1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
32 #include "insn-codes.h"
33 #include "insn-attr.h"
40 #include "diagnostic-core.h"
42 #include "basic-block.h"
45 #include "target-def.h"
46 #include "common/common-target.h"
47 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "sched-int.h"
61 #include "diagnostic.h"
63 #include "tree-pass.h"
64 #include "tree-flow.h"
66 static rtx legitimize_dllimport_symbol (rtx, bool);
67 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
68 static rtx legitimize_pe_coff_symbol (rtx, bool);
70 #ifndef CHECK_STACK_LIMIT
71 #define CHECK_STACK_LIMIT (-1)
74 /* Return index of given mode in mult and division cost tables. */
75 #define MODE_INDEX(mode) \
76 ((mode) == QImode ? 0 \
77 : (mode) == HImode ? 1 \
78 : (mode) == SImode ? 2 \
79 : (mode) == DImode ? 3 \
82 /* Processor costs (relative to an add) */
83 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
84 #define COSTS_N_BYTES(N) ((N) * 2)
86 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
89 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
90 COSTS_N_BYTES (2), /* cost of an add instruction */
91 COSTS_N_BYTES (3), /* cost of a lea instruction */
92 COSTS_N_BYTES (2), /* variable shift costs */
93 COSTS_N_BYTES (3), /* constant shift costs */
94 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
95 COSTS_N_BYTES (3), /* HI */
96 COSTS_N_BYTES (3), /* SI */
97 COSTS_N_BYTES (3), /* DI */
98 COSTS_N_BYTES (5)}, /* other */
99 0, /* cost of multiply per each bit set */
100 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
101 COSTS_N_BYTES (3), /* HI */
102 COSTS_N_BYTES (3), /* SI */
103 COSTS_N_BYTES (3), /* DI */
104 COSTS_N_BYTES (5)}, /* other */
105 COSTS_N_BYTES (3), /* cost of movsx */
106 COSTS_N_BYTES (3), /* cost of movzx */
107 0, /* "large" insn */
109 2, /* cost for loading QImode using movzbl */
110 {2, 2, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
112 Relative to reg-reg move (2). */
113 {2, 2, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {2, 2, 2}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
117 {2, 2, 2}, /* cost of storing fp registers
118 in SFmode, DFmode and XFmode */
119 3, /* cost of moving MMX register */
120 {3, 3}, /* cost of loading MMX registers
121 in SImode and DImode */
122 {3, 3}, /* cost of storing MMX registers
123 in SImode and DImode */
124 3, /* cost of moving SSE register */
125 {3, 3, 3}, /* cost of loading SSE registers
126 in SImode, DImode and TImode */
127 {3, 3, 3}, /* cost of storing SSE registers
128 in SImode, DImode and TImode */
129 3, /* MMX or SSE register to integer */
130 0, /* size of l1 cache */
131 0, /* size of l2 cache */
132 0, /* size of prefetch block */
133 0, /* number of parallel prefetches */
135 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
136 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
137 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
138 COSTS_N_BYTES (2), /* cost of FABS instruction. */
139 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
140 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
141 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
142 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}},
143 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
144 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}},
145 1, /* scalar_stmt_cost. */
146 1, /* scalar load_cost. */
147 1, /* scalar_store_cost. */
148 1, /* vec_stmt_cost. */
149 1, /* vec_to_scalar_cost. */
150 1, /* scalar_to_vec_cost. */
151 1, /* vec_align_load_cost. */
152 1, /* vec_unalign_load_cost. */
153 1, /* vec_store_cost. */
154 1, /* cond_taken_branch_cost. */
155 1, /* cond_not_taken_branch_cost. */
158 /* Processor costs (relative to an add) */
160 struct processor_costs i386_cost = { /* 386 specific costs */
161 COSTS_N_INSNS (1), /* cost of an add instruction */
162 COSTS_N_INSNS (1), /* cost of a lea instruction */
163 COSTS_N_INSNS (3), /* variable shift costs */
164 COSTS_N_INSNS (2), /* constant shift costs */
165 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
166 COSTS_N_INSNS (6), /* HI */
167 COSTS_N_INSNS (6), /* SI */
168 COSTS_N_INSNS (6), /* DI */
169 COSTS_N_INSNS (6)}, /* other */
170 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
171 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
172 COSTS_N_INSNS (23), /* HI */
173 COSTS_N_INSNS (23), /* SI */
174 COSTS_N_INSNS (23), /* DI */
175 COSTS_N_INSNS (23)}, /* other */
176 COSTS_N_INSNS (3), /* cost of movsx */
177 COSTS_N_INSNS (2), /* cost of movzx */
178 15, /* "large" insn */
180 4, /* cost for loading QImode using movzbl */
181 {2, 4, 2}, /* cost of loading integer registers
182 in QImode, HImode and SImode.
183 Relative to reg-reg move (2). */
184 {2, 4, 2}, /* cost of storing integer registers */
185 2, /* cost of reg,reg fld/fst */
186 {8, 8, 8}, /* cost of loading fp registers
187 in SFmode, DFmode and XFmode */
188 {8, 8, 8}, /* cost of storing fp registers
189 in SFmode, DFmode and XFmode */
190 2, /* cost of moving MMX register */
191 {4, 8}, /* cost of loading MMX registers
192 in SImode and DImode */
193 {4, 8}, /* cost of storing MMX registers
194 in SImode and DImode */
195 2, /* cost of moving SSE register */
196 {4, 8, 16}, /* cost of loading SSE registers
197 in SImode, DImode and TImode */
198 {4, 8, 16}, /* cost of storing SSE registers
199 in SImode, DImode and TImode */
200 3, /* MMX or SSE register to integer */
201 0, /* size of l1 cache */
202 0, /* size of l2 cache */
203 0, /* size of prefetch block */
204 0, /* number of parallel prefetches */
206 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
207 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
208 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
209 COSTS_N_INSNS (22), /* cost of FABS instruction. */
210 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
211 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
212 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
213 DUMMY_STRINGOP_ALGS},
214 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
215 DUMMY_STRINGOP_ALGS},
216 1, /* scalar_stmt_cost. */
217 1, /* scalar load_cost. */
218 1, /* scalar_store_cost. */
219 1, /* vec_stmt_cost. */
220 1, /* vec_to_scalar_cost. */
221 1, /* scalar_to_vec_cost. */
222 1, /* vec_align_load_cost. */
223 2, /* vec_unalign_load_cost. */
224 1, /* vec_store_cost. */
225 3, /* cond_taken_branch_cost. */
226 1, /* cond_not_taken_branch_cost. */
230 struct processor_costs i486_cost = { /* 486 specific costs */
231 COSTS_N_INSNS (1), /* cost of an add instruction */
232 COSTS_N_INSNS (1), /* cost of a lea instruction */
233 COSTS_N_INSNS (3), /* variable shift costs */
234 COSTS_N_INSNS (2), /* constant shift costs */
235 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
236 COSTS_N_INSNS (12), /* HI */
237 COSTS_N_INSNS (12), /* SI */
238 COSTS_N_INSNS (12), /* DI */
239 COSTS_N_INSNS (12)}, /* other */
240 1, /* cost of multiply per each bit set */
241 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
242 COSTS_N_INSNS (40), /* HI */
243 COSTS_N_INSNS (40), /* SI */
244 COSTS_N_INSNS (40), /* DI */
245 COSTS_N_INSNS (40)}, /* other */
246 COSTS_N_INSNS (3), /* cost of movsx */
247 COSTS_N_INSNS (2), /* cost of movzx */
248 15, /* "large" insn */
250 4, /* cost for loading QImode using movzbl */
251 {2, 4, 2}, /* cost of loading integer registers
252 in QImode, HImode and SImode.
253 Relative to reg-reg move (2). */
254 {2, 4, 2}, /* cost of storing integer registers */
255 2, /* cost of reg,reg fld/fst */
256 {8, 8, 8}, /* cost of loading fp registers
257 in SFmode, DFmode and XFmode */
258 {8, 8, 8}, /* cost of storing fp registers
259 in SFmode, DFmode and XFmode */
260 2, /* cost of moving MMX register */
261 {4, 8}, /* cost of loading MMX registers
262 in SImode and DImode */
263 {4, 8}, /* cost of storing MMX registers
264 in SImode and DImode */
265 2, /* cost of moving SSE register */
266 {4, 8, 16}, /* cost of loading SSE registers
267 in SImode, DImode and TImode */
268 {4, 8, 16}, /* cost of storing SSE registers
269 in SImode, DImode and TImode */
270 3, /* MMX or SSE register to integer */
271 4, /* size of l1 cache. 486 has 8kB cache
272 shared for code and data, so 4kB is
273 not really precise. */
274 4, /* size of l2 cache */
275 0, /* size of prefetch block */
276 0, /* number of parallel prefetches */
278 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
279 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
280 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
281 COSTS_N_INSNS (3), /* cost of FABS instruction. */
282 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
283 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
284 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
285 DUMMY_STRINGOP_ALGS},
286 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
287 DUMMY_STRINGOP_ALGS},
288 1, /* scalar_stmt_cost. */
289 1, /* scalar load_cost. */
290 1, /* scalar_store_cost. */
291 1, /* vec_stmt_cost. */
292 1, /* vec_to_scalar_cost. */
293 1, /* scalar_to_vec_cost. */
294 1, /* vec_align_load_cost. */
295 2, /* vec_unalign_load_cost. */
296 1, /* vec_store_cost. */
297 3, /* cond_taken_branch_cost. */
298 1, /* cond_not_taken_branch_cost. */
302 struct processor_costs pentium_cost = {
303 COSTS_N_INSNS (1), /* cost of an add instruction */
304 COSTS_N_INSNS (1), /* cost of a lea instruction */
305 COSTS_N_INSNS (4), /* variable shift costs */
306 COSTS_N_INSNS (1), /* constant shift costs */
307 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
308 COSTS_N_INSNS (11), /* HI */
309 COSTS_N_INSNS (11), /* SI */
310 COSTS_N_INSNS (11), /* DI */
311 COSTS_N_INSNS (11)}, /* other */
312 0, /* cost of multiply per each bit set */
313 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
314 COSTS_N_INSNS (25), /* HI */
315 COSTS_N_INSNS (25), /* SI */
316 COSTS_N_INSNS (25), /* DI */
317 COSTS_N_INSNS (25)}, /* other */
318 COSTS_N_INSNS (3), /* cost of movsx */
319 COSTS_N_INSNS (2), /* cost of movzx */
320 8, /* "large" insn */
322 6, /* cost for loading QImode using movzbl */
323 {2, 4, 2}, /* cost of loading integer registers
324 in QImode, HImode and SImode.
325 Relative to reg-reg move (2). */
326 {2, 4, 2}, /* cost of storing integer registers */
327 2, /* cost of reg,reg fld/fst */
328 {2, 2, 6}, /* cost of loading fp registers
329 in SFmode, DFmode and XFmode */
330 {4, 4, 6}, /* cost of storing fp registers
331 in SFmode, DFmode and XFmode */
332 8, /* cost of moving MMX register */
333 {8, 8}, /* cost of loading MMX registers
334 in SImode and DImode */
335 {8, 8}, /* cost of storing MMX registers
336 in SImode and DImode */
337 2, /* cost of moving SSE register */
338 {4, 8, 16}, /* cost of loading SSE registers
339 in SImode, DImode and TImode */
340 {4, 8, 16}, /* cost of storing SSE registers
341 in SImode, DImode and TImode */
342 3, /* MMX or SSE register to integer */
343 8, /* size of l1 cache. */
344 8, /* size of l2 cache */
345 0, /* size of prefetch block */
346 0, /* number of parallel prefetches */
348 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
349 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
350 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
351 COSTS_N_INSNS (1), /* cost of FABS instruction. */
352 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
353 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
354 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
355 DUMMY_STRINGOP_ALGS},
356 {{libcall, {{-1, rep_prefix_4_byte, false}}},
357 DUMMY_STRINGOP_ALGS},
358 1, /* scalar_stmt_cost. */
359 1, /* scalar load_cost. */
360 1, /* scalar_store_cost. */
361 1, /* vec_stmt_cost. */
362 1, /* vec_to_scalar_cost. */
363 1, /* scalar_to_vec_cost. */
364 1, /* vec_align_load_cost. */
365 2, /* vec_unalign_load_cost. */
366 1, /* vec_store_cost. */
367 3, /* cond_taken_branch_cost. */
368 1, /* cond_not_taken_branch_cost. */
372 struct processor_costs pentiumpro_cost = {
373 COSTS_N_INSNS (1), /* cost of an add instruction */
374 COSTS_N_INSNS (1), /* cost of a lea instruction */
375 COSTS_N_INSNS (1), /* variable shift costs */
376 COSTS_N_INSNS (1), /* constant shift costs */
377 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
378 COSTS_N_INSNS (4), /* HI */
379 COSTS_N_INSNS (4), /* SI */
380 COSTS_N_INSNS (4), /* DI */
381 COSTS_N_INSNS (4)}, /* other */
382 0, /* cost of multiply per each bit set */
383 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
384 COSTS_N_INSNS (17), /* HI */
385 COSTS_N_INSNS (17), /* SI */
386 COSTS_N_INSNS (17), /* DI */
387 COSTS_N_INSNS (17)}, /* other */
388 COSTS_N_INSNS (1), /* cost of movsx */
389 COSTS_N_INSNS (1), /* cost of movzx */
390 8, /* "large" insn */
392 2, /* cost for loading QImode using movzbl */
393 {4, 4, 4}, /* cost of loading integer registers
394 in QImode, HImode and SImode.
395 Relative to reg-reg move (2). */
396 {2, 2, 2}, /* cost of storing integer registers */
397 2, /* cost of reg,reg fld/fst */
398 {2, 2, 6}, /* cost of loading fp registers
399 in SFmode, DFmode and XFmode */
400 {4, 4, 6}, /* cost of storing fp registers
401 in SFmode, DFmode and XFmode */
402 2, /* cost of moving MMX register */
403 {2, 2}, /* cost of loading MMX registers
404 in SImode and DImode */
405 {2, 2}, /* cost of storing MMX registers
406 in SImode and DImode */
407 2, /* cost of moving SSE register */
408 {2, 2, 8}, /* cost of loading SSE registers
409 in SImode, DImode and TImode */
410 {2, 2, 8}, /* cost of storing SSE registers
411 in SImode, DImode and TImode */
412 3, /* MMX or SSE register to integer */
413 8, /* size of l1 cache. */
414 256, /* size of l2 cache */
415 32, /* size of prefetch block */
416 6, /* number of parallel prefetches */
418 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
419 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
420 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
421 COSTS_N_INSNS (2), /* cost of FABS instruction. */
422 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
423 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
424 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
425 (we ensure the alignment). For small blocks inline loop is still a
426 noticeable win, for bigger blocks either rep movsl or rep movsb is
427 way to go. Rep movsb has apparently more expensive startup time in CPU,
428 but after 4K the difference is down in the noise. */
429 {{rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
430 {8192, rep_prefix_4_byte, false},
431 {-1, rep_prefix_1_byte, false}}},
432 DUMMY_STRINGOP_ALGS},
433 {{rep_prefix_4_byte, {{1024, unrolled_loop, false},
434 {8192, rep_prefix_4_byte, false},
435 {-1, libcall, false}}},
436 DUMMY_STRINGOP_ALGS},
437 1, /* scalar_stmt_cost. */
438 1, /* scalar load_cost. */
439 1, /* scalar_store_cost. */
440 1, /* vec_stmt_cost. */
441 1, /* vec_to_scalar_cost. */
442 1, /* scalar_to_vec_cost. */
443 1, /* vec_align_load_cost. */
444 2, /* vec_unalign_load_cost. */
445 1, /* vec_store_cost. */
446 3, /* cond_taken_branch_cost. */
447 1, /* cond_not_taken_branch_cost. */
451 struct processor_costs geode_cost = {
452 COSTS_N_INSNS (1), /* cost of an add instruction */
453 COSTS_N_INSNS (1), /* cost of a lea instruction */
454 COSTS_N_INSNS (2), /* variable shift costs */
455 COSTS_N_INSNS (1), /* constant shift costs */
456 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
457 COSTS_N_INSNS (4), /* HI */
458 COSTS_N_INSNS (7), /* SI */
459 COSTS_N_INSNS (7), /* DI */
460 COSTS_N_INSNS (7)}, /* other */
461 0, /* cost of multiply per each bit set */
462 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
463 COSTS_N_INSNS (23), /* HI */
464 COSTS_N_INSNS (39), /* SI */
465 COSTS_N_INSNS (39), /* DI */
466 COSTS_N_INSNS (39)}, /* other */
467 COSTS_N_INSNS (1), /* cost of movsx */
468 COSTS_N_INSNS (1), /* cost of movzx */
469 8, /* "large" insn */
471 1, /* cost for loading QImode using movzbl */
472 {1, 1, 1}, /* cost of loading integer registers
473 in QImode, HImode and SImode.
474 Relative to reg-reg move (2). */
475 {1, 1, 1}, /* cost of storing integer registers */
476 1, /* cost of reg,reg fld/fst */
477 {1, 1, 1}, /* cost of loading fp registers
478 in SFmode, DFmode and XFmode */
479 {4, 6, 6}, /* cost of storing fp registers
480 in SFmode, DFmode and XFmode */
482 1, /* cost of moving MMX register */
483 {1, 1}, /* cost of loading MMX registers
484 in SImode and DImode */
485 {1, 1}, /* cost of storing MMX registers
486 in SImode and DImode */
487 1, /* cost of moving SSE register */
488 {1, 1, 1}, /* cost of loading SSE registers
489 in SImode, DImode and TImode */
490 {1, 1, 1}, /* cost of storing SSE registers
491 in SImode, DImode and TImode */
492 1, /* MMX or SSE register to integer */
493 64, /* size of l1 cache. */
494 128, /* size of l2 cache. */
495 32, /* size of prefetch block */
496 1, /* number of parallel prefetches */
498 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
499 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
500 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
501 COSTS_N_INSNS (1), /* cost of FABS instruction. */
502 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
503 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
504 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
505 DUMMY_STRINGOP_ALGS},
506 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
507 DUMMY_STRINGOP_ALGS},
508 1, /* scalar_stmt_cost. */
509 1, /* scalar load_cost. */
510 1, /* scalar_store_cost. */
511 1, /* vec_stmt_cost. */
512 1, /* vec_to_scalar_cost. */
513 1, /* scalar_to_vec_cost. */
514 1, /* vec_align_load_cost. */
515 2, /* vec_unalign_load_cost. */
516 1, /* vec_store_cost. */
517 3, /* cond_taken_branch_cost. */
518 1, /* cond_not_taken_branch_cost. */
522 struct processor_costs k6_cost = {
523 COSTS_N_INSNS (1), /* cost of an add instruction */
524 COSTS_N_INSNS (2), /* cost of a lea instruction */
525 COSTS_N_INSNS (1), /* variable shift costs */
526 COSTS_N_INSNS (1), /* constant shift costs */
527 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
528 COSTS_N_INSNS (3), /* HI */
529 COSTS_N_INSNS (3), /* SI */
530 COSTS_N_INSNS (3), /* DI */
531 COSTS_N_INSNS (3)}, /* other */
532 0, /* cost of multiply per each bit set */
533 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
534 COSTS_N_INSNS (18), /* HI */
535 COSTS_N_INSNS (18), /* SI */
536 COSTS_N_INSNS (18), /* DI */
537 COSTS_N_INSNS (18)}, /* other */
538 COSTS_N_INSNS (2), /* cost of movsx */
539 COSTS_N_INSNS (2), /* cost of movzx */
540 8, /* "large" insn */
542 3, /* cost for loading QImode using movzbl */
543 {4, 5, 4}, /* cost of loading integer registers
544 in QImode, HImode and SImode.
545 Relative to reg-reg move (2). */
546 {2, 3, 2}, /* cost of storing integer registers */
547 4, /* cost of reg,reg fld/fst */
548 {6, 6, 6}, /* cost of loading fp registers
549 in SFmode, DFmode and XFmode */
550 {4, 4, 4}, /* cost of storing fp registers
551 in SFmode, DFmode and XFmode */
552 2, /* cost of moving MMX register */
553 {2, 2}, /* cost of loading MMX registers
554 in SImode and DImode */
555 {2, 2}, /* cost of storing MMX registers
556 in SImode and DImode */
557 2, /* cost of moving SSE register */
558 {2, 2, 8}, /* cost of loading SSE registers
559 in SImode, DImode and TImode */
560 {2, 2, 8}, /* cost of storing SSE registers
561 in SImode, DImode and TImode */
562 6, /* MMX or SSE register to integer */
563 32, /* size of l1 cache. */
564 32, /* size of l2 cache. Some models
565 have integrated l2 cache, but
566 optimizing for k6 is not important
567 enough to worry about that. */
568 32, /* size of prefetch block */
569 1, /* number of parallel prefetches */
571 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
572 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
573 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
574 COSTS_N_INSNS (2), /* cost of FABS instruction. */
575 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
576 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
577 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
578 DUMMY_STRINGOP_ALGS},
579 {{libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
580 DUMMY_STRINGOP_ALGS},
581 1, /* scalar_stmt_cost. */
582 1, /* scalar load_cost. */
583 1, /* scalar_store_cost. */
584 1, /* vec_stmt_cost. */
585 1, /* vec_to_scalar_cost. */
586 1, /* scalar_to_vec_cost. */
587 1, /* vec_align_load_cost. */
588 2, /* vec_unalign_load_cost. */
589 1, /* vec_store_cost. */
590 3, /* cond_taken_branch_cost. */
591 1, /* cond_not_taken_branch_cost. */
595 struct processor_costs athlon_cost = {
596 COSTS_N_INSNS (1), /* cost of an add instruction */
597 COSTS_N_INSNS (2), /* cost of a lea instruction */
598 COSTS_N_INSNS (1), /* variable shift costs */
599 COSTS_N_INSNS (1), /* constant shift costs */
600 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
601 COSTS_N_INSNS (5), /* HI */
602 COSTS_N_INSNS (5), /* SI */
603 COSTS_N_INSNS (5), /* DI */
604 COSTS_N_INSNS (5)}, /* other */
605 0, /* cost of multiply per each bit set */
606 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
607 COSTS_N_INSNS (26), /* HI */
608 COSTS_N_INSNS (42), /* SI */
609 COSTS_N_INSNS (74), /* DI */
610 COSTS_N_INSNS (74)}, /* other */
611 COSTS_N_INSNS (1), /* cost of movsx */
612 COSTS_N_INSNS (1), /* cost of movzx */
613 8, /* "large" insn */
615 4, /* cost for loading QImode using movzbl */
616 {3, 4, 3}, /* cost of loading integer registers
617 in QImode, HImode and SImode.
618 Relative to reg-reg move (2). */
619 {3, 4, 3}, /* cost of storing integer registers */
620 4, /* cost of reg,reg fld/fst */
621 {4, 4, 12}, /* cost of loading fp registers
622 in SFmode, DFmode and XFmode */
623 {6, 6, 8}, /* cost of storing fp registers
624 in SFmode, DFmode and XFmode */
625 2, /* cost of moving MMX register */
626 {4, 4}, /* cost of loading MMX registers
627 in SImode and DImode */
628 {4, 4}, /* cost of storing MMX registers
629 in SImode and DImode */
630 2, /* cost of moving SSE register */
631 {4, 4, 6}, /* cost of loading SSE registers
632 in SImode, DImode and TImode */
633 {4, 4, 5}, /* cost of storing SSE registers
634 in SImode, DImode and TImode */
635 5, /* MMX or SSE register to integer */
636 64, /* size of l1 cache. */
637 256, /* size of l2 cache. */
638 64, /* size of prefetch block */
639 6, /* number of parallel prefetches */
641 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
642 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
643 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
644 COSTS_N_INSNS (2), /* cost of FABS instruction. */
645 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
646 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
647 /* For some reason, Athlon deals better with REP prefix (relative to loops)
648 compared to K8. Alignment becomes important after 8 bytes for memcpy and
649 128 bytes for memset. */
650 {{libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
651 DUMMY_STRINGOP_ALGS},
652 {{libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS},
654 1, /* scalar_stmt_cost. */
655 1, /* scalar load_cost. */
656 1, /* scalar_store_cost. */
657 1, /* vec_stmt_cost. */
658 1, /* vec_to_scalar_cost. */
659 1, /* scalar_to_vec_cost. */
660 1, /* vec_align_load_cost. */
661 2, /* vec_unalign_load_cost. */
662 1, /* vec_store_cost. */
663 3, /* cond_taken_branch_cost. */
664 1, /* cond_not_taken_branch_cost. */
668 struct processor_costs k8_cost = {
669 COSTS_N_INSNS (1), /* cost of an add instruction */
670 COSTS_N_INSNS (2), /* cost of a lea instruction */
671 COSTS_N_INSNS (1), /* variable shift costs */
672 COSTS_N_INSNS (1), /* constant shift costs */
673 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
674 COSTS_N_INSNS (4), /* HI */
675 COSTS_N_INSNS (3), /* SI */
676 COSTS_N_INSNS (4), /* DI */
677 COSTS_N_INSNS (5)}, /* other */
678 0, /* cost of multiply per each bit set */
679 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
680 COSTS_N_INSNS (26), /* HI */
681 COSTS_N_INSNS (42), /* SI */
682 COSTS_N_INSNS (74), /* DI */
683 COSTS_N_INSNS (74)}, /* other */
684 COSTS_N_INSNS (1), /* cost of movsx */
685 COSTS_N_INSNS (1), /* cost of movzx */
686 8, /* "large" insn */
688 4, /* cost for loading QImode using movzbl */
689 {3, 4, 3}, /* cost of loading integer registers
690 in QImode, HImode and SImode.
691 Relative to reg-reg move (2). */
692 {3, 4, 3}, /* cost of storing integer registers */
693 4, /* cost of reg,reg fld/fst */
694 {4, 4, 12}, /* cost of loading fp registers
695 in SFmode, DFmode and XFmode */
696 {6, 6, 8}, /* cost of storing fp registers
697 in SFmode, DFmode and XFmode */
698 2, /* cost of moving MMX register */
699 {3, 3}, /* cost of loading MMX registers
700 in SImode and DImode */
701 {4, 4}, /* cost of storing MMX registers
702 in SImode and DImode */
703 2, /* cost of moving SSE register */
704 {4, 3, 6}, /* cost of loading SSE registers
705 in SImode, DImode and TImode */
706 {4, 4, 5}, /* cost of storing SSE registers
707 in SImode, DImode and TImode */
708 5, /* MMX or SSE register to integer */
709 64, /* size of l1 cache. */
710 512, /* size of l2 cache. */
711 64, /* size of prefetch block */
712 /* New AMD processors never drop prefetches; if they cannot be performed
713 immediately, they are queued. We set number of simultaneous prefetches
714 to a large constant to reflect this (it probably is not a good idea not
715 to limit number of prefetches at all, as their execution also takes some
717 100, /* number of parallel prefetches */
719 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
720 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
721 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
722 COSTS_N_INSNS (2), /* cost of FABS instruction. */
723 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
724 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
729 {-1, rep_prefix_4_byte, false}}},
730 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
731 {-1, libcall, false}}}},
732 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
733 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
734 {libcall, {{48, unrolled_loop, false},
735 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
736 4, /* scalar_stmt_cost. */
737 2, /* scalar load_cost. */
738 2, /* scalar_store_cost. */
739 5, /* vec_stmt_cost. */
740 0, /* vec_to_scalar_cost. */
741 2, /* scalar_to_vec_cost. */
742 2, /* vec_align_load_cost. */
743 3, /* vec_unalign_load_cost. */
744 3, /* vec_store_cost. */
745 3, /* cond_taken_branch_cost. */
746 2, /* cond_not_taken_branch_cost. */
749 struct processor_costs amdfam10_cost = {
750 COSTS_N_INSNS (1), /* cost of an add instruction */
751 COSTS_N_INSNS (2), /* cost of a lea instruction */
752 COSTS_N_INSNS (1), /* variable shift costs */
753 COSTS_N_INSNS (1), /* constant shift costs */
754 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
755 COSTS_N_INSNS (4), /* HI */
756 COSTS_N_INSNS (3), /* SI */
757 COSTS_N_INSNS (4), /* DI */
758 COSTS_N_INSNS (5)}, /* other */
759 0, /* cost of multiply per each bit set */
760 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
761 COSTS_N_INSNS (35), /* HI */
762 COSTS_N_INSNS (51), /* SI */
763 COSTS_N_INSNS (83), /* DI */
764 COSTS_N_INSNS (83)}, /* other */
765 COSTS_N_INSNS (1), /* cost of movsx */
766 COSTS_N_INSNS (1), /* cost of movzx */
767 8, /* "large" insn */
769 4, /* cost for loading QImode using movzbl */
770 {3, 4, 3}, /* cost of loading integer registers
771 in QImode, HImode and SImode.
772 Relative to reg-reg move (2). */
773 {3, 4, 3}, /* cost of storing integer registers */
774 4, /* cost of reg,reg fld/fst */
775 {4, 4, 12}, /* cost of loading fp registers
776 in SFmode, DFmode and XFmode */
777 {6, 6, 8}, /* cost of storing fp registers
778 in SFmode, DFmode and XFmode */
779 2, /* cost of moving MMX register */
780 {3, 3}, /* cost of loading MMX registers
781 in SImode and DImode */
782 {4, 4}, /* cost of storing MMX registers
783 in SImode and DImode */
784 2, /* cost of moving SSE register */
785 {4, 4, 3}, /* cost of loading SSE registers
786 in SImode, DImode and TImode */
787 {4, 4, 5}, /* cost of storing SSE registers
788 in SImode, DImode and TImode */
789 3, /* MMX or SSE register to integer */
791 MOVD reg64, xmmreg Double FSTORE 4
792 MOVD reg32, xmmreg Double FSTORE 4
794 MOVD reg64, xmmreg Double FADD 3
796 MOVD reg32, xmmreg Double FADD 3
798 64, /* size of l1 cache. */
799 512, /* size of l2 cache. */
800 64, /* size of prefetch block */
801 /* New AMD processors never drop prefetches; if they cannot be performed
802 immediately, they are queued. We set number of simultaneous prefetches
803 to a large constant to reflect this (it probably is not a good idea not
804 to limit number of prefetches at all, as their execution also takes some
806 100, /* number of parallel prefetches */
808 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
809 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
810 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
811 COSTS_N_INSNS (2), /* cost of FABS instruction. */
812 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
813 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
815 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
816 very small blocks it is better to use loop. For large blocks, libcall can
817 do nontemporary accesses and beat inline considerably. */
818 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
819 {-1, rep_prefix_4_byte, false}}},
820 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
821 {-1, libcall, false}}}},
822 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
823 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
824 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
825 {-1, libcall, false}}}},
826 4, /* scalar_stmt_cost. */
827 2, /* scalar load_cost. */
828 2, /* scalar_store_cost. */
829 6, /* vec_stmt_cost. */
830 0, /* vec_to_scalar_cost. */
831 2, /* scalar_to_vec_cost. */
832 2, /* vec_align_load_cost. */
833 2, /* vec_unalign_load_cost. */
834 2, /* vec_store_cost. */
835 2, /* cond_taken_branch_cost. */
836 1, /* cond_not_taken_branch_cost. */
839 struct processor_costs bdver1_cost = {
840 COSTS_N_INSNS (1), /* cost of an add instruction */
841 COSTS_N_INSNS (1), /* cost of a lea instruction */
842 COSTS_N_INSNS (1), /* variable shift costs */
843 COSTS_N_INSNS (1), /* constant shift costs */
844 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
845 COSTS_N_INSNS (4), /* HI */
846 COSTS_N_INSNS (4), /* SI */
847 COSTS_N_INSNS (6), /* DI */
848 COSTS_N_INSNS (6)}, /* other */
849 0, /* cost of multiply per each bit set */
850 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
851 COSTS_N_INSNS (35), /* HI */
852 COSTS_N_INSNS (51), /* SI */
853 COSTS_N_INSNS (83), /* DI */
854 COSTS_N_INSNS (83)}, /* other */
855 COSTS_N_INSNS (1), /* cost of movsx */
856 COSTS_N_INSNS (1), /* cost of movzx */
857 8, /* "large" insn */
859 4, /* cost for loading QImode using movzbl */
860 {5, 5, 4}, /* cost of loading integer registers
861 in QImode, HImode and SImode.
862 Relative to reg-reg move (2). */
863 {4, 4, 4}, /* cost of storing integer registers */
864 2, /* cost of reg,reg fld/fst */
865 {5, 5, 12}, /* cost of loading fp registers
866 in SFmode, DFmode and XFmode */
867 {4, 4, 8}, /* cost of storing fp registers
868 in SFmode, DFmode and XFmode */
869 2, /* cost of moving MMX register */
870 {4, 4}, /* cost of loading MMX registers
871 in SImode and DImode */
872 {4, 4}, /* cost of storing MMX registers
873 in SImode and DImode */
874 2, /* cost of moving SSE register */
875 {4, 4, 4}, /* cost of loading SSE registers
876 in SImode, DImode and TImode */
877 {4, 4, 4}, /* cost of storing SSE registers
878 in SImode, DImode and TImode */
879 2, /* MMX or SSE register to integer */
881 MOVD reg64, xmmreg Double FSTORE 4
882 MOVD reg32, xmmreg Double FSTORE 4
884 MOVD reg64, xmmreg Double FADD 3
886 MOVD reg32, xmmreg Double FADD 3
888 16, /* size of l1 cache. */
889 2048, /* size of l2 cache. */
890 64, /* size of prefetch block */
891 /* New AMD processors never drop prefetches; if they cannot be performed
892 immediately, they are queued. We set number of simultaneous prefetches
893 to a large constant to reflect this (it probably is not a good idea not
894 to limit number of prefetches at all, as their execution also takes some
896 100, /* number of parallel prefetches */
898 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
899 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
900 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
901 COSTS_N_INSNS (2), /* cost of FABS instruction. */
902 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
903 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
905 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
906 very small blocks it is better to use loop. For large blocks, libcall
907 can do nontemporary accesses and beat inline considerably. */
908 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
909 {-1, rep_prefix_4_byte, false}}},
910 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
911 {-1, libcall, false}}}},
912 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
913 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
914 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
915 {-1, libcall, false}}}},
916 6, /* scalar_stmt_cost. */
917 4, /* scalar load_cost. */
918 4, /* scalar_store_cost. */
919 6, /* vec_stmt_cost. */
920 0, /* vec_to_scalar_cost. */
921 2, /* scalar_to_vec_cost. */
922 4, /* vec_align_load_cost. */
923 4, /* vec_unalign_load_cost. */
924 4, /* vec_store_cost. */
925 2, /* cond_taken_branch_cost. */
926 1, /* cond_not_taken_branch_cost. */
929 struct processor_costs bdver2_cost = {
930 COSTS_N_INSNS (1), /* cost of an add instruction */
931 COSTS_N_INSNS (1), /* cost of a lea instruction */
932 COSTS_N_INSNS (1), /* variable shift costs */
933 COSTS_N_INSNS (1), /* constant shift costs */
934 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
935 COSTS_N_INSNS (4), /* HI */
936 COSTS_N_INSNS (4), /* SI */
937 COSTS_N_INSNS (6), /* DI */
938 COSTS_N_INSNS (6)}, /* other */
939 0, /* cost of multiply per each bit set */
940 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
941 COSTS_N_INSNS (35), /* HI */
942 COSTS_N_INSNS (51), /* SI */
943 COSTS_N_INSNS (83), /* DI */
944 COSTS_N_INSNS (83)}, /* other */
945 COSTS_N_INSNS (1), /* cost of movsx */
946 COSTS_N_INSNS (1), /* cost of movzx */
947 8, /* "large" insn */
949 4, /* cost for loading QImode using movzbl */
950 {5, 5, 4}, /* cost of loading integer registers
951 in QImode, HImode and SImode.
952 Relative to reg-reg move (2). */
953 {4, 4, 4}, /* cost of storing integer registers */
954 2, /* cost of reg,reg fld/fst */
955 {5, 5, 12}, /* cost of loading fp registers
956 in SFmode, DFmode and XFmode */
957 {4, 4, 8}, /* cost of storing fp registers
958 in SFmode, DFmode and XFmode */
959 2, /* cost of moving MMX register */
960 {4, 4}, /* cost of loading MMX registers
961 in SImode and DImode */
962 {4, 4}, /* cost of storing MMX registers
963 in SImode and DImode */
964 2, /* cost of moving SSE register */
965 {4, 4, 4}, /* cost of loading SSE registers
966 in SImode, DImode and TImode */
967 {4, 4, 4}, /* cost of storing SSE registers
968 in SImode, DImode and TImode */
969 2, /* MMX or SSE register to integer */
971 MOVD reg64, xmmreg Double FSTORE 4
972 MOVD reg32, xmmreg Double FSTORE 4
974 MOVD reg64, xmmreg Double FADD 3
976 MOVD reg32, xmmreg Double FADD 3
978 16, /* size of l1 cache. */
979 2048, /* size of l2 cache. */
980 64, /* size of prefetch block */
981 /* New AMD processors never drop prefetches; if they cannot be performed
982 immediately, they are queued. We set number of simultaneous prefetches
983 to a large constant to reflect this (it probably is not a good idea not
984 to limit number of prefetches at all, as their execution also takes some
986 100, /* number of parallel prefetches */
988 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
989 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
990 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
991 COSTS_N_INSNS (2), /* cost of FABS instruction. */
992 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
993 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
995 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
996 very small blocks it is better to use loop. For large blocks, libcall
997 can do nontemporary accesses and beat inline considerably. */
998 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
999 {-1, rep_prefix_4_byte, false}}},
1000 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1001 {-1, libcall, false}}}},
1002 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
1003 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1004 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1005 {-1, libcall, false}}}},
1006 6, /* scalar_stmt_cost. */
1007 4, /* scalar load_cost. */
1008 4, /* scalar_store_cost. */
1009 6, /* vec_stmt_cost. */
1010 0, /* vec_to_scalar_cost. */
1011 2, /* scalar_to_vec_cost. */
1012 4, /* vec_align_load_cost. */
1013 4, /* vec_unalign_load_cost. */
1014 4, /* vec_store_cost. */
1015 2, /* cond_taken_branch_cost. */
1016 1, /* cond_not_taken_branch_cost. */
1019 struct processor_costs bdver3_cost = {
1020 COSTS_N_INSNS (1), /* cost of an add instruction */
1021 COSTS_N_INSNS (1), /* cost of a lea instruction */
1022 COSTS_N_INSNS (1), /* variable shift costs */
1023 COSTS_N_INSNS (1), /* constant shift costs */
1024 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1025 COSTS_N_INSNS (4), /* HI */
1026 COSTS_N_INSNS (4), /* SI */
1027 COSTS_N_INSNS (6), /* DI */
1028 COSTS_N_INSNS (6)}, /* other */
1029 0, /* cost of multiply per each bit set */
1030 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1031 COSTS_N_INSNS (35), /* HI */
1032 COSTS_N_INSNS (51), /* SI */
1033 COSTS_N_INSNS (83), /* DI */
1034 COSTS_N_INSNS (83)}, /* other */
1035 COSTS_N_INSNS (1), /* cost of movsx */
1036 COSTS_N_INSNS (1), /* cost of movzx */
1037 8, /* "large" insn */
1039 4, /* cost for loading QImode using movzbl */
1040 {5, 5, 4}, /* cost of loading integer registers
1041 in QImode, HImode and SImode.
1042 Relative to reg-reg move (2). */
1043 {4, 4, 4}, /* cost of storing integer registers */
1044 2, /* cost of reg,reg fld/fst */
1045 {5, 5, 12}, /* cost of loading fp registers
1046 in SFmode, DFmode and XFmode */
1047 {4, 4, 8}, /* cost of storing fp registers
1048 in SFmode, DFmode and XFmode */
1049 2, /* cost of moving MMX register */
1050 {4, 4}, /* cost of loading MMX registers
1051 in SImode and DImode */
1052 {4, 4}, /* cost of storing MMX registers
1053 in SImode and DImode */
1054 2, /* cost of moving SSE register */
1055 {4, 4, 4}, /* cost of loading SSE registers
1056 in SImode, DImode and TImode */
1057 {4, 4, 4}, /* cost of storing SSE registers
1058 in SImode, DImode and TImode */
1059 2, /* MMX or SSE register to integer */
1060 16, /* size of l1 cache. */
1061 2048, /* size of l2 cache. */
1062 64, /* size of prefetch block */
1063 /* New AMD processors never drop prefetches; if they cannot be performed
1064 immediately, they are queued. We set number of simultaneous prefetches
1065 to a large constant to reflect this (it probably is not a good idea not
1066 to limit number of prefetches at all, as their execution also takes some
1068 100, /* number of parallel prefetches */
1069 2, /* Branch cost */
1070 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1071 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1072 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1073 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1074 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1075 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1077 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1078 very small blocks it is better to use loop. For large blocks, libcall
1079 can do nontemporary accesses and beat inline considerably. */
1080 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
1081 {-1, rep_prefix_4_byte, false}}},
1082 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1083 {-1, libcall, false}}}},
1084 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
1085 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1086 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1087 {-1, libcall, false}}}},
1088 6, /* scalar_stmt_cost. */
1089 4, /* scalar load_cost. */
1090 4, /* scalar_store_cost. */
1091 6, /* vec_stmt_cost. */
1092 0, /* vec_to_scalar_cost. */
1093 2, /* scalar_to_vec_cost. */
1094 4, /* vec_align_load_cost. */
1095 4, /* vec_unalign_load_cost. */
1096 4, /* vec_store_cost. */
1097 2, /* cond_taken_branch_cost. */
1098 1, /* cond_not_taken_branch_cost. */
1101 struct processor_costs btver1_cost = {
1102 COSTS_N_INSNS (1), /* cost of an add instruction */
1103 COSTS_N_INSNS (2), /* cost of a lea instruction */
1104 COSTS_N_INSNS (1), /* variable shift costs */
1105 COSTS_N_INSNS (1), /* constant shift costs */
1106 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1107 COSTS_N_INSNS (4), /* HI */
1108 COSTS_N_INSNS (3), /* SI */
1109 COSTS_N_INSNS (4), /* DI */
1110 COSTS_N_INSNS (5)}, /* other */
1111 0, /* cost of multiply per each bit set */
1112 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1113 COSTS_N_INSNS (35), /* HI */
1114 COSTS_N_INSNS (51), /* SI */
1115 COSTS_N_INSNS (83), /* DI */
1116 COSTS_N_INSNS (83)}, /* other */
1117 COSTS_N_INSNS (1), /* cost of movsx */
1118 COSTS_N_INSNS (1), /* cost of movzx */
1119 8, /* "large" insn */
1121 4, /* cost for loading QImode using movzbl */
1122 {3, 4, 3}, /* cost of loading integer registers
1123 in QImode, HImode and SImode.
1124 Relative to reg-reg move (2). */
1125 {3, 4, 3}, /* cost of storing integer registers */
1126 4, /* cost of reg,reg fld/fst */
1127 {4, 4, 12}, /* cost of loading fp registers
1128 in SFmode, DFmode and XFmode */
1129 {6, 6, 8}, /* cost of storing fp registers
1130 in SFmode, DFmode and XFmode */
1131 2, /* cost of moving MMX register */
1132 {3, 3}, /* cost of loading MMX registers
1133 in SImode and DImode */
1134 {4, 4}, /* cost of storing MMX registers
1135 in SImode and DImode */
1136 2, /* cost of moving SSE register */
1137 {4, 4, 3}, /* cost of loading SSE registers
1138 in SImode, DImode and TImode */
1139 {4, 4, 5}, /* cost of storing SSE registers
1140 in SImode, DImode and TImode */
1141 3, /* MMX or SSE register to integer */
1143 MOVD reg64, xmmreg Double FSTORE 4
1144 MOVD reg32, xmmreg Double FSTORE 4
1146 MOVD reg64, xmmreg Double FADD 3
1148 MOVD reg32, xmmreg Double FADD 3
1150 32, /* size of l1 cache. */
1151 512, /* size of l2 cache. */
1152 64, /* size of prefetch block */
1153 100, /* number of parallel prefetches */
1154 2, /* Branch cost */
1155 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1156 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1157 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1158 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1159 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1160 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1162 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1163 very small blocks it is better to use loop. For large blocks, libcall can
1164 do nontemporary accesses and beat inline considerably. */
1165 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
1166 {-1, rep_prefix_4_byte, false}}},
1167 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1168 {-1, libcall, false}}}},
1169 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
1170 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1171 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1172 {-1, libcall, false}}}},
1173 4, /* scalar_stmt_cost. */
1174 2, /* scalar load_cost. */
1175 2, /* scalar_store_cost. */
1176 6, /* vec_stmt_cost. */
1177 0, /* vec_to_scalar_cost. */
1178 2, /* scalar_to_vec_cost. */
1179 2, /* vec_align_load_cost. */
1180 2, /* vec_unalign_load_cost. */
1181 2, /* vec_store_cost. */
1182 2, /* cond_taken_branch_cost. */
1183 1, /* cond_not_taken_branch_cost. */
1186 struct processor_costs btver2_cost = {
1187 COSTS_N_INSNS (1), /* cost of an add instruction */
1188 COSTS_N_INSNS (2), /* cost of a lea instruction */
1189 COSTS_N_INSNS (1), /* variable shift costs */
1190 COSTS_N_INSNS (1), /* constant shift costs */
1191 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1192 COSTS_N_INSNS (4), /* HI */
1193 COSTS_N_INSNS (3), /* SI */
1194 COSTS_N_INSNS (4), /* DI */
1195 COSTS_N_INSNS (5)}, /* other */
1196 0, /* cost of multiply per each bit set */
1197 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1198 COSTS_N_INSNS (35), /* HI */
1199 COSTS_N_INSNS (51), /* SI */
1200 COSTS_N_INSNS (83), /* DI */
1201 COSTS_N_INSNS (83)}, /* other */
1202 COSTS_N_INSNS (1), /* cost of movsx */
1203 COSTS_N_INSNS (1), /* cost of movzx */
1204 8, /* "large" insn */
1206 4, /* cost for loading QImode using movzbl */
1207 {3, 4, 3}, /* cost of loading integer registers
1208 in QImode, HImode and SImode.
1209 Relative to reg-reg move (2). */
1210 {3, 4, 3}, /* cost of storing integer registers */
1211 4, /* cost of reg,reg fld/fst */
1212 {4, 4, 12}, /* cost of loading fp registers
1213 in SFmode, DFmode and XFmode */
1214 {6, 6, 8}, /* cost of storing fp registers
1215 in SFmode, DFmode and XFmode */
1216 2, /* cost of moving MMX register */
1217 {3, 3}, /* cost of loading MMX registers
1218 in SImode and DImode */
1219 {4, 4}, /* cost of storing MMX registers
1220 in SImode and DImode */
1221 2, /* cost of moving SSE register */
1222 {4, 4, 3}, /* cost of loading SSE registers
1223 in SImode, DImode and TImode */
1224 {4, 4, 5}, /* cost of storing SSE registers
1225 in SImode, DImode and TImode */
1226 3, /* MMX or SSE register to integer */
1228 MOVD reg64, xmmreg Double FSTORE 4
1229 MOVD reg32, xmmreg Double FSTORE 4
1231 MOVD reg64, xmmreg Double FADD 3
1233 MOVD reg32, xmmreg Double FADD 3
1235 32, /* size of l1 cache. */
1236 2048, /* size of l2 cache. */
1237 64, /* size of prefetch block */
1238 100, /* number of parallel prefetches */
1239 2, /* Branch cost */
1240 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1241 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1242 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1243 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1244 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1245 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1247 {{libcall, {{6, loop, false}, {14, unrolled_loop, false},
1248 {-1, rep_prefix_4_byte, false}}},
1249 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1250 {-1, libcall, false}}}},
1251 {{libcall, {{8, loop, false}, {24, unrolled_loop, false},
1252 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1253 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1254 {-1, libcall, false}}}},
1255 4, /* scalar_stmt_cost. */
1256 2, /* scalar load_cost. */
1257 2, /* scalar_store_cost. */
1258 6, /* vec_stmt_cost. */
1259 0, /* vec_to_scalar_cost. */
1260 2, /* scalar_to_vec_cost. */
1261 2, /* vec_align_load_cost. */
1262 2, /* vec_unalign_load_cost. */
1263 2, /* vec_store_cost. */
1264 2, /* cond_taken_branch_cost. */
1265 1, /* cond_not_taken_branch_cost. */
1269 struct processor_costs pentium4_cost = {
1270 COSTS_N_INSNS (1), /* cost of an add instruction */
1271 COSTS_N_INSNS (3), /* cost of a lea instruction */
1272 COSTS_N_INSNS (4), /* variable shift costs */
1273 COSTS_N_INSNS (4), /* constant shift costs */
1274 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1275 COSTS_N_INSNS (15), /* HI */
1276 COSTS_N_INSNS (15), /* SI */
1277 COSTS_N_INSNS (15), /* DI */
1278 COSTS_N_INSNS (15)}, /* other */
1279 0, /* cost of multiply per each bit set */
1280 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1281 COSTS_N_INSNS (56), /* HI */
1282 COSTS_N_INSNS (56), /* SI */
1283 COSTS_N_INSNS (56), /* DI */
1284 COSTS_N_INSNS (56)}, /* other */
1285 COSTS_N_INSNS (1), /* cost of movsx */
1286 COSTS_N_INSNS (1), /* cost of movzx */
1287 16, /* "large" insn */
1289 2, /* cost for loading QImode using movzbl */
1290 {4, 5, 4}, /* cost of loading integer registers
1291 in QImode, HImode and SImode.
1292 Relative to reg-reg move (2). */
1293 {2, 3, 2}, /* cost of storing integer registers */
1294 2, /* cost of reg,reg fld/fst */
1295 {2, 2, 6}, /* cost of loading fp registers
1296 in SFmode, DFmode and XFmode */
1297 {4, 4, 6}, /* cost of storing fp registers
1298 in SFmode, DFmode and XFmode */
1299 2, /* cost of moving MMX register */
1300 {2, 2}, /* cost of loading MMX registers
1301 in SImode and DImode */
1302 {2, 2}, /* cost of storing MMX registers
1303 in SImode and DImode */
1304 12, /* cost of moving SSE register */
1305 {12, 12, 12}, /* cost of loading SSE registers
1306 in SImode, DImode and TImode */
1307 {2, 2, 8}, /* cost of storing SSE registers
1308 in SImode, DImode and TImode */
1309 10, /* MMX or SSE register to integer */
1310 8, /* size of l1 cache. */
1311 256, /* size of l2 cache. */
1312 64, /* size of prefetch block */
1313 6, /* number of parallel prefetches */
1314 2, /* Branch cost */
1315 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1316 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1317 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1318 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1319 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1320 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1321 {{libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1322 DUMMY_STRINGOP_ALGS},
1323 {{libcall, {{6, loop_1_byte, false}, {48, loop, false},
1324 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1325 DUMMY_STRINGOP_ALGS},
1326 1, /* scalar_stmt_cost. */
1327 1, /* scalar load_cost. */
1328 1, /* scalar_store_cost. */
1329 1, /* vec_stmt_cost. */
1330 1, /* vec_to_scalar_cost. */
1331 1, /* scalar_to_vec_cost. */
1332 1, /* vec_align_load_cost. */
1333 2, /* vec_unalign_load_cost. */
1334 1, /* vec_store_cost. */
1335 3, /* cond_taken_branch_cost. */
1336 1, /* cond_not_taken_branch_cost. */
1340 struct processor_costs nocona_cost = {
1341 COSTS_N_INSNS (1), /* cost of an add instruction */
1342 COSTS_N_INSNS (1), /* cost of a lea instruction */
1343 COSTS_N_INSNS (1), /* variable shift costs */
1344 COSTS_N_INSNS (1), /* constant shift costs */
1345 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1346 COSTS_N_INSNS (10), /* HI */
1347 COSTS_N_INSNS (10), /* SI */
1348 COSTS_N_INSNS (10), /* DI */
1349 COSTS_N_INSNS (10)}, /* other */
1350 0, /* cost of multiply per each bit set */
1351 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1352 COSTS_N_INSNS (66), /* HI */
1353 COSTS_N_INSNS (66), /* SI */
1354 COSTS_N_INSNS (66), /* DI */
1355 COSTS_N_INSNS (66)}, /* other */
1356 COSTS_N_INSNS (1), /* cost of movsx */
1357 COSTS_N_INSNS (1), /* cost of movzx */
1358 16, /* "large" insn */
1359 17, /* MOVE_RATIO */
1360 4, /* cost for loading QImode using movzbl */
1361 {4, 4, 4}, /* cost of loading integer registers
1362 in QImode, HImode and SImode.
1363 Relative to reg-reg move (2). */
1364 {4, 4, 4}, /* cost of storing integer registers */
1365 3, /* cost of reg,reg fld/fst */
1366 {12, 12, 12}, /* cost of loading fp registers
1367 in SFmode, DFmode and XFmode */
1368 {4, 4, 4}, /* cost of storing fp registers
1369 in SFmode, DFmode and XFmode */
1370 6, /* cost of moving MMX register */
1371 {12, 12}, /* cost of loading MMX registers
1372 in SImode and DImode */
1373 {12, 12}, /* cost of storing MMX registers
1374 in SImode and DImode */
1375 6, /* cost of moving SSE register */
1376 {12, 12, 12}, /* cost of loading SSE registers
1377 in SImode, DImode and TImode */
1378 {12, 12, 12}, /* cost of storing SSE registers
1379 in SImode, DImode and TImode */
1380 8, /* MMX or SSE register to integer */
1381 8, /* size of l1 cache. */
1382 1024, /* size of l2 cache. */
1383 128, /* size of prefetch block */
1384 8, /* number of parallel prefetches */
1385 1, /* Branch cost */
1386 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1387 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1388 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1389 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1390 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1391 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1392 {{libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1393 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1394 {100000, unrolled_loop, false}, {-1, libcall, false}}}},
1395 {{libcall, {{6, loop_1_byte, false}, {48, loop, false},
1396 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1397 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1398 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1399 1, /* scalar_stmt_cost. */
1400 1, /* scalar load_cost. */
1401 1, /* scalar_store_cost. */
1402 1, /* vec_stmt_cost. */
1403 1, /* vec_to_scalar_cost. */
1404 1, /* scalar_to_vec_cost. */
1405 1, /* vec_align_load_cost. */
1406 2, /* vec_unalign_load_cost. */
1407 1, /* vec_store_cost. */
1408 3, /* cond_taken_branch_cost. */
1409 1, /* cond_not_taken_branch_cost. */
1413 struct processor_costs atom_cost = {
1414 COSTS_N_INSNS (1), /* cost of an add instruction */
1415 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1416 COSTS_N_INSNS (1), /* variable shift costs */
1417 COSTS_N_INSNS (1), /* constant shift costs */
1418 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1419 COSTS_N_INSNS (4), /* HI */
1420 COSTS_N_INSNS (3), /* SI */
1421 COSTS_N_INSNS (4), /* DI */
1422 COSTS_N_INSNS (2)}, /* other */
1423 0, /* cost of multiply per each bit set */
1424 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1425 COSTS_N_INSNS (26), /* HI */
1426 COSTS_N_INSNS (42), /* SI */
1427 COSTS_N_INSNS (74), /* DI */
1428 COSTS_N_INSNS (74)}, /* other */
1429 COSTS_N_INSNS (1), /* cost of movsx */
1430 COSTS_N_INSNS (1), /* cost of movzx */
1431 8, /* "large" insn */
1432 17, /* MOVE_RATIO */
1433 4, /* cost for loading QImode using movzbl */
1434 {4, 4, 4}, /* cost of loading integer registers
1435 in QImode, HImode and SImode.
1436 Relative to reg-reg move (2). */
1437 {4, 4, 4}, /* cost of storing integer registers */
1438 4, /* cost of reg,reg fld/fst */
1439 {12, 12, 12}, /* cost of loading fp registers
1440 in SFmode, DFmode and XFmode */
1441 {6, 6, 8}, /* cost of storing fp registers
1442 in SFmode, DFmode and XFmode */
1443 2, /* cost of moving MMX register */
1444 {8, 8}, /* cost of loading MMX registers
1445 in SImode and DImode */
1446 {8, 8}, /* cost of storing MMX registers
1447 in SImode and DImode */
1448 2, /* cost of moving SSE register */
1449 {8, 8, 8}, /* cost of loading SSE registers
1450 in SImode, DImode and TImode */
1451 {8, 8, 8}, /* cost of storing SSE registers
1452 in SImode, DImode and TImode */
1453 5, /* MMX or SSE register to integer */
1454 32, /* size of l1 cache. */
1455 256, /* size of l2 cache. */
1456 64, /* size of prefetch block */
1457 6, /* number of parallel prefetches */
1458 3, /* Branch cost */
1459 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1460 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1461 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1462 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1463 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1464 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1465 {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1466 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1467 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1468 {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
1469 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1470 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1471 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1472 1, /* scalar_stmt_cost. */
1473 1, /* scalar load_cost. */
1474 1, /* scalar_store_cost. */
1475 1, /* vec_stmt_cost. */
1476 1, /* vec_to_scalar_cost. */
1477 1, /* scalar_to_vec_cost. */
1478 1, /* vec_align_load_cost. */
1479 2, /* vec_unalign_load_cost. */
1480 1, /* vec_store_cost. */
1481 3, /* cond_taken_branch_cost. */
1482 1, /* cond_not_taken_branch_cost. */
1486 struct processor_costs slm_cost = {
1487 COSTS_N_INSNS (1), /* cost of an add instruction */
1488 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1489 COSTS_N_INSNS (1), /* variable shift costs */
1490 COSTS_N_INSNS (1), /* constant shift costs */
1491 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1492 COSTS_N_INSNS (4), /* HI */
1493 COSTS_N_INSNS (3), /* SI */
1494 COSTS_N_INSNS (4), /* DI */
1495 COSTS_N_INSNS (2)}, /* other */
1496 0, /* cost of multiply per each bit set */
1497 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1498 COSTS_N_INSNS (26), /* HI */
1499 COSTS_N_INSNS (42), /* SI */
1500 COSTS_N_INSNS (74), /* DI */
1501 COSTS_N_INSNS (74)}, /* other */
1502 COSTS_N_INSNS (1), /* cost of movsx */
1503 COSTS_N_INSNS (1), /* cost of movzx */
1504 8, /* "large" insn */
1505 17, /* MOVE_RATIO */
1506 4, /* cost for loading QImode using movzbl */
1507 {4, 4, 4}, /* cost of loading integer registers
1508 in QImode, HImode and SImode.
1509 Relative to reg-reg move (2). */
1510 {4, 4, 4}, /* cost of storing integer registers */
1511 4, /* cost of reg,reg fld/fst */
1512 {12, 12, 12}, /* cost of loading fp registers
1513 in SFmode, DFmode and XFmode */
1514 {6, 6, 8}, /* cost of storing fp registers
1515 in SFmode, DFmode and XFmode */
1516 2, /* cost of moving MMX register */
1517 {8, 8}, /* cost of loading MMX registers
1518 in SImode and DImode */
1519 {8, 8}, /* cost of storing MMX registers
1520 in SImode and DImode */
1521 2, /* cost of moving SSE register */
1522 {8, 8, 8}, /* cost of loading SSE registers
1523 in SImode, DImode and TImode */
1524 {8, 8, 8}, /* cost of storing SSE registers
1525 in SImode, DImode and TImode */
1526 5, /* MMX or SSE register to integer */
1527 32, /* size of l1 cache. */
1528 256, /* size of l2 cache. */
1529 64, /* size of prefetch block */
1530 6, /* number of parallel prefetches */
1531 3, /* Branch cost */
1532 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1533 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1534 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1535 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1536 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1537 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1538 {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1539 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1540 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1541 {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
1542 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1543 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1544 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
1545 1, /* scalar_stmt_cost. */
1546 1, /* scalar load_cost. */
1547 1, /* scalar_store_cost. */
1548 1, /* vec_stmt_cost. */
1549 1, /* vec_to_scalar_cost. */
1550 1, /* scalar_to_vec_cost. */
1551 1, /* vec_align_load_cost. */
1552 2, /* vec_unalign_load_cost. */
1553 1, /* vec_store_cost. */
1554 3, /* cond_taken_branch_cost. */
1555 1, /* cond_not_taken_branch_cost. */
1558 /* Generic64 should produce code tuned for Nocona and K8. */
1560 struct processor_costs generic64_cost = {
1561 COSTS_N_INSNS (1), /* cost of an add instruction */
1562 /* On all chips taken into consideration lea is 2 cycles and more. With
1563 this cost however our current implementation of synth_mult results in
1564 use of unnecessary temporary registers causing regression on several
1565 SPECfp benchmarks. */
1566 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1567 COSTS_N_INSNS (1), /* variable shift costs */
1568 COSTS_N_INSNS (1), /* constant shift costs */
1569 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1570 COSTS_N_INSNS (4), /* HI */
1571 COSTS_N_INSNS (3), /* SI */
1572 COSTS_N_INSNS (4), /* DI */
1573 COSTS_N_INSNS (2)}, /* other */
1574 0, /* cost of multiply per each bit set */
1575 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1576 COSTS_N_INSNS (26), /* HI */
1577 COSTS_N_INSNS (42), /* SI */
1578 COSTS_N_INSNS (74), /* DI */
1579 COSTS_N_INSNS (74)}, /* other */
1580 COSTS_N_INSNS (1), /* cost of movsx */
1581 COSTS_N_INSNS (1), /* cost of movzx */
1582 8, /* "large" insn */
1583 17, /* MOVE_RATIO */
1584 4, /* cost for loading QImode using movzbl */
1585 {4, 4, 4}, /* cost of loading integer registers
1586 in QImode, HImode and SImode.
1587 Relative to reg-reg move (2). */
1588 {4, 4, 4}, /* cost of storing integer registers */
1589 4, /* cost of reg,reg fld/fst */
1590 {12, 12, 12}, /* cost of loading fp registers
1591 in SFmode, DFmode and XFmode */
1592 {6, 6, 8}, /* cost of storing fp registers
1593 in SFmode, DFmode and XFmode */
1594 2, /* cost of moving MMX register */
1595 {8, 8}, /* cost of loading MMX registers
1596 in SImode and DImode */
1597 {8, 8}, /* cost of storing MMX registers
1598 in SImode and DImode */
1599 2, /* cost of moving SSE register */
1600 {8, 8, 8}, /* cost of loading SSE registers
1601 in SImode, DImode and TImode */
1602 {8, 8, 8}, /* cost of storing SSE registers
1603 in SImode, DImode and TImode */
1604 5, /* MMX or SSE register to integer */
1605 32, /* size of l1 cache. */
1606 512, /* size of l2 cache. */
1607 64, /* size of prefetch block */
1608 6, /* number of parallel prefetches */
1609 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1610 value is increased to perhaps more appropriate value of 5. */
1611 3, /* Branch cost */
1612 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1613 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1614 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1615 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1616 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1617 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1618 {DUMMY_STRINGOP_ALGS,
1619 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1620 {-1, libcall, false}}}},
1621 {DUMMY_STRINGOP_ALGS,
1622 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1623 {-1, libcall, false}}}},
1624 1, /* scalar_stmt_cost. */
1625 1, /* scalar load_cost. */
1626 1, /* scalar_store_cost. */
1627 1, /* vec_stmt_cost. */
1628 1, /* vec_to_scalar_cost. */
1629 1, /* scalar_to_vec_cost. */
1630 1, /* vec_align_load_cost. */
1631 2, /* vec_unalign_load_cost. */
1632 1, /* vec_store_cost. */
1633 3, /* cond_taken_branch_cost. */
1634 1, /* cond_not_taken_branch_cost. */
1637 /* core_cost should produce code tuned for Core familly of CPUs. */
1639 struct processor_costs core_cost = {
1640 COSTS_N_INSNS (1), /* cost of an add instruction */
1641 /* On all chips taken into consideration lea is 2 cycles and more. With
1642 this cost however our current implementation of synth_mult results in
1643 use of unnecessary temporary registers causing regression on several
1644 SPECfp benchmarks. */
1645 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1646 COSTS_N_INSNS (1), /* variable shift costs */
1647 COSTS_N_INSNS (1), /* constant shift costs */
1648 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1649 COSTS_N_INSNS (4), /* HI */
1650 COSTS_N_INSNS (3), /* SI */
1651 COSTS_N_INSNS (4), /* DI */
1652 COSTS_N_INSNS (2)}, /* other */
1653 0, /* cost of multiply per each bit set */
1654 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1655 COSTS_N_INSNS (26), /* HI */
1656 COSTS_N_INSNS (42), /* SI */
1657 COSTS_N_INSNS (74), /* DI */
1658 COSTS_N_INSNS (74)}, /* other */
1659 COSTS_N_INSNS (1), /* cost of movsx */
1660 COSTS_N_INSNS (1), /* cost of movzx */
1661 8, /* "large" insn */
1662 17, /* MOVE_RATIO */
1663 4, /* cost for loading QImode using movzbl */
1664 {4, 4, 4}, /* cost of loading integer registers
1665 in QImode, HImode and SImode.
1666 Relative to reg-reg move (2). */
1667 {4, 4, 4}, /* cost of storing integer registers */
1668 4, /* cost of reg,reg fld/fst */
1669 {12, 12, 12}, /* cost of loading fp registers
1670 in SFmode, DFmode and XFmode */
1671 {6, 6, 8}, /* cost of storing fp registers
1672 in SFmode, DFmode and XFmode */
1673 2, /* cost of moving MMX register */
1674 {8, 8}, /* cost of loading MMX registers
1675 in SImode and DImode */
1676 {8, 8}, /* cost of storing MMX registers
1677 in SImode and DImode */
1678 2, /* cost of moving SSE register */
1679 {8, 8, 8}, /* cost of loading SSE registers
1680 in SImode, DImode and TImode */
1681 {8, 8, 8}, /* cost of storing SSE registers
1682 in SImode, DImode and TImode */
1683 5, /* MMX or SSE register to integer */
1684 64, /* size of l1 cache. */
1685 512, /* size of l2 cache. */
1686 64, /* size of prefetch block */
1687 6, /* number of parallel prefetches */
1688 /* FIXME perhaps more appropriate value is 5. */
1689 3, /* Branch cost */
1690 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1691 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1692 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1693 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1694 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1695 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1696 {{libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1697 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1698 {-1, libcall, false}}}},
1699 {{libcall, {{6, loop_1_byte, true},
1701 {8192, rep_prefix_4_byte, true},
1702 {-1, libcall, false}}},
1703 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1704 {-1, libcall, false}}}},
1705 1, /* scalar_stmt_cost. */
1706 1, /* scalar load_cost. */
1707 1, /* scalar_store_cost. */
1708 1, /* vec_stmt_cost. */
1709 1, /* vec_to_scalar_cost. */
1710 1, /* scalar_to_vec_cost. */
1711 1, /* vec_align_load_cost. */
1712 2, /* vec_unalign_load_cost. */
1713 1, /* vec_store_cost. */
1714 3, /* cond_taken_branch_cost. */
1715 1, /* cond_not_taken_branch_cost. */
1718 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1721 struct processor_costs generic32_cost = {
1722 COSTS_N_INSNS (1), /* cost of an add instruction */
1723 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1724 COSTS_N_INSNS (1), /* variable shift costs */
1725 COSTS_N_INSNS (1), /* constant shift costs */
1726 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1727 COSTS_N_INSNS (4), /* HI */
1728 COSTS_N_INSNS (3), /* SI */
1729 COSTS_N_INSNS (4), /* DI */
1730 COSTS_N_INSNS (2)}, /* other */
1731 0, /* cost of multiply per each bit set */
1732 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1733 COSTS_N_INSNS (26), /* HI */
1734 COSTS_N_INSNS (42), /* SI */
1735 COSTS_N_INSNS (74), /* DI */
1736 COSTS_N_INSNS (74)}, /* other */
1737 COSTS_N_INSNS (1), /* cost of movsx */
1738 COSTS_N_INSNS (1), /* cost of movzx */
1739 8, /* "large" insn */
1740 17, /* MOVE_RATIO */
1741 4, /* cost for loading QImode using movzbl */
1742 {4, 4, 4}, /* cost of loading integer registers
1743 in QImode, HImode and SImode.
1744 Relative to reg-reg move (2). */
1745 {4, 4, 4}, /* cost of storing integer registers */
1746 4, /* cost of reg,reg fld/fst */
1747 {12, 12, 12}, /* cost of loading fp registers
1748 in SFmode, DFmode and XFmode */
1749 {6, 6, 8}, /* cost of storing fp registers
1750 in SFmode, DFmode and XFmode */
1751 2, /* cost of moving MMX register */
1752 {8, 8}, /* cost of loading MMX registers
1753 in SImode and DImode */
1754 {8, 8}, /* cost of storing MMX registers
1755 in SImode and DImode */
1756 2, /* cost of moving SSE register */
1757 {8, 8, 8}, /* cost of loading SSE registers
1758 in SImode, DImode and TImode */
1759 {8, 8, 8}, /* cost of storing SSE registers
1760 in SImode, DImode and TImode */
1761 5, /* MMX or SSE register to integer */
1762 32, /* size of l1 cache. */
1763 256, /* size of l2 cache. */
1764 64, /* size of prefetch block */
1765 6, /* number of parallel prefetches */
1766 3, /* Branch cost */
1767 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1768 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1769 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1770 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1771 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1772 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1773 {{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1774 {-1, libcall, false}}},
1775 DUMMY_STRINGOP_ALGS},
1776 {{libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1777 {-1, libcall, false}}},
1778 DUMMY_STRINGOP_ALGS},
1779 1, /* scalar_stmt_cost. */
1780 1, /* scalar load_cost. */
1781 1, /* scalar_store_cost. */
1782 1, /* vec_stmt_cost. */
1783 1, /* vec_to_scalar_cost. */
1784 1, /* scalar_to_vec_cost. */
1785 1, /* vec_align_load_cost. */
1786 2, /* vec_unalign_load_cost. */
1787 1, /* vec_store_cost. */
1788 3, /* cond_taken_branch_cost. */
1789 1, /* cond_not_taken_branch_cost. */
1792 /* Set by -mtune. */
1793 const struct processor_costs *ix86_tune_cost = &pentium_cost;
1795 /* Set by -mtune or -Os. */
1796 const struct processor_costs *ix86_cost = &pentium_cost;
1798 /* Processor feature/optimization bitmasks. */
1799 #define m_386 (1<<PROCESSOR_I386)
1800 #define m_486 (1<<PROCESSOR_I486)
1801 #define m_PENT (1<<PROCESSOR_PENTIUM)
1802 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1803 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1804 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1805 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1806 #define m_CORE2 (1<<PROCESSOR_CORE2)
1807 #define m_COREI7 (1<<PROCESSOR_COREI7)
1808 #define m_HASWELL (1<<PROCESSOR_HASWELL)
1809 #define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
1810 #define m_ATOM (1<<PROCESSOR_ATOM)
1811 #define m_SLM (1<<PROCESSOR_SLM)
1813 #define m_GEODE (1<<PROCESSOR_GEODE)
1814 #define m_K6 (1<<PROCESSOR_K6)
1815 #define m_K6_GEODE (m_K6 | m_GEODE)
1816 #define m_K8 (1<<PROCESSOR_K8)
1817 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1818 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1819 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1820 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1821 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1822 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
1823 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1824 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1825 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3)
1826 #define m_BTVER (m_BTVER1 | m_BTVER2)
1827 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1829 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1830 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1832 /* Generic instruction choice should be common subset of supported CPUs
1833 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1834 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1836 /* Feature tests against the various tunings. */
1837 unsigned char ix86_tune_features[X86_TUNE_LAST];
1839 /* Feature tests against the various tunings used to create ix86_tune_features
1840 based on the processor mask. */
1841 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1842 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1843 negatively, so enabling for Generic64 seems like good code size
1844 tradeoff. We can't enable it for 32bit generic because it does not
1845 work well with PPro base chips. */
1846 m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC64,
1848 /* X86_TUNE_PUSH_MEMORY */
1849 m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1851 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1854 /* X86_TUNE_UNROLL_STRLEN */
1855 m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
1857 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1858 on simulation result. But after P4 was made, no performance benefit
1859 was observed with branch hints. It also increases the code size.
1860 As a result, icc never generates branch hints. */
1863 /* X86_TUNE_DOUBLE_WITH_ADD */
1866 /* X86_TUNE_USE_SAHF */
1867 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
1869 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1870 partial dependencies. */
1871 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1873 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1874 register stalls on Generic32 compilation setting as well. However
1875 in current implementation the partial register stalls are not eliminated
1876 very well - they can be introduced via subregs synthesized by combine
1877 and can happen in caller/callee saving sequences. Because this option
1878 pays back little on PPro based chips and is in conflict with partial reg
1879 dependencies used by Athlon/P4 based chips, it is better to leave it off
1880 for generic32 for now. */
1883 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1884 m_CORE_ALL | m_GENERIC,
1886 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
1887 * on 16-bit immediate moves into memory on Core2 and Corei7. */
1888 m_CORE_ALL | m_GENERIC,
1890 /* X86_TUNE_USE_HIMODE_FIOP */
1891 m_386 | m_486 | m_K6_GEODE,
1893 /* X86_TUNE_USE_SIMODE_FIOP */
1894 ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC),
1896 /* X86_TUNE_USE_MOV0 */
1899 /* X86_TUNE_USE_CLTD */
1900 ~(m_PENT | m_ATOM | m_SLM | m_K6),
1902 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1905 /* X86_TUNE_SPLIT_LONG_MOVES */
1908 /* X86_TUNE_READ_MODIFY_WRITE */
1911 /* X86_TUNE_READ_MODIFY */
1914 /* X86_TUNE_PROMOTE_QIMODE */
1915 m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
1917 /* X86_TUNE_FAST_PREFIX */
1918 ~(m_386 | m_486 | m_PENT),
1920 /* X86_TUNE_SINGLE_STRINGOP */
1921 m_386 | m_P4_NOCONA,
1923 /* X86_TUNE_QIMODE_MATH */
1926 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1927 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1928 might be considered for Generic32 if our scheme for avoiding partial
1929 stalls was more effective. */
1932 /* X86_TUNE_PROMOTE_QI_REGS */
1935 /* X86_TUNE_PROMOTE_HI_REGS */
1938 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1939 over esp addition. */
1940 m_386 | m_486 | m_PENT | m_PPRO,
1942 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1943 over esp addition. */
1946 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1947 over esp subtraction. */
1948 m_386 | m_486 | m_PENT | m_K6_GEODE,
1950 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1951 over esp subtraction. */
1952 m_PENT | m_K6_GEODE,
1954 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1955 for DFmode copies */
1956 ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC),
1958 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1959 m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
1961 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1962 conflict here in between PPro/Pentium4 based chips that thread 128bit
1963 SSE registers as single units versus K8 based chips that divide SSE
1964 registers to two 64bit halves. This knob promotes all store destinations
1965 to be 128bit to allow register renaming on 128bit SSE units, but usually
1966 results in one extra microop on 64bit SSE units. Experimental results
1967 shows that disabling this option on P4 brings over 20% SPECfp regression,
1968 while enabling it on K8 brings roughly 2.4% regression that can be partly
1969 masked by careful scheduling of moves. */
1970 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC,
1972 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1973 m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM,
1975 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1976 m_COREI7 | m_BDVER | m_SLM,
1978 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1981 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1982 are resolved on SSE register parts instead of whole registers, so we may
1983 maintain just lower part of scalar values in proper format leaving the
1984 upper part undefined. */
1987 /* X86_TUNE_SSE_TYPELESS_STORES */
1990 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1991 m_PPRO | m_P4_NOCONA,
1993 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1994 m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
1996 /* X86_TUNE_PROLOGUE_USING_MOVE */
1997 m_PPRO | m_ATHLON_K8,
1999 /* X86_TUNE_EPILOGUE_USING_MOVE */
2000 m_PPRO | m_ATHLON_K8,
2002 /* X86_TUNE_SHIFT1 */
2005 /* X86_TUNE_USE_FFREEP */
2008 /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC */
2009 ~(m_AMD_MULTIPLE | m_GENERIC),
2011 /* X86_TUNE_INTER_UNIT_MOVES_FROM_VEC */
2014 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2015 ~(m_AMDFAM10 | m_BDVER ),
2017 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2018 than 4 branch instructions in the 16 byte window. */
2019 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
2021 /* X86_TUNE_SCHEDULE */
2022 m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
2024 /* X86_TUNE_USE_BT */
2025 m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
2027 /* X86_TUNE_USE_INCDEC */
2028 ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC),
2030 /* X86_TUNE_PAD_RETURNS */
2031 m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC,
2033 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2036 /* X86_TUNE_EXT_80387_CONSTANTS */
2037 m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
2039 /* X86_TUNE_AVOID_VECTOR_DECODE */
2040 m_CORE_ALL | m_K8 | m_GENERIC64,
2042 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2043 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2046 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2047 vector path on AMD machines. */
2048 m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64,
2050 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2052 m_CORE_ALL | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC64,
2054 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2058 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2059 but one byte longer. */
2062 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2063 operand that cannot be represented using a modRM byte. The XOR
2064 replacement is long decoded, so this split helps here as well. */
2067 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2069 m_CORE_ALL | m_AMDFAM10 | m_GENERIC,
2071 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2072 from integer to FP. */
2075 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2076 with a subsequent conditional jump instruction into a single
2077 compare-and-branch uop. */
2080 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2081 will impact LEA instruction selection. */
2084 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2088 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2089 at -O3. For the moment, the prefetching seems badly tuned for Intel
2091 m_K6_GEODE | m_AMD_MULTIPLE,
2093 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2094 the auto-vectorizer. */
2097 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2098 during reassociation of integer computation. */
2101 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2102 during reassociation of fp computation. */
2103 m_ATOM | m_SLM | m_HASWELL | m_BDVER1 | m_BDVER2,
2105 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2106 regs instead of memory. */
2109 /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
2110 a conditional move. */
2114 /* Feature tests against the various architecture variations. */
2115 unsigned char ix86_arch_features[X86_ARCH_LAST];
2117 /* Feature tests against the various architecture variations, used to create
2118 ix86_arch_features based on the processor mask. */
2119 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2120 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2121 ~(m_386 | m_486 | m_PENT | m_K6),
2123 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2126 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2129 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2132 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2136 static const unsigned int x86_accumulate_outgoing_args
2137 = m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
2139 static const unsigned int x86_arch_always_fancy_math_387
2140 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC;
2142 static const unsigned int x86_avx256_split_unaligned_load
2143 = m_COREI7 | m_GENERIC;
2145 static const unsigned int x86_avx256_split_unaligned_store
2146 = m_COREI7 | m_BDVER | m_GENERIC;
2148 /* In case the average insn count for single function invocation is
2149 lower than this constant, emit fast (but longer) prologue and
2151 #define FAST_PROLOGUE_INSN_COUNT 20
2153 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2154 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2155 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2156 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2158 /* Array of the smallest class containing reg number REGNO, indexed by
2159 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2161 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2163 /* ax, dx, cx, bx */
2164 AREG, DREG, CREG, BREG,
2165 /* si, di, bp, sp */
2166 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2168 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2169 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2172 /* flags, fpsr, fpcr, frame */
2173 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2175 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2178 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2181 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2182 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2183 /* SSE REX registers */
2184 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2188 /* The "default" register map used in 32bit mode. */
2190 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2192 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2193 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2194 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2195 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2196 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2197 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2198 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2201 /* The "default" register map used in 64bit mode. */
2203 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2205 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2206 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2207 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2208 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2209 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2210 8,9,10,11,12,13,14,15, /* extended integer registers */
2211 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2214 /* Define the register numbers to be used in Dwarf debugging information.
2215 The SVR4 reference port C compiler uses the following register numbers
2216 in its Dwarf output code:
2217 0 for %eax (gcc regno = 0)
2218 1 for %ecx (gcc regno = 2)
2219 2 for %edx (gcc regno = 1)
2220 3 for %ebx (gcc regno = 3)
2221 4 for %esp (gcc regno = 7)
2222 5 for %ebp (gcc regno = 6)
2223 6 for %esi (gcc regno = 4)
2224 7 for %edi (gcc regno = 5)
2225 The following three DWARF register numbers are never generated by
2226 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2227 believes these numbers have these meanings.
2228 8 for %eip (no gcc equivalent)
2229 9 for %eflags (gcc regno = 17)
2230 10 for %trapno (no gcc equivalent)
2231 It is not at all clear how we should number the FP stack registers
2232 for the x86 architecture. If the version of SDB on x86/svr4 were
2233 a bit less brain dead with respect to floating-point then we would
2234 have a precedent to follow with respect to DWARF register numbers
2235 for x86 FP registers, but the SDB on x86/svr4 is so completely
2236 broken with respect to FP registers that it is hardly worth thinking
2237 of it as something to strive for compatibility with.
2238 The version of x86/svr4 SDB I have at the moment does (partially)
2239 seem to believe that DWARF register number 11 is associated with
2240 the x86 register %st(0), but that's about all. Higher DWARF
2241 register numbers don't seem to be associated with anything in
2242 particular, and even for DWARF regno 11, SDB only seems to under-
2243 stand that it should say that a variable lives in %st(0) (when
2244 asked via an `=' command) if we said it was in DWARF regno 11,
2245 but SDB still prints garbage when asked for the value of the
2246 variable in question (via a `/' command).
2247 (Also note that the labels SDB prints for various FP stack regs
2248 when doing an `x' command are all wrong.)
2249 Note that these problems generally don't affect the native SVR4
2250 C compiler because it doesn't allow the use of -O with -g and
2251 because when it is *not* optimizing, it allocates a memory
2252 location for each floating-point variable, and the memory
2253 location is what gets described in the DWARF AT_location
2254 attribute for the variable in question.
2255 Regardless of the severe mental illness of the x86/svr4 SDB, we
2256 do something sensible here and we use the following DWARF
2257 register numbers. Note that these are all stack-top-relative
2259 11 for %st(0) (gcc regno = 8)
2260 12 for %st(1) (gcc regno = 9)
2261 13 for %st(2) (gcc regno = 10)
2262 14 for %st(3) (gcc regno = 11)
2263 15 for %st(4) (gcc regno = 12)
2264 16 for %st(5) (gcc regno = 13)
2265 17 for %st(6) (gcc regno = 14)
2266 18 for %st(7) (gcc regno = 15)
2268 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2270 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2271 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2272 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2273 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2274 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2275 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2276 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2279 /* Define parameter passing and return registers. */
2281 static int const x86_64_int_parameter_registers[6] =
2283 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2286 static int const x86_64_ms_abi_int_parameter_registers[4] =
2288 CX_REG, DX_REG, R8_REG, R9_REG
2291 static int const x86_64_int_return_registers[4] =
2293 AX_REG, DX_REG, DI_REG, SI_REG
2296 /* Additional registers that are clobbered by SYSV calls. */
2298 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2302 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2303 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2306 /* Define the structure for the machine field in struct function. */
2308 struct GTY(()) stack_local_entry {
2309 unsigned short mode;
2312 struct stack_local_entry *next;
2315 /* Structure describing stack frame layout.
2316 Stack grows downward:
2322 saved static chain if ix86_static_chain_on_stack
2324 saved frame pointer if frame_pointer_needed
2325 <- HARD_FRAME_POINTER
2331 <- sse_regs_save_offset
2334 [va_arg registers] |
2338 [padding2] | = to_allocate
2347 int outgoing_arguments_size;
2349 /* The offsets relative to ARG_POINTER. */
2350 HOST_WIDE_INT frame_pointer_offset;
2351 HOST_WIDE_INT hard_frame_pointer_offset;
2352 HOST_WIDE_INT stack_pointer_offset;
2353 HOST_WIDE_INT hfp_save_offset;
2354 HOST_WIDE_INT reg_save_offset;
2355 HOST_WIDE_INT sse_reg_save_offset;
2357 /* When save_regs_using_mov is set, emit prologue using
2358 move instead of push instructions. */
2359 bool save_regs_using_mov;
2362 /* Which cpu are we scheduling for. */
2363 enum attr_cpu ix86_schedule;
2365 /* Which cpu are we optimizing for. */
2366 enum processor_type ix86_tune;
2368 /* Which instruction set architecture to use. */
2369 enum processor_type ix86_arch;
2371 /* True if processor has SSE prefetch instruction. */
2372 unsigned char x86_prefetch_sse;
2374 /* -mstackrealign option */
2375 static const char ix86_force_align_arg_pointer_string[]
2376 = "force_align_arg_pointer";
2378 static rtx (*ix86_gen_leave) (void);
2379 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2380 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2381 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2382 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2383 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2384 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2385 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2386 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2387 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2388 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2389 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2391 /* Preferred alignment for stack boundary in bits. */
2392 unsigned int ix86_preferred_stack_boundary;
2394 /* Alignment for incoming stack boundary in bits specified at
2396 static unsigned int ix86_user_incoming_stack_boundary;
2398 /* Default alignment for incoming stack boundary in bits. */
2399 static unsigned int ix86_default_incoming_stack_boundary;
2401 /* Alignment for incoming stack boundary in bits. */
2402 unsigned int ix86_incoming_stack_boundary;
2404 /* Calling abi specific va_list type nodes. */
2405 static GTY(()) tree sysv_va_list_type_node;
2406 static GTY(()) tree ms_va_list_type_node;
2408 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2409 char internal_label_prefix[16];
2410 int internal_label_prefix_len;
2412 /* Fence to use after loop using movnt. */
2415 /* Register class used for passing given 64bit part of the argument.
2416 These represent classes as documented by the PS ABI, with the exception
2417 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2418 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2420 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2421 whenever possible (upper half does contain padding). */
2422 enum x86_64_reg_class
2425 X86_64_INTEGER_CLASS,
2426 X86_64_INTEGERSI_CLASS,
2433 X86_64_COMPLEX_X87_CLASS,
2437 #define MAX_CLASSES 4
2439 /* Table of constants used by fldpi, fldln2, etc.... */
2440 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2441 static bool ext_80387_constants_init = 0;
2444 static struct machine_function * ix86_init_machine_status (void);
2445 static rtx ix86_function_value (const_tree, const_tree, bool);
2446 static bool ix86_function_value_regno_p (const unsigned int);
2447 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2449 static rtx ix86_static_chain (const_tree, bool);
2450 static int ix86_function_regparm (const_tree, const_tree);
2451 static void ix86_compute_frame_layout (struct ix86_frame *);
2452 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2454 static void ix86_add_new_builtins (HOST_WIDE_INT);
2455 static tree ix86_canonical_va_list_type (tree);
2456 static void predict_jump (int);
2457 static unsigned int split_stack_prologue_scratch_regno (void);
2458 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2460 enum ix86_function_specific_strings
2462 IX86_FUNCTION_SPECIFIC_ARCH,
2463 IX86_FUNCTION_SPECIFIC_TUNE,
2464 IX86_FUNCTION_SPECIFIC_MAX
2467 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2468 const char *, enum fpmath_unit, bool);
2469 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2470 static void ix86_function_specific_save (struct cl_target_option *);
2471 static void ix86_function_specific_restore (struct cl_target_option *);
2472 static void ix86_function_specific_print (FILE *, int,
2473 struct cl_target_option *);
2474 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2475 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2476 struct gcc_options *);
2477 static bool ix86_can_inline_p (tree, tree);
2478 static void ix86_set_current_function (tree);
2479 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2481 static enum calling_abi ix86_function_abi (const_tree);
2484 #ifndef SUBTARGET32_DEFAULT_CPU
2485 #define SUBTARGET32_DEFAULT_CPU "i386"
2488 /* Whether -mtune= or -march= were specified */
2489 static int ix86_tune_defaulted;
2490 static int ix86_arch_specified;
2492 /* Vectorization library interface and handlers. */
2493 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2495 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2496 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2498 /* Processor target table, indexed by processor number */
2501 const struct processor_costs *cost; /* Processor costs */
2502 const int align_loop; /* Default alignments. */
2503 const int align_loop_max_skip;
2504 const int align_jump;
2505 const int align_jump_max_skip;
2506 const int align_func;
2509 static const struct ptt processor_target_table[PROCESSOR_max] =
2511 {&i386_cost, 4, 3, 4, 3, 4},
2512 {&i486_cost, 16, 15, 16, 15, 16},
2513 {&pentium_cost, 16, 7, 16, 7, 16},
2514 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2515 {&geode_cost, 0, 0, 0, 0, 0},
2516 {&k6_cost, 32, 7, 32, 7, 32},
2517 {&athlon_cost, 16, 7, 16, 7, 16},
2518 {&pentium4_cost, 0, 0, 0, 0, 0},
2519 {&k8_cost, 16, 7, 16, 7, 16},
2520 {&nocona_cost, 0, 0, 0, 0, 0},
2522 {&core_cost, 16, 10, 16, 10, 16},
2524 {&core_cost, 16, 10, 16, 10, 16},
2526 {&core_cost, 16, 10, 16, 10, 16},
2527 {&generic32_cost, 16, 7, 16, 7, 16},
2528 {&generic64_cost, 16, 10, 16, 10, 16},
2529 {&amdfam10_cost, 32, 24, 32, 7, 32},
2530 {&bdver1_cost, 16, 10, 16, 7, 11},
2531 {&bdver2_cost, 16, 10, 16, 7, 11},
2532 {&bdver3_cost, 16, 10, 16, 7, 11},
2533 {&btver1_cost, 16, 10, 16, 7, 11},
2534 {&btver2_cost, 16, 10, 16, 7, 11},
2535 {&atom_cost, 16, 15, 16, 7, 16},
2536 {&slm_cost, 16, 15, 16, 7, 16}
2539 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2574 gate_insert_vzeroupper (void)
2576 return TARGET_VZEROUPPER;
2580 rest_of_handle_insert_vzeroupper (void)
2584 /* vzeroupper instructions are inserted immediately after reload to
2585 account for possible spills from 256bit registers. The pass
2586 reuses mode switching infrastructure by re-running mode insertion
2587 pass, so disable entities that have already been processed. */
2588 for (i = 0; i < MAX_386_ENTITIES; i++)
2589 ix86_optimize_mode_switching[i] = 0;
2591 ix86_optimize_mode_switching[AVX_U128] = 1;
2593 /* Call optimize_mode_switching. */
2594 pass_mode_switching.pass.execute ();
2598 struct rtl_opt_pass pass_insert_vzeroupper =
2602 "vzeroupper", /* name */
2603 OPTGROUP_NONE, /* optinfo_flags */
2604 gate_insert_vzeroupper, /* gate */
2605 rest_of_handle_insert_vzeroupper, /* execute */
2608 0, /* static_pass_number */
2609 TV_NONE, /* tv_id */
2610 0, /* properties_required */
2611 0, /* properties_provided */
2612 0, /* properties_destroyed */
2613 0, /* todo_flags_start */
2614 TODO_df_finish | TODO_verify_rtl_sharing |
2615 0, /* todo_flags_finish */
2619 /* Return true if a red-zone is in use. */
2622 ix86_using_red_zone (void)
2624 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2627 /* Return a string that documents the current -m options. The caller is
2628 responsible for freeing the string. */
2631 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2632 const char *tune, enum fpmath_unit fpmath,
2635 struct ix86_target_opts
2637 const char *option; /* option string */
2638 HOST_WIDE_INT mask; /* isa mask options */
2641 /* This table is ordered so that options like -msse4.2 that imply
2642 preceding options while match those first. */
2643 static struct ix86_target_opts isa_opts[] =
2645 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2646 { "-mfma", OPTION_MASK_ISA_FMA },
2647 { "-mxop", OPTION_MASK_ISA_XOP },
2648 { "-mlwp", OPTION_MASK_ISA_LWP },
2649 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2650 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2651 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2652 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2653 { "-msse3", OPTION_MASK_ISA_SSE3 },
2654 { "-msse2", OPTION_MASK_ISA_SSE2 },
2655 { "-msse", OPTION_MASK_ISA_SSE },
2656 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2657 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2658 { "-mmmx", OPTION_MASK_ISA_MMX },
2659 { "-mabm", OPTION_MASK_ISA_ABM },
2660 { "-mbmi", OPTION_MASK_ISA_BMI },
2661 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2662 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2663 { "-mhle", OPTION_MASK_ISA_HLE },
2664 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2665 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2666 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2667 { "-madx", OPTION_MASK_ISA_ADX },
2668 { "-mtbm", OPTION_MASK_ISA_TBM },
2669 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2670 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2671 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2672 { "-maes", OPTION_MASK_ISA_AES },
2673 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2674 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2675 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2676 { "-mf16c", OPTION_MASK_ISA_F16C },
2677 { "-mrtm", OPTION_MASK_ISA_RTM },
2678 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2679 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2683 static struct ix86_target_opts flag_opts[] =
2685 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2686 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2687 { "-m80387", MASK_80387 },
2688 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2689 { "-malign-double", MASK_ALIGN_DOUBLE },
2690 { "-mcld", MASK_CLD },
2691 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2692 { "-mieee-fp", MASK_IEEE_FP },
2693 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2694 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2695 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2696 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2697 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2698 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2699 { "-mno-red-zone", MASK_NO_RED_ZONE },
2700 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2701 { "-mrecip", MASK_RECIP },
2702 { "-mrtd", MASK_RTD },
2703 { "-msseregparm", MASK_SSEREGPARM },
2704 { "-mstack-arg-probe", MASK_STACK_PROBE },
2705 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2706 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2707 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2708 { "-mvzeroupper", MASK_VZEROUPPER },
2709 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2710 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2711 { "-mprefer-avx128", MASK_PREFER_AVX128},
2714 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2717 char target_other[40];
2727 memset (opts, '\0', sizeof (opts));
2729 /* Add -march= option. */
2732 opts[num][0] = "-march=";
2733 opts[num++][1] = arch;
2736 /* Add -mtune= option. */
2739 opts[num][0] = "-mtune=";
2740 opts[num++][1] = tune;
2743 /* Add -m32/-m64/-mx32. */
2744 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2746 if ((isa & OPTION_MASK_ABI_64) != 0)
2750 isa &= ~ (OPTION_MASK_ISA_64BIT
2751 | OPTION_MASK_ABI_64
2752 | OPTION_MASK_ABI_X32);
2756 opts[num++][0] = abi;
2758 /* Pick out the options in isa options. */
2759 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2761 if ((isa & isa_opts[i].mask) != 0)
2763 opts[num++][0] = isa_opts[i].option;
2764 isa &= ~ isa_opts[i].mask;
2768 if (isa && add_nl_p)
2770 opts[num++][0] = isa_other;
2771 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2775 /* Add flag options. */
2776 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2778 if ((flags & flag_opts[i].mask) != 0)
2780 opts[num++][0] = flag_opts[i].option;
2781 flags &= ~ flag_opts[i].mask;
2785 if (flags && add_nl_p)
2787 opts[num++][0] = target_other;
2788 sprintf (target_other, "(other flags: %#x)", flags);
2791 /* Add -fpmath= option. */
2794 opts[num][0] = "-mfpmath=";
2795 switch ((int) fpmath)
2798 opts[num++][1] = "387";
2802 opts[num++][1] = "sse";
2805 case FPMATH_387 | FPMATH_SSE:
2806 opts[num++][1] = "sse+387";
2818 gcc_assert (num < ARRAY_SIZE (opts));
2820 /* Size the string. */
2822 sep_len = (add_nl_p) ? 3 : 1;
2823 for (i = 0; i < num; i++)
2826 for (j = 0; j < 2; j++)
2828 len += strlen (opts[i][j]);
2831 /* Build the string. */
2832 ret = ptr = (char *) xmalloc (len);
2835 for (i = 0; i < num; i++)
2839 for (j = 0; j < 2; j++)
2840 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2847 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2855 for (j = 0; j < 2; j++)
2858 memcpy (ptr, opts[i][j], len2[j]);
2860 line_len += len2[j];
2865 gcc_assert (ret + len >= ptr);
2870 /* Return true, if profiling code should be emitted before
2871 prologue. Otherwise it returns false.
2872 Note: For x86 with "hotfix" it is sorried. */
2874 ix86_profile_before_prologue (void)
2876 return flag_fentry != 0;
2879 /* Function that is callable from the debugger to print the current
2882 ix86_debug_options (void)
2884 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2885 ix86_arch_string, ix86_tune_string,
2890 fprintf (stderr, "%s\n\n", opts);
2894 fputs ("<no options>\n\n", stderr);
2899 /* Override various settings based on options. If MAIN_ARGS_P, the
2900 options are from the command line, otherwise they are from
2904 ix86_option_override_internal (bool main_args_p)
2907 unsigned int ix86_arch_mask, ix86_tune_mask;
2908 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2913 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2914 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2915 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2916 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2917 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2918 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2919 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2920 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2921 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2922 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2923 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2924 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2925 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2926 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2927 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2928 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2929 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2930 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2931 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2932 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2933 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2934 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2935 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2936 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2937 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2938 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2939 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2940 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2941 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2942 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2943 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2944 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2945 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
2946 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
2947 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
2948 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
2949 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
2950 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
2951 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
2952 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
2954 /* if this reaches 64, need to widen struct pta flags below */
2958 const char *const name; /* processor name or nickname. */
2959 const enum processor_type processor;
2960 const enum attr_cpu schedule;
2961 const unsigned HOST_WIDE_INT flags;
2963 const processor_alias_table[] =
2965 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2966 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2967 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2968 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2969 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2970 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2971 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
2972 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
2973 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2974 PTA_MMX | PTA_SSE | PTA_FXSR},
2975 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2976 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2977 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
2978 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2979 PTA_MMX | PTA_SSE | PTA_FXSR},
2980 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2981 PTA_MMX | PTA_SSE | PTA_FXSR},
2982 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2983 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
2984 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2985 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
2986 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2987 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
2988 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2989 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
2990 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2991 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2992 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
2993 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2994 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2995 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR},
2996 {"corei7", PROCESSOR_COREI7, CPU_COREI7,
2997 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
2998 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT | PTA_FXSR},
2999 {"corei7-avx", PROCESSOR_COREI7, CPU_COREI7,
3000 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3001 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3002 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL
3003 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3004 {"core-avx-i", PROCESSOR_COREI7, CPU_COREI7,
3005 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3006 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX
3007 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
3008 | PTA_RDRND | PTA_F16C | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3009 {"core-avx2", PROCESSOR_HASWELL, CPU_COREI7,
3010 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3011 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
3012 | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
3013 | PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
3014 | PTA_FMA | PTA_MOVBE | PTA_RTM | PTA_HLE | PTA_FXSR | PTA_XSAVE
3016 {"atom", PROCESSOR_ATOM, CPU_ATOM,
3017 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3018 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR},
3019 {"slm", PROCESSOR_SLM, CPU_SLM,
3020 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3021 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_MOVBE
3023 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3024 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3025 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3026 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3027 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3028 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3029 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3030 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3031 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3032 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3033 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3034 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3035 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3036 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3037 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3038 {"x86-64", PROCESSOR_K8, CPU_K8,
3039 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3040 {"k8", PROCESSOR_K8, CPU_K8,
3041 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3042 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3043 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3044 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3045 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3046 {"opteron", PROCESSOR_K8, CPU_K8,
3047 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3048 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3049 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3050 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3051 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3052 {"athlon64", PROCESSOR_K8, CPU_K8,
3053 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3054 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3055 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3056 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3057 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3058 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3059 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3060 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3061 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3062 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3063 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3064 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3065 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3066 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3067 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3068 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3069 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3070 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3071 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3072 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3073 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3074 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3075 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3076 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3077 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3078 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3079 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3080 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3081 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3082 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3083 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3084 | PTA_XSAVEOPT | PTA_FSGSBASE},
3085 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
3086 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3087 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3088 | PTA_FXSR | PTA_XSAVE},
3089 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3090 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3091 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3092 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3093 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3094 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3096 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
3097 PTA_HLE /* flags are only used for -march switch. */ },
3098 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
3100 | PTA_HLE /* flags are only used for -march switch. */ },
3103 /* -mrecip options. */
3106 const char *string; /* option name */
3107 unsigned int mask; /* mask bits to set */
3109 const recip_options[] =
3111 { "all", RECIP_MASK_ALL },
3112 { "none", RECIP_MASK_NONE },
3113 { "div", RECIP_MASK_DIV },
3114 { "sqrt", RECIP_MASK_SQRT },
3115 { "vec-div", RECIP_MASK_VEC_DIV },
3116 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3119 int const pta_size = ARRAY_SIZE (processor_alias_table);
3121 /* Set up prefix/suffix so the error messages refer to either the command
3122 line argument, or the attribute(target). */
3131 prefix = "option(\"";
3136 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3137 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3138 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT)
3139 ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3140 #ifdef TARGET_BI_ARCH
3143 #if TARGET_BI_ARCH == 1
3144 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3145 is on and OPTION_MASK_ABI_X32 is off. We turn off
3146 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3149 ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3151 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3152 on and OPTION_MASK_ABI_64 is off. We turn off
3153 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3156 ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3163 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3164 OPTION_MASK_ABI_64 for TARGET_X32. */
3165 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3166 ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3168 else if (TARGET_LP64)
3170 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3171 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3172 ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3173 ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3176 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3177 SUBTARGET_OVERRIDE_OPTIONS;
3180 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3181 SUBSUBTARGET_OVERRIDE_OPTIONS;
3184 /* -fPIC is the default for x86_64. */
3185 if (TARGET_MACHO && TARGET_64BIT)
3188 /* Need to check -mtune=generic first. */
3189 if (ix86_tune_string)
3191 if (!strcmp (ix86_tune_string, "generic")
3192 || !strcmp (ix86_tune_string, "i686")
3193 /* As special support for cross compilers we read -mtune=native
3194 as -mtune=generic. With native compilers we won't see the
3195 -mtune=native, as it was changed by the driver. */
3196 || !strcmp (ix86_tune_string, "native"))
3199 ix86_tune_string = "generic64";
3201 ix86_tune_string = "generic32";
3203 /* If this call is for setting the option attribute, allow the
3204 generic32/generic64 that was previously set. */
3205 else if (!main_args_p
3206 && (!strcmp (ix86_tune_string, "generic32")
3207 || !strcmp (ix86_tune_string, "generic64")))
3209 else if (!strncmp (ix86_tune_string, "generic", 7))
3210 error ("bad value (%s) for %stune=%s %s",
3211 ix86_tune_string, prefix, suffix, sw);
3212 else if (!strcmp (ix86_tune_string, "x86-64"))
3213 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3214 "%stune=k8%s or %stune=generic%s instead as appropriate",
3215 prefix, suffix, prefix, suffix, prefix, suffix);
3219 if (ix86_arch_string)
3220 ix86_tune_string = ix86_arch_string;
3221 if (!ix86_tune_string)
3223 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3224 ix86_tune_defaulted = 1;
3227 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3228 need to use a sensible tune option. */
3229 if (!strcmp (ix86_tune_string, "generic")
3230 || !strcmp (ix86_tune_string, "x86-64")
3231 || !strcmp (ix86_tune_string, "i686"))
3234 ix86_tune_string = "generic64";
3236 ix86_tune_string = "generic32";
3240 if (ix86_stringop_alg == rep_prefix_8_byte && !TARGET_64BIT)
3242 /* rep; movq isn't available in 32-bit code. */
3243 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3244 ix86_stringop_alg = no_stringop;
3247 if (!ix86_arch_string)
3248 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3250 ix86_arch_specified = 1;
3252 if (global_options_set.x_ix86_pmode)
3254 if ((TARGET_LP64 && ix86_pmode == PMODE_SI)
3255 || (!TARGET_64BIT && ix86_pmode == PMODE_DI))
3256 error ("address mode %qs not supported in the %s bit mode",
3257 TARGET_64BIT ? "short" : "long",
3258 TARGET_64BIT ? "64" : "32");
3261 ix86_pmode = TARGET_LP64 ? PMODE_DI : PMODE_SI;
3263 if (!global_options_set.x_ix86_abi)
3264 ix86_abi = DEFAULT_ABI;
3266 if (global_options_set.x_ix86_cmodel)
3268 switch (ix86_cmodel)
3273 ix86_cmodel = CM_SMALL_PIC;
3275 error ("code model %qs not supported in the %s bit mode",
3282 ix86_cmodel = CM_MEDIUM_PIC;
3284 error ("code model %qs not supported in the %s bit mode",
3286 else if (TARGET_X32)
3287 error ("code model %qs not supported in x32 mode",
3294 ix86_cmodel = CM_LARGE_PIC;
3296 error ("code model %qs not supported in the %s bit mode",
3298 else if (TARGET_X32)
3299 error ("code model %qs not supported in x32 mode",
3305 error ("code model %s does not support PIC mode", "32");
3307 error ("code model %qs not supported in the %s bit mode",
3314 error ("code model %s does not support PIC mode", "kernel");
3315 ix86_cmodel = CM_32;
3318 error ("code model %qs not supported in the %s bit mode",
3328 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3329 use of rip-relative addressing. This eliminates fixups that
3330 would otherwise be needed if this object is to be placed in a
3331 DLL, and is essentially just as efficient as direct addressing. */
3332 if (TARGET_64BIT && (TARGET_RDOS || TARGET_PECOFF))
3333 ix86_cmodel = CM_MEDIUM_PIC, flag_pic = 1;
3334 else if (TARGET_64BIT)
3335 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3337 ix86_cmodel = CM_32;
3339 if (TARGET_MACHO && ix86_asm_dialect == ASM_INTEL)
3341 error ("-masm=intel not supported in this configuration");
3342 ix86_asm_dialect = ASM_ATT;
3344 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3345 sorry ("%i-bit mode not compiled in",
3346 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3348 for (i = 0; i < pta_size; i++)
3349 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3351 ix86_schedule = processor_alias_table[i].schedule;
3352 ix86_arch = processor_alias_table[i].processor;
3353 /* Default cpu tuning to the architecture. */
3354 ix86_tune = ix86_arch;
3356 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3357 error ("CPU you selected does not support x86-64 "
3360 if (processor_alias_table[i].flags & PTA_MMX
3361 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3362 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3363 if (processor_alias_table[i].flags & PTA_3DNOW
3364 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3365 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3366 if (processor_alias_table[i].flags & PTA_3DNOW_A
3367 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3368 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3369 if (processor_alias_table[i].flags & PTA_SSE
3370 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3371 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3372 if (processor_alias_table[i].flags & PTA_SSE2
3373 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3374 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3375 if (processor_alias_table[i].flags & PTA_SSE3
3376 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3377 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3378 if (processor_alias_table[i].flags & PTA_SSSE3
3379 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3380 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3381 if (processor_alias_table[i].flags & PTA_SSE4_1
3382 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3383 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3384 if (processor_alias_table[i].flags & PTA_SSE4_2
3385 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3386 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3387 if (processor_alias_table[i].flags & PTA_AVX
3388 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3389 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3390 if (processor_alias_table[i].flags & PTA_AVX2
3391 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3392 ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3393 if (processor_alias_table[i].flags & PTA_FMA
3394 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3395 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3396 if (processor_alias_table[i].flags & PTA_SSE4A
3397 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3398 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3399 if (processor_alias_table[i].flags & PTA_FMA4
3400 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3401 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3402 if (processor_alias_table[i].flags & PTA_XOP
3403 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3404 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3405 if (processor_alias_table[i].flags & PTA_LWP
3406 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3407 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3408 if (processor_alias_table[i].flags & PTA_ABM
3409 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3410 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3411 if (processor_alias_table[i].flags & PTA_BMI
3412 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3413 ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3414 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3415 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3416 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3417 if (processor_alias_table[i].flags & PTA_TBM
3418 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3419 ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3420 if (processor_alias_table[i].flags & PTA_BMI2
3421 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3422 ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3423 if (processor_alias_table[i].flags & PTA_CX16
3424 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3425 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3426 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3427 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3428 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3429 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3430 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3431 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3432 if (processor_alias_table[i].flags & PTA_MOVBE
3433 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3434 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3435 if (processor_alias_table[i].flags & PTA_AES
3436 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3437 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3438 if (processor_alias_table[i].flags & PTA_PCLMUL
3439 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3440 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3441 if (processor_alias_table[i].flags & PTA_FSGSBASE
3442 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3443 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3444 if (processor_alias_table[i].flags & PTA_RDRND
3445 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3446 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3447 if (processor_alias_table[i].flags & PTA_F16C
3448 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3449 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3450 if (processor_alias_table[i].flags & PTA_RTM
3451 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3452 ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3453 if (processor_alias_table[i].flags & PTA_HLE
3454 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3455 ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3456 if (processor_alias_table[i].flags & PTA_PRFCHW
3457 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3458 ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3459 if (processor_alias_table[i].flags & PTA_RDSEED
3460 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3461 ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3462 if (processor_alias_table[i].flags & PTA_ADX
3463 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3464 ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3465 if (processor_alias_table[i].flags & PTA_FXSR
3466 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3467 ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3468 if (processor_alias_table[i].flags & PTA_XSAVE
3469 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3470 ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3471 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3472 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3473 ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3474 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3475 x86_prefetch_sse = true;
3480 if (!strcmp (ix86_arch_string, "generic"))
3481 error ("generic CPU can be used only for %stune=%s %s",
3482 prefix, suffix, sw);
3483 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3484 error ("bad value (%s) for %sarch=%s %s",
3485 ix86_arch_string, prefix, suffix, sw);
3487 ix86_arch_mask = 1u << ix86_arch;
3488 for (i = 0; i < X86_ARCH_LAST; ++i)
3489 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3491 for (i = 0; i < pta_size; i++)
3492 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3494 ix86_schedule = processor_alias_table[i].schedule;
3495 ix86_tune = processor_alias_table[i].processor;
3498 if (!(processor_alias_table[i].flags & PTA_64BIT))
3500 if (ix86_tune_defaulted)
3502 ix86_tune_string = "x86-64";
3503 for (i = 0; i < pta_size; i++)
3504 if (! strcmp (ix86_tune_string,
3505 processor_alias_table[i].name))
3507 ix86_schedule = processor_alias_table[i].schedule;
3508 ix86_tune = processor_alias_table[i].processor;
3511 error ("CPU you selected does not support x86-64 "
3517 /* Adjust tuning when compiling for 32-bit ABI. */
3520 case PROCESSOR_GENERIC64:
3521 ix86_tune = PROCESSOR_GENERIC32;
3522 ix86_schedule = CPU_PENTIUMPRO;
3529 /* Intel CPUs have always interpreted SSE prefetch instructions as
3530 NOPs; so, we can enable SSE prefetch instructions even when
3531 -mtune (rather than -march) points us to a processor that has them.
3532 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3533 higher processors. */
3535 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3536 x86_prefetch_sse = true;
3540 if (ix86_tune_specified && i == pta_size)
3541 error ("bad value (%s) for %stune=%s %s",
3542 ix86_tune_string, prefix, suffix, sw);
3544 ix86_tune_mask = 1u << ix86_tune;
3545 for (i = 0; i < X86_TUNE_LAST; ++i)
3546 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3548 #ifndef USE_IX86_FRAME_POINTER
3549 #define USE_IX86_FRAME_POINTER 0
3552 #ifndef USE_X86_64_FRAME_POINTER
3553 #define USE_X86_64_FRAME_POINTER 0
3556 /* Set the default values for switches whose default depends on TARGET_64BIT
3557 in case they weren't overwritten by command line options. */
3560 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3561 flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3562 if (flag_asynchronous_unwind_tables == 2)
3563 flag_unwind_tables = flag_asynchronous_unwind_tables = 1;
3564 if (flag_pcc_struct_return == 2)
3565 flag_pcc_struct_return = 0;
3569 if (optimize >= 1 && !global_options_set.x_flag_omit_frame_pointer)
3570 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3571 if (flag_asynchronous_unwind_tables == 2)
3572 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3573 if (flag_pcc_struct_return == 2)
3574 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3577 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3579 ix86_cost = &ix86_size_cost;
3581 ix86_cost = ix86_tune_cost;
3583 /* Arrange to set up i386_stack_locals for all functions. */
3584 init_machine_status = ix86_init_machine_status;
3586 /* Validate -mregparm= value. */
3587 if (global_options_set.x_ix86_regparm)
3590 warning (0, "-mregparm is ignored in 64-bit mode");
3591 if (ix86_regparm > REGPARM_MAX)
3593 error ("-mregparm=%d is not between 0 and %d",
3594 ix86_regparm, REGPARM_MAX);
3599 ix86_regparm = REGPARM_MAX;
3601 /* Default align_* from the processor table. */
3602 if (align_loops == 0)
3604 align_loops = processor_target_table[ix86_tune].align_loop;
3605 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3607 if (align_jumps == 0)
3609 align_jumps = processor_target_table[ix86_tune].align_jump;
3610 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3612 if (align_functions == 0)
3614 align_functions = processor_target_table[ix86_tune].align_func;
3617 /* Provide default for -mbranch-cost= value. */
3618 if (!global_options_set.x_ix86_branch_cost)
3619 ix86_branch_cost = ix86_cost->branch_cost;
3623 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3625 /* Enable by default the SSE and MMX builtins. Do allow the user to
3626 explicitly disable any of these. In particular, disabling SSE and
3627 MMX for kernel code is extremely useful. */
3628 if (!ix86_arch_specified)
3630 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3631 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3634 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3638 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3640 if (!ix86_arch_specified)
3642 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3644 /* i386 ABI does not specify red zone. It still makes sense to use it
3645 when programmer takes care to stack from being destroyed. */
3646 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3647 target_flags |= MASK_NO_RED_ZONE;
3650 /* Keep nonleaf frame pointers. */
3651 if (flag_omit_frame_pointer)
3652 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3653 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3654 flag_omit_frame_pointer = 1;
3656 /* If we're doing fast math, we don't care about comparison order
3657 wrt NaNs. This lets us use a shorter comparison sequence. */
3658 if (flag_finite_math_only)
3659 target_flags &= ~MASK_IEEE_FP;
3661 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3662 since the insns won't need emulation. */
3663 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3664 target_flags &= ~MASK_NO_FANCY_MATH_387;
3666 /* Likewise, if the target doesn't have a 387, or we've specified
3667 software floating point, don't use 387 inline intrinsics. */
3669 target_flags |= MASK_NO_FANCY_MATH_387;
3671 /* Turn on MMX builtins for -msse. */
3673 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3675 /* Enable SSE prefetch. */
3676 if (TARGET_SSE || (TARGET_PRFCHW && !TARGET_3DNOW))
3677 x86_prefetch_sse = true;
3679 /* Enable prefetch{,w} instructions for -m3dnow. */
3681 ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW & ~ix86_isa_flags_explicit;
3683 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3684 if (TARGET_SSE4_2 || TARGET_ABM)
3685 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3687 /* Enable lzcnt instruction for -mabm. */
3689 ix86_isa_flags |= OPTION_MASK_ISA_LZCNT & ~ix86_isa_flags_explicit;
3691 /* Validate -mpreferred-stack-boundary= value or default it to
3692 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3693 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3694 if (global_options_set.x_ix86_preferred_stack_boundary_arg)
3696 int min = (TARGET_64BIT ? (TARGET_SSE ? 4 : 3) : 2);
3697 int max = (TARGET_SEH ? 4 : 12);
3699 if (ix86_preferred_stack_boundary_arg < min
3700 || ix86_preferred_stack_boundary_arg > max)
3703 error ("-mpreferred-stack-boundary is not supported "
3706 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3707 ix86_preferred_stack_boundary_arg, min, max);
3710 ix86_preferred_stack_boundary
3711 = (1 << ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3714 /* Set the default value for -mstackrealign. */
3715 if (ix86_force_align_arg_pointer == -1)
3716 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3718 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3720 /* Validate -mincoming-stack-boundary= value or default it to
3721 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3722 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3723 if (global_options_set.x_ix86_incoming_stack_boundary_arg)
3725 if (ix86_incoming_stack_boundary_arg < (TARGET_64BIT ? 4 : 2)
3726 || ix86_incoming_stack_boundary_arg > 12)
3727 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3728 ix86_incoming_stack_boundary_arg, TARGET_64BIT ? 4 : 2);
3731 ix86_user_incoming_stack_boundary
3732 = (1 << ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3733 ix86_incoming_stack_boundary
3734 = ix86_user_incoming_stack_boundary;
3738 /* Accept -msseregparm only if at least SSE support is enabled. */
3739 if (TARGET_SSEREGPARM
3741 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3743 if (global_options_set.x_ix86_fpmath)
3745 if (ix86_fpmath & FPMATH_SSE)
3749 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3750 ix86_fpmath = FPMATH_387;
3752 else if ((ix86_fpmath & FPMATH_387) && !TARGET_80387)
3754 warning (0, "387 instruction set disabled, using SSE arithmetics");
3755 ix86_fpmath = FPMATH_SSE;
3760 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3762 /* If the i387 is disabled, then do not return values in it. */
3764 target_flags &= ~MASK_FLOAT_RETURNS;
3766 /* Use external vectorized library in vectorizing intrinsics. */
3767 if (global_options_set.x_ix86_veclibabi_type)
3768 switch (ix86_veclibabi_type)
3770 case ix86_veclibabi_type_svml:
3771 ix86_veclib_handler = ix86_veclibabi_svml;
3774 case ix86_veclibabi_type_acml:
3775 ix86_veclib_handler = ix86_veclibabi_acml;
3782 if ((!USE_IX86_FRAME_POINTER
3783 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3784 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3786 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3788 /* ??? Unwind info is not correct around the CFG unless either a frame
3789 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3790 unwind info generation to be aware of the CFG and propagating states
3792 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3793 || flag_exceptions || flag_non_call_exceptions)
3794 && flag_omit_frame_pointer
3795 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3797 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3798 warning (0, "unwind tables currently require either a frame pointer "
3799 "or %saccumulate-outgoing-args%s for correctness",
3801 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3804 /* If stack probes are required, the space used for large function
3805 arguments on the stack must also be probed, so enable
3806 -maccumulate-outgoing-args so this happens in the prologue. */
3807 if (TARGET_STACK_PROBE
3808 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3810 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3811 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3812 "for correctness", prefix, suffix);
3813 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3816 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3819 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3820 p = strchr (internal_label_prefix, 'X');
3821 internal_label_prefix_len = p - internal_label_prefix;
3825 /* When scheduling description is not available, disable scheduler pass
3826 so it won't slow down the compilation and make x87 code slower. */
3827 if (!TARGET_SCHEDULE)
3828 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3830 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3831 ix86_tune_cost->simultaneous_prefetches,
3832 global_options.x_param_values,
3833 global_options_set.x_param_values);
3834 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3835 ix86_tune_cost->prefetch_block,
3836 global_options.x_param_values,
3837 global_options_set.x_param_values);
3838 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3839 ix86_tune_cost->l1_cache_size,
3840 global_options.x_param_values,
3841 global_options_set.x_param_values);
3842 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
3843 ix86_tune_cost->l2_cache_size,
3844 global_options.x_param_values,
3845 global_options_set.x_param_values);
3847 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3848 if (flag_prefetch_loop_arrays < 0
3850 && (optimize >= 3 || flag_profile_use)
3851 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
3852 flag_prefetch_loop_arrays = 1;
3854 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3855 can be optimized to ap = __builtin_next_arg (0). */
3856 if (!TARGET_64BIT && !flag_split_stack)
3857 targetm.expand_builtin_va_start = NULL;
3861 ix86_gen_leave = gen_leave_rex64;
3862 if (Pmode == DImode)
3864 ix86_gen_monitor = gen_sse3_monitor64_di;
3865 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
3866 ix86_gen_tls_local_dynamic_base_64
3867 = gen_tls_local_dynamic_base_64_di;
3871 ix86_gen_monitor = gen_sse3_monitor64_si;
3872 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
3873 ix86_gen_tls_local_dynamic_base_64
3874 = gen_tls_local_dynamic_base_64_si;
3879 ix86_gen_leave = gen_leave;
3880 ix86_gen_monitor = gen_sse3_monitor;
3883 if (Pmode == DImode)
3885 ix86_gen_add3 = gen_adddi3;
3886 ix86_gen_sub3 = gen_subdi3;
3887 ix86_gen_sub3_carry = gen_subdi3_carry;
3888 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3889 ix86_gen_andsp = gen_anddi3;
3890 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3891 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3892 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3896 ix86_gen_add3 = gen_addsi3;
3897 ix86_gen_sub3 = gen_subsi3;
3898 ix86_gen_sub3_carry = gen_subsi3_carry;
3899 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3900 ix86_gen_andsp = gen_andsi3;
3901 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3902 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3903 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3907 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3909 target_flags |= MASK_CLD & ~target_flags_explicit;
3912 if (!TARGET_64BIT && flag_pic)
3914 if (flag_fentry > 0)
3915 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3919 else if (TARGET_SEH)
3921 if (flag_fentry == 0)
3922 sorry ("-mno-fentry isn%'t compatible with SEH");
3925 else if (flag_fentry < 0)
3927 #if defined(PROFILE_BEFORE_PROLOGUE)
3936 /* When not optimize for size, enable vzeroupper optimization for
3937 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3938 AVX unaligned load/store. */
3941 if (flag_expensive_optimizations
3942 && !(target_flags_explicit & MASK_VZEROUPPER))
3943 target_flags |= MASK_VZEROUPPER;
3944 if ((x86_avx256_split_unaligned_load & ix86_tune_mask)
3945 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
3946 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
3947 if ((x86_avx256_split_unaligned_store & ix86_tune_mask)
3948 && !(target_flags_explicit & MASK_AVX256_SPLIT_UNALIGNED_STORE))
3949 target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
3950 /* Enable 128-bit AVX instruction generation
3951 for the auto-vectorizer. */
3952 if (TARGET_AVX128_OPTIMAL
3953 && !(target_flags_explicit & MASK_PREFER_AVX128))
3954 target_flags |= MASK_PREFER_AVX128;
3959 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3960 target_flags &= ~MASK_VZEROUPPER;
3963 if (ix86_recip_name)
3965 char *p = ASTRDUP (ix86_recip_name);
3967 unsigned int mask, i;
3970 while ((q = strtok (p, ",")) != NULL)
3981 if (!strcmp (q, "default"))
3982 mask = RECIP_MASK_ALL;
3985 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3986 if (!strcmp (q, recip_options[i].string))
3988 mask = recip_options[i].mask;
3992 if (i == ARRAY_SIZE (recip_options))
3994 error ("unknown option for -mrecip=%s", q);
3996 mask = RECIP_MASK_NONE;
4000 recip_mask_explicit |= mask;
4002 recip_mask &= ~mask;
4009 recip_mask |= RECIP_MASK_ALL & ~recip_mask_explicit;
4010 else if (target_flags_explicit & MASK_RECIP)
4011 recip_mask &= ~(RECIP_MASK_ALL & ~recip_mask_explicit);
4013 /* Default long double to 64-bit for Bionic. */
4014 if (TARGET_HAS_BIONIC
4015 && !(target_flags_explicit & MASK_LONG_DOUBLE_64))
4016 target_flags |= MASK_LONG_DOUBLE_64;
4018 /* Save the initial options in case the user does function specific
4021 target_option_default_node = target_option_current_node
4022 = build_target_option_node ();
4024 /* Handle stack protector */
4025 if (!global_options_set.x_ix86_stack_protector_guard)
4026 ix86_stack_protector_guard = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4029 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4032 ix86_option_override (void)
4034 static struct register_pass_info insert_vzeroupper_info
4035 = { &pass_insert_vzeroupper.pass, "reload",
4036 1, PASS_POS_INSERT_AFTER
4039 ix86_option_override_internal (true);
4042 /* This needs to be done at start up. It's convenient to do it here. */
4043 register_pass (&insert_vzeroupper_info);
4046 /* Update register usage after having seen the compiler flags. */
4049 ix86_conditional_register_usage (void)
4054 /* The PIC register, if it exists, is fixed. */
4055 j = PIC_OFFSET_TABLE_REGNUM;
4056 if (j != INVALID_REGNUM)
4057 fixed_regs[j] = call_used_regs[j] = 1;
4059 /* For 32-bit targets, squash the REX registers. */
4062 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4063 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4064 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4065 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4068 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4069 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4070 : TARGET_64BIT ? (1 << 2)
4073 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4075 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4077 /* Set/reset conditionally defined registers from
4078 CALL_USED_REGISTERS initializer. */
4079 if (call_used_regs[i] > 1)
4080 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4082 /* Calculate registers of CLOBBERED_REGS register set
4083 as call used registers from GENERAL_REGS register set. */
4084 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4085 && call_used_regs[i])
4086 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4089 /* If MMX is disabled, squash the registers. */
4091 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4092 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4093 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4095 /* If SSE is disabled, squash the registers. */
4097 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4098 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4099 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4101 /* If the FPU is disabled, squash the registers. */
4102 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4103 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4104 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4105 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4109 /* Save the current options */
4112 ix86_function_specific_save (struct cl_target_option *ptr)
4114 ptr->arch = ix86_arch;
4115 ptr->schedule = ix86_schedule;
4116 ptr->tune = ix86_tune;
4117 ptr->branch_cost = ix86_branch_cost;
4118 ptr->tune_defaulted = ix86_tune_defaulted;
4119 ptr->arch_specified = ix86_arch_specified;
4120 ptr->x_ix86_isa_flags_explicit = ix86_isa_flags_explicit;
4121 ptr->ix86_target_flags_explicit = target_flags_explicit;
4122 ptr->x_recip_mask_explicit = recip_mask_explicit;
4124 /* The fields are char but the variables are not; make sure the
4125 values fit in the fields. */
4126 gcc_assert (ptr->arch == ix86_arch);
4127 gcc_assert (ptr->schedule == ix86_schedule);
4128 gcc_assert (ptr->tune == ix86_tune);
4129 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4132 /* Restore the current options */
4135 ix86_function_specific_restore (struct cl_target_option *ptr)
4137 enum processor_type old_tune = ix86_tune;
4138 enum processor_type old_arch = ix86_arch;
4139 unsigned int ix86_arch_mask, ix86_tune_mask;
4142 ix86_arch = (enum processor_type) ptr->arch;
4143 ix86_schedule = (enum attr_cpu) ptr->schedule;
4144 ix86_tune = (enum processor_type) ptr->tune;
4145 ix86_branch_cost = ptr->branch_cost;
4146 ix86_tune_defaulted = ptr->tune_defaulted;
4147 ix86_arch_specified = ptr->arch_specified;
4148 ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4149 target_flags_explicit = ptr->ix86_target_flags_explicit;
4150 recip_mask_explicit = ptr->x_recip_mask_explicit;
4152 /* Recreate the arch feature tests if the arch changed */
4153 if (old_arch != ix86_arch)
4155 ix86_arch_mask = 1u << ix86_arch;
4156 for (i = 0; i < X86_ARCH_LAST; ++i)
4157 ix86_arch_features[i]
4158 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4161 /* Recreate the tune optimization tests */
4162 if (old_tune != ix86_tune)
4164 ix86_tune_mask = 1u << ix86_tune;
4165 for (i = 0; i < X86_TUNE_LAST; ++i)
4166 ix86_tune_features[i]
4167 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
4171 /* Print the current options */
4174 ix86_function_specific_print (FILE *file, int indent,
4175 struct cl_target_option *ptr)
4178 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4179 NULL, NULL, ptr->x_ix86_fpmath, false);
4181 fprintf (file, "%*sarch = %d (%s)\n",
4184 ((ptr->arch < TARGET_CPU_DEFAULT_max)
4185 ? cpu_names[ptr->arch]
4188 fprintf (file, "%*stune = %d (%s)\n",
4191 ((ptr->tune < TARGET_CPU_DEFAULT_max)
4192 ? cpu_names[ptr->tune]
4195 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4199 fprintf (file, "%*s%s\n", indent, "", target_string);
4200 free (target_string);
4205 /* Inner function to process the attribute((target(...))), take an argument and
4206 set the current options from the argument. If we have a list, recursively go
4210 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4211 struct gcc_options *enum_opts_set)
4216 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4217 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4218 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4219 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4220 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4236 enum ix86_opt_type type;
4241 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4242 IX86_ATTR_ISA ("abm", OPT_mabm),
4243 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4244 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4245 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4246 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4247 IX86_ATTR_ISA ("aes", OPT_maes),
4248 IX86_ATTR_ISA ("avx", OPT_mavx),
4249 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4250 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4251 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4252 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4253 IX86_ATTR_ISA ("sse", OPT_msse),
4254 IX86_ATTR_ISA ("sse2", OPT_msse2),
4255 IX86_ATTR_ISA ("sse3", OPT_msse3),
4256 IX86_ATTR_ISA ("sse4", OPT_msse4),
4257 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4258 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4259 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4260 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4261 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4262 IX86_ATTR_ISA ("fma", OPT_mfma),
4263 IX86_ATTR_ISA ("xop", OPT_mxop),
4264 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4265 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4266 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4267 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4268 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4269 IX86_ATTR_ISA ("hle", OPT_mhle),
4270 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4271 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4272 IX86_ATTR_ISA ("adx", OPT_madx),
4273 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4274 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4275 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4278 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4280 /* string options */
4281 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4282 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4285 IX86_ATTR_YES ("cld",
4289 IX86_ATTR_NO ("fancy-math-387",
4290 OPT_mfancy_math_387,
4291 MASK_NO_FANCY_MATH_387),
4293 IX86_ATTR_YES ("ieee-fp",
4297 IX86_ATTR_YES ("inline-all-stringops",
4298 OPT_minline_all_stringops,
4299 MASK_INLINE_ALL_STRINGOPS),
4301 IX86_ATTR_YES ("inline-stringops-dynamically",
4302 OPT_minline_stringops_dynamically,
4303 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4305 IX86_ATTR_NO ("align-stringops",
4306 OPT_mno_align_stringops,
4307 MASK_NO_ALIGN_STRINGOPS),
4309 IX86_ATTR_YES ("recip",
4315 /* If this is a list, recurse to get the options. */
4316 if (TREE_CODE (args) == TREE_LIST)
4320 for (; args; args = TREE_CHAIN (args))
4321 if (TREE_VALUE (args)
4322 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4323 p_strings, enum_opts_set))
4329 else if (TREE_CODE (args) != STRING_CST)
4331 error ("attribute %<target%> argument not a string");
4335 /* Handle multiple arguments separated by commas. */
4336 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4338 while (next_optstr && *next_optstr != '\0')
4340 char *p = next_optstr;
4342 char *comma = strchr (next_optstr, ',');
4343 const char *opt_string;
4344 size_t len, opt_len;
4349 enum ix86_opt_type type = ix86_opt_unknown;
4355 len = comma - next_optstr;
4356 next_optstr = comma + 1;
4364 /* Recognize no-xxx. */
4365 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4374 /* Find the option. */
4377 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4379 type = attrs[i].type;
4380 opt_len = attrs[i].len;
4381 if (ch == attrs[i].string[0]
4382 && ((type != ix86_opt_str && type != ix86_opt_enum)
4385 && memcmp (p, attrs[i].string, opt_len) == 0)
4388 mask = attrs[i].mask;
4389 opt_string = attrs[i].string;
4394 /* Process the option. */
4397 error ("attribute(target(\"%s\")) is unknown", orig_p);
4401 else if (type == ix86_opt_isa)
4403 struct cl_decoded_option decoded;
4405 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4406 ix86_handle_option (&global_options, &global_options_set,
4407 &decoded, input_location);
4410 else if (type == ix86_opt_yes || type == ix86_opt_no)
4412 if (type == ix86_opt_no)
4413 opt_set_p = !opt_set_p;
4416 target_flags |= mask;
4418 target_flags &= ~mask;
4421 else if (type == ix86_opt_str)
4425 error ("option(\"%s\") was already specified", opt_string);
4429 p_strings[opt] = xstrdup (p + opt_len);
4432 else if (type == ix86_opt_enum)
4437 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4439 set_option (&global_options, enum_opts_set, opt, value,
4440 p + opt_len, DK_UNSPECIFIED, input_location,
4444 error ("attribute(target(\"%s\")) is unknown", orig_p);
4456 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4459 ix86_valid_target_attribute_tree (tree args)
4461 const char *orig_arch_string = ix86_arch_string;
4462 const char *orig_tune_string = ix86_tune_string;
4463 enum fpmath_unit orig_fpmath_set = global_options_set.x_ix86_fpmath;
4464 int orig_tune_defaulted = ix86_tune_defaulted;
4465 int orig_arch_specified = ix86_arch_specified;
4466 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4469 struct cl_target_option *def
4470 = TREE_TARGET_OPTION (target_option_default_node);
4471 struct gcc_options enum_opts_set;
4473 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4475 /* Process each of the options on the chain. */
4476 if (! ix86_valid_target_attribute_inner_p (args, option_strings,
4478 return error_mark_node;
4480 /* If the changed options are different from the default, rerun
4481 ix86_option_override_internal, and then save the options away.
4482 The string options are are attribute options, and will be undone
4483 when we copy the save structure. */
4484 if (ix86_isa_flags != def->x_ix86_isa_flags
4485 || target_flags != def->x_target_flags
4486 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4487 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4488 || enum_opts_set.x_ix86_fpmath)
4490 /* If we are using the default tune= or arch=, undo the string assigned,
4491 and use the default. */
4492 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4493 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4494 else if (!orig_arch_specified)
4495 ix86_arch_string = NULL;
4497 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4498 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4499 else if (orig_tune_defaulted)
4500 ix86_tune_string = NULL;
4502 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4503 if (enum_opts_set.x_ix86_fpmath)
4504 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4505 else if (!TARGET_64BIT && TARGET_SSE)
4507 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4508 global_options_set.x_ix86_fpmath = (enum fpmath_unit) 1;
4511 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4512 ix86_option_override_internal (false);
4514 /* Add any builtin functions with the new isa if any. */
4515 ix86_add_new_builtins (ix86_isa_flags);
4517 /* Save the current options unless we are validating options for
4519 t = build_target_option_node ();
4521 ix86_arch_string = orig_arch_string;
4522 ix86_tune_string = orig_tune_string;
4523 global_options_set.x_ix86_fpmath = orig_fpmath_set;
4525 /* Free up memory allocated to hold the strings */
4526 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4527 free (option_strings[i]);
4533 /* Hook to validate attribute((target("string"))). */
4536 ix86_valid_target_attribute_p (tree fndecl,
4537 tree ARG_UNUSED (name),
4539 int ARG_UNUSED (flags))
4541 struct cl_target_option cur_target;
4544 /* attribute((target("default"))) does nothing, beyond
4545 affecting multi-versioning. */
4546 if (TREE_VALUE (args)
4547 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4548 && TREE_CHAIN (args) == NULL_TREE
4549 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4552 tree old_optimize = build_optimization_node ();
4553 tree new_target, new_optimize;
4554 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4556 /* If the function changed the optimization levels as well as setting target
4557 options, start with the optimizations specified. */
4558 if (func_optimize && func_optimize != old_optimize)
4559 cl_optimization_restore (&global_options,
4560 TREE_OPTIMIZATION (func_optimize));
4562 /* The target attributes may also change some optimization flags, so update
4563 the optimization options if necessary. */
4564 cl_target_option_save (&cur_target, &global_options);
4565 new_target = ix86_valid_target_attribute_tree (args);
4566 new_optimize = build_optimization_node ();
4568 if (new_target == error_mark_node)
4571 else if (fndecl && new_target)
4573 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4575 if (old_optimize != new_optimize)
4576 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4579 cl_target_option_restore (&global_options, &cur_target);
4581 if (old_optimize != new_optimize)
4582 cl_optimization_restore (&global_options,
4583 TREE_OPTIMIZATION (old_optimize));
4589 /* Hook to determine if one function can safely inline another. */
4592 ix86_can_inline_p (tree caller, tree callee)
4595 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4596 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4598 /* If callee has no option attributes, then it is ok to inline. */
4602 /* If caller has no option attributes, but callee does then it is not ok to
4604 else if (!caller_tree)
4609 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4610 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4612 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4613 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4615 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4616 != callee_opts->x_ix86_isa_flags)
4619 /* See if we have the same non-isa options. */
4620 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4623 /* See if arch, tune, etc. are the same. */
4624 else if (caller_opts->arch != callee_opts->arch)
4627 else if (caller_opts->tune != callee_opts->tune)
4630 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4633 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4644 /* Remember the last target of ix86_set_current_function. */
4645 static GTY(()) tree ix86_previous_fndecl;
4647 /* Establish appropriate back-end context for processing the function
4648 FNDECL. The argument might be NULL to indicate processing at top
4649 level, outside of any function scope. */
4651 ix86_set_current_function (tree fndecl)
4653 /* Only change the context if the function changes. This hook is called
4654 several times in the course of compiling a function, and we don't want to
4655 slow things down too much or call target_reinit when it isn't safe. */
4656 if (fndecl && fndecl != ix86_previous_fndecl)
4658 tree old_tree = (ix86_previous_fndecl
4659 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4662 tree new_tree = (fndecl
4663 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4666 ix86_previous_fndecl = fndecl;
4667 if (old_tree == new_tree)
4672 cl_target_option_restore (&global_options,
4673 TREE_TARGET_OPTION (new_tree));
4679 struct cl_target_option *def
4680 = TREE_TARGET_OPTION (target_option_current_node);
4682 cl_target_option_restore (&global_options, def);
4689 /* Return true if this goes in large data/bss. */
4692 ix86_in_large_data_p (tree exp)
4694 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4697 /* Functions are never large data. */
4698 if (TREE_CODE (exp) == FUNCTION_DECL)
4701 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4703 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4704 if (strcmp (section, ".ldata") == 0
4705 || strcmp (section, ".lbss") == 0)
4711 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4713 /* If this is an incomplete type with size 0, then we can't put it
4714 in data because it might be too big when completed. */
4715 if (!size || size > ix86_section_threshold)
4722 /* Switch to the appropriate section for output of DECL.
4723 DECL is either a `VAR_DECL' node or a constant of some sort.
4724 RELOC indicates whether forming the initial value of DECL requires
4725 link-time relocations. */
4727 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4731 x86_64_elf_select_section (tree decl, int reloc,
4732 unsigned HOST_WIDE_INT align)
4734 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4735 && ix86_in_large_data_p (decl))
4737 const char *sname = NULL;
4738 unsigned int flags = SECTION_WRITE;
4739 switch (categorize_decl_for_section (decl, reloc))
4744 case SECCAT_DATA_REL:
4745 sname = ".ldata.rel";
4747 case SECCAT_DATA_REL_LOCAL:
4748 sname = ".ldata.rel.local";
4750 case SECCAT_DATA_REL_RO:
4751 sname = ".ldata.rel.ro";
4753 case SECCAT_DATA_REL_RO_LOCAL:
4754 sname = ".ldata.rel.ro.local";
4758 flags |= SECTION_BSS;
4761 case SECCAT_RODATA_MERGE_STR:
4762 case SECCAT_RODATA_MERGE_STR_INIT:
4763 case SECCAT_RODATA_MERGE_CONST:
4767 case SECCAT_SRODATA:
4774 /* We don't split these for medium model. Place them into
4775 default sections and hope for best. */
4780 /* We might get called with string constants, but get_named_section
4781 doesn't like them as they are not DECLs. Also, we need to set
4782 flags in that case. */
4784 return get_section (sname, flags, NULL);
4785 return get_named_section (decl, sname, reloc);
4788 return default_elf_select_section (decl, reloc, align);
4791 /* Build up a unique section name, expressed as a
4792 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4793 RELOC indicates whether the initial value of EXP requires
4794 link-time relocations. */
4796 static void ATTRIBUTE_UNUSED
4797 x86_64_elf_unique_section (tree decl, int reloc)
4799 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4800 && ix86_in_large_data_p (decl))
4802 const char *prefix = NULL;
4803 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4804 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4806 switch (categorize_decl_for_section (decl, reloc))
4809 case SECCAT_DATA_REL:
4810 case SECCAT_DATA_REL_LOCAL:
4811 case SECCAT_DATA_REL_RO:
4812 case SECCAT_DATA_REL_RO_LOCAL:
4813 prefix = one_only ? ".ld" : ".ldata";
4816 prefix = one_only ? ".lb" : ".lbss";
4819 case SECCAT_RODATA_MERGE_STR:
4820 case SECCAT_RODATA_MERGE_STR_INIT:
4821 case SECCAT_RODATA_MERGE_CONST:
4822 prefix = one_only ? ".lr" : ".lrodata";
4824 case SECCAT_SRODATA:
4831 /* We don't split these for medium model. Place them into
4832 default sections and hope for best. */
4837 const char *name, *linkonce;
4840 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4841 name = targetm.strip_name_encoding (name);
4843 /* If we're using one_only, then there needs to be a .gnu.linkonce
4844 prefix to the section name. */
4845 linkonce = one_only ? ".gnu.linkonce" : "";
4847 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4849 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4853 default_unique_section (decl, reloc);
4856 #ifdef COMMON_ASM_OP
4857 /* This says how to output assembler code to declare an
4858 uninitialized external linkage data object.
4860 For medium model x86-64 we need to use .largecomm opcode for
4863 x86_elf_aligned_common (FILE *file,
4864 const char *name, unsigned HOST_WIDE_INT size,
4867 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4868 && size > (unsigned int)ix86_section_threshold)
4869 fputs (".largecomm\t", file);
4871 fputs (COMMON_ASM_OP, file);
4872 assemble_name (file, name);
4873 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4874 size, align / BITS_PER_UNIT);
4878 /* Utility function for targets to use in implementing
4879 ASM_OUTPUT_ALIGNED_BSS. */
4882 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4883 const char *name, unsigned HOST_WIDE_INT size,
4886 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4887 && size > (unsigned int)ix86_section_threshold)
4888 switch_to_section (get_named_section (decl, ".lbss", 0));
4890 switch_to_section (bss_section);
4891 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4892 #ifdef ASM_DECLARE_OBJECT_NAME
4893 last_assemble_variable_decl = decl;
4894 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4896 /* Standard thing is just output label for the object. */
4897 ASM_OUTPUT_LABEL (file, name);
4898 #endif /* ASM_DECLARE_OBJECT_NAME */
4899 ASM_OUTPUT_SKIP (file, size ? size : 1);
4902 /* Decide whether we must probe the stack before any space allocation
4903 on this target. It's essentially TARGET_STACK_PROBE except when
4904 -fstack-check causes the stack to be already probed differently. */
4907 ix86_target_stack_probe (void)
4909 /* Do not probe the stack twice if static stack checking is enabled. */
4910 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4913 return TARGET_STACK_PROBE;
4916 /* Decide whether we can make a sibling call to a function. DECL is the
4917 declaration of the function being targeted by the call and EXP is the
4918 CALL_EXPR representing the call. */
4921 ix86_function_ok_for_sibcall (tree decl, tree exp)
4923 tree type, decl_or_type;
4926 /* If we are generating position-independent code, we cannot sibcall
4927 optimize any indirect call, or a direct call to a global function,
4928 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4932 && (!decl || !targetm.binds_local_p (decl)))
4935 /* If we need to align the outgoing stack, then sibcalling would
4936 unalign the stack, which may break the called function. */
4937 if (ix86_minimum_incoming_stack_boundary (true)
4938 < PREFERRED_STACK_BOUNDARY)
4943 decl_or_type = decl;
4944 type = TREE_TYPE (decl);
4948 /* We're looking at the CALL_EXPR, we need the type of the function. */
4949 type = CALL_EXPR_FN (exp); /* pointer expression */
4950 type = TREE_TYPE (type); /* pointer type */
4951 type = TREE_TYPE (type); /* function type */
4952 decl_or_type = type;
4955 /* Check that the return value locations are the same. Like
4956 if we are returning floats on the 80387 register stack, we cannot
4957 make a sibcall from a function that doesn't return a float to a
4958 function that does or, conversely, from a function that does return
4959 a float to a function that doesn't; the necessary stack adjustment
4960 would not be executed. This is also the place we notice
4961 differences in the return value ABI. Note that it is ok for one
4962 of the functions to have void return type as long as the return
4963 value of the other is passed in a register. */
4964 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4965 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4967 if (STACK_REG_P (a) || STACK_REG_P (b))
4969 if (!rtx_equal_p (a, b))
4972 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4974 else if (!rtx_equal_p (a, b))
4979 /* The SYSV ABI has more call-clobbered registers;
4980 disallow sibcalls from MS to SYSV. */
4981 if (cfun->machine->call_abi == MS_ABI
4982 && ix86_function_type_abi (type) == SYSV_ABI)
4987 /* If this call is indirect, we'll need to be able to use a
4988 call-clobbered register for the address of the target function.
4989 Make sure that all such registers are not used for passing
4990 parameters. Note that DLLIMPORT functions are indirect. */
4992 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4994 if (ix86_function_regparm (type, NULL) >= 3)
4996 /* ??? Need to count the actual number of registers to be used,
4997 not the possible number of registers. Fix later. */
5003 /* Otherwise okay. That also includes certain types of indirect calls. */
5007 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5008 and "sseregparm" calling convention attributes;
5009 arguments as in struct attribute_spec.handler. */
5012 ix86_handle_cconv_attribute (tree *node, tree name,
5014 int flags ATTRIBUTE_UNUSED,
5017 if (TREE_CODE (*node) != FUNCTION_TYPE
5018 && TREE_CODE (*node) != METHOD_TYPE
5019 && TREE_CODE (*node) != FIELD_DECL
5020 && TREE_CODE (*node) != TYPE_DECL)
5022 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5024 *no_add_attrs = true;
5028 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5029 if (is_attribute_p ("regparm", name))
5033 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5035 error ("fastcall and regparm attributes are not compatible");
5038 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5040 error ("regparam and thiscall attributes are not compatible");
5043 cst = TREE_VALUE (args);
5044 if (TREE_CODE (cst) != INTEGER_CST)
5046 warning (OPT_Wattributes,
5047 "%qE attribute requires an integer constant argument",
5049 *no_add_attrs = true;
5051 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5053 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5055 *no_add_attrs = true;
5063 /* Do not warn when emulating the MS ABI. */
5064 if ((TREE_CODE (*node) != FUNCTION_TYPE
5065 && TREE_CODE (*node) != METHOD_TYPE)
5066 || ix86_function_type_abi (*node) != MS_ABI)
5067 warning (OPT_Wattributes, "%qE attribute ignored",
5069 *no_add_attrs = true;
5073 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5074 if (is_attribute_p ("fastcall", name))
5076 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5078 error ("fastcall and cdecl attributes are not compatible");
5080 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5082 error ("fastcall and stdcall attributes are not compatible");
5084 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5086 error ("fastcall and regparm attributes are not compatible");
5088 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5090 error ("fastcall and thiscall attributes are not compatible");
5094 /* Can combine stdcall with fastcall (redundant), regparm and
5096 else if (is_attribute_p ("stdcall", name))
5098 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5100 error ("stdcall and cdecl attributes are not compatible");
5102 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5104 error ("stdcall and fastcall attributes are not compatible");
5106 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5108 error ("stdcall and thiscall attributes are not compatible");
5112 /* Can combine cdecl with regparm and sseregparm. */
5113 else if (is_attribute_p ("cdecl", name))
5115 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5117 error ("stdcall and cdecl attributes are not compatible");
5119 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5121 error ("fastcall and cdecl attributes are not compatible");
5123 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5125 error ("cdecl and thiscall attributes are not compatible");
5128 else if (is_attribute_p ("thiscall", name))
5130 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5131 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5133 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5135 error ("stdcall and thiscall attributes are not compatible");
5137 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5139 error ("fastcall and thiscall attributes are not compatible");
5141 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5143 error ("cdecl and thiscall attributes are not compatible");
5147 /* Can combine sseregparm with all attributes. */
5152 /* The transactional memory builtins are implicitly regparm or fastcall
5153 depending on the ABI. Override the generic do-nothing attribute that
5154 these builtins were declared with, and replace it with one of the two
5155 attributes that we expect elsewhere. */
5158 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5159 tree args ATTRIBUTE_UNUSED,
5160 int flags ATTRIBUTE_UNUSED,
5165 /* In no case do we want to add the placeholder attribute. */
5166 *no_add_attrs = true;
5168 /* The 64-bit ABI is unchanged for transactional memory. */
5172 /* ??? Is there a better way to validate 32-bit windows? We have
5173 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5174 if (CHECK_STACK_LIMIT > 0)
5175 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5178 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5179 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5181 decl_attributes (node, alt, flags);
5186 /* This function determines from TYPE the calling-convention. */
5189 ix86_get_callcvt (const_tree type)
5191 unsigned int ret = 0;
5196 return IX86_CALLCVT_CDECL;
5198 attrs = TYPE_ATTRIBUTES (type);
5199 if (attrs != NULL_TREE)
5201 if (lookup_attribute ("cdecl", attrs))
5202 ret |= IX86_CALLCVT_CDECL;
5203 else if (lookup_attribute ("stdcall", attrs))
5204 ret |= IX86_CALLCVT_STDCALL;
5205 else if (lookup_attribute ("fastcall", attrs))
5206 ret |= IX86_CALLCVT_FASTCALL;
5207 else if (lookup_attribute ("thiscall", attrs))
5208 ret |= IX86_CALLCVT_THISCALL;
5210 /* Regparam isn't allowed for thiscall and fastcall. */
5211 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5213 if (lookup_attribute ("regparm", attrs))
5214 ret |= IX86_CALLCVT_REGPARM;
5215 if (lookup_attribute ("sseregparm", attrs))
5216 ret |= IX86_CALLCVT_SSEREGPARM;
5219 if (IX86_BASE_CALLCVT(ret) != 0)
5223 is_stdarg = stdarg_p (type);
5224 if (TARGET_RTD && !is_stdarg)
5225 return IX86_CALLCVT_STDCALL | ret;
5229 || TREE_CODE (type) != METHOD_TYPE
5230 || ix86_function_type_abi (type) != MS_ABI)
5231 return IX86_CALLCVT_CDECL | ret;
5233 return IX86_CALLCVT_THISCALL;
5236 /* Return 0 if the attributes for two types are incompatible, 1 if they
5237 are compatible, and 2 if they are nearly compatible (which causes a
5238 warning to be generated). */
5241 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5243 unsigned int ccvt1, ccvt2;
5245 if (TREE_CODE (type1) != FUNCTION_TYPE
5246 && TREE_CODE (type1) != METHOD_TYPE)
5249 ccvt1 = ix86_get_callcvt (type1);
5250 ccvt2 = ix86_get_callcvt (type2);
5253 if (ix86_function_regparm (type1, NULL)
5254 != ix86_function_regparm (type2, NULL))
5260 /* Return the regparm value for a function with the indicated TYPE and DECL.
5261 DECL may be NULL when calling function indirectly
5262 or considering a libcall. */
5265 ix86_function_regparm (const_tree type, const_tree decl)
5272 return (ix86_function_type_abi (type) == SYSV_ABI
5273 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5274 ccvt = ix86_get_callcvt (type);
5275 regparm = ix86_regparm;
5277 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5279 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5282 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5286 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5288 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5291 /* Use register calling convention for local functions when possible. */
5293 && TREE_CODE (decl) == FUNCTION_DECL
5295 && !(profile_flag && !flag_fentry))
5297 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5298 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5299 if (i && i->local && i->can_change_signature)
5301 int local_regparm, globals = 0, regno;
5303 /* Make sure no regparm register is taken by a
5304 fixed register variable. */
5305 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5306 if (fixed_regs[local_regparm])
5309 /* We don't want to use regparm(3) for nested functions as
5310 these use a static chain pointer in the third argument. */
5311 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5314 /* In 32-bit mode save a register for the split stack. */
5315 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5318 /* Each fixed register usage increases register pressure,
5319 so less registers should be used for argument passing.
5320 This functionality can be overriden by an explicit
5322 for (regno = AX_REG; regno <= DI_REG; regno++)
5323 if (fixed_regs[regno])
5327 = globals < local_regparm ? local_regparm - globals : 0;
5329 if (local_regparm > regparm)
5330 regparm = local_regparm;
5337 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5338 DFmode (2) arguments in SSE registers for a function with the
5339 indicated TYPE and DECL. DECL may be NULL when calling function
5340 indirectly or considering a libcall. Otherwise return 0. */
5343 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5345 gcc_assert (!TARGET_64BIT);
5347 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5348 by the sseregparm attribute. */
5349 if (TARGET_SSEREGPARM
5350 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5357 error ("calling %qD with attribute sseregparm without "
5358 "SSE/SSE2 enabled", decl);
5360 error ("calling %qT with attribute sseregparm without "
5361 "SSE/SSE2 enabled", type);
5369 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5370 (and DFmode for SSE2) arguments in SSE registers. */
5371 if (decl && TARGET_SSE_MATH && optimize
5372 && !(profile_flag && !flag_fentry))
5374 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5375 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5376 if (i && i->local && i->can_change_signature)
5377 return TARGET_SSE2 ? 2 : 1;
5383 /* Return true if EAX is live at the start of the function. Used by
5384 ix86_expand_prologue to determine if we need special help before
5385 calling allocate_stack_worker. */
5388 ix86_eax_live_at_start_p (void)
5390 /* Cheat. Don't bother working forward from ix86_function_regparm
5391 to the function type to whether an actual argument is located in
5392 eax. Instead just look at cfg info, which is still close enough
5393 to correct at this point. This gives false positives for broken
5394 functions that might use uninitialized data that happens to be
5395 allocated in eax, but who cares? */
5396 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
5400 ix86_keep_aggregate_return_pointer (tree fntype)
5406 attr = lookup_attribute ("callee_pop_aggregate_return",
5407 TYPE_ATTRIBUTES (fntype));
5409 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5411 /* For 32-bit MS-ABI the default is to keep aggregate
5413 if (ix86_function_type_abi (fntype) == MS_ABI)
5416 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5419 /* Value is the number of bytes of arguments automatically
5420 popped when returning from a subroutine call.
5421 FUNDECL is the declaration node of the function (as a tree),
5422 FUNTYPE is the data type of the function (as a tree),
5423 or for a library call it is an identifier node for the subroutine name.
5424 SIZE is the number of bytes of arguments passed on the stack.
5426 On the 80386, the RTD insn may be used to pop them if the number
5427 of args is fixed, but if the number is variable then the caller
5428 must pop them all. RTD can't be used for library calls now
5429 because the library is compiled with the Unix compiler.
5430 Use of RTD is a selectable option, since it is incompatible with
5431 standard Unix calling sequences. If the option is not selected,
5432 the caller must always pop the args.
5434 The attribute stdcall is equivalent to RTD on a per module basis. */
5437 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5441 /* None of the 64-bit ABIs pop arguments. */
5445 ccvt = ix86_get_callcvt (funtype);
5447 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5448 | IX86_CALLCVT_THISCALL)) != 0
5449 && ! stdarg_p (funtype))
5452 /* Lose any fake structure return argument if it is passed on the stack. */
5453 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5454 && !ix86_keep_aggregate_return_pointer (funtype))
5456 int nregs = ix86_function_regparm (funtype, fundecl);
5458 return GET_MODE_SIZE (Pmode);
5464 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5467 ix86_legitimate_combined_insn (rtx insn)
5469 /* Check operand constraints in case hard registers were propagated
5470 into insn pattern. This check prevents combine pass from
5471 generating insn patterns with invalid hard register operands.
5472 These invalid insns can eventually confuse reload to error out
5473 with a spill failure. See also PRs 46829 and 46843. */
5474 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5478 extract_insn (insn);
5479 preprocess_constraints ();
5481 for (i = 0; i < recog_data.n_operands; i++)
5483 rtx op = recog_data.operand[i];
5484 enum machine_mode mode = GET_MODE (op);
5485 struct operand_alternative *op_alt;
5490 /* A unary operator may be accepted by the predicate, but it
5491 is irrelevant for matching constraints. */
5495 if (GET_CODE (op) == SUBREG)
5497 if (REG_P (SUBREG_REG (op))
5498 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5499 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5500 GET_MODE (SUBREG_REG (op)),
5503 op = SUBREG_REG (op);
5506 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5509 op_alt = recog_op_alt[i];
5511 /* Operand has no constraints, anything is OK. */
5512 win = !recog_data.n_alternatives;
5514 for (j = 0; j < recog_data.n_alternatives; j++)
5516 if (op_alt[j].anything_ok
5517 || (op_alt[j].matches != -1
5519 (recog_data.operand[i],
5520 recog_data.operand[op_alt[j].matches]))
5521 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5536 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5538 static unsigned HOST_WIDE_INT
5539 ix86_asan_shadow_offset (void)
5541 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5542 : HOST_WIDE_INT_C (0x7fff8000))
5543 : (HOST_WIDE_INT_1 << 29);
5546 /* Argument support functions. */
5548 /* Return true when register may be used to pass function parameters. */
5550 ix86_function_arg_regno_p (int regno)
5553 const int *parm_regs;
5558 return (regno < REGPARM_MAX
5559 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5561 return (regno < REGPARM_MAX
5562 || (TARGET_MMX && MMX_REGNO_P (regno)
5563 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5564 || (TARGET_SSE && SSE_REGNO_P (regno)
5565 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5570 if (SSE_REGNO_P (regno) && TARGET_SSE)
5575 if (TARGET_SSE && SSE_REGNO_P (regno)
5576 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5580 /* TODO: The function should depend on current function ABI but
5581 builtins.c would need updating then. Therefore we use the
5584 /* RAX is used as hidden argument to va_arg functions. */
5585 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5588 if (ix86_abi == MS_ABI)
5589 parm_regs = x86_64_ms_abi_int_parameter_registers;
5591 parm_regs = x86_64_int_parameter_registers;
5592 for (i = 0; i < (ix86_abi == MS_ABI
5593 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5594 if (regno == parm_regs[i])
5599 /* Return if we do not know how to pass TYPE solely in registers. */
5602 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5604 if (must_pass_in_stack_var_size_or_pad (mode, type))
5607 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5608 The layout_type routine is crafty and tries to trick us into passing
5609 currently unsupported vector types on the stack by using TImode. */
5610 return (!TARGET_64BIT && mode == TImode
5611 && type && TREE_CODE (type) != VECTOR_TYPE);
5614 /* It returns the size, in bytes, of the area reserved for arguments passed
5615 in registers for the function represented by fndecl dependent to the used
5618 ix86_reg_parm_stack_space (const_tree fndecl)
5620 enum calling_abi call_abi = SYSV_ABI;
5621 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5622 call_abi = ix86_function_abi (fndecl);
5624 call_abi = ix86_function_type_abi (fndecl);
5625 if (TARGET_64BIT && call_abi == MS_ABI)
5630 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5633 ix86_function_type_abi (const_tree fntype)
5635 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5637 enum calling_abi abi = ix86_abi;
5638 if (abi == SYSV_ABI)
5640 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5643 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5651 ix86_function_ms_hook_prologue (const_tree fn)
5653 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5655 if (decl_function_context (fn) != NULL_TREE)
5656 error_at (DECL_SOURCE_LOCATION (fn),
5657 "ms_hook_prologue is not compatible with nested function");
5664 static enum calling_abi
5665 ix86_function_abi (const_tree fndecl)
5669 return ix86_function_type_abi (TREE_TYPE (fndecl));
5672 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5675 ix86_cfun_abi (void)
5679 return cfun->machine->call_abi;
5682 /* Write the extra assembler code needed to declare a function properly. */
5685 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5688 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5692 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5693 unsigned int filler_cc = 0xcccccccc;
5695 for (i = 0; i < filler_count; i += 4)
5696 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5699 #ifdef SUBTARGET_ASM_UNWIND_INIT
5700 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
5703 ASM_OUTPUT_LABEL (asm_out_file, fname);
5705 /* Output magic byte marker, if hot-patch attribute is set. */
5710 /* leaq [%rsp + 0], %rsp */
5711 asm_fprintf (asm_out_file, ASM_BYTE
5712 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5716 /* movl.s %edi, %edi
5718 movl.s %esp, %ebp */
5719 asm_fprintf (asm_out_file, ASM_BYTE
5720 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5726 extern void init_regs (void);
5728 /* Implementation of call abi switching target hook. Specific to FNDECL
5729 the specific call register sets are set. See also
5730 ix86_conditional_register_usage for more details. */
5732 ix86_call_abi_override (const_tree fndecl)
5734 if (fndecl == NULL_TREE)
5735 cfun->machine->call_abi = ix86_abi;
5737 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5740 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5741 expensive re-initialization of init_regs each time we switch function context
5742 since this is needed only during RTL expansion. */
5744 ix86_maybe_switch_abi (void)
5747 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5751 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5752 for a call to a function whose data type is FNTYPE.
5753 For a library call, FNTYPE is 0. */
5756 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5757 tree fntype, /* tree ptr for function decl */
5758 rtx libname, /* SYMBOL_REF of library name or 0 */
5762 struct cgraph_local_info *i;
5764 memset (cum, 0, sizeof (*cum));
5768 i = cgraph_local_info (fndecl);
5769 cum->call_abi = ix86_function_abi (fndecl);
5774 cum->call_abi = ix86_function_type_abi (fntype);
5777 cum->caller = caller;
5779 /* Set up the number of registers to use for passing arguments. */
5781 if (TARGET_64BIT && cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5782 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5783 "or subtarget optimization implying it");
5784 cum->nregs = ix86_regparm;
5787 cum->nregs = (cum->call_abi == SYSV_ABI
5788 ? X86_64_REGPARM_MAX
5789 : X86_64_MS_REGPARM_MAX);
5793 cum->sse_nregs = SSE_REGPARM_MAX;
5796 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5797 ? X86_64_SSE_REGPARM_MAX
5798 : X86_64_MS_SSE_REGPARM_MAX);
5802 cum->mmx_nregs = MMX_REGPARM_MAX;
5803 cum->warn_avx = true;
5804 cum->warn_sse = true;
5805 cum->warn_mmx = true;
5807 /* Because type might mismatch in between caller and callee, we need to
5808 use actual type of function for local calls.
5809 FIXME: cgraph_analyze can be told to actually record if function uses
5810 va_start so for local functions maybe_vaarg can be made aggressive
5812 FIXME: once typesytem is fixed, we won't need this code anymore. */
5813 if (i && i->local && i->can_change_signature)
5814 fntype = TREE_TYPE (fndecl);
5815 cum->maybe_vaarg = (fntype
5816 ? (!prototype_p (fntype) || stdarg_p (fntype))
5821 /* If there are variable arguments, then we won't pass anything
5822 in registers in 32-bit mode. */
5823 if (stdarg_p (fntype))
5834 /* Use ecx and edx registers if function has fastcall attribute,
5835 else look for regparm information. */
5838 unsigned int ccvt = ix86_get_callcvt (fntype);
5839 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5842 cum->fastcall = 1; /* Same first register as in fastcall. */
5844 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5850 cum->nregs = ix86_function_regparm (fntype, fndecl);
5853 /* Set up the number of SSE registers used for passing SFmode
5854 and DFmode arguments. Warn for mismatching ABI. */
5855 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5859 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5860 But in the case of vector types, it is some vector mode.
5862 When we have only some of our vector isa extensions enabled, then there
5863 are some modes for which vector_mode_supported_p is false. For these
5864 modes, the generic vector support in gcc will choose some non-vector mode
5865 in order to implement the type. By computing the natural mode, we'll
5866 select the proper ABI location for the operand and not depend on whatever
5867 the middle-end decides to do with these vector types.
5869 The midde-end can't deal with the vector types > 16 bytes. In this
5870 case, we return the original mode and warn ABI change if CUM isn't
5873 static enum machine_mode
5874 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5876 enum machine_mode mode = TYPE_MODE (type);
5878 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5880 HOST_WIDE_INT size = int_size_in_bytes (type);
5881 if ((size == 8 || size == 16 || size == 32)
5882 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5883 && TYPE_VECTOR_SUBPARTS (type) > 1)
5885 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5887 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5888 mode = MIN_MODE_VECTOR_FLOAT;
5890 mode = MIN_MODE_VECTOR_INT;
5892 /* Get the mode which has this inner mode and number of units. */
5893 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5894 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5895 && GET_MODE_INNER (mode) == innermode)
5897 if (size == 32 && !TARGET_AVX)
5899 static bool warnedavx;
5906 warning (0, "AVX vector argument without AVX "
5907 "enabled changes the ABI");
5909 return TYPE_MODE (type);
5911 else if ((size == 8 || size == 16) && !TARGET_SSE)
5913 static bool warnedsse;
5920 warning (0, "SSE vector argument without SSE "
5921 "enabled changes the ABI");
5936 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5937 this may not agree with the mode that the type system has chosen for the
5938 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5939 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5942 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5947 if (orig_mode != BLKmode)
5948 tmp = gen_rtx_REG (orig_mode, regno);
5951 tmp = gen_rtx_REG (mode, regno);
5952 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5953 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5959 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5960 of this code is to classify each 8bytes of incoming argument by the register
5961 class and assign registers accordingly. */
5963 /* Return the union class of CLASS1 and CLASS2.
5964 See the x86-64 PS ABI for details. */
5966 static enum x86_64_reg_class
5967 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5969 /* Rule #1: If both classes are equal, this is the resulting class. */
5970 if (class1 == class2)
5973 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5975 if (class1 == X86_64_NO_CLASS)
5977 if (class2 == X86_64_NO_CLASS)
5980 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5981 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5982 return X86_64_MEMORY_CLASS;
5984 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5985 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5986 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5987 return X86_64_INTEGERSI_CLASS;
5988 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5989 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5990 return X86_64_INTEGER_CLASS;
5992 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5994 if (class1 == X86_64_X87_CLASS
5995 || class1 == X86_64_X87UP_CLASS
5996 || class1 == X86_64_COMPLEX_X87_CLASS
5997 || class2 == X86_64_X87_CLASS
5998 || class2 == X86_64_X87UP_CLASS
5999 || class2 == X86_64_COMPLEX_X87_CLASS)
6000 return X86_64_MEMORY_CLASS;
6002 /* Rule #6: Otherwise class SSE is used. */
6003 return X86_64_SSE_CLASS;
6006 /* Classify the argument of type TYPE and mode MODE.
6007 CLASSES will be filled by the register class used to pass each word
6008 of the operand. The number of words is returned. In case the parameter
6009 should be passed in memory, 0 is returned. As a special case for zero
6010 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6012 BIT_OFFSET is used internally for handling records and specifies offset
6013 of the offset in bits modulo 256 to avoid overflow cases.
6015 See the x86-64 PS ABI for details.
6019 classify_argument (enum machine_mode mode, const_tree type,
6020 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6022 HOST_WIDE_INT bytes =
6023 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6025 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6027 /* Variable sized entities are always passed/returned in memory. */
6031 if (mode != VOIDmode
6032 && targetm.calls.must_pass_in_stack (mode, type))
6035 if (type && AGGREGATE_TYPE_P (type))
6039 enum x86_64_reg_class subclasses[MAX_CLASSES];
6041 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6045 for (i = 0; i < words; i++)
6046 classes[i] = X86_64_NO_CLASS;
6048 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6049 signalize memory class, so handle it as special case. */
6052 classes[0] = X86_64_NO_CLASS;
6056 /* Classify each field of record and merge classes. */
6057 switch (TREE_CODE (type))
6060 /* And now merge the fields of structure. */
6061 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6063 if (TREE_CODE (field) == FIELD_DECL)
6067 if (TREE_TYPE (field) == error_mark_node)
6070 /* Bitfields are always classified as integer. Handle them
6071 early, since later code would consider them to be
6072 misaligned integers. */
6073 if (DECL_BIT_FIELD (field))
6075 for (i = (int_bit_position (field)
6076 + (bit_offset % 64)) / 8 / 8;
6077 i < ((int_bit_position (field) + (bit_offset % 64))
6078 + tree_low_cst (DECL_SIZE (field), 0)
6081 merge_classes (X86_64_INTEGER_CLASS,
6088 type = TREE_TYPE (field);
6090 /* Flexible array member is ignored. */
6091 if (TYPE_MODE (type) == BLKmode
6092 && TREE_CODE (type) == ARRAY_TYPE
6093 && TYPE_SIZE (type) == NULL_TREE
6094 && TYPE_DOMAIN (type) != NULL_TREE
6095 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6100 if (!warned && warn_psabi)
6103 inform (input_location,
6104 "the ABI of passing struct with"
6105 " a flexible array member has"
6106 " changed in GCC 4.4");
6110 num = classify_argument (TYPE_MODE (type), type,
6112 (int_bit_position (field)
6113 + bit_offset) % 256);
6116 pos = (int_bit_position (field)
6117 + (bit_offset % 64)) / 8 / 8;
6118 for (i = 0; i < num && (i + pos) < words; i++)
6120 merge_classes (subclasses[i], classes[i + pos]);
6127 /* Arrays are handled as small records. */
6130 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6131 TREE_TYPE (type), subclasses, bit_offset);
6135 /* The partial classes are now full classes. */
6136 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6137 subclasses[0] = X86_64_SSE_CLASS;
6138 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6139 && !((bit_offset % 64) == 0 && bytes == 4))
6140 subclasses[0] = X86_64_INTEGER_CLASS;
6142 for (i = 0; i < words; i++)
6143 classes[i] = subclasses[i % num];
6148 case QUAL_UNION_TYPE:
6149 /* Unions are similar to RECORD_TYPE but offset is always 0.
6151 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6153 if (TREE_CODE (field) == FIELD_DECL)
6157 if (TREE_TYPE (field) == error_mark_node)
6160 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6161 TREE_TYPE (field), subclasses,
6165 for (i = 0; i < num; i++)
6166 classes[i] = merge_classes (subclasses[i], classes[i]);
6177 /* When size > 16 bytes, if the first one isn't
6178 X86_64_SSE_CLASS or any other ones aren't
6179 X86_64_SSEUP_CLASS, everything should be passed in
6181 if (classes[0] != X86_64_SSE_CLASS)
6184 for (i = 1; i < words; i++)
6185 if (classes[i] != X86_64_SSEUP_CLASS)
6189 /* Final merger cleanup. */
6190 for (i = 0; i < words; i++)
6192 /* If one class is MEMORY, everything should be passed in
6194 if (classes[i] == X86_64_MEMORY_CLASS)
6197 /* The X86_64_SSEUP_CLASS should be always preceded by
6198 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6199 if (classes[i] == X86_64_SSEUP_CLASS
6200 && classes[i - 1] != X86_64_SSE_CLASS
6201 && classes[i - 1] != X86_64_SSEUP_CLASS)
6203 /* The first one should never be X86_64_SSEUP_CLASS. */
6204 gcc_assert (i != 0);
6205 classes[i] = X86_64_SSE_CLASS;
6208 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6209 everything should be passed in memory. */
6210 if (classes[i] == X86_64_X87UP_CLASS
6211 && (classes[i - 1] != X86_64_X87_CLASS))
6215 /* The first one should never be X86_64_X87UP_CLASS. */
6216 gcc_assert (i != 0);
6217 if (!warned && warn_psabi)
6220 inform (input_location,
6221 "the ABI of passing union with long double"
6222 " has changed in GCC 4.4");
6230 /* Compute alignment needed. We align all types to natural boundaries with
6231 exception of XFmode that is aligned to 64bits. */
6232 if (mode != VOIDmode && mode != BLKmode)
6234 int mode_alignment = GET_MODE_BITSIZE (mode);
6237 mode_alignment = 128;
6238 else if (mode == XCmode)
6239 mode_alignment = 256;
6240 if (COMPLEX_MODE_P (mode))
6241 mode_alignment /= 2;
6242 /* Misaligned fields are always returned in memory. */
6243 if (bit_offset % mode_alignment)
6247 /* for V1xx modes, just use the base mode */
6248 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6249 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6250 mode = GET_MODE_INNER (mode);
6252 /* Classification of atomic types. */
6257 classes[0] = X86_64_SSE_CLASS;
6260 classes[0] = X86_64_SSE_CLASS;
6261 classes[1] = X86_64_SSEUP_CLASS;
6271 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
6275 classes[0] = X86_64_INTEGERSI_CLASS;
6278 else if (size <= 64)
6280 classes[0] = X86_64_INTEGER_CLASS;
6283 else if (size <= 64+32)
6285 classes[0] = X86_64_INTEGER_CLASS;
6286 classes[1] = X86_64_INTEGERSI_CLASS;
6289 else if (size <= 64+64)
6291 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6299 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6303 /* OImode shouldn't be used directly. */
6308 if (!(bit_offset % 64))
6309 classes[0] = X86_64_SSESF_CLASS;
6311 classes[0] = X86_64_SSE_CLASS;
6314 classes[0] = X86_64_SSEDF_CLASS;
6317 classes[0] = X86_64_X87_CLASS;
6318 classes[1] = X86_64_X87UP_CLASS;
6321 classes[0] = X86_64_SSE_CLASS;
6322 classes[1] = X86_64_SSEUP_CLASS;
6325 classes[0] = X86_64_SSE_CLASS;
6326 if (!(bit_offset % 64))
6332 if (!warned && warn_psabi)
6335 inform (input_location,
6336 "the ABI of passing structure with complex float"
6337 " member has changed in GCC 4.4");
6339 classes[1] = X86_64_SSESF_CLASS;
6343 classes[0] = X86_64_SSEDF_CLASS;
6344 classes[1] = X86_64_SSEDF_CLASS;
6347 classes[0] = X86_64_COMPLEX_X87_CLASS;
6350 /* This modes is larger than 16 bytes. */
6358 classes[0] = X86_64_SSE_CLASS;
6359 classes[1] = X86_64_SSEUP_CLASS;
6360 classes[2] = X86_64_SSEUP_CLASS;
6361 classes[3] = X86_64_SSEUP_CLASS;
6369 classes[0] = X86_64_SSE_CLASS;
6370 classes[1] = X86_64_SSEUP_CLASS;
6378 classes[0] = X86_64_SSE_CLASS;
6384 gcc_assert (VECTOR_MODE_P (mode));
6389 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6391 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6392 classes[0] = X86_64_INTEGERSI_CLASS;
6394 classes[0] = X86_64_INTEGER_CLASS;
6395 classes[1] = X86_64_INTEGER_CLASS;
6396 return 1 + (bytes > 8);
6400 /* Examine the argument and return set number of register required in each
6401 class. Return 0 iff parameter should be passed in memory. */
6403 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6404 int *int_nregs, int *sse_nregs)
6406 enum x86_64_reg_class regclass[MAX_CLASSES];
6407 int n = classify_argument (mode, type, regclass, 0);
6413 for (n--; n >= 0; n--)
6414 switch (regclass[n])
6416 case X86_64_INTEGER_CLASS:
6417 case X86_64_INTEGERSI_CLASS:
6420 case X86_64_SSE_CLASS:
6421 case X86_64_SSESF_CLASS:
6422 case X86_64_SSEDF_CLASS:
6425 case X86_64_NO_CLASS:
6426 case X86_64_SSEUP_CLASS:
6428 case X86_64_X87_CLASS:
6429 case X86_64_X87UP_CLASS:
6433 case X86_64_COMPLEX_X87_CLASS:
6434 return in_return ? 2 : 0;
6435 case X86_64_MEMORY_CLASS:
6441 /* Construct container for the argument used by GCC interface. See
6442 FUNCTION_ARG for the detailed description. */
6445 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6446 const_tree type, int in_return, int nintregs, int nsseregs,
6447 const int *intreg, int sse_regno)
6449 /* The following variables hold the static issued_error state. */
6450 static bool issued_sse_arg_error;
6451 static bool issued_sse_ret_error;
6452 static bool issued_x87_ret_error;
6454 enum machine_mode tmpmode;
6456 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6457 enum x86_64_reg_class regclass[MAX_CLASSES];
6461 int needed_sseregs, needed_intregs;
6462 rtx exp[MAX_CLASSES];
6465 n = classify_argument (mode, type, regclass, 0);
6468 if (!examine_argument (mode, type, in_return, &needed_intregs,
6471 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6474 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6475 some less clueful developer tries to use floating-point anyway. */
6476 if (needed_sseregs && !TARGET_SSE)
6480 if (!issued_sse_ret_error)
6482 error ("SSE register return with SSE disabled");
6483 issued_sse_ret_error = true;
6486 else if (!issued_sse_arg_error)
6488 error ("SSE register argument with SSE disabled");
6489 issued_sse_arg_error = true;
6494 /* Likewise, error if the ABI requires us to return values in the
6495 x87 registers and the user specified -mno-80387. */
6496 if (!TARGET_80387 && in_return)
6497 for (i = 0; i < n; i++)
6498 if (regclass[i] == X86_64_X87_CLASS
6499 || regclass[i] == X86_64_X87UP_CLASS
6500 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6502 if (!issued_x87_ret_error)
6504 error ("x87 register return with x87 disabled");
6505 issued_x87_ret_error = true;
6510 /* First construct simple cases. Avoid SCmode, since we want to use
6511 single register to pass this type. */
6512 if (n == 1 && mode != SCmode)
6513 switch (regclass[0])
6515 case X86_64_INTEGER_CLASS:
6516 case X86_64_INTEGERSI_CLASS:
6517 return gen_rtx_REG (mode, intreg[0]);
6518 case X86_64_SSE_CLASS:
6519 case X86_64_SSESF_CLASS:
6520 case X86_64_SSEDF_CLASS:
6521 if (mode != BLKmode)
6522 return gen_reg_or_parallel (mode, orig_mode,
6523 SSE_REGNO (sse_regno));
6525 case X86_64_X87_CLASS:
6526 case X86_64_COMPLEX_X87_CLASS:
6527 return gen_rtx_REG (mode, FIRST_STACK_REG);
6528 case X86_64_NO_CLASS:
6529 /* Zero sized array, struct or class. */
6535 && regclass[0] == X86_64_SSE_CLASS
6536 && regclass[1] == X86_64_SSEUP_CLASS
6538 return gen_reg_or_parallel (mode, orig_mode,
6539 SSE_REGNO (sse_regno));
6541 && regclass[0] == X86_64_SSE_CLASS
6542 && regclass[1] == X86_64_SSEUP_CLASS
6543 && regclass[2] == X86_64_SSEUP_CLASS
6544 && regclass[3] == X86_64_SSEUP_CLASS
6546 return gen_reg_or_parallel (mode, orig_mode,
6547 SSE_REGNO (sse_regno));
6549 && regclass[0] == X86_64_X87_CLASS
6550 && regclass[1] == X86_64_X87UP_CLASS)
6551 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6554 && regclass[0] == X86_64_INTEGER_CLASS
6555 && regclass[1] == X86_64_INTEGER_CLASS
6556 && (mode == CDImode || mode == TImode || mode == TFmode)
6557 && intreg[0] + 1 == intreg[1])
6558 return gen_rtx_REG (mode, intreg[0]);
6560 /* Otherwise figure out the entries of the PARALLEL. */
6561 for (i = 0; i < n; i++)
6565 switch (regclass[i])
6567 case X86_64_NO_CLASS:
6569 case X86_64_INTEGER_CLASS:
6570 case X86_64_INTEGERSI_CLASS:
6571 /* Merge TImodes on aligned occasions here too. */
6572 if (i * 8 + 8 > bytes)
6574 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6575 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6579 /* We've requested 24 bytes we
6580 don't have mode for. Use DImode. */
6581 if (tmpmode == BLKmode)
6584 = gen_rtx_EXPR_LIST (VOIDmode,
6585 gen_rtx_REG (tmpmode, *intreg),
6589 case X86_64_SSESF_CLASS:
6591 = gen_rtx_EXPR_LIST (VOIDmode,
6592 gen_rtx_REG (SFmode,
6593 SSE_REGNO (sse_regno)),
6597 case X86_64_SSEDF_CLASS:
6599 = gen_rtx_EXPR_LIST (VOIDmode,
6600 gen_rtx_REG (DFmode,
6601 SSE_REGNO (sse_regno)),
6605 case X86_64_SSE_CLASS:
6613 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6623 && regclass[1] == X86_64_SSEUP_CLASS
6624 && regclass[2] == X86_64_SSEUP_CLASS
6625 && regclass[3] == X86_64_SSEUP_CLASS);
6633 = gen_rtx_EXPR_LIST (VOIDmode,
6634 gen_rtx_REG (tmpmode,
6635 SSE_REGNO (sse_regno)),
6644 /* Empty aligned struct, union or class. */
6648 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6649 for (i = 0; i < nexps; i++)
6650 XVECEXP (ret, 0, i) = exp [i];
6654 /* Update the data in CUM to advance over an argument of mode MODE
6655 and data type TYPE. (TYPE is null for libcalls where that information
6656 may not be available.) */
6659 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6660 const_tree type, HOST_WIDE_INT bytes,
6661 HOST_WIDE_INT words)
6677 cum->words += words;
6678 cum->nregs -= words;
6679 cum->regno += words;
6681 if (cum->nregs <= 0)
6689 /* OImode shouldn't be used directly. */
6693 if (cum->float_in_sse < 2)
6696 if (cum->float_in_sse < 1)
6713 if (!type || !AGGREGATE_TYPE_P (type))
6715 cum->sse_words += words;
6716 cum->sse_nregs -= 1;
6717 cum->sse_regno += 1;
6718 if (cum->sse_nregs <= 0)
6732 if (!type || !AGGREGATE_TYPE_P (type))
6734 cum->mmx_words += words;
6735 cum->mmx_nregs -= 1;
6736 cum->mmx_regno += 1;
6737 if (cum->mmx_nregs <= 0)
6748 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6749 const_tree type, HOST_WIDE_INT words, bool named)
6751 int int_nregs, sse_nregs;
6753 /* Unnamed 256bit vector mode parameters are passed on stack. */
6754 if (!named && VALID_AVX256_REG_MODE (mode))
6757 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6758 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6760 cum->nregs -= int_nregs;
6761 cum->sse_nregs -= sse_nregs;
6762 cum->regno += int_nregs;
6763 cum->sse_regno += sse_nregs;
6767 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6768 cum->words = (cum->words + align - 1) & ~(align - 1);
6769 cum->words += words;
6774 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6775 HOST_WIDE_INT words)
6777 /* Otherwise, this should be passed indirect. */
6778 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6780 cum->words += words;
6788 /* Update the data in CUM to advance over an argument of mode MODE and
6789 data type TYPE. (TYPE is null for libcalls where that information
6790 may not be available.) */
6793 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
6794 const_tree type, bool named)
6796 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6797 HOST_WIDE_INT bytes, words;
6799 if (mode == BLKmode)
6800 bytes = int_size_in_bytes (type);
6802 bytes = GET_MODE_SIZE (mode);
6803 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6806 mode = type_natural_mode (type, NULL);
6808 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6809 function_arg_advance_ms_64 (cum, bytes, words);
6810 else if (TARGET_64BIT)
6811 function_arg_advance_64 (cum, mode, type, words, named);
6813 function_arg_advance_32 (cum, mode, type, bytes, words);
6816 /* Define where to put the arguments to a function.
6817 Value is zero to push the argument on the stack,
6818 or a hard register in which to store the argument.
6820 MODE is the argument's machine mode.
6821 TYPE is the data type of the argument (as a tree).
6822 This is null for libcalls where that information may
6824 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6825 the preceding args and about the function being called.
6826 NAMED is nonzero if this argument is a named parameter
6827 (otherwise it is an extra parameter matching an ellipsis). */
6830 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6831 enum machine_mode orig_mode, const_tree type,
6832 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6834 static bool warnedsse, warnedmmx;
6836 /* Avoid the AL settings for the Unix64 ABI. */
6837 if (mode == VOIDmode)
6853 if (words <= cum->nregs)
6855 int regno = cum->regno;
6857 /* Fastcall allocates the first two DWORD (SImode) or
6858 smaller arguments to ECX and EDX if it isn't an
6864 || (type && AGGREGATE_TYPE_P (type)))
6867 /* ECX not EAX is the first allocated register. */
6868 if (regno == AX_REG)
6871 return gen_rtx_REG (mode, regno);
6876 if (cum->float_in_sse < 2)
6879 if (cum->float_in_sse < 1)
6883 /* In 32bit, we pass TImode in xmm registers. */
6890 if (!type || !AGGREGATE_TYPE_P (type))
6892 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6895 warning (0, "SSE vector argument without SSE enabled "
6899 return gen_reg_or_parallel (mode, orig_mode,
6900 cum->sse_regno + FIRST_SSE_REG);
6905 /* OImode shouldn't be used directly. */
6914 if (!type || !AGGREGATE_TYPE_P (type))
6917 return gen_reg_or_parallel (mode, orig_mode,
6918 cum->sse_regno + FIRST_SSE_REG);
6928 if (!type || !AGGREGATE_TYPE_P (type))
6930 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6933 warning (0, "MMX vector argument without MMX enabled "
6937 return gen_reg_or_parallel (mode, orig_mode,
6938 cum->mmx_regno + FIRST_MMX_REG);
6947 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6948 enum machine_mode orig_mode, const_tree type, bool named)
6950 /* Handle a hidden AL argument containing number of registers
6951 for varargs x86-64 functions. */
6952 if (mode == VOIDmode)
6953 return GEN_INT (cum->maybe_vaarg
6954 ? (cum->sse_nregs < 0
6955 ? X86_64_SSE_REGPARM_MAX
6970 /* Unnamed 256bit vector mode parameters are passed on stack. */
6976 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6978 &x86_64_int_parameter_registers [cum->regno],
6983 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6984 enum machine_mode orig_mode, bool named,
6985 HOST_WIDE_INT bytes)
6989 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6990 We use value of -2 to specify that current function call is MSABI. */
6991 if (mode == VOIDmode)
6992 return GEN_INT (-2);
6994 /* If we've run out of registers, it goes on the stack. */
6995 if (cum->nregs == 0)
6998 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7000 /* Only floating point modes are passed in anything but integer regs. */
7001 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7004 regno = cum->regno + FIRST_SSE_REG;
7009 /* Unnamed floating parameters are passed in both the
7010 SSE and integer registers. */
7011 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7012 t2 = gen_rtx_REG (mode, regno);
7013 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7014 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7015 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7018 /* Handle aggregated types passed in register. */
7019 if (orig_mode == BLKmode)
7021 if (bytes > 0 && bytes <= 8)
7022 mode = (bytes > 4 ? DImode : SImode);
7023 if (mode == BLKmode)
7027 return gen_reg_or_parallel (mode, orig_mode, regno);
7030 /* Return where to put the arguments to a function.
7031 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7033 MODE is the argument's machine mode. TYPE is the data type of the
7034 argument. It is null for libcalls where that information may not be
7035 available. CUM gives information about the preceding args and about
7036 the function being called. NAMED is nonzero if this argument is a
7037 named parameter (otherwise it is an extra parameter matching an
7041 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7042 const_tree type, bool named)
7044 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7045 enum machine_mode mode = omode;
7046 HOST_WIDE_INT bytes, words;
7049 if (mode == BLKmode)
7050 bytes = int_size_in_bytes (type);
7052 bytes = GET_MODE_SIZE (mode);
7053 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7055 /* To simplify the code below, represent vector types with a vector mode
7056 even if MMX/SSE are not active. */
7057 if (type && TREE_CODE (type) == VECTOR_TYPE)
7058 mode = type_natural_mode (type, cum);
7060 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7061 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7062 else if (TARGET_64BIT)
7063 arg = function_arg_64 (cum, mode, omode, type, named);
7065 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7070 /* A C expression that indicates when an argument must be passed by
7071 reference. If nonzero for an argument, a copy of that argument is
7072 made in memory and a pointer to the argument is passed instead of
7073 the argument itself. The pointer is passed in whatever way is
7074 appropriate for passing a pointer to that type. */
7077 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED,
7078 enum machine_mode mode ATTRIBUTE_UNUSED,
7079 const_tree type, bool named ATTRIBUTE_UNUSED)
7081 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7083 /* See Windows x64 Software Convention. */
7084 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7086 int msize = (int) GET_MODE_SIZE (mode);
7089 /* Arrays are passed by reference. */
7090 if (TREE_CODE (type) == ARRAY_TYPE)
7093 if (AGGREGATE_TYPE_P (type))
7095 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7096 are passed by reference. */
7097 msize = int_size_in_bytes (type);
7101 /* __m128 is passed by reference. */
7103 case 1: case 2: case 4: case 8:
7109 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7115 /* Return true when TYPE should be 128bit aligned for 32bit argument
7116 passing ABI. XXX: This function is obsolete and is only used for
7117 checking psABI compatibility with previous versions of GCC. */
7120 ix86_compat_aligned_value_p (const_tree type)
7122 enum machine_mode mode = TYPE_MODE (type);
7123 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7127 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7129 if (TYPE_ALIGN (type) < 128)
7132 if (AGGREGATE_TYPE_P (type))
7134 /* Walk the aggregates recursively. */
7135 switch (TREE_CODE (type))
7139 case QUAL_UNION_TYPE:
7143 /* Walk all the structure fields. */
7144 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7146 if (TREE_CODE (field) == FIELD_DECL
7147 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7154 /* Just for use if some languages passes arrays by value. */
7155 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7166 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7167 XXX: This function is obsolete and is only used for checking psABI
7168 compatibility with previous versions of GCC. */
7171 ix86_compat_function_arg_boundary (enum machine_mode mode,
7172 const_tree type, unsigned int align)
7174 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7175 natural boundaries. */
7176 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7178 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7179 make an exception for SSE modes since these require 128bit
7182 The handling here differs from field_alignment. ICC aligns MMX
7183 arguments to 4 byte boundaries, while structure fields are aligned
7184 to 8 byte boundaries. */
7187 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7188 align = PARM_BOUNDARY;
7192 if (!ix86_compat_aligned_value_p (type))
7193 align = PARM_BOUNDARY;
7196 if (align > BIGGEST_ALIGNMENT)
7197 align = BIGGEST_ALIGNMENT;
7201 /* Return true when TYPE should be 128bit aligned for 32bit argument
7205 ix86_contains_aligned_value_p (const_tree type)
7207 enum machine_mode mode = TYPE_MODE (type);
7209 if (mode == XFmode || mode == XCmode)
7212 if (TYPE_ALIGN (type) < 128)
7215 if (AGGREGATE_TYPE_P (type))
7217 /* Walk the aggregates recursively. */
7218 switch (TREE_CODE (type))
7222 case QUAL_UNION_TYPE:
7226 /* Walk all the structure fields. */
7227 for (field = TYPE_FIELDS (type);
7229 field = DECL_CHAIN (field))
7231 if (TREE_CODE (field) == FIELD_DECL
7232 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7239 /* Just for use if some languages passes arrays by value. */
7240 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7249 return TYPE_ALIGN (type) >= 128;
7254 /* Gives the alignment boundary, in bits, of an argument with the
7255 specified mode and type. */
7258 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7263 /* Since the main variant type is used for call, we convert it to
7264 the main variant type. */
7265 type = TYPE_MAIN_VARIANT (type);
7266 align = TYPE_ALIGN (type);
7269 align = GET_MODE_ALIGNMENT (mode);
7270 if (align < PARM_BOUNDARY)
7271 align = PARM_BOUNDARY;
7275 unsigned int saved_align = align;
7279 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7282 if (mode == XFmode || mode == XCmode)
7283 align = PARM_BOUNDARY;
7285 else if (!ix86_contains_aligned_value_p (type))
7286 align = PARM_BOUNDARY;
7289 align = PARM_BOUNDARY;
7294 && align != ix86_compat_function_arg_boundary (mode, type,
7298 inform (input_location,
7299 "The ABI for passing parameters with %d-byte"
7300 " alignment has changed in GCC 4.6",
7301 align / BITS_PER_UNIT);
7308 /* Return true if N is a possible register number of function value. */
7311 ix86_function_value_regno_p (const unsigned int regno)
7318 case FIRST_FLOAT_REG:
7319 /* TODO: The function should depend on current function ABI but
7320 builtins.c would need updating then. Therefore we use the
7322 if (TARGET_64BIT && ix86_abi == MS_ABI)
7324 return TARGET_FLOAT_RETURNS_IN_80387;
7330 if (TARGET_MACHO || TARGET_64BIT)
7338 /* Define how to find the value returned by a function.
7339 VALTYPE is the data type of the value (as a tree).
7340 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7341 otherwise, FUNC is 0. */
7344 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7345 const_tree fntype, const_tree fn)
7349 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7350 we normally prevent this case when mmx is not available. However
7351 some ABIs may require the result to be returned like DImode. */
7352 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7353 regno = FIRST_MMX_REG;
7355 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7356 we prevent this case when sse is not available. However some ABIs
7357 may require the result to be returned like integer TImode. */
7358 else if (mode == TImode
7359 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7360 regno = FIRST_SSE_REG;
7362 /* 32-byte vector modes in %ymm0. */
7363 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7364 regno = FIRST_SSE_REG;
7366 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7367 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7368 regno = FIRST_FLOAT_REG;
7370 /* Most things go in %eax. */
7373 /* Override FP return register with %xmm0 for local functions when
7374 SSE math is enabled or for functions with sseregparm attribute. */
7375 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7377 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7378 if ((sse_level >= 1 && mode == SFmode)
7379 || (sse_level == 2 && mode == DFmode))
7380 regno = FIRST_SSE_REG;
7383 /* OImode shouldn't be used directly. */
7384 gcc_assert (mode != OImode);
7386 return gen_rtx_REG (orig_mode, regno);
7390 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7395 /* Handle libcalls, which don't provide a type node. */
7396 if (valtype == NULL)
7410 regno = FIRST_SSE_REG;
7414 regno = FIRST_FLOAT_REG;
7422 return gen_rtx_REG (mode, regno);
7424 else if (POINTER_TYPE_P (valtype))
7426 /* Pointers are always returned in word_mode. */
7430 ret = construct_container (mode, orig_mode, valtype, 1,
7431 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7432 x86_64_int_return_registers, 0);
7434 /* For zero sized structures, construct_container returns NULL, but we
7435 need to keep rest of compiler happy by returning meaningful value. */
7437 ret = gen_rtx_REG (orig_mode, AX_REG);
7443 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7446 unsigned int regno = AX_REG;
7450 switch (GET_MODE_SIZE (mode))
7453 if (valtype != NULL_TREE
7454 && !VECTOR_INTEGER_TYPE_P (valtype)
7455 && !VECTOR_INTEGER_TYPE_P (valtype)
7456 && !INTEGRAL_TYPE_P (valtype)
7457 && !VECTOR_FLOAT_TYPE_P (valtype))
7459 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7460 && !COMPLEX_MODE_P (mode))
7461 regno = FIRST_SSE_REG;
7465 if (mode == SFmode || mode == DFmode)
7466 regno = FIRST_SSE_REG;
7472 return gen_rtx_REG (orig_mode, regno);
7476 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7477 enum machine_mode orig_mode, enum machine_mode mode)
7479 const_tree fn, fntype;
7482 if (fntype_or_decl && DECL_P (fntype_or_decl))
7483 fn = fntype_or_decl;
7484 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7486 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7487 return function_value_ms_64 (orig_mode, mode, valtype);
7488 else if (TARGET_64BIT)
7489 return function_value_64 (orig_mode, mode, valtype);
7491 return function_value_32 (orig_mode, mode, fntype, fn);
7495 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7496 bool outgoing ATTRIBUTE_UNUSED)
7498 enum machine_mode mode, orig_mode;
7500 orig_mode = TYPE_MODE (valtype);
7501 mode = type_natural_mode (valtype, NULL);
7502 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7505 /* Pointer function arguments and return values are promoted to
7508 static enum machine_mode
7509 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7510 int *punsignedp, const_tree fntype,
7513 if (type != NULL_TREE && POINTER_TYPE_P (type))
7515 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7518 return default_promote_function_mode (type, mode, punsignedp, fntype,
7522 /* Return true if a structure, union or array with MODE containing FIELD
7523 should be accessed using BLKmode. */
7526 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7528 /* Union with XFmode must be in BLKmode. */
7529 return (mode == XFmode
7530 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7531 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7535 ix86_libcall_value (enum machine_mode mode)
7537 return ix86_function_value_1 (NULL, NULL, mode, mode);
7540 /* Return true iff type is returned in memory. */
7542 static bool ATTRIBUTE_UNUSED
7543 return_in_memory_32 (const_tree type, enum machine_mode mode)
7547 if (mode == BLKmode)
7550 size = int_size_in_bytes (type);
7552 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
7555 if (VECTOR_MODE_P (mode) || mode == TImode)
7557 /* User-created vectors small enough to fit in EAX. */
7561 /* MMX/3dNow values are returned in MM0,
7562 except when it doesn't exits or the ABI prescribes otherwise. */
7564 return !TARGET_MMX || TARGET_VECT8_RETURNS;
7566 /* SSE values are returned in XMM0, except when it doesn't exist. */
7570 /* AVX values are returned in YMM0, except when it doesn't exist. */
7581 /* OImode shouldn't be used directly. */
7582 gcc_assert (mode != OImode);
7587 static bool ATTRIBUTE_UNUSED
7588 return_in_memory_64 (const_tree type, enum machine_mode mode)
7590 int needed_intregs, needed_sseregs;
7591 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
7594 static bool ATTRIBUTE_UNUSED
7595 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
7597 HOST_WIDE_INT size = int_size_in_bytes (type);
7599 /* __m128 is returned in xmm0. */
7600 if ((!type || VECTOR_INTEGER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
7601 || VECTOR_FLOAT_TYPE_P (type))
7602 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7603 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
7606 /* Otherwise, the size must be exactly in [1248]. */
7607 return size != 1 && size != 2 && size != 4 && size != 8;
7611 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7613 #ifdef SUBTARGET_RETURN_IN_MEMORY
7614 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
7616 const enum machine_mode mode = type_natural_mode (type, NULL);
7620 if (ix86_function_type_abi (fntype) == MS_ABI)
7621 return return_in_memory_ms_64 (type, mode);
7623 return return_in_memory_64 (type, mode);
7626 return return_in_memory_32 (type, mode);
7630 /* When returning SSE vector types, we have a choice of either
7631 (1) being abi incompatible with a -march switch, or
7632 (2) generating an error.
7633 Given no good solution, I think the safest thing is one warning.
7634 The user won't be able to use -Werror, but....
7636 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7637 called in response to actually generating a caller or callee that
7638 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7639 via aggregate_value_p for general type probing from tree-ssa. */
7642 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
7644 static bool warnedsse, warnedmmx;
7646 if (!TARGET_64BIT && type)
7648 /* Look at the return type of the function, not the function type. */
7649 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
7651 if (!TARGET_SSE && !warnedsse)
7654 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7657 warning (0, "SSE vector return without SSE enabled "
7662 if (!TARGET_MMX && !warnedmmx)
7664 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7667 warning (0, "MMX vector return without MMX enabled "
7677 /* Create the va_list data type. */
7679 /* Returns the calling convention specific va_list date type.
7680 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7683 ix86_build_builtin_va_list_abi (enum calling_abi abi)
7685 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7687 /* For i386 we use plain pointer to argument area. */
7688 if (!TARGET_64BIT || abi == MS_ABI)
7689 return build_pointer_type (char_type_node);
7691 record = lang_hooks.types.make_type (RECORD_TYPE);
7692 type_decl = build_decl (BUILTINS_LOCATION,
7693 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7695 f_gpr = build_decl (BUILTINS_LOCATION,
7696 FIELD_DECL, get_identifier ("gp_offset"),
7697 unsigned_type_node);
7698 f_fpr = build_decl (BUILTINS_LOCATION,
7699 FIELD_DECL, get_identifier ("fp_offset"),
7700 unsigned_type_node);
7701 f_ovf = build_decl (BUILTINS_LOCATION,
7702 FIELD_DECL, get_identifier ("overflow_arg_area"),
7704 f_sav = build_decl (BUILTINS_LOCATION,
7705 FIELD_DECL, get_identifier ("reg_save_area"),
7708 va_list_gpr_counter_field = f_gpr;
7709 va_list_fpr_counter_field = f_fpr;
7711 DECL_FIELD_CONTEXT (f_gpr) = record;
7712 DECL_FIELD_CONTEXT (f_fpr) = record;
7713 DECL_FIELD_CONTEXT (f_ovf) = record;
7714 DECL_FIELD_CONTEXT (f_sav) = record;
7716 TYPE_STUB_DECL (record) = type_decl;
7717 TYPE_NAME (record) = type_decl;
7718 TYPE_FIELDS (record) = f_gpr;
7719 DECL_CHAIN (f_gpr) = f_fpr;
7720 DECL_CHAIN (f_fpr) = f_ovf;
7721 DECL_CHAIN (f_ovf) = f_sav;
7723 layout_type (record);
7725 /* The correct type is an array type of one element. */
7726 return build_array_type (record, build_index_type (size_zero_node));
7729 /* Setup the builtin va_list data type and for 64-bit the additional
7730 calling convention specific va_list data types. */
7733 ix86_build_builtin_va_list (void)
7735 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7737 /* Initialize abi specific va_list builtin types. */
7741 if (ix86_abi == MS_ABI)
7743 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7744 if (TREE_CODE (t) != RECORD_TYPE)
7745 t = build_variant_type_copy (t);
7746 sysv_va_list_type_node = t;
7751 if (TREE_CODE (t) != RECORD_TYPE)
7752 t = build_variant_type_copy (t);
7753 sysv_va_list_type_node = t;
7755 if (ix86_abi != MS_ABI)
7757 t = ix86_build_builtin_va_list_abi (MS_ABI);
7758 if (TREE_CODE (t) != RECORD_TYPE)
7759 t = build_variant_type_copy (t);
7760 ms_va_list_type_node = t;
7765 if (TREE_CODE (t) != RECORD_TYPE)
7766 t = build_variant_type_copy (t);
7767 ms_va_list_type_node = t;
7774 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7777 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7783 /* GPR size of varargs save area. */
7784 if (cfun->va_list_gpr_size)
7785 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7787 ix86_varargs_gpr_size = 0;
7789 /* FPR size of varargs save area. We don't need it if we don't pass
7790 anything in SSE registers. */
7791 if (TARGET_SSE && cfun->va_list_fpr_size)
7792 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7794 ix86_varargs_fpr_size = 0;
7796 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7799 save_area = frame_pointer_rtx;
7800 set = get_varargs_alias_set ();
7802 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7803 if (max > X86_64_REGPARM_MAX)
7804 max = X86_64_REGPARM_MAX;
7806 for (i = cum->regno; i < max; i++)
7808 mem = gen_rtx_MEM (word_mode,
7809 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
7810 MEM_NOTRAP_P (mem) = 1;
7811 set_mem_alias_set (mem, set);
7812 emit_move_insn (mem,
7813 gen_rtx_REG (word_mode,
7814 x86_64_int_parameter_registers[i]));
7817 if (ix86_varargs_fpr_size)
7819 enum machine_mode smode;
7822 /* Now emit code to save SSE registers. The AX parameter contains number
7823 of SSE parameter registers used to call this function, though all we
7824 actually check here is the zero/non-zero status. */
7826 label = gen_label_rtx ();
7827 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7828 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7831 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7832 we used movdqa (i.e. TImode) instead? Perhaps even better would
7833 be if we could determine the real mode of the data, via a hook
7834 into pass_stdarg. Ignore all that for now. */
7836 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7837 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7839 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7840 if (max > X86_64_SSE_REGPARM_MAX)
7841 max = X86_64_SSE_REGPARM_MAX;
7843 for (i = cum->sse_regno; i < max; ++i)
7845 mem = plus_constant (Pmode, save_area,
7846 i * 16 + ix86_varargs_gpr_size);
7847 mem = gen_rtx_MEM (smode, mem);
7848 MEM_NOTRAP_P (mem) = 1;
7849 set_mem_alias_set (mem, set);
7850 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7852 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7860 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7862 alias_set_type set = get_varargs_alias_set ();
7865 /* Reset to zero, as there might be a sysv vaarg used
7867 ix86_varargs_gpr_size = 0;
7868 ix86_varargs_fpr_size = 0;
7870 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7874 mem = gen_rtx_MEM (Pmode,
7875 plus_constant (Pmode, virtual_incoming_args_rtx,
7876 i * UNITS_PER_WORD));
7877 MEM_NOTRAP_P (mem) = 1;
7878 set_mem_alias_set (mem, set);
7880 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7881 emit_move_insn (mem, reg);
7886 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7887 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7890 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7891 CUMULATIVE_ARGS next_cum;
7894 /* This argument doesn't appear to be used anymore. Which is good,
7895 because the old code here didn't suppress rtl generation. */
7896 gcc_assert (!no_rtl);
7901 fntype = TREE_TYPE (current_function_decl);
7903 /* For varargs, we do not want to skip the dummy va_dcl argument.
7904 For stdargs, we do want to skip the last named argument. */
7906 if (stdarg_p (fntype))
7907 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
7910 if (cum->call_abi == MS_ABI)
7911 setup_incoming_varargs_ms_64 (&next_cum);
7913 setup_incoming_varargs_64 (&next_cum);
7916 /* Checks if TYPE is of kind va_list char *. */
7919 is_va_list_char_pointer (tree type)
7923 /* For 32-bit it is always true. */
7926 canonic = ix86_canonical_va_list_type (type);
7927 return (canonic == ms_va_list_type_node
7928 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7931 /* Implement va_start. */
7934 ix86_va_start (tree valist, rtx nextarg)
7936 HOST_WIDE_INT words, n_gpr, n_fpr;
7937 tree f_gpr, f_fpr, f_ovf, f_sav;
7938 tree gpr, fpr, ovf, sav, t;
7942 if (flag_split_stack
7943 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7945 unsigned int scratch_regno;
7947 /* When we are splitting the stack, we can't refer to the stack
7948 arguments using internal_arg_pointer, because they may be on
7949 the old stack. The split stack prologue will arrange to
7950 leave a pointer to the old stack arguments in a scratch
7951 register, which we here copy to a pseudo-register. The split
7952 stack prologue can't set the pseudo-register directly because
7953 it (the prologue) runs before any registers have been saved. */
7955 scratch_regno = split_stack_prologue_scratch_regno ();
7956 if (scratch_regno != INVALID_REGNUM)
7960 reg = gen_reg_rtx (Pmode);
7961 cfun->machine->split_stack_varargs_pointer = reg;
7964 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7968 push_topmost_sequence ();
7969 emit_insn_after (seq, entry_of_function ());
7970 pop_topmost_sequence ();
7974 /* Only 64bit target needs something special. */
7975 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7977 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7978 std_expand_builtin_va_start (valist, nextarg);
7983 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7984 next = expand_binop (ptr_mode, add_optab,
7985 cfun->machine->split_stack_varargs_pointer,
7986 crtl->args.arg_offset_rtx,
7987 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7988 convert_move (va_r, next, 0);
7993 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7994 f_fpr = DECL_CHAIN (f_gpr);
7995 f_ovf = DECL_CHAIN (f_fpr);
7996 f_sav = DECL_CHAIN (f_ovf);
7998 valist = build_simple_mem_ref (valist);
7999 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8000 /* The following should be folded into the MEM_REF offset. */
8001 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8003 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8005 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8007 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8010 /* Count number of gp and fp argument registers used. */
8011 words = crtl->args.info.words;
8012 n_gpr = crtl->args.info.regno;
8013 n_fpr = crtl->args.info.sse_regno;
8015 if (cfun->va_list_gpr_size)
8017 type = TREE_TYPE (gpr);
8018 t = build2 (MODIFY_EXPR, type,
8019 gpr, build_int_cst (type, n_gpr * 8));
8020 TREE_SIDE_EFFECTS (t) = 1;
8021 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8024 if (TARGET_SSE && cfun->va_list_fpr_size)
8026 type = TREE_TYPE (fpr);
8027 t = build2 (MODIFY_EXPR, type, fpr,
8028 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8029 TREE_SIDE_EFFECTS (t) = 1;
8030 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8033 /* Find the overflow area. */
8034 type = TREE_TYPE (ovf);
8035 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8036 ovf_rtx = crtl->args.internal_arg_pointer;
8038 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8039 t = make_tree (type, ovf_rtx);
8041 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8042 t = build2 (MODIFY_EXPR, type, ovf, t);
8043 TREE_SIDE_EFFECTS (t) = 1;
8044 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8046 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8048 /* Find the register save area.
8049 Prologue of the function save it right above stack frame. */
8050 type = TREE_TYPE (sav);
8051 t = make_tree (type, frame_pointer_rtx);
8052 if (!ix86_varargs_gpr_size)
8053 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8054 t = build2 (MODIFY_EXPR, type, sav, t);
8055 TREE_SIDE_EFFECTS (t) = 1;
8056 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8060 /* Implement va_arg. */
8063 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8066 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8067 tree f_gpr, f_fpr, f_ovf, f_sav;
8068 tree gpr, fpr, ovf, sav, t;
8070 tree lab_false, lab_over = NULL_TREE;
8075 enum machine_mode nat_mode;
8076 unsigned int arg_boundary;
8078 /* Only 64bit target needs something special. */
8079 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8080 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8082 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8083 f_fpr = DECL_CHAIN (f_gpr);
8084 f_ovf = DECL_CHAIN (f_fpr);
8085 f_sav = DECL_CHAIN (f_ovf);
8087 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8088 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8089 valist = build_va_arg_indirect_ref (valist);
8090 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8091 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8092 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8094 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8096 type = build_pointer_type (type);
8097 size = int_size_in_bytes (type);
8098 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8100 nat_mode = type_natural_mode (type, NULL);
8109 /* Unnamed 256bit vector mode parameters are passed on stack. */
8110 if (!TARGET_64BIT_MS_ABI)
8117 container = construct_container (nat_mode, TYPE_MODE (type),
8118 type, 0, X86_64_REGPARM_MAX,
8119 X86_64_SSE_REGPARM_MAX, intreg,
8124 /* Pull the value out of the saved registers. */
8126 addr = create_tmp_var (ptr_type_node, "addr");
8130 int needed_intregs, needed_sseregs;
8132 tree int_addr, sse_addr;
8134 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8135 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8137 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8139 need_temp = (!REG_P (container)
8140 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8141 || TYPE_ALIGN (type) > 128));
8143 /* In case we are passing structure, verify that it is consecutive block
8144 on the register save area. If not we need to do moves. */
8145 if (!need_temp && !REG_P (container))
8147 /* Verify that all registers are strictly consecutive */
8148 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8152 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8154 rtx slot = XVECEXP (container, 0, i);
8155 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8156 || INTVAL (XEXP (slot, 1)) != i * 16)
8164 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8166 rtx slot = XVECEXP (container, 0, i);
8167 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8168 || INTVAL (XEXP (slot, 1)) != i * 8)
8180 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8181 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8184 /* First ensure that we fit completely in registers. */
8187 t = build_int_cst (TREE_TYPE (gpr),
8188 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8189 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8190 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8191 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8192 gimplify_and_add (t, pre_p);
8196 t = build_int_cst (TREE_TYPE (fpr),
8197 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8198 + X86_64_REGPARM_MAX * 8);
8199 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8200 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8201 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8202 gimplify_and_add (t, pre_p);
8205 /* Compute index to start of area used for integer regs. */
8208 /* int_addr = gpr + sav; */
8209 t = fold_build_pointer_plus (sav, gpr);
8210 gimplify_assign (int_addr, t, pre_p);
8214 /* sse_addr = fpr + sav; */
8215 t = fold_build_pointer_plus (sav, fpr);
8216 gimplify_assign (sse_addr, t, pre_p);
8220 int i, prev_size = 0;
8221 tree temp = create_tmp_var (type, "va_arg_tmp");
8224 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8225 gimplify_assign (addr, t, pre_p);
8227 for (i = 0; i < XVECLEN (container, 0); i++)
8229 rtx slot = XVECEXP (container, 0, i);
8230 rtx reg = XEXP (slot, 0);
8231 enum machine_mode mode = GET_MODE (reg);
8237 tree dest_addr, dest;
8238 int cur_size = GET_MODE_SIZE (mode);
8240 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8241 prev_size = INTVAL (XEXP (slot, 1));
8242 if (prev_size + cur_size > size)
8244 cur_size = size - prev_size;
8245 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8246 if (mode == BLKmode)
8249 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8250 if (mode == GET_MODE (reg))
8251 addr_type = build_pointer_type (piece_type);
8253 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8255 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8258 if (SSE_REGNO_P (REGNO (reg)))
8260 src_addr = sse_addr;
8261 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8265 src_addr = int_addr;
8266 src_offset = REGNO (reg) * 8;
8268 src_addr = fold_convert (addr_type, src_addr);
8269 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8271 dest_addr = fold_convert (daddr_type, addr);
8272 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8273 if (cur_size == GET_MODE_SIZE (mode))
8275 src = build_va_arg_indirect_ref (src_addr);
8276 dest = build_va_arg_indirect_ref (dest_addr);
8278 gimplify_assign (dest, src, pre_p);
8283 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8284 3, dest_addr, src_addr,
8285 size_int (cur_size));
8286 gimplify_and_add (copy, pre_p);
8288 prev_size += cur_size;
8294 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8295 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8296 gimplify_assign (gpr, t, pre_p);
8301 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8302 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8303 gimplify_assign (fpr, t, pre_p);
8306 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8308 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8311 /* ... otherwise out of the overflow area. */
8313 /* When we align parameter on stack for caller, if the parameter
8314 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8315 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8316 here with caller. */
8317 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8318 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8319 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8321 /* Care for on-stack alignment if needed. */
8322 if (arg_boundary <= 64 || size == 0)
8326 HOST_WIDE_INT align = arg_boundary / 8;
8327 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8328 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8329 build_int_cst (TREE_TYPE (t), -align));
8332 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8333 gimplify_assign (addr, t, pre_p);
8335 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8336 gimplify_assign (unshare_expr (ovf), t, pre_p);
8339 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8341 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8342 addr = fold_convert (ptrtype, addr);
8345 addr = build_va_arg_indirect_ref (addr);
8346 return build_va_arg_indirect_ref (addr);
8349 /* Return true if OPNUM's MEM should be matched
8350 in movabs* patterns. */
8353 ix86_check_movabs (rtx insn, int opnum)
8357 set = PATTERN (insn);
8358 if (GET_CODE (set) == PARALLEL)
8359 set = XVECEXP (set, 0, 0);
8360 gcc_assert (GET_CODE (set) == SET);
8361 mem = XEXP (set, opnum);
8362 while (GET_CODE (mem) == SUBREG)
8363 mem = SUBREG_REG (mem);
8364 gcc_assert (MEM_P (mem));
8365 return volatile_ok || !MEM_VOLATILE_P (mem);
8368 /* Initialize the table of extra 80387 mathematical constants. */
8371 init_ext_80387_constants (void)
8373 static const char * cst[5] =
8375 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8376 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8377 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8378 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8379 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8383 for (i = 0; i < 5; i++)
8385 real_from_string (&ext_80387_constants_table[i], cst[i]);
8386 /* Ensure each constant is rounded to XFmode precision. */
8387 real_convert (&ext_80387_constants_table[i],
8388 XFmode, &ext_80387_constants_table[i]);
8391 ext_80387_constants_init = 1;
8394 /* Return non-zero if the constant is something that
8395 can be loaded with a special instruction. */
8398 standard_80387_constant_p (rtx x)
8400 enum machine_mode mode = GET_MODE (x);
8404 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8407 if (x == CONST0_RTX (mode))
8409 if (x == CONST1_RTX (mode))
8412 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8414 /* For XFmode constants, try to find a special 80387 instruction when
8415 optimizing for size or on those CPUs that benefit from them. */
8417 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8421 if (! ext_80387_constants_init)
8422 init_ext_80387_constants ();
8424 for (i = 0; i < 5; i++)
8425 if (real_identical (&r, &ext_80387_constants_table[i]))
8429 /* Load of the constant -0.0 or -1.0 will be split as
8430 fldz;fchs or fld1;fchs sequence. */
8431 if (real_isnegzero (&r))
8433 if (real_identical (&r, &dconstm1))
8439 /* Return the opcode of the special instruction to be used to load
8443 standard_80387_constant_opcode (rtx x)
8445 switch (standard_80387_constant_p (x))
8469 /* Return the CONST_DOUBLE representing the 80387 constant that is
8470 loaded by the specified special instruction. The argument IDX
8471 matches the return value from standard_80387_constant_p. */
8474 standard_80387_constant_rtx (int idx)
8478 if (! ext_80387_constants_init)
8479 init_ext_80387_constants ();
8495 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8499 /* Return 1 if X is all 0s and 2 if x is all 1s
8500 in supported SSE/AVX vector mode. */
8503 standard_sse_constant_p (rtx x)
8505 enum machine_mode mode = GET_MODE (x);
8507 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8509 if (vector_all_ones_operand (x, mode))
8531 /* Return the opcode of the special instruction to be used to load
8535 standard_sse_constant_opcode (rtx insn, rtx x)
8537 switch (standard_sse_constant_p (x))
8540 switch (get_attr_mode (insn))
8543 return "%vpxor\t%0, %d0";
8545 return "%vxorpd\t%0, %d0";
8547 return "%vxorps\t%0, %d0";
8550 return "vpxor\t%x0, %x0, %x0";
8552 return "vxorpd\t%x0, %x0, %x0";
8554 return "vxorps\t%x0, %x0, %x0";
8562 return "vpcmpeqd\t%0, %0, %0";
8564 return "pcmpeqd\t%0, %0";
8572 /* Returns true if OP contains a symbol reference */
8575 symbolic_reference_mentioned_p (rtx op)
8580 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
8583 fmt = GET_RTX_FORMAT (GET_CODE (op));
8584 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
8590 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
8591 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
8595 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
8602 /* Return true if it is appropriate to emit `ret' instructions in the
8603 body of a function. Do this only if the epilogue is simple, needing a
8604 couple of insns. Prior to reloading, we can't tell how many registers
8605 must be saved, so return false then. Return false if there is no frame
8606 marker to de-allocate. */
8609 ix86_can_use_return_insn_p (void)
8611 struct ix86_frame frame;
8613 if (! reload_completed || frame_pointer_needed)
8616 /* Don't allow more than 32k pop, since that's all we can do
8617 with one instruction. */
8618 if (crtl->args.pops_args && crtl->args.size >= 32768)
8621 ix86_compute_frame_layout (&frame);
8622 return (frame.stack_pointer_offset == UNITS_PER_WORD
8623 && (frame.nregs + frame.nsseregs) == 0);
8626 /* Value should be nonzero if functions must have frame pointers.
8627 Zero means the frame pointer need not be set up (and parms may
8628 be accessed via the stack pointer) in functions that seem suitable. */
8631 ix86_frame_pointer_required (void)
8633 /* If we accessed previous frames, then the generated code expects
8634 to be able to access the saved ebp value in our frame. */
8635 if (cfun->machine->accesses_prev_frame)
8638 /* Several x86 os'es need a frame pointer for other reasons,
8639 usually pertaining to setjmp. */
8640 if (SUBTARGET_FRAME_POINTER_REQUIRED)
8643 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8644 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
8647 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8648 allocation is 4GB. */
8649 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
8652 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8653 turns off the frame pointer by default. Turn it back on now if
8654 we've not got a leaf function. */
8655 if (TARGET_OMIT_LEAF_FRAME_POINTER
8657 || ix86_current_function_calls_tls_descriptor))
8660 if (crtl->profile && !flag_fentry)
8666 /* Record that the current function accesses previous call frames. */
8669 ix86_setup_frame_addresses (void)
8671 cfun->machine->accesses_prev_frame = 1;
8674 #ifndef USE_HIDDEN_LINKONCE
8675 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8676 # define USE_HIDDEN_LINKONCE 1
8678 # define USE_HIDDEN_LINKONCE 0
8682 static int pic_labels_used;
8684 /* Fills in the label name that should be used for a pc thunk for
8685 the given register. */
8688 get_pc_thunk_name (char name[32], unsigned int regno)
8690 gcc_assert (!TARGET_64BIT);
8692 if (USE_HIDDEN_LINKONCE)
8693 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
8695 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
8699 /* This function generates code for -fpic that loads %ebx with
8700 the return address of the caller and then returns. */
8703 ix86_code_end (void)
8708 for (regno = AX_REG; regno <= SP_REG; regno++)
8713 if (!(pic_labels_used & (1 << regno)))
8716 get_pc_thunk_name (name, regno);
8718 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8719 get_identifier (name),
8720 build_function_type_list (void_type_node, NULL_TREE));
8721 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8722 NULL_TREE, void_type_node);
8723 TREE_PUBLIC (decl) = 1;
8724 TREE_STATIC (decl) = 1;
8725 DECL_IGNORED_P (decl) = 1;
8730 switch_to_section (darwin_sections[text_coal_section]);
8731 fputs ("\t.weak_definition\t", asm_out_file);
8732 assemble_name (asm_out_file, name);
8733 fputs ("\n\t.private_extern\t", asm_out_file);
8734 assemble_name (asm_out_file, name);
8735 putc ('\n', asm_out_file);
8736 ASM_OUTPUT_LABEL (asm_out_file, name);
8737 DECL_WEAK (decl) = 1;
8741 if (USE_HIDDEN_LINKONCE)
8743 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8745 targetm.asm_out.unique_section (decl, 0);
8746 switch_to_section (get_named_section (decl, NULL, 0));
8748 targetm.asm_out.globalize_label (asm_out_file, name);
8749 fputs ("\t.hidden\t", asm_out_file);
8750 assemble_name (asm_out_file, name);
8751 putc ('\n', asm_out_file);
8752 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8756 switch_to_section (text_section);
8757 ASM_OUTPUT_LABEL (asm_out_file, name);
8760 DECL_INITIAL (decl) = make_node (BLOCK);
8761 current_function_decl = decl;
8762 init_function_start (decl);
8763 first_function_block_is_cold = false;
8764 /* Make sure unwind info is emitted for the thunk if needed. */
8765 final_start_function (emit_barrier (), asm_out_file, 1);
8767 /* Pad stack IP move with 4 instructions (two NOPs count
8768 as one instruction). */
8769 if (TARGET_PAD_SHORT_FUNCTION)
8774 fputs ("\tnop\n", asm_out_file);
8777 xops[0] = gen_rtx_REG (Pmode, regno);
8778 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8779 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8780 fputs ("\tret\n", asm_out_file);
8781 final_end_function ();
8782 init_insn_lengths ();
8783 free_after_compilation (cfun);
8785 current_function_decl = NULL;
8788 if (flag_split_stack)
8789 file_end_indicate_split_stack ();
8792 /* Emit code for the SET_GOT patterns. */
8795 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8801 if (TARGET_VXWORKS_RTP && flag_pic)
8803 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8804 xops[2] = gen_rtx_MEM (Pmode,
8805 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8806 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8808 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8809 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8810 an unadorned address. */
8811 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8812 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8813 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8817 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8821 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8823 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8826 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8827 is what will be referenced by the Mach-O PIC subsystem. */
8829 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8832 targetm.asm_out.internal_label (asm_out_file, "L",
8833 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8838 get_pc_thunk_name (name, REGNO (dest));
8839 pic_labels_used |= 1 << REGNO (dest);
8841 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8842 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8843 output_asm_insn ("call\t%X2", xops);
8844 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8845 is what will be referenced by the Mach-O PIC subsystem. */
8848 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8850 targetm.asm_out.internal_label (asm_out_file, "L",
8851 CODE_LABEL_NUMBER (label));
8856 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8861 /* Generate an "push" pattern for input ARG. */
8866 struct machine_function *m = cfun->machine;
8868 if (m->fs.cfa_reg == stack_pointer_rtx)
8869 m->fs.cfa_offset += UNITS_PER_WORD;
8870 m->fs.sp_offset += UNITS_PER_WORD;
8872 if (REG_P (arg) && GET_MODE (arg) != word_mode)
8873 arg = gen_rtx_REG (word_mode, REGNO (arg));
8875 return gen_rtx_SET (VOIDmode,
8876 gen_rtx_MEM (word_mode,
8877 gen_rtx_PRE_DEC (Pmode,
8878 stack_pointer_rtx)),
8882 /* Generate an "pop" pattern for input ARG. */
8887 if (REG_P (arg) && GET_MODE (arg) != word_mode)
8888 arg = gen_rtx_REG (word_mode, REGNO (arg));
8890 return gen_rtx_SET (VOIDmode,
8892 gen_rtx_MEM (word_mode,
8893 gen_rtx_POST_INC (Pmode,
8894 stack_pointer_rtx)));
8897 /* Return >= 0 if there is an unused call-clobbered register available
8898 for the entire function. */
8901 ix86_select_alt_pic_regnum (void)
8905 && !ix86_current_function_calls_tls_descriptor)
8908 /* Can't use the same register for both PIC and DRAP. */
8910 drap = REGNO (crtl->drap_reg);
8913 for (i = 2; i >= 0; --i)
8914 if (i != drap && !df_regs_ever_live_p (i))
8918 return INVALID_REGNUM;
8921 /* Return TRUE if we need to save REGNO. */
8924 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
8926 if (pic_offset_table_rtx
8927 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8928 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8930 || crtl->calls_eh_return
8931 || crtl->uses_const_pool))
8932 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
8934 if (crtl->calls_eh_return && maybe_eh_return)
8939 unsigned test = EH_RETURN_DATA_REGNO (i);
8940 if (test == INVALID_REGNUM)
8947 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8950 return (df_regs_ever_live_p (regno)
8951 && !call_used_regs[regno]
8952 && !fixed_regs[regno]
8953 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8956 /* Return number of saved general prupose registers. */
8959 ix86_nsaved_regs (void)
8964 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8965 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8970 /* Return number of saved SSE registrers. */
8973 ix86_nsaved_sseregs (void)
8978 if (!TARGET_64BIT_MS_ABI)
8980 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8981 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8986 /* Given FROM and TO register numbers, say whether this elimination is
8987 allowed. If stack alignment is needed, we can only replace argument
8988 pointer with hard frame pointer, or replace frame pointer with stack
8989 pointer. Otherwise, frame pointer elimination is automatically
8990 handled and all other eliminations are valid. */
8993 ix86_can_eliminate (const int from, const int to)
8995 if (stack_realign_fp)
8996 return ((from == ARG_POINTER_REGNUM
8997 && to == HARD_FRAME_POINTER_REGNUM)
8998 || (from == FRAME_POINTER_REGNUM
8999 && to == STACK_POINTER_REGNUM));
9001 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9004 /* Return the offset between two registers, one to be eliminated, and the other
9005 its replacement, at the start of a routine. */
9008 ix86_initial_elimination_offset (int from, int to)
9010 struct ix86_frame frame;
9011 ix86_compute_frame_layout (&frame);
9013 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9014 return frame.hard_frame_pointer_offset;
9015 else if (from == FRAME_POINTER_REGNUM
9016 && to == HARD_FRAME_POINTER_REGNUM)
9017 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9020 gcc_assert (to == STACK_POINTER_REGNUM);
9022 if (from == ARG_POINTER_REGNUM)
9023 return frame.stack_pointer_offset;
9025 gcc_assert (from == FRAME_POINTER_REGNUM);
9026 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9030 /* In a dynamically-aligned function, we can't know the offset from
9031 stack pointer to frame pointer, so we must ensure that setjmp
9032 eliminates fp against the hard fp (%ebp) rather than trying to
9033 index from %esp up to the top of the frame across a gap that is
9034 of unknown (at compile-time) size. */
9036 ix86_builtin_setjmp_frame_value (void)
9038 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9041 /* When using -fsplit-stack, the allocation routines set a field in
9042 the TCB to the bottom of the stack plus this much space, measured
9045 #define SPLIT_STACK_AVAILABLE 256
9047 /* Fill structure ix86_frame about frame of currently computed function. */
9050 ix86_compute_frame_layout (struct ix86_frame *frame)
9052 unsigned HOST_WIDE_INT stack_alignment_needed;
9053 HOST_WIDE_INT offset;
9054 unsigned HOST_WIDE_INT preferred_alignment;
9055 HOST_WIDE_INT size = get_frame_size ();
9056 HOST_WIDE_INT to_allocate;
9058 frame->nregs = ix86_nsaved_regs ();
9059 frame->nsseregs = ix86_nsaved_sseregs ();
9061 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9062 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9064 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9065 function prologues and leaf. */
9066 if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16)
9067 && (!crtl->is_leaf || cfun->calls_alloca != 0
9068 || ix86_current_function_calls_tls_descriptor))
9070 preferred_alignment = 16;
9071 stack_alignment_needed = 16;
9072 crtl->preferred_stack_boundary = 128;
9073 crtl->stack_alignment_needed = 128;
9076 gcc_assert (!size || stack_alignment_needed);
9077 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9078 gcc_assert (preferred_alignment <= stack_alignment_needed);
9080 /* For SEH we have to limit the amount of code movement into the prologue.
9081 At present we do this via a BLOCKAGE, at which point there's very little
9082 scheduling that can be done, which means that there's very little point
9083 in doing anything except PUSHs. */
9085 cfun->machine->use_fast_prologue_epilogue = false;
9087 /* During reload iteration the amount of registers saved can change.
9088 Recompute the value as needed. Do not recompute when amount of registers
9089 didn't change as reload does multiple calls to the function and does not
9090 expect the decision to change within single iteration. */
9091 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR)
9092 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9094 int count = frame->nregs;
9095 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9097 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9099 /* The fast prologue uses move instead of push to save registers. This
9100 is significantly longer, but also executes faster as modern hardware
9101 can execute the moves in parallel, but can't do that for push/pop.
9103 Be careful about choosing what prologue to emit: When function takes
9104 many instructions to execute we may use slow version as well as in
9105 case function is known to be outside hot spot (this is known with
9106 feedback only). Weight the size of function by number of registers
9107 to save as it is cheap to use one or two push instructions but very
9108 slow to use many of them. */
9110 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9111 if (node->frequency < NODE_FREQUENCY_NORMAL
9112 || (flag_branch_probabilities
9113 && node->frequency < NODE_FREQUENCY_HOT))
9114 cfun->machine->use_fast_prologue_epilogue = false;
9116 cfun->machine->use_fast_prologue_epilogue
9117 = !expensive_function_p (count);
9120 frame->save_regs_using_mov
9121 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9122 /* If static stack checking is enabled and done with probes,
9123 the registers need to be saved before allocating the frame. */
9124 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9126 /* Skip return address. */
9127 offset = UNITS_PER_WORD;
9129 /* Skip pushed static chain. */
9130 if (ix86_static_chain_on_stack)
9131 offset += UNITS_PER_WORD;
9133 /* Skip saved base pointer. */
9134 if (frame_pointer_needed)
9135 offset += UNITS_PER_WORD;
9136 frame->hfp_save_offset = offset;
9138 /* The traditional frame pointer location is at the top of the frame. */
9139 frame->hard_frame_pointer_offset = offset;
9141 /* Register save area */
9142 offset += frame->nregs * UNITS_PER_WORD;
9143 frame->reg_save_offset = offset;
9145 /* On SEH target, registers are pushed just before the frame pointer
9148 frame->hard_frame_pointer_offset = offset;
9150 /* Align and set SSE register save area. */
9151 if (frame->nsseregs)
9153 /* The only ABI that has saved SSE registers (Win64) also has a
9154 16-byte aligned default stack, and thus we don't need to be
9155 within the re-aligned local stack frame to save them. */
9156 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9157 offset = (offset + 16 - 1) & -16;
9158 offset += frame->nsseregs * 16;
9160 frame->sse_reg_save_offset = offset;
9162 /* The re-aligned stack starts here. Values before this point are not
9163 directly comparable with values below this point. In order to make
9164 sure that no value happens to be the same before and after, force
9165 the alignment computation below to add a non-zero value. */
9166 if (stack_realign_fp)
9167 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9170 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9171 offset += frame->va_arg_size;
9173 /* Align start of frame for local function. */
9174 if (stack_realign_fp
9175 || offset != frame->sse_reg_save_offset
9178 || cfun->calls_alloca
9179 || ix86_current_function_calls_tls_descriptor)
9180 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9182 /* Frame pointer points here. */
9183 frame->frame_pointer_offset = offset;
9187 /* Add outgoing arguments area. Can be skipped if we eliminated
9188 all the function calls as dead code.
9189 Skipping is however impossible when function calls alloca. Alloca
9190 expander assumes that last crtl->outgoing_args_size
9191 of stack frame are unused. */
9192 if (ACCUMULATE_OUTGOING_ARGS
9193 && (!crtl->is_leaf || cfun->calls_alloca
9194 || ix86_current_function_calls_tls_descriptor))
9196 offset += crtl->outgoing_args_size;
9197 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9200 frame->outgoing_arguments_size = 0;
9202 /* Align stack boundary. Only needed if we're calling another function
9204 if (!crtl->is_leaf || cfun->calls_alloca
9205 || ix86_current_function_calls_tls_descriptor)
9206 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9208 /* We've reached end of stack frame. */
9209 frame->stack_pointer_offset = offset;
9211 /* Size prologue needs to allocate. */
9212 to_allocate = offset - frame->sse_reg_save_offset;
9214 if ((!to_allocate && frame->nregs <= 1)
9215 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9216 frame->save_regs_using_mov = false;
9218 if (ix86_using_red_zone ()
9219 && crtl->sp_is_unchanging
9221 && !ix86_current_function_calls_tls_descriptor)
9223 frame->red_zone_size = to_allocate;
9224 if (frame->save_regs_using_mov)
9225 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9226 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9227 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9230 frame->red_zone_size = 0;
9231 frame->stack_pointer_offset -= frame->red_zone_size;
9233 /* The SEH frame pointer location is near the bottom of the frame.
9234 This is enforced by the fact that the difference between the
9235 stack pointer and the frame pointer is limited to 240 bytes in
9236 the unwind data structure. */
9241 /* If we can leave the frame pointer where it is, do so. Also, returns
9242 the establisher frame for __builtin_frame_address (0). */
9243 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9244 if (diff <= SEH_MAX_FRAME_SIZE
9245 && (diff > 240 || (diff & 15) != 0)
9246 && !crtl->accesses_prior_frames)
9248 /* Ideally we'd determine what portion of the local stack frame
9249 (within the constraint of the lowest 240) is most heavily used.
9250 But without that complication, simply bias the frame pointer
9251 by 128 bytes so as to maximize the amount of the local stack
9252 frame that is addressable with 8-bit offsets. */
9253 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9258 /* This is semi-inlined memory_address_length, but simplified
9259 since we know that we're always dealing with reg+offset, and
9260 to avoid having to create and discard all that rtl. */
9263 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9269 /* EBP and R13 cannot be encoded without an offset. */
9270 len = (regno == BP_REG || regno == R13_REG);
9272 else if (IN_RANGE (offset, -128, 127))
9275 /* ESP and R12 must be encoded with a SIB byte. */
9276 if (regno == SP_REG || regno == R12_REG)
9282 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9283 The valid base registers are taken from CFUN->MACHINE->FS. */
9286 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9288 const struct machine_function *m = cfun->machine;
9289 rtx base_reg = NULL;
9290 HOST_WIDE_INT base_offset = 0;
9292 if (m->use_fast_prologue_epilogue)
9294 /* Choose the base register most likely to allow the most scheduling
9295 opportunities. Generally FP is valid throughout the function,
9296 while DRAP must be reloaded within the epilogue. But choose either
9297 over the SP due to increased encoding size. */
9301 base_reg = hard_frame_pointer_rtx;
9302 base_offset = m->fs.fp_offset - cfa_offset;
9304 else if (m->fs.drap_valid)
9306 base_reg = crtl->drap_reg;
9307 base_offset = 0 - cfa_offset;
9309 else if (m->fs.sp_valid)
9311 base_reg = stack_pointer_rtx;
9312 base_offset = m->fs.sp_offset - cfa_offset;
9317 HOST_WIDE_INT toffset;
9320 /* Choose the base register with the smallest address encoding.
9321 With a tie, choose FP > DRAP > SP. */
9324 base_reg = stack_pointer_rtx;
9325 base_offset = m->fs.sp_offset - cfa_offset;
9326 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9328 if (m->fs.drap_valid)
9330 toffset = 0 - cfa_offset;
9331 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9334 base_reg = crtl->drap_reg;
9335 base_offset = toffset;
9341 toffset = m->fs.fp_offset - cfa_offset;
9342 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9345 base_reg = hard_frame_pointer_rtx;
9346 base_offset = toffset;
9351 gcc_assert (base_reg != NULL);
9353 return plus_constant (Pmode, base_reg, base_offset);
9356 /* Emit code to save registers in the prologue. */
9359 ix86_emit_save_regs (void)
9364 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9365 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9367 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9368 RTX_FRAME_RELATED_P (insn) = 1;
9372 /* Emit a single register save at CFA - CFA_OFFSET. */
9375 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9376 HOST_WIDE_INT cfa_offset)
9378 struct machine_function *m = cfun->machine;
9379 rtx reg = gen_rtx_REG (mode, regno);
9380 rtx mem, addr, base, insn;
9382 addr = choose_baseaddr (cfa_offset);
9383 mem = gen_frame_mem (mode, addr);
9385 /* For SSE saves, we need to indicate the 128-bit alignment. */
9386 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9388 insn = emit_move_insn (mem, reg);
9389 RTX_FRAME_RELATED_P (insn) = 1;
9392 if (GET_CODE (base) == PLUS)
9393 base = XEXP (base, 0);
9394 gcc_checking_assert (REG_P (base));
9396 /* When saving registers into a re-aligned local stack frame, avoid
9397 any tricky guessing by dwarf2out. */
9398 if (m->fs.realigned)
9400 gcc_checking_assert (stack_realign_drap);
9402 if (regno == REGNO (crtl->drap_reg))
9404 /* A bit of a hack. We force the DRAP register to be saved in
9405 the re-aligned stack frame, which provides us with a copy
9406 of the CFA that will last past the prologue. Install it. */
9407 gcc_checking_assert (cfun->machine->fs.fp_valid);
9408 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9409 cfun->machine->fs.fp_offset - cfa_offset);
9410 mem = gen_rtx_MEM (mode, addr);
9411 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9415 /* The frame pointer is a stable reference within the
9416 aligned frame. Use it. */
9417 gcc_checking_assert (cfun->machine->fs.fp_valid);
9418 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9419 cfun->machine->fs.fp_offset - cfa_offset);
9420 mem = gen_rtx_MEM (mode, addr);
9421 add_reg_note (insn, REG_CFA_EXPRESSION,
9422 gen_rtx_SET (VOIDmode, mem, reg));
9426 /* The memory may not be relative to the current CFA register,
9427 which means that we may need to generate a new pattern for
9428 use by the unwind info. */
9429 else if (base != m->fs.cfa_reg)
9431 addr = plus_constant (Pmode, m->fs.cfa_reg,
9432 m->fs.cfa_offset - cfa_offset);
9433 mem = gen_rtx_MEM (mode, addr);
9434 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9438 /* Emit code to save registers using MOV insns.
9439 First register is stored at CFA - CFA_OFFSET. */
9441 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9445 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9446 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9448 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9449 cfa_offset -= UNITS_PER_WORD;
9453 /* Emit code to save SSE registers using MOV insns.
9454 First register is stored at CFA - CFA_OFFSET. */
9456 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9460 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9461 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9463 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9468 static GTY(()) rtx queued_cfa_restores;
9470 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9471 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9472 Don't add the note if the previously saved value will be left untouched
9473 within stack red-zone till return, as unwinders can find the same value
9474 in the register and on the stack. */
9477 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9479 if (!crtl->shrink_wrapped
9480 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9485 add_reg_note (insn, REG_CFA_RESTORE, reg);
9486 RTX_FRAME_RELATED_P (insn) = 1;
9490 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9493 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9496 ix86_add_queued_cfa_restore_notes (rtx insn)
9499 if (!queued_cfa_restores)
9501 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9503 XEXP (last, 1) = REG_NOTES (insn);
9504 REG_NOTES (insn) = queued_cfa_restores;
9505 queued_cfa_restores = NULL_RTX;
9506 RTX_FRAME_RELATED_P (insn) = 1;
9509 /* Expand prologue or epilogue stack adjustment.
9510 The pattern exist to put a dependency on all ebp-based memory accesses.
9511 STYLE should be negative if instructions should be marked as frame related,
9512 zero if %r11 register is live and cannot be freely used and positive
9516 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9517 int style, bool set_cfa)
9519 struct machine_function *m = cfun->machine;
9521 bool add_frame_related_expr = false;
9523 if (Pmode == SImode)
9524 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9525 else if (x86_64_immediate_operand (offset, DImode))
9526 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9530 /* r11 is used by indirect sibcall return as well, set before the
9531 epilogue and used after the epilogue. */
9533 tmp = gen_rtx_REG (DImode, R11_REG);
9536 gcc_assert (src != hard_frame_pointer_rtx
9537 && dest != hard_frame_pointer_rtx);
9538 tmp = hard_frame_pointer_rtx;
9540 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9542 add_frame_related_expr = true;
9544 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9547 insn = emit_insn (insn);
9549 ix86_add_queued_cfa_restore_notes (insn);
9555 gcc_assert (m->fs.cfa_reg == src);
9556 m->fs.cfa_offset += INTVAL (offset);
9557 m->fs.cfa_reg = dest;
9559 r = gen_rtx_PLUS (Pmode, src, offset);
9560 r = gen_rtx_SET (VOIDmode, dest, r);
9561 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9562 RTX_FRAME_RELATED_P (insn) = 1;
9566 RTX_FRAME_RELATED_P (insn) = 1;
9567 if (add_frame_related_expr)
9569 rtx r = gen_rtx_PLUS (Pmode, src, offset);
9570 r = gen_rtx_SET (VOIDmode, dest, r);
9571 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
9575 if (dest == stack_pointer_rtx)
9577 HOST_WIDE_INT ooffset = m->fs.sp_offset;
9578 bool valid = m->fs.sp_valid;
9580 if (src == hard_frame_pointer_rtx)
9582 valid = m->fs.fp_valid;
9583 ooffset = m->fs.fp_offset;
9585 else if (src == crtl->drap_reg)
9587 valid = m->fs.drap_valid;
9592 /* Else there are two possibilities: SP itself, which we set
9593 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9594 taken care of this by hand along the eh_return path. */
9595 gcc_checking_assert (src == stack_pointer_rtx
9596 || offset == const0_rtx);
9599 m->fs.sp_offset = ooffset - INTVAL (offset);
9600 m->fs.sp_valid = valid;
9604 /* Find an available register to be used as dynamic realign argument
9605 pointer regsiter. Such a register will be written in prologue and
9606 used in begin of body, so it must not be
9607 1. parameter passing register.
9609 We reuse static-chain register if it is available. Otherwise, we
9610 use DI for i386 and R13 for x86-64. We chose R13 since it has
9613 Return: the regno of chosen register. */
9616 find_drap_reg (void)
9618 tree decl = cfun->decl;
9622 /* Use R13 for nested function or function need static chain.
9623 Since function with tail call may use any caller-saved
9624 registers in epilogue, DRAP must not use caller-saved
9625 register in such case. */
9626 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9633 /* Use DI for nested function or function need static chain.
9634 Since function with tail call may use any caller-saved
9635 registers in epilogue, DRAP must not use caller-saved
9636 register in such case. */
9637 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
9640 /* Reuse static chain register if it isn't used for parameter
9642 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
9644 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
9645 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
9652 /* Return minimum incoming stack alignment. */
9655 ix86_minimum_incoming_stack_boundary (bool sibcall)
9657 unsigned int incoming_stack_boundary;
9659 /* Prefer the one specified at command line. */
9660 if (ix86_user_incoming_stack_boundary)
9661 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
9662 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9663 if -mstackrealign is used, it isn't used for sibcall check and
9664 estimated stack alignment is 128bit. */
9667 && ix86_force_align_arg_pointer
9668 && crtl->stack_alignment_estimated == 128)
9669 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9671 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
9673 /* Incoming stack alignment can be changed on individual functions
9674 via force_align_arg_pointer attribute. We use the smallest
9675 incoming stack boundary. */
9676 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
9677 && lookup_attribute (ix86_force_align_arg_pointer_string,
9678 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
9679 incoming_stack_boundary = MIN_STACK_BOUNDARY;
9681 /* The incoming stack frame has to be aligned at least at
9682 parm_stack_boundary. */
9683 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9684 incoming_stack_boundary = crtl->parm_stack_boundary;
9686 /* Stack at entrance of main is aligned by runtime. We use the
9687 smallest incoming stack boundary. */
9688 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9689 && DECL_NAME (current_function_decl)
9690 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9691 && DECL_FILE_SCOPE_P (current_function_decl))
9692 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9694 return incoming_stack_boundary;
9697 /* Update incoming stack boundary and estimated stack alignment. */
9700 ix86_update_stack_boundary (void)
9702 ix86_incoming_stack_boundary
9703 = ix86_minimum_incoming_stack_boundary (false);
9705 /* x86_64 vararg needs 16byte stack alignment for register save
9709 && crtl->stack_alignment_estimated < 128)
9710 crtl->stack_alignment_estimated = 128;
9713 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9714 needed or an rtx for DRAP otherwise. */
9717 ix86_get_drap_rtx (void)
9719 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9720 crtl->need_drap = true;
9722 if (stack_realign_drap)
9724 /* Assign DRAP to vDRAP and returns vDRAP */
9725 unsigned int regno = find_drap_reg ();
9730 arg_ptr = gen_rtx_REG (Pmode, regno);
9731 crtl->drap_reg = arg_ptr;
9734 drap_vreg = copy_to_reg (arg_ptr);
9738 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9741 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9742 RTX_FRAME_RELATED_P (insn) = 1;
9750 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9753 ix86_internal_arg_pointer (void)
9755 return virtual_incoming_args_rtx;
9758 struct scratch_reg {
9763 /* Return a short-lived scratch register for use on function entry.
9764 In 32-bit mode, it is valid only after the registers are saved
9765 in the prologue. This register must be released by means of
9766 release_scratch_register_on_entry once it is dead. */
9769 get_scratch_register_on_entry (struct scratch_reg *sr)
9777 /* We always use R11 in 64-bit mode. */
9782 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9784 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9786 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9787 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9788 int regparm = ix86_function_regparm (fntype, decl);
9790 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9792 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9793 for the static chain register. */
9794 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9795 && drap_regno != AX_REG)
9797 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
9798 for the static chain register. */
9799 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
9801 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
9803 /* ecx is the static chain register. */
9804 else if (regparm < 3 && !fastcall_p && !thiscall_p
9806 && drap_regno != CX_REG)
9808 else if (ix86_save_reg (BX_REG, true))
9810 /* esi is the static chain register. */
9811 else if (!(regparm == 3 && static_chain_p)
9812 && ix86_save_reg (SI_REG, true))
9814 else if (ix86_save_reg (DI_REG, true))
9818 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9823 sr->reg = gen_rtx_REG (Pmode, regno);
9826 rtx insn = emit_insn (gen_push (sr->reg));
9827 RTX_FRAME_RELATED_P (insn) = 1;
9831 /* Release a scratch register obtained from the preceding function. */
9834 release_scratch_register_on_entry (struct scratch_reg *sr)
9838 struct machine_function *m = cfun->machine;
9839 rtx x, insn = emit_insn (gen_pop (sr->reg));
9841 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9842 RTX_FRAME_RELATED_P (insn) = 1;
9843 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9844 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9845 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9846 m->fs.sp_offset -= UNITS_PER_WORD;
9850 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9852 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9855 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9857 /* We skip the probe for the first interval + a small dope of 4 words and
9858 probe that many bytes past the specified size to maintain a protection
9859 area at the botton of the stack. */
9860 const int dope = 4 * UNITS_PER_WORD;
9861 rtx size_rtx = GEN_INT (size), last;
9863 /* See if we have a constant small number of probes to generate. If so,
9864 that's the easy case. The run-time loop is made up of 11 insns in the
9865 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9866 for n # of intervals. */
9867 if (size <= 5 * PROBE_INTERVAL)
9869 HOST_WIDE_INT i, adjust;
9870 bool first_probe = true;
9872 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9873 values of N from 1 until it exceeds SIZE. If only one probe is
9874 needed, this will not generate any code. Then adjust and probe
9875 to PROBE_INTERVAL + SIZE. */
9876 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9880 adjust = 2 * PROBE_INTERVAL + dope;
9881 first_probe = false;
9884 adjust = PROBE_INTERVAL;
9886 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9887 plus_constant (Pmode, stack_pointer_rtx,
9889 emit_stack_probe (stack_pointer_rtx);
9893 adjust = size + PROBE_INTERVAL + dope;
9895 adjust = size + PROBE_INTERVAL - i;
9897 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9898 plus_constant (Pmode, stack_pointer_rtx,
9900 emit_stack_probe (stack_pointer_rtx);
9902 /* Adjust back to account for the additional first interval. */
9903 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9904 plus_constant (Pmode, stack_pointer_rtx,
9905 PROBE_INTERVAL + dope)));
9908 /* Otherwise, do the same as above, but in a loop. Note that we must be
9909 extra careful with variables wrapping around because we might be at
9910 the very top (or the very bottom) of the address space and we have
9911 to be able to handle this case properly; in particular, we use an
9912 equality test for the loop condition. */
9915 HOST_WIDE_INT rounded_size;
9916 struct scratch_reg sr;
9918 get_scratch_register_on_entry (&sr);
9921 /* Step 1: round SIZE to the previous multiple of the interval. */
9923 rounded_size = size & -PROBE_INTERVAL;
9926 /* Step 2: compute initial and final value of the loop counter. */
9928 /* SP = SP_0 + PROBE_INTERVAL. */
9929 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9930 plus_constant (Pmode, stack_pointer_rtx,
9931 - (PROBE_INTERVAL + dope))));
9933 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9934 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9935 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9936 gen_rtx_PLUS (Pmode, sr.reg,
9937 stack_pointer_rtx)));
9942 while (SP != LAST_ADDR)
9944 SP = SP + PROBE_INTERVAL
9948 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9949 values of N from 1 until it is equal to ROUNDED_SIZE. */
9951 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9954 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9955 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9957 if (size != rounded_size)
9959 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9960 plus_constant (Pmode, stack_pointer_rtx,
9961 rounded_size - size)));
9962 emit_stack_probe (stack_pointer_rtx);
9965 /* Adjust back to account for the additional first interval. */
9966 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9967 plus_constant (Pmode, stack_pointer_rtx,
9968 PROBE_INTERVAL + dope)));
9970 release_scratch_register_on_entry (&sr);
9973 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9975 /* Even if the stack pointer isn't the CFA register, we need to correctly
9976 describe the adjustments made to it, in particular differentiate the
9977 frame-related ones from the frame-unrelated ones. */
9980 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
9981 XVECEXP (expr, 0, 0)
9982 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9983 plus_constant (Pmode, stack_pointer_rtx, -size));
9984 XVECEXP (expr, 0, 1)
9985 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9986 plus_constant (Pmode, stack_pointer_rtx,
9987 PROBE_INTERVAL + dope + size));
9988 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
9989 RTX_FRAME_RELATED_P (last) = 1;
9991 cfun->machine->fs.sp_offset += size;
9994 /* Make sure nothing is scheduled before we are done. */
9995 emit_insn (gen_blockage ());
9998 /* Adjust the stack pointer up to REG while probing it. */
10001 output_adjust_stack_and_probe (rtx reg)
10003 static int labelno = 0;
10004 char loop_lab[32], end_lab[32];
10007 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10008 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10010 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10012 /* Jump to END_LAB if SP == LAST_ADDR. */
10013 xops[0] = stack_pointer_rtx;
10015 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10016 fputs ("\tje\t", asm_out_file);
10017 assemble_name_raw (asm_out_file, end_lab);
10018 fputc ('\n', asm_out_file);
10020 /* SP = SP + PROBE_INTERVAL. */
10021 xops[1] = GEN_INT (PROBE_INTERVAL);
10022 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10025 xops[1] = const0_rtx;
10026 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10028 fprintf (asm_out_file, "\tjmp\t");
10029 assemble_name_raw (asm_out_file, loop_lab);
10030 fputc ('\n', asm_out_file);
10032 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10037 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10038 inclusive. These are offsets from the current stack pointer. */
10041 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10043 /* See if we have a constant small number of probes to generate. If so,
10044 that's the easy case. The run-time loop is made up of 7 insns in the
10045 generic case while the compile-time loop is made up of n insns for n #
10047 if (size <= 7 * PROBE_INTERVAL)
10051 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10052 it exceeds SIZE. If only one probe is needed, this will not
10053 generate any code. Then probe at FIRST + SIZE. */
10054 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10055 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10058 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10062 /* Otherwise, do the same as above, but in a loop. Note that we must be
10063 extra careful with variables wrapping around because we might be at
10064 the very top (or the very bottom) of the address space and we have
10065 to be able to handle this case properly; in particular, we use an
10066 equality test for the loop condition. */
10069 HOST_WIDE_INT rounded_size, last;
10070 struct scratch_reg sr;
10072 get_scratch_register_on_entry (&sr);
10075 /* Step 1: round SIZE to the previous multiple of the interval. */
10077 rounded_size = size & -PROBE_INTERVAL;
10080 /* Step 2: compute initial and final value of the loop counter. */
10082 /* TEST_OFFSET = FIRST. */
10083 emit_move_insn (sr.reg, GEN_INT (-first));
10085 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10086 last = first + rounded_size;
10089 /* Step 3: the loop
10091 while (TEST_ADDR != LAST_ADDR)
10093 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10097 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10098 until it is equal to ROUNDED_SIZE. */
10100 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10103 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10104 that SIZE is equal to ROUNDED_SIZE. */
10106 if (size != rounded_size)
10107 emit_stack_probe (plus_constant (Pmode,
10108 gen_rtx_PLUS (Pmode,
10111 rounded_size - size));
10113 release_scratch_register_on_entry (&sr);
10116 /* Make sure nothing is scheduled before we are done. */
10117 emit_insn (gen_blockage ());
10120 /* Probe a range of stack addresses from REG to END, inclusive. These are
10121 offsets from the current stack pointer. */
10124 output_probe_stack_range (rtx reg, rtx end)
10126 static int labelno = 0;
10127 char loop_lab[32], end_lab[32];
10130 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10131 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10133 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10135 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10138 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10139 fputs ("\tje\t", asm_out_file);
10140 assemble_name_raw (asm_out_file, end_lab);
10141 fputc ('\n', asm_out_file);
10143 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10144 xops[1] = GEN_INT (PROBE_INTERVAL);
10145 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10147 /* Probe at TEST_ADDR. */
10148 xops[0] = stack_pointer_rtx;
10150 xops[2] = const0_rtx;
10151 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10153 fprintf (asm_out_file, "\tjmp\t");
10154 assemble_name_raw (asm_out_file, loop_lab);
10155 fputc ('\n', asm_out_file);
10157 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10162 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10163 to be generated in correct form. */
10165 ix86_finalize_stack_realign_flags (void)
10167 /* Check if stack realign is really needed after reload, and
10168 stores result in cfun */
10169 unsigned int incoming_stack_boundary
10170 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10171 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10172 unsigned int stack_realign = (incoming_stack_boundary
10174 ? crtl->max_used_stack_slot_alignment
10175 : crtl->stack_alignment_needed));
10177 if (crtl->stack_realign_finalized)
10179 /* After stack_realign_needed is finalized, we can't no longer
10181 gcc_assert (crtl->stack_realign_needed == stack_realign);
10185 /* If the only reason for frame_pointer_needed is that we conservatively
10186 assumed stack realignment might be needed, but in the end nothing that
10187 needed the stack alignment had been spilled, clear frame_pointer_needed
10188 and say we don't need stack realignment. */
10190 && !crtl->need_drap
10191 && frame_pointer_needed
10193 && flag_omit_frame_pointer
10194 && crtl->sp_is_unchanging
10195 && !ix86_current_function_calls_tls_descriptor
10196 && !crtl->accesses_prior_frames
10197 && !cfun->calls_alloca
10198 && !crtl->calls_eh_return
10199 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10200 && !ix86_frame_pointer_required ()
10201 && get_frame_size () == 0
10202 && ix86_nsaved_sseregs () == 0
10203 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10205 HARD_REG_SET set_up_by_prologue, prologue_used;
10208 CLEAR_HARD_REG_SET (prologue_used);
10209 CLEAR_HARD_REG_SET (set_up_by_prologue);
10210 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10211 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10212 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10213 HARD_FRAME_POINTER_REGNUM);
10217 FOR_BB_INSNS (bb, insn)
10218 if (NONDEBUG_INSN_P (insn)
10219 && requires_stack_frame_p (insn, prologue_used,
10220 set_up_by_prologue))
10222 crtl->stack_realign_needed = stack_realign;
10223 crtl->stack_realign_finalized = true;
10228 frame_pointer_needed = false;
10229 stack_realign = false;
10230 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10231 crtl->stack_alignment_needed = incoming_stack_boundary;
10232 crtl->stack_alignment_estimated = incoming_stack_boundary;
10233 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10234 crtl->preferred_stack_boundary = incoming_stack_boundary;
10235 df_finish_pass (true);
10236 df_scan_alloc (NULL);
10238 df_compute_regs_ever_live (true);
10242 crtl->stack_realign_needed = stack_realign;
10243 crtl->stack_realign_finalized = true;
10246 /* Expand the prologue into a bunch of separate insns. */
10249 ix86_expand_prologue (void)
10251 struct machine_function *m = cfun->machine;
10254 struct ix86_frame frame;
10255 HOST_WIDE_INT allocate;
10256 bool int_registers_saved;
10257 bool sse_registers_saved;
10259 ix86_finalize_stack_realign_flags ();
10261 /* DRAP should not coexist with stack_realign_fp */
10262 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10264 memset (&m->fs, 0, sizeof (m->fs));
10266 /* Initialize CFA state for before the prologue. */
10267 m->fs.cfa_reg = stack_pointer_rtx;
10268 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10270 /* Track SP offset to the CFA. We continue tracking this after we've
10271 swapped the CFA register away from SP. In the case of re-alignment
10272 this is fudged; we're interested to offsets within the local frame. */
10273 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10274 m->fs.sp_valid = true;
10276 ix86_compute_frame_layout (&frame);
10278 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10280 /* We should have already generated an error for any use of
10281 ms_hook on a nested function. */
10282 gcc_checking_assert (!ix86_static_chain_on_stack);
10284 /* Check if profiling is active and we shall use profiling before
10285 prologue variant. If so sorry. */
10286 if (crtl->profile && flag_fentry != 0)
10287 sorry ("ms_hook_prologue attribute isn%'t compatible "
10288 "with -mfentry for 32-bit");
10290 /* In ix86_asm_output_function_label we emitted:
10291 8b ff movl.s %edi,%edi
10293 8b ec movl.s %esp,%ebp
10295 This matches the hookable function prologue in Win32 API
10296 functions in Microsoft Windows XP Service Pack 2 and newer.
10297 Wine uses this to enable Windows apps to hook the Win32 API
10298 functions provided by Wine.
10300 What that means is that we've already set up the frame pointer. */
10302 if (frame_pointer_needed
10303 && !(crtl->drap_reg && crtl->stack_realign_needed))
10307 /* We've decided to use the frame pointer already set up.
10308 Describe this to the unwinder by pretending that both
10309 push and mov insns happen right here.
10311 Putting the unwind info here at the end of the ms_hook
10312 is done so that we can make absolutely certain we get
10313 the required byte sequence at the start of the function,
10314 rather than relying on an assembler that can produce
10315 the exact encoding required.
10317 However it does mean (in the unpatched case) that we have
10318 a 1 insn window where the asynchronous unwind info is
10319 incorrect. However, if we placed the unwind info at
10320 its correct location we would have incorrect unwind info
10321 in the patched case. Which is probably all moot since
10322 I don't expect Wine generates dwarf2 unwind info for the
10323 system libraries that use this feature. */
10325 insn = emit_insn (gen_blockage ());
10327 push = gen_push (hard_frame_pointer_rtx);
10328 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10329 stack_pointer_rtx);
10330 RTX_FRAME_RELATED_P (push) = 1;
10331 RTX_FRAME_RELATED_P (mov) = 1;
10333 RTX_FRAME_RELATED_P (insn) = 1;
10334 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10335 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10337 /* Note that gen_push incremented m->fs.cfa_offset, even
10338 though we didn't emit the push insn here. */
10339 m->fs.cfa_reg = hard_frame_pointer_rtx;
10340 m->fs.fp_offset = m->fs.cfa_offset;
10341 m->fs.fp_valid = true;
10345 /* The frame pointer is not needed so pop %ebp again.
10346 This leaves us with a pristine state. */
10347 emit_insn (gen_pop (hard_frame_pointer_rtx));
10351 /* The first insn of a function that accepts its static chain on the
10352 stack is to push the register that would be filled in by a direct
10353 call. This insn will be skipped by the trampoline. */
10354 else if (ix86_static_chain_on_stack)
10356 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10357 emit_insn (gen_blockage ());
10359 /* We don't want to interpret this push insn as a register save,
10360 only as a stack adjustment. The real copy of the register as
10361 a save will be done later, if needed. */
10362 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10363 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10364 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10365 RTX_FRAME_RELATED_P (insn) = 1;
10368 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10369 of DRAP is needed and stack realignment is really needed after reload */
10370 if (stack_realign_drap)
10372 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10374 /* Only need to push parameter pointer reg if it is caller saved. */
10375 if (!call_used_regs[REGNO (crtl->drap_reg)])
10377 /* Push arg pointer reg */
10378 insn = emit_insn (gen_push (crtl->drap_reg));
10379 RTX_FRAME_RELATED_P (insn) = 1;
10382 /* Grab the argument pointer. */
10383 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10384 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10385 RTX_FRAME_RELATED_P (insn) = 1;
10386 m->fs.cfa_reg = crtl->drap_reg;
10387 m->fs.cfa_offset = 0;
10389 /* Align the stack. */
10390 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10392 GEN_INT (-align_bytes)));
10393 RTX_FRAME_RELATED_P (insn) = 1;
10395 /* Replicate the return address on the stack so that return
10396 address can be reached via (argp - 1) slot. This is needed
10397 to implement macro RETURN_ADDR_RTX and intrinsic function
10398 expand_builtin_return_addr etc. */
10399 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10400 t = gen_frame_mem (word_mode, t);
10401 insn = emit_insn (gen_push (t));
10402 RTX_FRAME_RELATED_P (insn) = 1;
10404 /* For the purposes of frame and register save area addressing,
10405 we've started over with a new frame. */
10406 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10407 m->fs.realigned = true;
10410 int_registers_saved = (frame.nregs == 0);
10411 sse_registers_saved = (frame.nsseregs == 0);
10413 if (frame_pointer_needed && !m->fs.fp_valid)
10415 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10416 slower on all targets. Also sdb doesn't like it. */
10417 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10418 RTX_FRAME_RELATED_P (insn) = 1;
10420 /* Push registers now, before setting the frame pointer
10422 if (!int_registers_saved
10424 && !frame.save_regs_using_mov)
10426 ix86_emit_save_regs ();
10427 int_registers_saved = true;
10428 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10431 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10433 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10434 RTX_FRAME_RELATED_P (insn) = 1;
10436 if (m->fs.cfa_reg == stack_pointer_rtx)
10437 m->fs.cfa_reg = hard_frame_pointer_rtx;
10438 m->fs.fp_offset = m->fs.sp_offset;
10439 m->fs.fp_valid = true;
10443 if (!int_registers_saved)
10445 /* If saving registers via PUSH, do so now. */
10446 if (!frame.save_regs_using_mov)
10448 ix86_emit_save_regs ();
10449 int_registers_saved = true;
10450 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10453 /* When using red zone we may start register saving before allocating
10454 the stack frame saving one cycle of the prologue. However, avoid
10455 doing this if we have to probe the stack; at least on x86_64 the
10456 stack probe can turn into a call that clobbers a red zone location. */
10457 else if (ix86_using_red_zone ()
10458 && (! TARGET_STACK_PROBE
10459 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10461 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10462 int_registers_saved = true;
10466 if (stack_realign_fp)
10468 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10469 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10471 /* The computation of the size of the re-aligned stack frame means
10472 that we must allocate the size of the register save area before
10473 performing the actual alignment. Otherwise we cannot guarantee
10474 that there's enough storage above the realignment point. */
10475 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10476 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10477 GEN_INT (m->fs.sp_offset
10478 - frame.sse_reg_save_offset),
10481 /* Align the stack. */
10482 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10484 GEN_INT (-align_bytes)));
10486 /* For the purposes of register save area addressing, the stack
10487 pointer is no longer valid. As for the value of sp_offset,
10488 see ix86_compute_frame_layout, which we need to match in order
10489 to pass verification of stack_pointer_offset at the end. */
10490 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10491 m->fs.sp_valid = false;
10494 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10496 if (flag_stack_usage_info)
10498 /* We start to count from ARG_POINTER. */
10499 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10501 /* If it was realigned, take into account the fake frame. */
10502 if (stack_realign_drap)
10504 if (ix86_static_chain_on_stack)
10505 stack_size += UNITS_PER_WORD;
10507 if (!call_used_regs[REGNO (crtl->drap_reg)])
10508 stack_size += UNITS_PER_WORD;
10510 /* This over-estimates by 1 minimal-stack-alignment-unit but
10511 mitigates that by counting in the new return address slot. */
10512 current_function_dynamic_stack_size
10513 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10516 current_function_static_stack_size = stack_size;
10519 /* On SEH target with very large frame size, allocate an area to save
10520 SSE registers (as the very large allocation won't be described). */
10522 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10523 && !sse_registers_saved)
10525 HOST_WIDE_INT sse_size =
10526 frame.sse_reg_save_offset - frame.reg_save_offset;
10528 gcc_assert (int_registers_saved);
10530 /* No need to do stack checking as the area will be immediately
10532 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10533 GEN_INT (-sse_size), -1,
10534 m->fs.cfa_reg == stack_pointer_rtx);
10535 allocate -= sse_size;
10536 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10537 sse_registers_saved = true;
10540 /* The stack has already been decremented by the instruction calling us
10541 so probe if the size is non-negative to preserve the protection area. */
10542 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10544 /* We expect the registers to be saved when probes are used. */
10545 gcc_assert (int_registers_saved);
10547 if (STACK_CHECK_MOVING_SP)
10549 ix86_adjust_stack_and_probe (allocate);
10554 HOST_WIDE_INT size = allocate;
10556 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
10557 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
10559 if (TARGET_STACK_PROBE)
10560 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
10562 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
10568 else if (!ix86_target_stack_probe ()
10569 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
10571 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10572 GEN_INT (-allocate), -1,
10573 m->fs.cfa_reg == stack_pointer_rtx);
10577 rtx eax = gen_rtx_REG (Pmode, AX_REG);
10579 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
10580 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
10581 bool eax_live = false;
10582 bool r10_live = false;
10585 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
10586 if (!TARGET_64BIT_MS_ABI)
10587 eax_live = ix86_eax_live_at_start_p ();
10589 /* Note that SEH directives need to continue tracking the stack
10590 pointer even after the frame pointer has been set up. */
10593 insn = emit_insn (gen_push (eax));
10594 allocate -= UNITS_PER_WORD;
10595 if (sp_is_cfa_reg || TARGET_SEH)
10598 m->fs.cfa_offset += UNITS_PER_WORD;
10599 RTX_FRAME_RELATED_P (insn) = 1;
10605 r10 = gen_rtx_REG (Pmode, R10_REG);
10606 insn = emit_insn (gen_push (r10));
10607 allocate -= UNITS_PER_WORD;
10608 if (sp_is_cfa_reg || TARGET_SEH)
10611 m->fs.cfa_offset += UNITS_PER_WORD;
10612 RTX_FRAME_RELATED_P (insn) = 1;
10616 emit_move_insn (eax, GEN_INT (allocate));
10617 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
10619 /* Use the fact that AX still contains ALLOCATE. */
10620 adjust_stack_insn = (Pmode == DImode
10621 ? gen_pro_epilogue_adjust_stack_di_sub
10622 : gen_pro_epilogue_adjust_stack_si_sub);
10624 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
10625 stack_pointer_rtx, eax));
10627 if (sp_is_cfa_reg || TARGET_SEH)
10630 m->fs.cfa_offset += allocate;
10631 RTX_FRAME_RELATED_P (insn) = 1;
10632 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10633 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10634 plus_constant (Pmode, stack_pointer_rtx,
10637 m->fs.sp_offset += allocate;
10639 if (r10_live && eax_live)
10641 t = choose_baseaddr (m->fs.sp_offset - allocate);
10642 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10643 gen_frame_mem (word_mode, t));
10644 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
10645 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
10646 gen_frame_mem (word_mode, t));
10648 else if (eax_live || r10_live)
10650 t = choose_baseaddr (m->fs.sp_offset - allocate);
10651 emit_move_insn (gen_rtx_REG (word_mode,
10652 (eax_live ? AX_REG : R10_REG)),
10653 gen_frame_mem (word_mode, t));
10656 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
10658 /* If we havn't already set up the frame pointer, do so now. */
10659 if (frame_pointer_needed && !m->fs.fp_valid)
10661 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
10662 GEN_INT (frame.stack_pointer_offset
10663 - frame.hard_frame_pointer_offset));
10664 insn = emit_insn (insn);
10665 RTX_FRAME_RELATED_P (insn) = 1;
10666 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
10668 if (m->fs.cfa_reg == stack_pointer_rtx)
10669 m->fs.cfa_reg = hard_frame_pointer_rtx;
10670 m->fs.fp_offset = frame.hard_frame_pointer_offset;
10671 m->fs.fp_valid = true;
10674 if (!int_registers_saved)
10675 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10676 if (!sse_registers_saved)
10677 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10679 pic_reg_used = false;
10680 /* We don't use pic-register for pe-coff target. */
10681 if (pic_offset_table_rtx
10683 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
10686 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
10688 if (alt_pic_reg_used != INVALID_REGNUM)
10689 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
10691 pic_reg_used = true;
10698 if (ix86_cmodel == CM_LARGE_PIC)
10700 rtx label, tmp_reg;
10702 gcc_assert (Pmode == DImode);
10703 label = gen_label_rtx ();
10704 emit_label (label);
10705 LABEL_PRESERVE_P (label) = 1;
10706 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
10707 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
10708 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
10710 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
10711 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
10712 pic_offset_table_rtx, tmp_reg));
10715 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
10719 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
10720 RTX_FRAME_RELATED_P (insn) = 1;
10721 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
10725 /* In the pic_reg_used case, make sure that the got load isn't deleted
10726 when mcount needs it. Blockage to avoid call movement across mcount
10727 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10729 if (crtl->profile && !flag_fentry && pic_reg_used)
10730 emit_insn (gen_prologue_use (pic_offset_table_rtx));
10732 if (crtl->drap_reg && !crtl->stack_realign_needed)
10734 /* vDRAP is setup but after reload it turns out stack realign
10735 isn't necessary, here we will emit prologue to setup DRAP
10736 without stack realign adjustment */
10737 t = choose_baseaddr (0);
10738 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10741 /* Prevent instructions from being scheduled into register save push
10742 sequence when access to the redzone area is done through frame pointer.
10743 The offset between the frame pointer and the stack pointer is calculated
10744 relative to the value of the stack pointer at the end of the function
10745 prologue, and moving instructions that access redzone area via frame
10746 pointer inside push sequence violates this assumption. */
10747 if (frame_pointer_needed && frame.red_zone_size)
10748 emit_insn (gen_memory_blockage ());
10750 /* Emit cld instruction if stringops are used in the function. */
10751 if (TARGET_CLD && ix86_current_function_needs_cld)
10752 emit_insn (gen_cld ());
10754 /* SEH requires that the prologue end within 256 bytes of the start of
10755 the function. Prevent instruction schedules that would extend that.
10756 Further, prevent alloca modifications to the stack pointer from being
10757 combined with prologue modifications. */
10759 emit_insn (gen_prologue_use (stack_pointer_rtx));
10762 /* Emit code to restore REG using a POP insn. */
10765 ix86_emit_restore_reg_using_pop (rtx reg)
10767 struct machine_function *m = cfun->machine;
10768 rtx insn = emit_insn (gen_pop (reg));
10770 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
10771 m->fs.sp_offset -= UNITS_PER_WORD;
10773 if (m->fs.cfa_reg == crtl->drap_reg
10774 && REGNO (reg) == REGNO (crtl->drap_reg))
10776 /* Previously we'd represented the CFA as an expression
10777 like *(%ebp - 8). We've just popped that value from
10778 the stack, which means we need to reset the CFA to
10779 the drap register. This will remain until we restore
10780 the stack pointer. */
10781 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10782 RTX_FRAME_RELATED_P (insn) = 1;
10784 /* This means that the DRAP register is valid for addressing too. */
10785 m->fs.drap_valid = true;
10789 if (m->fs.cfa_reg == stack_pointer_rtx)
10791 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
10792 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10793 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
10794 RTX_FRAME_RELATED_P (insn) = 1;
10796 m->fs.cfa_offset -= UNITS_PER_WORD;
10799 /* When the frame pointer is the CFA, and we pop it, we are
10800 swapping back to the stack pointer as the CFA. This happens
10801 for stack frames that don't allocate other data, so we assume
10802 the stack pointer is now pointing at the return address, i.e.
10803 the function entry state, which makes the offset be 1 word. */
10804 if (reg == hard_frame_pointer_rtx)
10806 m->fs.fp_valid = false;
10807 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10809 m->fs.cfa_reg = stack_pointer_rtx;
10810 m->fs.cfa_offset -= UNITS_PER_WORD;
10812 add_reg_note (insn, REG_CFA_DEF_CFA,
10813 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10814 GEN_INT (m->fs.cfa_offset)));
10815 RTX_FRAME_RELATED_P (insn) = 1;
10820 /* Emit code to restore saved registers using POP insns. */
10823 ix86_emit_restore_regs_using_pop (void)
10825 unsigned int regno;
10827 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10828 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
10829 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
10832 /* Emit code and notes for the LEAVE instruction. */
10835 ix86_emit_leave (void)
10837 struct machine_function *m = cfun->machine;
10838 rtx insn = emit_insn (ix86_gen_leave ());
10840 ix86_add_queued_cfa_restore_notes (insn);
10842 gcc_assert (m->fs.fp_valid);
10843 m->fs.sp_valid = true;
10844 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
10845 m->fs.fp_valid = false;
10847 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
10849 m->fs.cfa_reg = stack_pointer_rtx;
10850 m->fs.cfa_offset = m->fs.sp_offset;
10852 add_reg_note (insn, REG_CFA_DEF_CFA,
10853 plus_constant (Pmode, stack_pointer_rtx,
10855 RTX_FRAME_RELATED_P (insn) = 1;
10857 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10861 /* Emit code to restore saved registers using MOV insns.
10862 First register is restored from CFA - CFA_OFFSET. */
10864 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10865 bool maybe_eh_return)
10867 struct machine_function *m = cfun->machine;
10868 unsigned int regno;
10870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10871 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10873 rtx reg = gen_rtx_REG (word_mode, regno);
10876 mem = choose_baseaddr (cfa_offset);
10877 mem = gen_frame_mem (word_mode, mem);
10878 insn = emit_move_insn (reg, mem);
10880 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10882 /* Previously we'd represented the CFA as an expression
10883 like *(%ebp - 8). We've just popped that value from
10884 the stack, which means we need to reset the CFA to
10885 the drap register. This will remain until we restore
10886 the stack pointer. */
10887 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10888 RTX_FRAME_RELATED_P (insn) = 1;
10890 /* This means that the DRAP register is valid for addressing. */
10891 m->fs.drap_valid = true;
10894 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10896 cfa_offset -= UNITS_PER_WORD;
10900 /* Emit code to restore saved registers using MOV insns.
10901 First register is restored from CFA - CFA_OFFSET. */
10903 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10904 bool maybe_eh_return)
10906 unsigned int regno;
10908 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10909 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10911 rtx reg = gen_rtx_REG (V4SFmode, regno);
10914 mem = choose_baseaddr (cfa_offset);
10915 mem = gen_rtx_MEM (V4SFmode, mem);
10916 set_mem_align (mem, 128);
10917 emit_move_insn (reg, mem);
10919 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10925 /* Restore function stack, frame, and registers. */
10928 ix86_expand_epilogue (int style)
10930 struct machine_function *m = cfun->machine;
10931 struct machine_frame_state frame_state_save = m->fs;
10932 struct ix86_frame frame;
10933 bool restore_regs_via_mov;
10936 ix86_finalize_stack_realign_flags ();
10937 ix86_compute_frame_layout (&frame);
10939 m->fs.sp_valid = (!frame_pointer_needed
10940 || (crtl->sp_is_unchanging
10941 && !stack_realign_fp));
10942 gcc_assert (!m->fs.sp_valid
10943 || m->fs.sp_offset == frame.stack_pointer_offset);
10945 /* The FP must be valid if the frame pointer is present. */
10946 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10947 gcc_assert (!m->fs.fp_valid
10948 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10950 /* We must have *some* valid pointer to the stack frame. */
10951 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10953 /* The DRAP is never valid at this point. */
10954 gcc_assert (!m->fs.drap_valid);
10956 /* See the comment about red zone and frame
10957 pointer usage in ix86_expand_prologue. */
10958 if (frame_pointer_needed && frame.red_zone_size)
10959 emit_insn (gen_memory_blockage ());
10961 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10962 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10964 /* Determine the CFA offset of the end of the red-zone. */
10965 m->fs.red_zone_offset = 0;
10966 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10968 /* The red-zone begins below the return address. */
10969 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10971 /* When the register save area is in the aligned portion of
10972 the stack, determine the maximum runtime displacement that
10973 matches up with the aligned frame. */
10974 if (stack_realign_drap)
10975 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10979 /* Special care must be taken for the normal return case of a function
10980 using eh_return: the eax and edx registers are marked as saved, but
10981 not restored along this path. Adjust the save location to match. */
10982 if (crtl->calls_eh_return && style != 2)
10983 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10985 /* EH_RETURN requires the use of moves to function properly. */
10986 if (crtl->calls_eh_return)
10987 restore_regs_via_mov = true;
10988 /* SEH requires the use of pops to identify the epilogue. */
10989 else if (TARGET_SEH)
10990 restore_regs_via_mov = false;
10991 /* If we're only restoring one register and sp is not valid then
10992 using a move instruction to restore the register since it's
10993 less work than reloading sp and popping the register. */
10994 else if (!m->fs.sp_valid && frame.nregs <= 1)
10995 restore_regs_via_mov = true;
10996 else if (TARGET_EPILOGUE_USING_MOVE
10997 && cfun->machine->use_fast_prologue_epilogue
10998 && (frame.nregs > 1
10999 || m->fs.sp_offset != frame.reg_save_offset))
11000 restore_regs_via_mov = true;
11001 else if (frame_pointer_needed
11003 && m->fs.sp_offset != frame.reg_save_offset)
11004 restore_regs_via_mov = true;
11005 else if (frame_pointer_needed
11006 && TARGET_USE_LEAVE
11007 && cfun->machine->use_fast_prologue_epilogue
11008 && frame.nregs == 1)
11009 restore_regs_via_mov = true;
11011 restore_regs_via_mov = false;
11013 if (restore_regs_via_mov || frame.nsseregs)
11015 /* Ensure that the entire register save area is addressable via
11016 the stack pointer, if we will restore via sp. */
11018 && m->fs.sp_offset > 0x7fffffff
11019 && !(m->fs.fp_valid || m->fs.drap_valid)
11020 && (frame.nsseregs + frame.nregs) != 0)
11022 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11023 GEN_INT (m->fs.sp_offset
11024 - frame.sse_reg_save_offset),
11026 m->fs.cfa_reg == stack_pointer_rtx);
11030 /* If there are any SSE registers to restore, then we have to do it
11031 via moves, since there's obviously no pop for SSE regs. */
11032 if (frame.nsseregs)
11033 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11036 if (restore_regs_via_mov)
11041 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11043 /* eh_return epilogues need %ecx added to the stack pointer. */
11046 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11048 /* Stack align doesn't work with eh_return. */
11049 gcc_assert (!stack_realign_drap);
11050 /* Neither does regparm nested functions. */
11051 gcc_assert (!ix86_static_chain_on_stack);
11053 if (frame_pointer_needed)
11055 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11056 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11057 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11059 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11060 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11062 /* Note that we use SA as a temporary CFA, as the return
11063 address is at the proper place relative to it. We
11064 pretend this happens at the FP restore insn because
11065 prior to this insn the FP would be stored at the wrong
11066 offset relative to SA, and after this insn we have no
11067 other reasonable register to use for the CFA. We don't
11068 bother resetting the CFA to the SP for the duration of
11069 the return insn. */
11070 add_reg_note (insn, REG_CFA_DEF_CFA,
11071 plus_constant (Pmode, sa, UNITS_PER_WORD));
11072 ix86_add_queued_cfa_restore_notes (insn);
11073 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11074 RTX_FRAME_RELATED_P (insn) = 1;
11076 m->fs.cfa_reg = sa;
11077 m->fs.cfa_offset = UNITS_PER_WORD;
11078 m->fs.fp_valid = false;
11080 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11081 const0_rtx, style, false);
11085 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11086 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11087 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11088 ix86_add_queued_cfa_restore_notes (insn);
11090 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11091 if (m->fs.cfa_offset != UNITS_PER_WORD)
11093 m->fs.cfa_offset = UNITS_PER_WORD;
11094 add_reg_note (insn, REG_CFA_DEF_CFA,
11095 plus_constant (Pmode, stack_pointer_rtx,
11097 RTX_FRAME_RELATED_P (insn) = 1;
11100 m->fs.sp_offset = UNITS_PER_WORD;
11101 m->fs.sp_valid = true;
11106 /* SEH requires that the function end with (1) a stack adjustment
11107 if necessary, (2) a sequence of pops, and (3) a return or
11108 jump instruction. Prevent insns from the function body from
11109 being scheduled into this sequence. */
11112 /* Prevent a catch region from being adjacent to the standard
11113 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11114 several other flags that would be interesting to test are
11116 if (flag_non_call_exceptions)
11117 emit_insn (gen_nops (const1_rtx));
11119 emit_insn (gen_blockage ());
11122 /* First step is to deallocate the stack frame so that we can
11123 pop the registers. Also do it on SEH target for very large
11124 frame as the emitted instructions aren't allowed by the ABI in
11126 if (!m->fs.sp_valid
11128 && (m->fs.sp_offset - frame.reg_save_offset
11129 >= SEH_MAX_FRAME_SIZE)))
11131 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11132 GEN_INT (m->fs.fp_offset
11133 - frame.reg_save_offset),
11136 else if (m->fs.sp_offset != frame.reg_save_offset)
11138 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11139 GEN_INT (m->fs.sp_offset
11140 - frame.reg_save_offset),
11142 m->fs.cfa_reg == stack_pointer_rtx);
11145 ix86_emit_restore_regs_using_pop ();
11148 /* If we used a stack pointer and haven't already got rid of it,
11150 if (m->fs.fp_valid)
11152 /* If the stack pointer is valid and pointing at the frame
11153 pointer store address, then we only need a pop. */
11154 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11155 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11156 /* Leave results in shorter dependency chains on CPUs that are
11157 able to grok it fast. */
11158 else if (TARGET_USE_LEAVE
11159 || optimize_bb_for_size_p (EXIT_BLOCK_PTR)
11160 || !cfun->machine->use_fast_prologue_epilogue)
11161 ix86_emit_leave ();
11164 pro_epilogue_adjust_stack (stack_pointer_rtx,
11165 hard_frame_pointer_rtx,
11166 const0_rtx, style, !using_drap);
11167 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11173 int param_ptr_offset = UNITS_PER_WORD;
11176 gcc_assert (stack_realign_drap);
11178 if (ix86_static_chain_on_stack)
11179 param_ptr_offset += UNITS_PER_WORD;
11180 if (!call_used_regs[REGNO (crtl->drap_reg)])
11181 param_ptr_offset += UNITS_PER_WORD;
11183 insn = emit_insn (gen_rtx_SET
11184 (VOIDmode, stack_pointer_rtx,
11185 gen_rtx_PLUS (Pmode,
11187 GEN_INT (-param_ptr_offset))));
11188 m->fs.cfa_reg = stack_pointer_rtx;
11189 m->fs.cfa_offset = param_ptr_offset;
11190 m->fs.sp_offset = param_ptr_offset;
11191 m->fs.realigned = false;
11193 add_reg_note (insn, REG_CFA_DEF_CFA,
11194 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11195 GEN_INT (param_ptr_offset)));
11196 RTX_FRAME_RELATED_P (insn) = 1;
11198 if (!call_used_regs[REGNO (crtl->drap_reg)])
11199 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11202 /* At this point the stack pointer must be valid, and we must have
11203 restored all of the registers. We may not have deallocated the
11204 entire stack frame. We've delayed this until now because it may
11205 be possible to merge the local stack deallocation with the
11206 deallocation forced by ix86_static_chain_on_stack. */
11207 gcc_assert (m->fs.sp_valid);
11208 gcc_assert (!m->fs.fp_valid);
11209 gcc_assert (!m->fs.realigned);
11210 if (m->fs.sp_offset != UNITS_PER_WORD)
11212 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11213 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11217 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11219 /* Sibcall epilogues don't want a return instruction. */
11222 m->fs = frame_state_save;
11226 if (crtl->args.pops_args && crtl->args.size)
11228 rtx popc = GEN_INT (crtl->args.pops_args);
11230 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11231 address, do explicit add, and jump indirectly to the caller. */
11233 if (crtl->args.pops_args >= 65536)
11235 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11238 /* There is no "pascal" calling convention in any 64bit ABI. */
11239 gcc_assert (!TARGET_64BIT);
11241 insn = emit_insn (gen_pop (ecx));
11242 m->fs.cfa_offset -= UNITS_PER_WORD;
11243 m->fs.sp_offset -= UNITS_PER_WORD;
11245 add_reg_note (insn, REG_CFA_ADJUST_CFA,
11246 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
11247 add_reg_note (insn, REG_CFA_REGISTER,
11248 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11249 RTX_FRAME_RELATED_P (insn) = 1;
11251 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11253 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11256 emit_jump_insn (gen_simple_return_pop_internal (popc));
11259 emit_jump_insn (gen_simple_return_internal ());
11261 /* Restore the state back to the state from the prologue,
11262 so that it's correct for the next epilogue. */
11263 m->fs = frame_state_save;
11266 /* Reset from the function's potential modifications. */
11269 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11270 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11272 if (pic_offset_table_rtx)
11273 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11275 /* Mach-O doesn't support labels at the end of objects, so if
11276 it looks like we might want one, insert a NOP. */
11278 rtx insn = get_last_insn ();
11279 rtx deleted_debug_label = NULL_RTX;
11282 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11284 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11285 notes only, instead set their CODE_LABEL_NUMBER to -1,
11286 otherwise there would be code generation differences
11287 in between -g and -g0. */
11288 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11289 deleted_debug_label = insn;
11290 insn = PREV_INSN (insn);
11295 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11296 fputs ("\tnop\n", file);
11297 else if (deleted_debug_label)
11298 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11299 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11300 CODE_LABEL_NUMBER (insn) = -1;
11306 /* Return a scratch register to use in the split stack prologue. The
11307 split stack prologue is used for -fsplit-stack. It is the first
11308 instructions in the function, even before the regular prologue.
11309 The scratch register can be any caller-saved register which is not
11310 used for parameters or for the static chain. */
11312 static unsigned int
11313 split_stack_prologue_scratch_regno (void)
11319 bool is_fastcall, is_thiscall;
11322 is_fastcall = (lookup_attribute ("fastcall",
11323 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11325 is_thiscall = (lookup_attribute ("thiscall",
11326 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11328 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11332 if (DECL_STATIC_CHAIN (cfun->decl))
11334 sorry ("-fsplit-stack does not support fastcall with "
11335 "nested function");
11336 return INVALID_REGNUM;
11340 else if (is_thiscall)
11342 if (!DECL_STATIC_CHAIN (cfun->decl))
11346 else if (regparm < 3)
11348 if (!DECL_STATIC_CHAIN (cfun->decl))
11354 sorry ("-fsplit-stack does not support 2 register "
11355 " parameters for a nested function");
11356 return INVALID_REGNUM;
11363 /* FIXME: We could make this work by pushing a register
11364 around the addition and comparison. */
11365 sorry ("-fsplit-stack does not support 3 register parameters");
11366 return INVALID_REGNUM;
11371 /* A SYMBOL_REF for the function which allocates new stackspace for
11374 static GTY(()) rtx split_stack_fn;
11376 /* A SYMBOL_REF for the more stack function when using the large
11379 static GTY(()) rtx split_stack_fn_large;
11381 /* Handle -fsplit-stack. These are the first instructions in the
11382 function, even before the regular prologue. */
11385 ix86_expand_split_stack_prologue (void)
11387 struct ix86_frame frame;
11388 HOST_WIDE_INT allocate;
11389 unsigned HOST_WIDE_INT args_size;
11390 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11391 rtx scratch_reg = NULL_RTX;
11392 rtx varargs_label = NULL_RTX;
11395 gcc_assert (flag_split_stack && reload_completed);
11397 ix86_finalize_stack_realign_flags ();
11398 ix86_compute_frame_layout (&frame);
11399 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11401 /* This is the label we will branch to if we have enough stack
11402 space. We expect the basic block reordering pass to reverse this
11403 branch if optimizing, so that we branch in the unlikely case. */
11404 label = gen_label_rtx ();
11406 /* We need to compare the stack pointer minus the frame size with
11407 the stack boundary in the TCB. The stack boundary always gives
11408 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11409 can compare directly. Otherwise we need to do an addition. */
11411 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11412 UNSPEC_STACK_CHECK);
11413 limit = gen_rtx_CONST (Pmode, limit);
11414 limit = gen_rtx_MEM (Pmode, limit);
11415 if (allocate < SPLIT_STACK_AVAILABLE)
11416 current = stack_pointer_rtx;
11419 unsigned int scratch_regno;
11422 /* We need a scratch register to hold the stack pointer minus
11423 the required frame size. Since this is the very start of the
11424 function, the scratch register can be any caller-saved
11425 register which is not used for parameters. */
11426 offset = GEN_INT (- allocate);
11427 scratch_regno = split_stack_prologue_scratch_regno ();
11428 if (scratch_regno == INVALID_REGNUM)
11430 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11431 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11433 /* We don't use ix86_gen_add3 in this case because it will
11434 want to split to lea, but when not optimizing the insn
11435 will not be split after this point. */
11436 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11437 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11442 emit_move_insn (scratch_reg, offset);
11443 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11444 stack_pointer_rtx));
11446 current = scratch_reg;
11449 ix86_expand_branch (GEU, current, limit, label);
11450 jump_insn = get_last_insn ();
11451 JUMP_LABEL (jump_insn) = label;
11453 /* Mark the jump as very likely to be taken. */
11454 add_reg_note (jump_insn, REG_BR_PROB,
11455 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
11457 if (split_stack_fn == NULL_RTX)
11458 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11459 fn = split_stack_fn;
11461 /* Get more stack space. We pass in the desired stack space and the
11462 size of the arguments to copy to the new stack. In 32-bit mode
11463 we push the parameters; __morestack will return on a new stack
11464 anyhow. In 64-bit mode we pass the parameters in r10 and
11466 allocate_rtx = GEN_INT (allocate);
11467 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11468 call_fusage = NULL_RTX;
11473 reg10 = gen_rtx_REG (Pmode, R10_REG);
11474 reg11 = gen_rtx_REG (Pmode, R11_REG);
11476 /* If this function uses a static chain, it will be in %r10.
11477 Preserve it across the call to __morestack. */
11478 if (DECL_STATIC_CHAIN (cfun->decl))
11482 rax = gen_rtx_REG (word_mode, AX_REG);
11483 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11484 use_reg (&call_fusage, rax);
11487 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11490 HOST_WIDE_INT argval;
11492 gcc_assert (Pmode == DImode);
11493 /* When using the large model we need to load the address
11494 into a register, and we've run out of registers. So we
11495 switch to a different calling convention, and we call a
11496 different function: __morestack_large. We pass the
11497 argument size in the upper 32 bits of r10 and pass the
11498 frame size in the lower 32 bits. */
11499 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11500 gcc_assert ((args_size & 0xffffffff) == args_size);
11502 if (split_stack_fn_large == NULL_RTX)
11503 split_stack_fn_large =
11504 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11506 if (ix86_cmodel == CM_LARGE_PIC)
11510 label = gen_label_rtx ();
11511 emit_label (label);
11512 LABEL_PRESERVE_P (label) = 1;
11513 emit_insn (gen_set_rip_rex64 (reg10, label));
11514 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11515 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11516 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11518 x = gen_rtx_CONST (Pmode, x);
11519 emit_move_insn (reg11, x);
11520 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11521 x = gen_const_mem (Pmode, x);
11522 emit_move_insn (reg11, x);
11525 emit_move_insn (reg11, split_stack_fn_large);
11529 argval = ((args_size << 16) << 16) + allocate;
11530 emit_move_insn (reg10, GEN_INT (argval));
11534 emit_move_insn (reg10, allocate_rtx);
11535 emit_move_insn (reg11, GEN_INT (args_size));
11536 use_reg (&call_fusage, reg11);
11539 use_reg (&call_fusage, reg10);
11543 emit_insn (gen_push (GEN_INT (args_size)));
11544 emit_insn (gen_push (allocate_rtx));
11546 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
11547 GEN_INT (UNITS_PER_WORD), constm1_rtx,
11549 add_function_usage_to (call_insn, call_fusage);
11551 /* In order to make call/return prediction work right, we now need
11552 to execute a return instruction. See
11553 libgcc/config/i386/morestack.S for the details on how this works.
11555 For flow purposes gcc must not see this as a return
11556 instruction--we need control flow to continue at the subsequent
11557 label. Therefore, we use an unspec. */
11558 gcc_assert (crtl->args.pops_args < 65536);
11559 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
11561 /* If we are in 64-bit mode and this function uses a static chain,
11562 we saved %r10 in %rax before calling _morestack. */
11563 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
11564 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11565 gen_rtx_REG (word_mode, AX_REG));
11567 /* If this function calls va_start, we need to store a pointer to
11568 the arguments on the old stack, because they may not have been
11569 all copied to the new stack. At this point the old stack can be
11570 found at the frame pointer value used by __morestack, because
11571 __morestack has set that up before calling back to us. Here we
11572 store that pointer in a scratch register, and in
11573 ix86_expand_prologue we store the scratch register in a stack
11575 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11577 unsigned int scratch_regno;
11581 scratch_regno = split_stack_prologue_scratch_regno ();
11582 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11583 frame_reg = gen_rtx_REG (Pmode, BP_REG);
11587 return address within this function
11588 return address of caller of this function
11590 So we add three words to get to the stack arguments.
11594 return address within this function
11595 first argument to __morestack
11596 second argument to __morestack
11597 return address of caller of this function
11599 So we add five words to get to the stack arguments.
11601 words = TARGET_64BIT ? 3 : 5;
11602 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11603 gen_rtx_PLUS (Pmode, frame_reg,
11604 GEN_INT (words * UNITS_PER_WORD))));
11606 varargs_label = gen_label_rtx ();
11607 emit_jump_insn (gen_jump (varargs_label));
11608 JUMP_LABEL (get_last_insn ()) = varargs_label;
11613 emit_label (label);
11614 LABEL_NUSES (label) = 1;
11616 /* If this function calls va_start, we now have to set the scratch
11617 register for the case where we do not call __morestack. In this
11618 case we need to set it based on the stack pointer. */
11619 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11621 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11622 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11623 GEN_INT (UNITS_PER_WORD))));
11625 emit_label (varargs_label);
11626 LABEL_NUSES (varargs_label) = 1;
11630 /* We may have to tell the dataflow pass that the split stack prologue
11631 is initializing a scratch register. */
11634 ix86_live_on_entry (bitmap regs)
11636 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11638 gcc_assert (flag_split_stack);
11639 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
11643 /* Determine if op is suitable SUBREG RTX for address. */
11646 ix86_address_subreg_operand (rtx op)
11648 enum machine_mode mode;
11653 mode = GET_MODE (op);
11655 if (GET_MODE_CLASS (mode) != MODE_INT)
11658 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11659 failures when the register is one word out of a two word structure. */
11660 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
11663 /* Allow only SUBREGs of non-eliminable hard registers. */
11664 return register_no_elim_operand (op, mode);
11667 /* Extract the parts of an RTL expression that is a valid memory address
11668 for an instruction. Return 0 if the structure of the address is
11669 grossly off. Return -1 if the address contains ASHIFT, so it is not
11670 strictly valid, but still used for computing length of lea instruction. */
11673 ix86_decompose_address (rtx addr, struct ix86_address *out)
11675 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
11676 rtx base_reg, index_reg;
11677 HOST_WIDE_INT scale = 1;
11678 rtx scale_rtx = NULL_RTX;
11681 enum ix86_address_seg seg = SEG_DEFAULT;
11683 /* Allow zero-extended SImode addresses,
11684 they will be emitted with addr32 prefix. */
11685 if (TARGET_64BIT && GET_MODE (addr) == DImode)
11687 if (GET_CODE (addr) == ZERO_EXTEND
11688 && GET_MODE (XEXP (addr, 0)) == SImode)
11690 addr = XEXP (addr, 0);
11691 if (CONST_INT_P (addr))
11694 else if (GET_CODE (addr) == AND
11695 && const_32bit_mask (XEXP (addr, 1), DImode))
11697 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
11698 if (addr == NULL_RTX)
11701 if (CONST_INT_P (addr))
11706 /* Allow SImode subregs of DImode addresses,
11707 they will be emitted with addr32 prefix. */
11708 if (TARGET_64BIT && GET_MODE (addr) == SImode)
11710 if (GET_CODE (addr) == SUBREG
11711 && GET_MODE (SUBREG_REG (addr)) == DImode)
11713 addr = SUBREG_REG (addr);
11714 if (CONST_INT_P (addr))
11721 else if (GET_CODE (addr) == SUBREG)
11723 if (ix86_address_subreg_operand (SUBREG_REG (addr)))
11728 else if (GET_CODE (addr) == PLUS)
11730 rtx addends[4], op;
11738 addends[n++] = XEXP (op, 1);
11741 while (GET_CODE (op) == PLUS);
11746 for (i = n; i >= 0; --i)
11749 switch (GET_CODE (op))
11754 index = XEXP (op, 0);
11755 scale_rtx = XEXP (op, 1);
11761 index = XEXP (op, 0);
11762 tmp = XEXP (op, 1);
11763 if (!CONST_INT_P (tmp))
11765 scale = INTVAL (tmp);
11766 if ((unsigned HOST_WIDE_INT) scale > 3)
11768 scale = 1 << scale;
11773 if (GET_CODE (op) != UNSPEC)
11778 if (XINT (op, 1) == UNSPEC_TP
11779 && TARGET_TLS_DIRECT_SEG_REFS
11780 && seg == SEG_DEFAULT)
11781 seg = DEFAULT_TLS_SEG_REG;
11787 if (!ix86_address_subreg_operand (SUBREG_REG (op)))
11814 else if (GET_CODE (addr) == MULT)
11816 index = XEXP (addr, 0); /* index*scale */
11817 scale_rtx = XEXP (addr, 1);
11819 else if (GET_CODE (addr) == ASHIFT)
11821 /* We're called for lea too, which implements ashift on occasion. */
11822 index = XEXP (addr, 0);
11823 tmp = XEXP (addr, 1);
11824 if (!CONST_INT_P (tmp))
11826 scale = INTVAL (tmp);
11827 if ((unsigned HOST_WIDE_INT) scale > 3)
11829 scale = 1 << scale;
11832 else if (CONST_INT_P (addr))
11834 if (!x86_64_immediate_operand (addr, VOIDmode))
11837 /* Constant addresses are sign extended to 64bit, we have to
11838 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11840 && val_signbit_known_set_p (SImode, INTVAL (addr)))
11846 disp = addr; /* displacement */
11852 else if (GET_CODE (index) == SUBREG
11853 && ix86_address_subreg_operand (SUBREG_REG (index)))
11859 /* Address override works only on the (%reg) part of %fs:(%reg). */
11860 if (seg != SEG_DEFAULT
11861 && ((base && GET_MODE (base) != word_mode)
11862 || (index && GET_MODE (index) != word_mode)))
11865 /* Extract the integral value of scale. */
11868 if (!CONST_INT_P (scale_rtx))
11870 scale = INTVAL (scale_rtx);
11873 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
11874 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
11876 /* Avoid useless 0 displacement. */
11877 if (disp == const0_rtx && (base || index))
11880 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11881 if (base_reg && index_reg && scale == 1
11882 && (index_reg == arg_pointer_rtx
11883 || index_reg == frame_pointer_rtx
11884 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
11887 tmp = base, base = index, index = tmp;
11888 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
11891 /* Special case: %ebp cannot be encoded as a base without a displacement.
11895 && (base_reg == hard_frame_pointer_rtx
11896 || base_reg == frame_pointer_rtx
11897 || base_reg == arg_pointer_rtx
11898 || (REG_P (base_reg)
11899 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
11900 || REGNO (base_reg) == R13_REG))))
11903 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11904 Avoid this by transforming to [%esi+0].
11905 Reload calls address legitimization without cfun defined, so we need
11906 to test cfun for being non-NULL. */
11907 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
11908 && base_reg && !index_reg && !disp
11909 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
11912 /* Special case: encode reg+reg instead of reg*2. */
11913 if (!base && index && scale == 2)
11914 base = index, base_reg = index_reg, scale = 1;
11916 /* Special case: scaling cannot be encoded without base or displacement. */
11917 if (!base && !disp && index && scale != 1)
11921 out->index = index;
11923 out->scale = scale;
11929 /* Return cost of the memory address x.
11930 For i386, it is better to use a complex address than let gcc copy
11931 the address into a reg and make a new pseudo. But not if the address
11932 requires to two regs - that would mean more pseudos with longer
11935 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
11936 addr_space_t as ATTRIBUTE_UNUSED,
11937 bool speed ATTRIBUTE_UNUSED)
11939 struct ix86_address parts;
11941 int ok = ix86_decompose_address (x, &parts);
11945 if (parts.base && GET_CODE (parts.base) == SUBREG)
11946 parts.base = SUBREG_REG (parts.base);
11947 if (parts.index && GET_CODE (parts.index) == SUBREG)
11948 parts.index = SUBREG_REG (parts.index);
11950 /* Attempt to minimize number of registers in the address. */
11952 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
11954 && (!REG_P (parts.index)
11955 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
11959 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
11961 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
11962 && parts.base != parts.index)
11965 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11966 since it's predecode logic can't detect the length of instructions
11967 and it degenerates to vector decoded. Increase cost of such
11968 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11969 to split such addresses or even refuse such addresses at all.
11971 Following addressing modes are affected:
11976 The first and last case may be avoidable by explicitly coding the zero in
11977 memory address, but I don't have AMD-K6 machine handy to check this
11981 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
11982 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
11983 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
11989 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11990 this is used for to form addresses to local data when -fPIC is in
11994 darwin_local_data_pic (rtx disp)
11996 return (GET_CODE (disp) == UNSPEC
11997 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12000 /* Determine if a given RTX is a valid constant. We already know this
12001 satisfies CONSTANT_P. */
12004 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12006 switch (GET_CODE (x))
12011 if (GET_CODE (x) == PLUS)
12013 if (!CONST_INT_P (XEXP (x, 1)))
12018 if (TARGET_MACHO && darwin_local_data_pic (x))
12021 /* Only some unspecs are valid as "constants". */
12022 if (GET_CODE (x) == UNSPEC)
12023 switch (XINT (x, 1))
12026 case UNSPEC_GOTOFF:
12027 case UNSPEC_PLTOFF:
12028 return TARGET_64BIT;
12030 case UNSPEC_NTPOFF:
12031 x = XVECEXP (x, 0, 0);
12032 return (GET_CODE (x) == SYMBOL_REF
12033 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12034 case UNSPEC_DTPOFF:
12035 x = XVECEXP (x, 0, 0);
12036 return (GET_CODE (x) == SYMBOL_REF
12037 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12042 /* We must have drilled down to a symbol. */
12043 if (GET_CODE (x) == LABEL_REF)
12045 if (GET_CODE (x) != SYMBOL_REF)
12050 /* TLS symbols are never valid. */
12051 if (SYMBOL_REF_TLS_MODEL (x))
12054 /* DLLIMPORT symbols are never valid. */
12055 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12056 && SYMBOL_REF_DLLIMPORT_P (x))
12060 /* mdynamic-no-pic */
12061 if (MACHO_DYNAMIC_NO_PIC_P)
12062 return machopic_symbol_defined_p (x);
12067 if (GET_MODE (x) == TImode
12068 && x != CONST0_RTX (TImode)
12074 if (!standard_sse_constant_p (x))
12081 /* Otherwise we handle everything else in the move patterns. */
12085 /* Determine if it's legal to put X into the constant pool. This
12086 is not possible for the address of thread-local symbols, which
12087 is checked above. */
12090 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12092 /* We can always put integral constants and vectors in memory. */
12093 switch (GET_CODE (x))
12103 return !ix86_legitimate_constant_p (mode, x);
12106 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12110 is_imported_p (rtx x)
12112 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12113 || GET_CODE (x) != SYMBOL_REF)
12116 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12120 /* Nonzero if the constant value X is a legitimate general operand
12121 when generating PIC code. It is given that flag_pic is on and
12122 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12125 legitimate_pic_operand_p (rtx x)
12129 switch (GET_CODE (x))
12132 inner = XEXP (x, 0);
12133 if (GET_CODE (inner) == PLUS
12134 && CONST_INT_P (XEXP (inner, 1)))
12135 inner = XEXP (inner, 0);
12137 /* Only some unspecs are valid as "constants". */
12138 if (GET_CODE (inner) == UNSPEC)
12139 switch (XINT (inner, 1))
12142 case UNSPEC_GOTOFF:
12143 case UNSPEC_PLTOFF:
12144 return TARGET_64BIT;
12146 x = XVECEXP (inner, 0, 0);
12147 return (GET_CODE (x) == SYMBOL_REF
12148 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12149 case UNSPEC_MACHOPIC_OFFSET:
12150 return legitimate_pic_address_disp_p (x);
12158 return legitimate_pic_address_disp_p (x);
12165 /* Determine if a given CONST RTX is a valid memory displacement
12169 legitimate_pic_address_disp_p (rtx disp)
12173 /* In 64bit mode we can allow direct addresses of symbols and labels
12174 when they are not dynamic symbols. */
12177 rtx op0 = disp, op1;
12179 switch (GET_CODE (disp))
12185 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12187 op0 = XEXP (XEXP (disp, 0), 0);
12188 op1 = XEXP (XEXP (disp, 0), 1);
12189 if (!CONST_INT_P (op1)
12190 || INTVAL (op1) >= 16*1024*1024
12191 || INTVAL (op1) < -16*1024*1024)
12193 if (GET_CODE (op0) == LABEL_REF)
12195 if (GET_CODE (op0) == CONST
12196 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12197 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12199 if (GET_CODE (op0) == UNSPEC
12200 && XINT (op0, 1) == UNSPEC_PCREL)
12202 if (GET_CODE (op0) != SYMBOL_REF)
12207 /* TLS references should always be enclosed in UNSPEC.
12208 The dllimported symbol needs always to be resolved. */
12209 if (SYMBOL_REF_TLS_MODEL (op0)
12210 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12215 if (is_imported_p (op0))
12218 if (SYMBOL_REF_FAR_ADDR_P (op0)
12219 || !SYMBOL_REF_LOCAL_P (op0))
12222 /* Function-symbols need to be resolved only for
12224 For the small-model we don't need to resolve anything
12226 if ((ix86_cmodel != CM_LARGE_PIC
12227 && SYMBOL_REF_FUNCTION_P (op0))
12228 || ix86_cmodel == CM_SMALL_PIC)
12230 /* Non-external symbols don't need to be resolved for
12231 large, and medium-model. */
12232 if ((ix86_cmodel == CM_LARGE_PIC
12233 || ix86_cmodel == CM_MEDIUM_PIC)
12234 && !SYMBOL_REF_EXTERNAL_P (op0))
12237 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12238 && SYMBOL_REF_LOCAL_P (op0)
12239 && ix86_cmodel != CM_LARGE_PIC)
12247 if (GET_CODE (disp) != CONST)
12249 disp = XEXP (disp, 0);
12253 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12254 of GOT tables. We should not need these anyway. */
12255 if (GET_CODE (disp) != UNSPEC
12256 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12257 && XINT (disp, 1) != UNSPEC_GOTOFF
12258 && XINT (disp, 1) != UNSPEC_PCREL
12259 && XINT (disp, 1) != UNSPEC_PLTOFF))
12262 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12263 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12269 if (GET_CODE (disp) == PLUS)
12271 if (!CONST_INT_P (XEXP (disp, 1)))
12273 disp = XEXP (disp, 0);
12277 if (TARGET_MACHO && darwin_local_data_pic (disp))
12280 if (GET_CODE (disp) != UNSPEC)
12283 switch (XINT (disp, 1))
12288 /* We need to check for both symbols and labels because VxWorks loads
12289 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12291 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12292 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12293 case UNSPEC_GOTOFF:
12294 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12295 While ABI specify also 32bit relocation but we don't produce it in
12296 small PIC model at all. */
12297 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12298 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12300 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12302 case UNSPEC_GOTTPOFF:
12303 case UNSPEC_GOTNTPOFF:
12304 case UNSPEC_INDNTPOFF:
12307 disp = XVECEXP (disp, 0, 0);
12308 return (GET_CODE (disp) == SYMBOL_REF
12309 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12310 case UNSPEC_NTPOFF:
12311 disp = XVECEXP (disp, 0, 0);
12312 return (GET_CODE (disp) == SYMBOL_REF
12313 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12314 case UNSPEC_DTPOFF:
12315 disp = XVECEXP (disp, 0, 0);
12316 return (GET_CODE (disp) == SYMBOL_REF
12317 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12323 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12324 replace the input X, or the original X if no replacement is called for.
12325 The output parameter *WIN is 1 if the calling macro should goto WIN,
12326 0 if it should not. */
12329 ix86_legitimize_reload_address (rtx x,
12330 enum machine_mode mode ATTRIBUTE_UNUSED,
12331 int opnum, int type,
12332 int ind_levels ATTRIBUTE_UNUSED)
12334 /* Reload can generate:
12336 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12340 This RTX is rejected from ix86_legitimate_address_p due to
12341 non-strictness of base register 97. Following this rejection,
12342 reload pushes all three components into separate registers,
12343 creating invalid memory address RTX.
12345 Following code reloads only the invalid part of the
12346 memory address RTX. */
12348 if (GET_CODE (x) == PLUS
12349 && REG_P (XEXP (x, 1))
12350 && GET_CODE (XEXP (x, 0)) == PLUS
12351 && REG_P (XEXP (XEXP (x, 0), 1)))
12354 bool something_reloaded = false;
12356 base = XEXP (XEXP (x, 0), 1);
12357 if (!REG_OK_FOR_BASE_STRICT_P (base))
12359 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12360 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12361 opnum, (enum reload_type) type);
12362 something_reloaded = true;
12365 index = XEXP (x, 1);
12366 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12368 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12369 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12370 opnum, (enum reload_type) type);
12371 something_reloaded = true;
12374 gcc_assert (something_reloaded);
12381 /* Recognizes RTL expressions that are valid memory addresses for an
12382 instruction. The MODE argument is the machine mode for the MEM
12383 expression that wants to use this address.
12385 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12386 convert common non-canonical forms to canonical form so that they will
12390 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12391 rtx addr, bool strict)
12393 struct ix86_address parts;
12394 rtx base, index, disp;
12395 HOST_WIDE_INT scale;
12397 if (ix86_decompose_address (addr, &parts) <= 0)
12398 /* Decomposition failed. */
12402 index = parts.index;
12404 scale = parts.scale;
12406 /* Validate base register. */
12413 else if (GET_CODE (base) == SUBREG && REG_P (SUBREG_REG (base)))
12414 reg = SUBREG_REG (base);
12416 /* Base is not a register. */
12419 if (GET_MODE (base) != SImode && GET_MODE (base) != DImode)
12422 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12423 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12424 /* Base is not valid. */
12428 /* Validate index register. */
12435 else if (GET_CODE (index) == SUBREG && REG_P (SUBREG_REG (index)))
12436 reg = SUBREG_REG (index);
12438 /* Index is not a register. */
12441 if (GET_MODE (index) != SImode && GET_MODE (index) != DImode)
12444 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12445 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12446 /* Index is not valid. */
12450 /* Index and base should have the same mode. */
12452 && GET_MODE (base) != GET_MODE (index))
12455 /* Validate scale factor. */
12459 /* Scale without index. */
12462 if (scale != 2 && scale != 4 && scale != 8)
12463 /* Scale is not a valid multiplier. */
12467 /* Validate displacement. */
12470 if (GET_CODE (disp) == CONST
12471 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12472 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12473 switch (XINT (XEXP (disp, 0), 1))
12475 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12476 used. While ABI specify also 32bit relocations, we don't produce
12477 them at all and use IP relative instead. */
12479 case UNSPEC_GOTOFF:
12480 gcc_assert (flag_pic);
12482 goto is_legitimate_pic;
12484 /* 64bit address unspec. */
12487 case UNSPEC_GOTPCREL:
12489 gcc_assert (flag_pic);
12490 goto is_legitimate_pic;
12492 case UNSPEC_GOTTPOFF:
12493 case UNSPEC_GOTNTPOFF:
12494 case UNSPEC_INDNTPOFF:
12495 case UNSPEC_NTPOFF:
12496 case UNSPEC_DTPOFF:
12499 case UNSPEC_STACK_CHECK:
12500 gcc_assert (flag_split_stack);
12504 /* Invalid address unspec. */
12508 else if (SYMBOLIC_CONST (disp)
12512 && MACHOPIC_INDIRECT
12513 && !machopic_operand_p (disp)
12519 if (TARGET_64BIT && (index || base))
12521 /* foo@dtpoff(%rX) is ok. */
12522 if (GET_CODE (disp) != CONST
12523 || GET_CODE (XEXP (disp, 0)) != PLUS
12524 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12525 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12526 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12527 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12528 /* Non-constant pic memory reference. */
12531 else if ((!TARGET_MACHO || flag_pic)
12532 && ! legitimate_pic_address_disp_p (disp))
12533 /* Displacement is an invalid pic construct. */
12536 else if (MACHO_DYNAMIC_NO_PIC_P
12537 && !ix86_legitimate_constant_p (Pmode, disp))
12538 /* displacment must be referenced via non_lazy_pointer */
12542 /* This code used to verify that a symbolic pic displacement
12543 includes the pic_offset_table_rtx register.
12545 While this is good idea, unfortunately these constructs may
12546 be created by "adds using lea" optimization for incorrect
12555 This code is nonsensical, but results in addressing
12556 GOT table with pic_offset_table_rtx base. We can't
12557 just refuse it easily, since it gets matched by
12558 "addsi3" pattern, that later gets split to lea in the
12559 case output register differs from input. While this
12560 can be handled by separate addsi pattern for this case
12561 that never results in lea, this seems to be easier and
12562 correct fix for crash to disable this test. */
12564 else if (GET_CODE (disp) != LABEL_REF
12565 && !CONST_INT_P (disp)
12566 && (GET_CODE (disp) != CONST
12567 || !ix86_legitimate_constant_p (Pmode, disp))
12568 && (GET_CODE (disp) != SYMBOL_REF
12569 || !ix86_legitimate_constant_p (Pmode, disp)))
12570 /* Displacement is not constant. */
12572 else if (TARGET_64BIT
12573 && !x86_64_immediate_operand (disp, VOIDmode))
12574 /* Displacement is out of range. */
12578 /* Everything looks valid. */
12582 /* Determine if a given RTX is a valid constant address. */
12585 constant_address_p (rtx x)
12587 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
12590 /* Return a unique alias set for the GOT. */
12592 static alias_set_type
12593 ix86_GOT_alias_set (void)
12595 static alias_set_type set = -1;
12597 set = new_alias_set ();
12601 /* Return a legitimate reference for ORIG (an address) using the
12602 register REG. If REG is 0, a new pseudo is generated.
12604 There are two types of references that must be handled:
12606 1. Global data references must load the address from the GOT, via
12607 the PIC reg. An insn is emitted to do this load, and the reg is
12610 2. Static data references, constant pool addresses, and code labels
12611 compute the address as an offset from the GOT, whose base is in
12612 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12613 differentiate them from global data objects. The returned
12614 address is the PIC reg + an unspec constant.
12616 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12617 reg also appears in the address. */
12620 legitimize_pic_address (rtx orig, rtx reg)
12623 rtx new_rtx = orig;
12626 if (TARGET_MACHO && !TARGET_64BIT)
12629 reg = gen_reg_rtx (Pmode);
12630 /* Use the generic Mach-O PIC machinery. */
12631 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
12635 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12637 rtx tmp = legitimize_pe_coff_symbol (addr, true);
12642 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
12644 else if (TARGET_64BIT && !TARGET_PECOFF
12645 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
12648 /* This symbol may be referenced via a displacement from the PIC
12649 base address (@GOTOFF). */
12651 if (reload_in_progress)
12652 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12653 if (GET_CODE (addr) == CONST)
12654 addr = XEXP (addr, 0);
12655 if (GET_CODE (addr) == PLUS)
12657 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12659 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12662 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12663 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12665 tmpreg = gen_reg_rtx (Pmode);
12668 emit_move_insn (tmpreg, new_rtx);
12672 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
12673 tmpreg, 1, OPTAB_DIRECT);
12677 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
12679 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
12681 /* This symbol may be referenced via a displacement from the PIC
12682 base address (@GOTOFF). */
12684 if (reload_in_progress)
12685 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12686 if (GET_CODE (addr) == CONST)
12687 addr = XEXP (addr, 0);
12688 if (GET_CODE (addr) == PLUS)
12690 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
12692 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
12695 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
12696 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12697 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12701 emit_move_insn (reg, new_rtx);
12705 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
12706 /* We can't use @GOTOFF for text labels on VxWorks;
12707 see gotoff_operand. */
12708 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
12710 rtx tmp = legitimize_pe_coff_symbol (addr, true);
12714 /* For x64 PE-COFF there is no GOT table. So we use address
12716 if (TARGET_64BIT && TARGET_PECOFF)
12718 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
12719 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12722 reg = gen_reg_rtx (Pmode);
12723 emit_move_insn (reg, new_rtx);
12726 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
12728 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
12729 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12730 new_rtx = gen_const_mem (Pmode, new_rtx);
12731 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12734 reg = gen_reg_rtx (Pmode);
12735 /* Use directly gen_movsi, otherwise the address is loaded
12736 into register for CSE. We don't want to CSE this addresses,
12737 instead we CSE addresses from the GOT table, so skip this. */
12738 emit_insn (gen_movsi (reg, new_rtx));
12743 /* This symbol must be referenced via a load from the
12744 Global Offset Table (@GOT). */
12746 if (reload_in_progress)
12747 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12748 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
12749 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12751 new_rtx = force_reg (Pmode, new_rtx);
12752 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12753 new_rtx = gen_const_mem (Pmode, new_rtx);
12754 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
12757 reg = gen_reg_rtx (Pmode);
12758 emit_move_insn (reg, new_rtx);
12764 if (CONST_INT_P (addr)
12765 && !x86_64_immediate_operand (addr, VOIDmode))
12769 emit_move_insn (reg, addr);
12773 new_rtx = force_reg (Pmode, addr);
12775 else if (GET_CODE (addr) == CONST)
12777 addr = XEXP (addr, 0);
12779 /* We must match stuff we generate before. Assume the only
12780 unspecs that can get here are ours. Not that we could do
12781 anything with them anyway.... */
12782 if (GET_CODE (addr) == UNSPEC
12783 || (GET_CODE (addr) == PLUS
12784 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
12786 gcc_assert (GET_CODE (addr) == PLUS);
12788 if (GET_CODE (addr) == PLUS)
12790 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
12792 /* Check first to see if this is a constant offset from a @GOTOFF
12793 symbol reference. */
12794 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
12795 && CONST_INT_P (op1))
12799 if (reload_in_progress)
12800 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
12801 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
12803 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
12804 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
12805 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
12809 emit_move_insn (reg, new_rtx);
12815 if (INTVAL (op1) < -16*1024*1024
12816 || INTVAL (op1) >= 16*1024*1024)
12818 if (!x86_64_immediate_operand (op1, Pmode))
12819 op1 = force_reg (Pmode, op1);
12820 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
12826 rtx base = legitimize_pic_address (op0, reg);
12827 enum machine_mode mode = GET_MODE (base);
12829 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
12831 if (CONST_INT_P (new_rtx))
12833 if (INTVAL (new_rtx) < -16*1024*1024
12834 || INTVAL (new_rtx) >= 16*1024*1024)
12836 if (!x86_64_immediate_operand (new_rtx, mode))
12837 new_rtx = force_reg (mode, new_rtx);
12839 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
12842 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
12846 if (GET_CODE (new_rtx) == PLUS
12847 && CONSTANT_P (XEXP (new_rtx, 1)))
12849 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
12850 new_rtx = XEXP (new_rtx, 1);
12852 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
12860 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12863 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
12865 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
12867 if (GET_MODE (tp) != tp_mode)
12869 gcc_assert (GET_MODE (tp) == SImode);
12870 gcc_assert (tp_mode == DImode);
12872 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
12876 tp = copy_to_mode_reg (tp_mode, tp);
12881 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12883 static GTY(()) rtx ix86_tls_symbol;
12886 ix86_tls_get_addr (void)
12888 if (!ix86_tls_symbol)
12891 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
12892 ? "___tls_get_addr" : "__tls_get_addr");
12894 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
12897 return ix86_tls_symbol;
12900 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12902 static GTY(()) rtx ix86_tls_module_base_symbol;
12905 ix86_tls_module_base (void)
12907 if (!ix86_tls_module_base_symbol)
12909 ix86_tls_module_base_symbol
12910 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
12912 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
12913 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
12916 return ix86_tls_module_base_symbol;
12919 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12920 false if we expect this to be used for a memory address and true if
12921 we expect to load the address into a register. */
12924 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
12926 rtx dest, base, off;
12927 rtx pic = NULL_RTX, tp = NULL_RTX;
12928 enum machine_mode tp_mode = Pmode;
12933 case TLS_MODEL_GLOBAL_DYNAMIC:
12934 dest = gen_reg_rtx (Pmode);
12938 if (flag_pic && !TARGET_PECOFF)
12939 pic = pic_offset_table_rtx;
12942 pic = gen_reg_rtx (Pmode);
12943 emit_insn (gen_set_got (pic));
12947 if (TARGET_GNU2_TLS)
12950 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
12952 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
12954 tp = get_thread_pointer (Pmode, true);
12955 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
12957 if (GET_MODE (x) != Pmode)
12958 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12960 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
12964 rtx caddr = ix86_tls_get_addr ();
12968 rtx rax = gen_rtx_REG (Pmode, AX_REG);
12973 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
12974 insns = get_insns ();
12977 if (GET_MODE (x) != Pmode)
12978 x = gen_rtx_ZERO_EXTEND (Pmode, x);
12980 RTL_CONST_CALL_P (insns) = 1;
12981 emit_libcall_block (insns, dest, rax, x);
12984 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
12988 case TLS_MODEL_LOCAL_DYNAMIC:
12989 base = gen_reg_rtx (Pmode);
12994 pic = pic_offset_table_rtx;
12997 pic = gen_reg_rtx (Pmode);
12998 emit_insn (gen_set_got (pic));
13002 if (TARGET_GNU2_TLS)
13004 rtx tmp = ix86_tls_module_base ();
13007 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13009 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13011 tp = get_thread_pointer (Pmode, true);
13012 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13013 gen_rtx_MINUS (Pmode, tmp, tp));
13017 rtx caddr = ix86_tls_get_addr ();
13021 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13026 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13027 insns = get_insns ();
13030 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13031 share the LD_BASE result with other LD model accesses. */
13032 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13033 UNSPEC_TLS_LD_BASE);
13035 RTL_CONST_CALL_P (insns) = 1;
13036 emit_libcall_block (insns, base, rax, eqv);
13039 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13042 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13043 off = gen_rtx_CONST (Pmode, off);
13045 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13047 if (TARGET_GNU2_TLS)
13049 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13051 if (GET_MODE (x) != Pmode)
13052 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13054 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13058 case TLS_MODEL_INITIAL_EXEC:
13061 if (TARGET_SUN_TLS && !TARGET_X32)
13063 /* The Sun linker took the AMD64 TLS spec literally
13064 and can only handle %rax as destination of the
13065 initial executable code sequence. */
13067 dest = gen_reg_rtx (DImode);
13068 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13072 /* Generate DImode references to avoid %fs:(%reg32)
13073 problems and linker IE->LE relaxation bug. */
13076 type = UNSPEC_GOTNTPOFF;
13080 if (reload_in_progress)
13081 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13082 pic = pic_offset_table_rtx;
13083 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13085 else if (!TARGET_ANY_GNU_TLS)
13087 pic = gen_reg_rtx (Pmode);
13088 emit_insn (gen_set_got (pic));
13089 type = UNSPEC_GOTTPOFF;
13094 type = UNSPEC_INDNTPOFF;
13097 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13098 off = gen_rtx_CONST (tp_mode, off);
13100 off = gen_rtx_PLUS (tp_mode, pic, off);
13101 off = gen_const_mem (tp_mode, off);
13102 set_mem_alias_set (off, ix86_GOT_alias_set ());
13104 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13106 base = get_thread_pointer (tp_mode,
13107 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13108 off = force_reg (tp_mode, off);
13109 return gen_rtx_PLUS (tp_mode, base, off);
13113 base = get_thread_pointer (Pmode, true);
13114 dest = gen_reg_rtx (Pmode);
13115 emit_insn (ix86_gen_sub3 (dest, base, off));
13119 case TLS_MODEL_LOCAL_EXEC:
13120 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13121 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13122 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13123 off = gen_rtx_CONST (Pmode, off);
13125 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13127 base = get_thread_pointer (Pmode,
13128 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13129 return gen_rtx_PLUS (Pmode, base, off);
13133 base = get_thread_pointer (Pmode, true);
13134 dest = gen_reg_rtx (Pmode);
13135 emit_insn (ix86_gen_sub3 (dest, base, off));
13140 gcc_unreachable ();
13146 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13147 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13148 unique refptr-DECL symbol corresponding to symbol DECL. */
13150 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13151 htab_t dllimport_map;
13154 get_dllimport_decl (tree decl, bool beimport)
13156 struct tree_map *h, in;
13159 const char *prefix;
13160 size_t namelen, prefixlen;
13165 if (!dllimport_map)
13166 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13168 in.hash = htab_hash_pointer (decl);
13169 in.base.from = decl;
13170 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13171 h = (struct tree_map *) *loc;
13175 *loc = h = ggc_alloc_tree_map ();
13177 h->base.from = decl;
13178 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13179 VAR_DECL, NULL, ptr_type_node);
13180 DECL_ARTIFICIAL (to) = 1;
13181 DECL_IGNORED_P (to) = 1;
13182 DECL_EXTERNAL (to) = 1;
13183 TREE_READONLY (to) = 1;
13185 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13186 name = targetm.strip_name_encoding (name);
13188 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13189 ? "*__imp_" : "*__imp__";
13191 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13192 namelen = strlen (name);
13193 prefixlen = strlen (prefix);
13194 imp_name = (char *) alloca (namelen + prefixlen + 1);
13195 memcpy (imp_name, prefix, prefixlen);
13196 memcpy (imp_name + prefixlen, name, namelen + 1);
13198 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13199 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13200 SET_SYMBOL_REF_DECL (rtl, to);
13201 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13204 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13205 #ifdef SUB_TARGET_RECORD_STUB
13206 SUB_TARGET_RECORD_STUB (name);
13210 rtl = gen_const_mem (Pmode, rtl);
13211 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13213 SET_DECL_RTL (to, rtl);
13214 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13219 /* Expand SYMBOL into its corresponding far-addresse symbol.
13220 WANT_REG is true if we require the result be a register. */
13223 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13228 gcc_assert (SYMBOL_REF_DECL (symbol));
13229 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13231 x = DECL_RTL (imp_decl);
13233 x = force_reg (Pmode, x);
13237 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13238 true if we require the result be a register. */
13241 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13246 gcc_assert (SYMBOL_REF_DECL (symbol));
13247 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13249 x = DECL_RTL (imp_decl);
13251 x = force_reg (Pmode, x);
13255 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13256 is true if we require the result be a register. */
13259 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13261 if (!TARGET_PECOFF)
13264 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13266 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13267 return legitimize_dllimport_symbol (addr, inreg);
13268 if (GET_CODE (addr) == CONST
13269 && GET_CODE (XEXP (addr, 0)) == PLUS
13270 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13271 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13273 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13274 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13278 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13280 if (GET_CODE (addr) == SYMBOL_REF
13281 && !is_imported_p (addr)
13282 && SYMBOL_REF_EXTERNAL_P (addr)
13283 && SYMBOL_REF_DECL (addr))
13284 return legitimize_pe_coff_extern_decl (addr, inreg);
13286 if (GET_CODE (addr) == CONST
13287 && GET_CODE (XEXP (addr, 0)) == PLUS
13288 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13289 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13290 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13291 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13293 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13294 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13299 /* Try machine-dependent ways of modifying an illegitimate address
13300 to be legitimate. If we find one, return the new, valid address.
13301 This macro is used in only one place: `memory_address' in explow.c.
13303 OLDX is the address as it was before break_out_memory_refs was called.
13304 In some cases it is useful to look at this to decide what needs to be done.
13306 It is always safe for this macro to do nothing. It exists to recognize
13307 opportunities to optimize the output.
13309 For the 80386, we handle X+REG by loading X into a register R and
13310 using R+REG. R will go in a general reg and indexing will be used.
13311 However, if REG is a broken-out memory address or multiplication,
13312 nothing needs to be done because REG can certainly go in a general reg.
13314 When -fpic is used, special handling is needed for symbolic references.
13315 See comments by legitimize_pic_address in i386.c for details. */
13318 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13319 enum machine_mode mode)
13324 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13326 return legitimize_tls_address (x, (enum tls_model) log, false);
13327 if (GET_CODE (x) == CONST
13328 && GET_CODE (XEXP (x, 0)) == PLUS
13329 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13330 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13332 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13333 (enum tls_model) log, false);
13334 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13337 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13339 rtx tmp = legitimize_pe_coff_symbol (x, true);
13344 if (flag_pic && SYMBOLIC_CONST (x))
13345 return legitimize_pic_address (x, 0);
13348 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13349 return machopic_indirect_data_reference (x, 0);
13352 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13353 if (GET_CODE (x) == ASHIFT
13354 && CONST_INT_P (XEXP (x, 1))
13355 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13358 log = INTVAL (XEXP (x, 1));
13359 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13360 GEN_INT (1 << log));
13363 if (GET_CODE (x) == PLUS)
13365 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13367 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13368 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13369 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13372 log = INTVAL (XEXP (XEXP (x, 0), 1));
13373 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13374 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13375 GEN_INT (1 << log));
13378 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13379 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13380 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13383 log = INTVAL (XEXP (XEXP (x, 1), 1));
13384 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13385 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13386 GEN_INT (1 << log));
13389 /* Put multiply first if it isn't already. */
13390 if (GET_CODE (XEXP (x, 1)) == MULT)
13392 rtx tmp = XEXP (x, 0);
13393 XEXP (x, 0) = XEXP (x, 1);
13398 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13399 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13400 created by virtual register instantiation, register elimination, and
13401 similar optimizations. */
13402 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13405 x = gen_rtx_PLUS (Pmode,
13406 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13407 XEXP (XEXP (x, 1), 0)),
13408 XEXP (XEXP (x, 1), 1));
13412 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13413 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13414 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13415 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13416 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13417 && CONSTANT_P (XEXP (x, 1)))
13420 rtx other = NULL_RTX;
13422 if (CONST_INT_P (XEXP (x, 1)))
13424 constant = XEXP (x, 1);
13425 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13427 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13429 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13430 other = XEXP (x, 1);
13438 x = gen_rtx_PLUS (Pmode,
13439 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13440 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13441 plus_constant (Pmode, other,
13442 INTVAL (constant)));
13446 if (changed && ix86_legitimate_address_p (mode, x, false))
13449 if (GET_CODE (XEXP (x, 0)) == MULT)
13452 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
13455 if (GET_CODE (XEXP (x, 1)) == MULT)
13458 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
13462 && REG_P (XEXP (x, 1))
13463 && REG_P (XEXP (x, 0)))
13466 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13469 x = legitimize_pic_address (x, 0);
13472 if (changed && ix86_legitimate_address_p (mode, x, false))
13475 if (REG_P (XEXP (x, 0)))
13477 rtx temp = gen_reg_rtx (Pmode);
13478 rtx val = force_operand (XEXP (x, 1), temp);
13481 val = convert_to_mode (Pmode, val, 1);
13482 emit_move_insn (temp, val);
13485 XEXP (x, 1) = temp;
13489 else if (REG_P (XEXP (x, 1)))
13491 rtx temp = gen_reg_rtx (Pmode);
13492 rtx val = force_operand (XEXP (x, 0), temp);
13495 val = convert_to_mode (Pmode, val, 1);
13496 emit_move_insn (temp, val);
13499 XEXP (x, 0) = temp;
13507 /* Print an integer constant expression in assembler syntax. Addition
13508 and subtraction are the only arithmetic that may appear in these
13509 expressions. FILE is the stdio stream to write to, X is the rtx, and
13510 CODE is the operand print code from the output string. */
13513 output_pic_addr_const (FILE *file, rtx x, int code)
13517 switch (GET_CODE (x))
13520 gcc_assert (flag_pic);
13525 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
13526 output_addr_const (file, x);
13529 const char *name = XSTR (x, 0);
13531 /* Mark the decl as referenced so that cgraph will
13532 output the function. */
13533 if (SYMBOL_REF_DECL (x))
13534 mark_decl_referenced (SYMBOL_REF_DECL (x));
13537 if (MACHOPIC_INDIRECT
13538 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
13539 name = machopic_indirection_name (x, /*stub_p=*/true);
13541 assemble_name (file, name);
13543 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
13544 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
13545 fputs ("@PLT", file);
13552 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
13553 assemble_name (asm_out_file, buf);
13557 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13561 /* This used to output parentheses around the expression,
13562 but that does not work on the 386 (either ATT or BSD assembler). */
13563 output_pic_addr_const (file, XEXP (x, 0), code);
13567 if (GET_MODE (x) == VOIDmode)
13569 /* We can use %d if the number is <32 bits and positive. */
13570 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
13571 fprintf (file, "0x%lx%08lx",
13572 (unsigned long) CONST_DOUBLE_HIGH (x),
13573 (unsigned long) CONST_DOUBLE_LOW (x));
13575 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
13578 /* We can't handle floating point constants;
13579 TARGET_PRINT_OPERAND must handle them. */
13580 output_operand_lossage ("floating constant misused");
13584 /* Some assemblers need integer constants to appear first. */
13585 if (CONST_INT_P (XEXP (x, 0)))
13587 output_pic_addr_const (file, XEXP (x, 0), code);
13589 output_pic_addr_const (file, XEXP (x, 1), code);
13593 gcc_assert (CONST_INT_P (XEXP (x, 1)));
13594 output_pic_addr_const (file, XEXP (x, 1), code);
13596 output_pic_addr_const (file, XEXP (x, 0), code);
13602 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
13603 output_pic_addr_const (file, XEXP (x, 0), code);
13605 output_pic_addr_const (file, XEXP (x, 1), code);
13607 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
13611 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
13613 bool f = i386_asm_output_addr_const_extra (file, x);
13618 gcc_assert (XVECLEN (x, 0) == 1);
13619 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
13620 switch (XINT (x, 1))
13623 fputs ("@GOT", file);
13625 case UNSPEC_GOTOFF:
13626 fputs ("@GOTOFF", file);
13628 case UNSPEC_PLTOFF:
13629 fputs ("@PLTOFF", file);
13632 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13633 "(%rip)" : "[rip]", file);
13635 case UNSPEC_GOTPCREL:
13636 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13637 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
13639 case UNSPEC_GOTTPOFF:
13640 /* FIXME: This might be @TPOFF in Sun ld too. */
13641 fputs ("@gottpoff", file);
13644 fputs ("@tpoff", file);
13646 case UNSPEC_NTPOFF:
13648 fputs ("@tpoff", file);
13650 fputs ("@ntpoff", file);
13652 case UNSPEC_DTPOFF:
13653 fputs ("@dtpoff", file);
13655 case UNSPEC_GOTNTPOFF:
13657 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13658 "@gottpoff(%rip)": "@gottpoff[rip]", file);
13660 fputs ("@gotntpoff", file);
13662 case UNSPEC_INDNTPOFF:
13663 fputs ("@indntpoff", file);
13666 case UNSPEC_MACHOPIC_OFFSET:
13668 machopic_output_function_base_name (file);
13672 output_operand_lossage ("invalid UNSPEC as operand");
13678 output_operand_lossage ("invalid expression as operand");
13682 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13683 We need to emit DTP-relative relocations. */
13685 static void ATTRIBUTE_UNUSED
13686 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
13688 fputs (ASM_LONG, file);
13689 output_addr_const (file, x);
13690 fputs ("@dtpoff", file);
13696 fputs (", 0", file);
13699 gcc_unreachable ();
13703 /* Return true if X is a representation of the PIC register. This copes
13704 with calls from ix86_find_base_term, where the register might have
13705 been replaced by a cselib value. */
13708 ix86_pic_register_p (rtx x)
13710 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
13711 return (pic_offset_table_rtx
13712 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
13714 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
13717 /* Helper function for ix86_delegitimize_address.
13718 Attempt to delegitimize TLS local-exec accesses. */
13721 ix86_delegitimize_tls_address (rtx orig_x)
13723 rtx x = orig_x, unspec;
13724 struct ix86_address addr;
13726 if (!TARGET_TLS_DIRECT_SEG_REFS)
13730 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
13732 if (ix86_decompose_address (x, &addr) == 0
13733 || addr.seg != DEFAULT_TLS_SEG_REG
13734 || addr.disp == NULL_RTX
13735 || GET_CODE (addr.disp) != CONST)
13737 unspec = XEXP (addr.disp, 0);
13738 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
13739 unspec = XEXP (unspec, 0);
13740 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
13742 x = XVECEXP (unspec, 0, 0);
13743 gcc_assert (GET_CODE (x) == SYMBOL_REF);
13744 if (unspec != XEXP (addr.disp, 0))
13745 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
13748 rtx idx = addr.index;
13749 if (addr.scale != 1)
13750 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
13751 x = gen_rtx_PLUS (Pmode, idx, x);
13754 x = gen_rtx_PLUS (Pmode, addr.base, x);
13755 if (MEM_P (orig_x))
13756 x = replace_equiv_address_nv (orig_x, x);
13760 /* In the name of slightly smaller debug output, and to cater to
13761 general assembler lossage, recognize PIC+GOTOFF and turn it back
13762 into a direct symbol reference.
13764 On Darwin, this is necessary to avoid a crash, because Darwin
13765 has a different PIC label for each routine but the DWARF debugging
13766 information is not associated with any particular routine, so it's
13767 necessary to remove references to the PIC label from RTL stored by
13768 the DWARF output code. */
13771 ix86_delegitimize_address (rtx x)
13773 rtx orig_x = delegitimize_mem_from_attrs (x);
13774 /* addend is NULL or some rtx if x is something+GOTOFF where
13775 something doesn't include the PIC register. */
13776 rtx addend = NULL_RTX;
13777 /* reg_addend is NULL or a multiple of some register. */
13778 rtx reg_addend = NULL_RTX;
13779 /* const_addend is NULL or a const_int. */
13780 rtx const_addend = NULL_RTX;
13781 /* This is the result, or NULL. */
13782 rtx result = NULL_RTX;
13791 if (GET_CODE (x) == CONST
13792 && GET_CODE (XEXP (x, 0)) == PLUS
13793 && GET_MODE (XEXP (x, 0)) == Pmode
13794 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13795 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
13796 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
13798 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
13799 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
13800 if (MEM_P (orig_x))
13801 x = replace_equiv_address_nv (orig_x, x);
13804 if (GET_CODE (x) != CONST
13805 || GET_CODE (XEXP (x, 0)) != UNSPEC
13806 || (XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
13807 && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL)
13808 || (!MEM_P (orig_x) && XINT (XEXP (x, 0), 1) != UNSPEC_PCREL))
13809 return ix86_delegitimize_tls_address (orig_x);
13810 x = XVECEXP (XEXP (x, 0), 0, 0);
13811 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
13813 x = simplify_gen_subreg (GET_MODE (orig_x), x,
13821 if (GET_CODE (x) != PLUS
13822 || GET_CODE (XEXP (x, 1)) != CONST)
13823 return ix86_delegitimize_tls_address (orig_x);
13825 if (ix86_pic_register_p (XEXP (x, 0)))
13826 /* %ebx + GOT/GOTOFF */
13828 else if (GET_CODE (XEXP (x, 0)) == PLUS)
13830 /* %ebx + %reg * scale + GOT/GOTOFF */
13831 reg_addend = XEXP (x, 0);
13832 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
13833 reg_addend = XEXP (reg_addend, 1);
13834 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
13835 reg_addend = XEXP (reg_addend, 0);
13838 reg_addend = NULL_RTX;
13839 addend = XEXP (x, 0);
13843 addend = XEXP (x, 0);
13845 x = XEXP (XEXP (x, 1), 0);
13846 if (GET_CODE (x) == PLUS
13847 && CONST_INT_P (XEXP (x, 1)))
13849 const_addend = XEXP (x, 1);
13853 if (GET_CODE (x) == UNSPEC
13854 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
13855 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
13856 result = XVECEXP (x, 0, 0);
13858 if (TARGET_MACHO && darwin_local_data_pic (x)
13859 && !MEM_P (orig_x))
13860 result = XVECEXP (x, 0, 0);
13863 return ix86_delegitimize_tls_address (orig_x);
13866 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
13868 result = gen_rtx_PLUS (Pmode, reg_addend, result);
13871 /* If the rest of original X doesn't involve the PIC register, add
13872 addend and subtract pic_offset_table_rtx. This can happen e.g.
13874 leal (%ebx, %ecx, 4), %ecx
13876 movl foo@GOTOFF(%ecx), %edx
13877 in which case we return (%ecx - %ebx) + foo. */
13878 if (pic_offset_table_rtx)
13879 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
13880 pic_offset_table_rtx),
13885 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
13887 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
13888 if (result == NULL_RTX)
13894 /* If X is a machine specific address (i.e. a symbol or label being
13895 referenced as a displacement from the GOT implemented using an
13896 UNSPEC), then return the base term. Otherwise return X. */
13899 ix86_find_base_term (rtx x)
13905 if (GET_CODE (x) != CONST)
13907 term = XEXP (x, 0);
13908 if (GET_CODE (term) == PLUS
13909 && (CONST_INT_P (XEXP (term, 1))
13910 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
13911 term = XEXP (term, 0);
13912 if (GET_CODE (term) != UNSPEC
13913 || (XINT (term, 1) != UNSPEC_GOTPCREL
13914 && XINT (term, 1) != UNSPEC_PCREL))
13917 return XVECEXP (term, 0, 0);
13920 return ix86_delegitimize_address (x);
13924 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
13925 bool fp, FILE *file)
13927 const char *suffix;
13929 if (mode == CCFPmode || mode == CCFPUmode)
13931 code = ix86_fp_compare_code_to_integer (code);
13935 code = reverse_condition (code);
13986 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
13990 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13991 Those same assemblers have the same but opposite lossage on cmov. */
13992 if (mode == CCmode)
13993 suffix = fp ? "nbe" : "a";
13994 else if (mode == CCCmode)
13997 gcc_unreachable ();
14013 gcc_unreachable ();
14017 gcc_assert (mode == CCmode || mode == CCCmode);
14034 gcc_unreachable ();
14038 /* ??? As above. */
14039 gcc_assert (mode == CCmode || mode == CCCmode);
14040 suffix = fp ? "nb" : "ae";
14043 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14047 /* ??? As above. */
14048 if (mode == CCmode)
14050 else if (mode == CCCmode)
14051 suffix = fp ? "nb" : "ae";
14053 gcc_unreachable ();
14056 suffix = fp ? "u" : "p";
14059 suffix = fp ? "nu" : "np";
14062 gcc_unreachable ();
14064 fputs (suffix, file);
14067 /* Print the name of register X to FILE based on its machine mode and number.
14068 If CODE is 'w', pretend the mode is HImode.
14069 If CODE is 'b', pretend the mode is QImode.
14070 If CODE is 'k', pretend the mode is SImode.
14071 If CODE is 'q', pretend the mode is DImode.
14072 If CODE is 'x', pretend the mode is V4SFmode.
14073 If CODE is 't', pretend the mode is V8SFmode.
14074 If CODE is 'h', pretend the reg is the 'high' byte register.
14075 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14076 If CODE is 'd', duplicate the operand for AVX instruction.
14080 print_reg (rtx x, int code, FILE *file)
14083 unsigned int regno;
14084 bool duplicated = code == 'd' && TARGET_AVX;
14086 if (ASSEMBLER_DIALECT == ASM_ATT)
14091 gcc_assert (TARGET_64BIT);
14092 fputs ("rip", file);
14096 regno = true_regnum (x);
14097 gcc_assert (regno != ARG_POINTER_REGNUM
14098 && regno != FRAME_POINTER_REGNUM
14099 && regno != FLAGS_REG
14100 && regno != FPSR_REG
14101 && regno != FPCR_REG);
14103 if (code == 'w' || MMX_REG_P (x))
14105 else if (code == 'b')
14107 else if (code == 'k')
14109 else if (code == 'q')
14111 else if (code == 'y')
14113 else if (code == 'h')
14115 else if (code == 'x')
14117 else if (code == 't')
14120 code = GET_MODE_SIZE (GET_MODE (x));
14122 /* Irritatingly, AMD extended registers use different naming convention
14123 from the normal registers: "r%d[bwd]" */
14124 if (REX_INT_REGNO_P (regno))
14126 gcc_assert (TARGET_64BIT);
14128 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14132 error ("extended registers have no high halves");
14147 error ("unsupported operand size for extended register");
14157 if (STACK_TOP_P (x))
14166 if (! ANY_FP_REG_P (x))
14167 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14172 reg = hi_reg_name[regno];
14175 if (regno >= ARRAY_SIZE (qi_reg_name))
14177 reg = qi_reg_name[regno];
14180 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14182 reg = qi_high_reg_name[regno];
14187 gcc_assert (!duplicated);
14189 fputs (hi_reg_name[regno] + 1, file);
14194 gcc_unreachable ();
14200 if (ASSEMBLER_DIALECT == ASM_ATT)
14201 fprintf (file, ", %%%s", reg);
14203 fprintf (file, ", %s", reg);
14207 /* Locate some local-dynamic symbol still in use by this function
14208 so that we can print its name in some tls_local_dynamic_base
14212 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14216 if (GET_CODE (x) == SYMBOL_REF
14217 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14219 cfun->machine->some_ld_name = XSTR (x, 0);
14226 static const char *
14227 get_some_local_dynamic_name (void)
14231 if (cfun->machine->some_ld_name)
14232 return cfun->machine->some_ld_name;
14234 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14235 if (NONDEBUG_INSN_P (insn)
14236 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14237 return cfun->machine->some_ld_name;
14242 /* Meaning of CODE:
14243 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14244 C -- print opcode suffix for set/cmov insn.
14245 c -- like C, but print reversed condition
14246 F,f -- likewise, but for floating-point.
14247 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14249 R -- print the prefix for register names.
14250 z -- print the opcode suffix for the size of the current operand.
14251 Z -- likewise, with special suffixes for x87 instructions.
14252 * -- print a star (in certain assembler syntax)
14253 A -- print an absolute memory reference.
14254 E -- print address with DImode register names if TARGET_64BIT.
14255 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14256 s -- print a shift double count, followed by the assemblers argument
14258 b -- print the QImode name of the register for the indicated operand.
14259 %b0 would print %al if operands[0] is reg 0.
14260 w -- likewise, print the HImode name of the register.
14261 k -- likewise, print the SImode name of the register.
14262 q -- likewise, print the DImode name of the register.
14263 x -- likewise, print the V4SFmode name of the register.
14264 t -- likewise, print the V8SFmode name of the register.
14265 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14266 y -- print "st(0)" instead of "st" as a register.
14267 d -- print duplicated register operand for AVX instruction.
14268 D -- print condition for SSE cmp instruction.
14269 P -- if PIC, print an @PLT suffix.
14270 p -- print raw symbol name.
14271 X -- don't print any sort of PIC '@' suffix for a symbol.
14272 & -- print some in-use local-dynamic symbol name.
14273 H -- print a memory address offset by 8; used for sse high-parts
14274 Y -- print condition for XOP pcom* instruction.
14275 + -- print a branch hint as 'cs' or 'ds' prefix
14276 ; -- print a semicolon (after prefixes due to bug in older gas).
14277 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14278 @ -- print a segment register of thread base pointer load
14279 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14283 ix86_print_operand (FILE *file, rtx x, int code)
14290 switch (ASSEMBLER_DIALECT)
14297 /* Intel syntax. For absolute addresses, registers should not
14298 be surrounded by braces. */
14302 ix86_print_operand (file, x, 0);
14309 gcc_unreachable ();
14312 ix86_print_operand (file, x, 0);
14316 /* Wrap address in an UNSPEC to declare special handling. */
14318 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14320 output_address (x);
14324 if (ASSEMBLER_DIALECT == ASM_ATT)
14329 if (ASSEMBLER_DIALECT == ASM_ATT)
14334 if (ASSEMBLER_DIALECT == ASM_ATT)
14339 if (ASSEMBLER_DIALECT == ASM_ATT)
14344 if (ASSEMBLER_DIALECT == ASM_ATT)
14349 if (ASSEMBLER_DIALECT == ASM_ATT)
14354 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14355 if (ASSEMBLER_DIALECT != ASM_ATT)
14358 switch (GET_MODE_SIZE (GET_MODE (x)))
14373 output_operand_lossage
14374 ("invalid operand size for operand code 'O'");
14383 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14385 /* Opcodes don't get size suffixes if using Intel opcodes. */
14386 if (ASSEMBLER_DIALECT == ASM_INTEL)
14389 switch (GET_MODE_SIZE (GET_MODE (x)))
14408 output_operand_lossage
14409 ("invalid operand size for operand code 'z'");
14414 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14416 (0, "non-integer operand used with operand code 'z'");
14420 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14421 if (ASSEMBLER_DIALECT == ASM_INTEL)
14424 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14426 switch (GET_MODE_SIZE (GET_MODE (x)))
14429 #ifdef HAVE_AS_IX86_FILDS
14439 #ifdef HAVE_AS_IX86_FILDQ
14442 fputs ("ll", file);
14450 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14452 /* 387 opcodes don't get size suffixes
14453 if the operands are registers. */
14454 if (STACK_REG_P (x))
14457 switch (GET_MODE_SIZE (GET_MODE (x)))
14478 output_operand_lossage
14479 ("invalid operand type used with operand code 'Z'");
14483 output_operand_lossage
14484 ("invalid operand size for operand code 'Z'");
14502 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
14504 ix86_print_operand (file, x, 0);
14505 fputs (", ", file);
14510 switch (GET_CODE (x))
14513 fputs ("neq", file);
14516 fputs ("eq", file);
14520 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
14524 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
14528 fputs ("le", file);
14532 fputs ("lt", file);
14535 fputs ("unord", file);
14538 fputs ("ord", file);
14541 fputs ("ueq", file);
14544 fputs ("nlt", file);
14547 fputs ("nle", file);
14550 fputs ("ule", file);
14553 fputs ("ult", file);
14556 fputs ("une", file);
14559 output_operand_lossage ("operand is not a condition code, "
14560 "invalid operand code 'Y'");
14566 /* Little bit of braindamage here. The SSE compare instructions
14567 does use completely different names for the comparisons that the
14568 fp conditional moves. */
14569 switch (GET_CODE (x))
14574 fputs ("eq_us", file);
14578 fputs ("eq", file);
14583 fputs ("nge", file);
14587 fputs ("lt", file);
14592 fputs ("ngt", file);
14596 fputs ("le", file);
14599 fputs ("unord", file);
14604 fputs ("neq_oq", file);
14608 fputs ("neq", file);
14613 fputs ("ge", file);
14617 fputs ("nlt", file);
14622 fputs ("gt", file);
14626 fputs ("nle", file);
14629 fputs ("ord", file);
14632 output_operand_lossage ("operand is not a condition code, "
14633 "invalid operand code 'D'");
14640 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14641 if (ASSEMBLER_DIALECT == ASM_ATT)
14647 if (!COMPARISON_P (x))
14649 output_operand_lossage ("operand is not a condition code, "
14650 "invalid operand code '%c'", code);
14653 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
14654 code == 'c' || code == 'f',
14655 code == 'F' || code == 'f',
14660 if (!offsettable_memref_p (x))
14662 output_operand_lossage ("operand is not an offsettable memory "
14663 "reference, invalid operand code 'H'");
14666 /* It doesn't actually matter what mode we use here, as we're
14667 only going to use this for printing. */
14668 x = adjust_address_nv (x, DImode, 8);
14672 gcc_assert (CONST_INT_P (x));
14674 if (INTVAL (x) & IX86_HLE_ACQUIRE)
14675 #ifdef HAVE_AS_IX86_HLE
14676 fputs ("xacquire ", file);
14678 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
14680 else if (INTVAL (x) & IX86_HLE_RELEASE)
14681 #ifdef HAVE_AS_IX86_HLE
14682 fputs ("xrelease ", file);
14684 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
14686 /* We do not want to print value of the operand. */
14690 if (ASSEMBLER_DIALECT == ASM_ATT)
14696 const char *name = get_some_local_dynamic_name ();
14698 output_operand_lossage ("'%%&' used without any "
14699 "local dynamic TLS references");
14701 assemble_name (file, name);
14710 || optimize_function_for_size_p (cfun)
14711 || !TARGET_BRANCH_PREDICTION_HINTS)
14714 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
14717 int pred_val = INTVAL (XEXP (x, 0));
14719 if (pred_val < REG_BR_PROB_BASE * 45 / 100
14720 || pred_val > REG_BR_PROB_BASE * 55 / 100)
14722 bool taken = pred_val > REG_BR_PROB_BASE / 2;
14724 = final_forward_branch_p (current_output_insn) == 0;
14726 /* Emit hints only in the case default branch prediction
14727 heuristics would fail. */
14728 if (taken != cputaken)
14730 /* We use 3e (DS) prefix for taken branches and
14731 2e (CS) prefix for not taken branches. */
14733 fputs ("ds ; ", file);
14735 fputs ("cs ; ", file);
14743 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14749 if (ASSEMBLER_DIALECT == ASM_ATT)
14752 /* The kernel uses a different segment register for performance
14753 reasons; a system call would not have to trash the userspace
14754 segment register, which would be expensive. */
14755 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
14756 fputs ("fs", file);
14758 fputs ("gs", file);
14762 putc (TARGET_AVX2 ? 'i' : 'f', file);
14766 if (TARGET_64BIT && Pmode != word_mode)
14767 fputs ("addr32 ", file);
14771 output_operand_lossage ("invalid operand code '%c'", code);
14776 print_reg (x, code, file);
14778 else if (MEM_P (x))
14780 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14781 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
14782 && GET_MODE (x) != BLKmode)
14785 switch (GET_MODE_SIZE (GET_MODE (x)))
14787 case 1: size = "BYTE"; break;
14788 case 2: size = "WORD"; break;
14789 case 4: size = "DWORD"; break;
14790 case 8: size = "QWORD"; break;
14791 case 12: size = "TBYTE"; break;
14793 if (GET_MODE (x) == XFmode)
14798 case 32: size = "YMMWORD"; break;
14800 gcc_unreachable ();
14803 /* Check for explicit size override (codes 'b', 'w', 'k',
14807 else if (code == 'w')
14809 else if (code == 'k')
14811 else if (code == 'q')
14813 else if (code == 'x')
14816 fputs (size, file);
14817 fputs (" PTR ", file);
14821 /* Avoid (%rip) for call operands. */
14822 if (CONSTANT_ADDRESS_P (x) && code == 'P'
14823 && !CONST_INT_P (x))
14824 output_addr_const (file, x);
14825 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
14826 output_operand_lossage ("invalid constraints for operand");
14828 output_address (x);
14831 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
14836 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14837 REAL_VALUE_TO_TARGET_SINGLE (r, l);
14839 if (ASSEMBLER_DIALECT == ASM_ATT)
14841 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14843 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
14844 (unsigned long long) (int) l);
14846 fprintf (file, "0x%08x", (unsigned int) l);
14849 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
14854 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14855 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14857 if (ASSEMBLER_DIALECT == ASM_ATT)
14859 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
14862 /* These float cases don't actually occur as immediate operands. */
14863 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
14867 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
14868 fputs (dstr, file);
14873 /* We have patterns that allow zero sets of memory, for instance.
14874 In 64-bit mode, we should probably support all 8-byte vectors,
14875 since we can in fact encode that into an immediate. */
14876 if (GET_CODE (x) == CONST_VECTOR)
14878 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
14882 if (code != 'P' && code != 'p')
14884 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
14886 if (ASSEMBLER_DIALECT == ASM_ATT)
14889 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
14890 || GET_CODE (x) == LABEL_REF)
14892 if (ASSEMBLER_DIALECT == ASM_ATT)
14895 fputs ("OFFSET FLAT:", file);
14898 if (CONST_INT_P (x))
14899 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14900 else if (flag_pic || MACHOPIC_INDIRECT)
14901 output_pic_addr_const (file, x, code);
14903 output_addr_const (file, x);
14908 ix86_print_operand_punct_valid_p (unsigned char code)
14910 return (code == '@' || code == '*' || code == '+' || code == '&'
14911 || code == ';' || code == '~' || code == '^');
14914 /* Print a memory operand whose address is ADDR. */
14917 ix86_print_operand_address (FILE *file, rtx addr)
14919 struct ix86_address parts;
14920 rtx base, index, disp;
14926 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
14928 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14929 gcc_assert (parts.index == NULL_RTX);
14930 parts.index = XVECEXP (addr, 0, 1);
14931 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
14932 addr = XVECEXP (addr, 0, 0);
14935 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
14937 gcc_assert (TARGET_64BIT);
14938 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
14942 ok = ix86_decompose_address (addr, &parts);
14947 index = parts.index;
14949 scale = parts.scale;
14957 if (ASSEMBLER_DIALECT == ASM_ATT)
14959 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
14962 gcc_unreachable ();
14965 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14966 if (TARGET_64BIT && !base && !index)
14970 if (GET_CODE (disp) == CONST
14971 && GET_CODE (XEXP (disp, 0)) == PLUS
14972 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14973 symbol = XEXP (XEXP (disp, 0), 0);
14975 if (GET_CODE (symbol) == LABEL_REF
14976 || (GET_CODE (symbol) == SYMBOL_REF
14977 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
14980 if (!base && !index)
14982 /* Displacement only requires special attention. */
14984 if (CONST_INT_P (disp))
14986 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
14987 fputs ("ds:", file);
14988 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
14991 output_pic_addr_const (file, disp, 0);
14993 output_addr_const (file, disp);
14997 /* Print SImode register names to force addr32 prefix. */
14998 if (SImode_address_operand (addr, VOIDmode))
15000 #ifdef ENABLE_CHECKING
15001 gcc_assert (TARGET_64BIT);
15002 switch (GET_CODE (addr))
15005 gcc_assert (GET_MODE (addr) == SImode);
15006 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15010 gcc_assert (GET_MODE (addr) == DImode);
15013 gcc_unreachable ();
15016 gcc_assert (!code);
15022 && CONST_INT_P (disp)
15023 && INTVAL (disp) < -16*1024*1024)
15025 /* X32 runs in 64-bit mode, where displacement, DISP, in
15026 address DISP(%r64), is encoded as 32-bit immediate sign-
15027 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15028 address is %r64 + 0xffffffffbffffd00. When %r64 <
15029 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15030 which is invalid for x32. The correct address is %r64
15031 - 0x40000300 == 0xf7ffdd64. To properly encode
15032 -0x40000300(%r64) for x32, we zero-extend negative
15033 displacement by forcing addr32 prefix which truncates
15034 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15035 zero-extend all negative displacements, including -1(%rsp).
15036 However, for small negative displacements, sign-extension
15037 won't cause overflow. We only zero-extend negative
15038 displacements if they < -16*1024*1024, which is also used
15039 to check legitimate address displacements for PIC. */
15043 if (ASSEMBLER_DIALECT == ASM_ATT)
15048 output_pic_addr_const (file, disp, 0);
15049 else if (GET_CODE (disp) == LABEL_REF)
15050 output_asm_label (disp);
15052 output_addr_const (file, disp);
15057 print_reg (base, code, file);
15061 print_reg (index, vsib ? 0 : code, file);
15062 if (scale != 1 || vsib)
15063 fprintf (file, ",%d", scale);
15069 rtx offset = NULL_RTX;
15073 /* Pull out the offset of a symbol; print any symbol itself. */
15074 if (GET_CODE (disp) == CONST
15075 && GET_CODE (XEXP (disp, 0)) == PLUS
15076 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15078 offset = XEXP (XEXP (disp, 0), 1);
15079 disp = gen_rtx_CONST (VOIDmode,
15080 XEXP (XEXP (disp, 0), 0));
15084 output_pic_addr_const (file, disp, 0);
15085 else if (GET_CODE (disp) == LABEL_REF)
15086 output_asm_label (disp);
15087 else if (CONST_INT_P (disp))
15090 output_addr_const (file, disp);
15096 print_reg (base, code, file);
15099 if (INTVAL (offset) >= 0)
15101 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15105 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15112 print_reg (index, vsib ? 0 : code, file);
15113 if (scale != 1 || vsib)
15114 fprintf (file, "*%d", scale);
15121 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15124 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15128 if (GET_CODE (x) != UNSPEC)
15131 op = XVECEXP (x, 0, 0);
15132 switch (XINT (x, 1))
15134 case UNSPEC_GOTTPOFF:
15135 output_addr_const (file, op);
15136 /* FIXME: This might be @TPOFF in Sun ld. */
15137 fputs ("@gottpoff", file);
15140 output_addr_const (file, op);
15141 fputs ("@tpoff", file);
15143 case UNSPEC_NTPOFF:
15144 output_addr_const (file, op);
15146 fputs ("@tpoff", file);
15148 fputs ("@ntpoff", file);
15150 case UNSPEC_DTPOFF:
15151 output_addr_const (file, op);
15152 fputs ("@dtpoff", file);
15154 case UNSPEC_GOTNTPOFF:
15155 output_addr_const (file, op);
15157 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15158 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15160 fputs ("@gotntpoff", file);
15162 case UNSPEC_INDNTPOFF:
15163 output_addr_const (file, op);
15164 fputs ("@indntpoff", file);
15167 case UNSPEC_MACHOPIC_OFFSET:
15168 output_addr_const (file, op);
15170 machopic_output_function_base_name (file);
15174 case UNSPEC_STACK_CHECK:
15178 gcc_assert (flag_split_stack);
15180 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15181 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15183 gcc_unreachable ();
15186 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15197 /* Split one or more double-mode RTL references into pairs of half-mode
15198 references. The RTL can be REG, offsettable MEM, integer constant, or
15199 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15200 split and "num" is its length. lo_half and hi_half are output arrays
15201 that parallel "operands". */
15204 split_double_mode (enum machine_mode mode, rtx operands[],
15205 int num, rtx lo_half[], rtx hi_half[])
15207 enum machine_mode half_mode;
15213 half_mode = DImode;
15216 half_mode = SImode;
15219 gcc_unreachable ();
15222 byte = GET_MODE_SIZE (half_mode);
15226 rtx op = operands[num];
15228 /* simplify_subreg refuse to split volatile memory addresses,
15229 but we still have to handle it. */
15232 lo_half[num] = adjust_address (op, half_mode, 0);
15233 hi_half[num] = adjust_address (op, half_mode, byte);
15237 lo_half[num] = simplify_gen_subreg (half_mode, op,
15238 GET_MODE (op) == VOIDmode
15239 ? mode : GET_MODE (op), 0);
15240 hi_half[num] = simplify_gen_subreg (half_mode, op,
15241 GET_MODE (op) == VOIDmode
15242 ? mode : GET_MODE (op), byte);
15247 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15248 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15249 is the expression of the binary operation. The output may either be
15250 emitted here, or returned to the caller, like all output_* functions.
15252 There is no guarantee that the operands are the same mode, as they
15253 might be within FLOAT or FLOAT_EXTEND expressions. */
15255 #ifndef SYSV386_COMPAT
15256 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15257 wants to fix the assemblers because that causes incompatibility
15258 with gcc. No-one wants to fix gcc because that causes
15259 incompatibility with assemblers... You can use the option of
15260 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15261 #define SYSV386_COMPAT 1
15265 output_387_binary_op (rtx insn, rtx *operands)
15267 static char buf[40];
15270 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15272 #ifdef ENABLE_CHECKING
15273 /* Even if we do not want to check the inputs, this documents input
15274 constraints. Which helps in understanding the following code. */
15275 if (STACK_REG_P (operands[0])
15276 && ((REG_P (operands[1])
15277 && REGNO (operands[0]) == REGNO (operands[1])
15278 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15279 || (REG_P (operands[2])
15280 && REGNO (operands[0]) == REGNO (operands[2])
15281 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15282 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15285 gcc_assert (is_sse);
15288 switch (GET_CODE (operands[3]))
15291 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15292 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15300 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15301 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15309 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15310 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15318 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15319 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15327 gcc_unreachable ();
15334 strcpy (buf, ssep);
15335 if (GET_MODE (operands[0]) == SFmode)
15336 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15338 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15342 strcpy (buf, ssep + 1);
15343 if (GET_MODE (operands[0]) == SFmode)
15344 strcat (buf, "ss\t{%2, %0|%0, %2}");
15346 strcat (buf, "sd\t{%2, %0|%0, %2}");
15352 switch (GET_CODE (operands[3]))
15356 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15358 rtx temp = operands[2];
15359 operands[2] = operands[1];
15360 operands[1] = temp;
15363 /* know operands[0] == operands[1]. */
15365 if (MEM_P (operands[2]))
15371 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15373 if (STACK_TOP_P (operands[0]))
15374 /* How is it that we are storing to a dead operand[2]?
15375 Well, presumably operands[1] is dead too. We can't
15376 store the result to st(0) as st(0) gets popped on this
15377 instruction. Instead store to operands[2] (which I
15378 think has to be st(1)). st(1) will be popped later.
15379 gcc <= 2.8.1 didn't have this check and generated
15380 assembly code that the Unixware assembler rejected. */
15381 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15383 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15387 if (STACK_TOP_P (operands[0]))
15388 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15390 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15395 if (MEM_P (operands[1]))
15401 if (MEM_P (operands[2]))
15407 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15410 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15411 derived assemblers, confusingly reverse the direction of
15412 the operation for fsub{r} and fdiv{r} when the
15413 destination register is not st(0). The Intel assembler
15414 doesn't have this brain damage. Read !SYSV386_COMPAT to
15415 figure out what the hardware really does. */
15416 if (STACK_TOP_P (operands[0]))
15417 p = "{p\t%0, %2|rp\t%2, %0}";
15419 p = "{rp\t%2, %0|p\t%0, %2}";
15421 if (STACK_TOP_P (operands[0]))
15422 /* As above for fmul/fadd, we can't store to st(0). */
15423 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15425 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15430 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15433 if (STACK_TOP_P (operands[0]))
15434 p = "{rp\t%0, %1|p\t%1, %0}";
15436 p = "{p\t%1, %0|rp\t%0, %1}";
15438 if (STACK_TOP_P (operands[0]))
15439 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15441 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15446 if (STACK_TOP_P (operands[0]))
15448 if (STACK_TOP_P (operands[1]))
15449 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15451 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15454 else if (STACK_TOP_P (operands[1]))
15457 p = "{\t%1, %0|r\t%0, %1}";
15459 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15465 p = "{r\t%2, %0|\t%0, %2}";
15467 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15473 gcc_unreachable ();
15480 /* Check if a 256bit AVX register is referenced inside of EXP. */
15483 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
15487 if (GET_CODE (exp) == SUBREG)
15488 exp = SUBREG_REG (exp);
15491 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
15497 /* Return needed mode for entity in optimize_mode_switching pass. */
15500 ix86_avx_u128_mode_needed (rtx insn)
15506 /* Needed mode is set to AVX_U128_CLEAN if there are
15507 no 256bit modes used in function arguments. */
15508 for (link = CALL_INSN_FUNCTION_USAGE (insn);
15510 link = XEXP (link, 1))
15512 if (GET_CODE (XEXP (link, 0)) == USE)
15514 rtx arg = XEXP (XEXP (link, 0), 0);
15516 if (ix86_check_avx256_register (&arg, NULL))
15517 return AVX_U128_ANY;
15521 return AVX_U128_CLEAN;
15524 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
15525 changes state only when a 256bit register is written to, but we need
15526 to prevent the compiler from moving optimal insertion point above
15527 eventual read from 256bit register. */
15528 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
15529 return AVX_U128_DIRTY;
15531 return AVX_U128_ANY;
15534 /* Return mode that i387 must be switched into
15535 prior to the execution of insn. */
15538 ix86_i387_mode_needed (int entity, rtx insn)
15540 enum attr_i387_cw mode;
15542 /* The mode UNINITIALIZED is used to store control word after a
15543 function call or ASM pattern. The mode ANY specify that function
15544 has no requirements on the control word and make no changes in the
15545 bits we are interested in. */
15548 || (NONJUMP_INSN_P (insn)
15549 && (asm_noperands (PATTERN (insn)) >= 0
15550 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
15551 return I387_CW_UNINITIALIZED;
15553 if (recog_memoized (insn) < 0)
15554 return I387_CW_ANY;
15556 mode = get_attr_i387_cw (insn);
15561 if (mode == I387_CW_TRUNC)
15566 if (mode == I387_CW_FLOOR)
15571 if (mode == I387_CW_CEIL)
15576 if (mode == I387_CW_MASK_PM)
15581 gcc_unreachable ();
15584 return I387_CW_ANY;
15587 /* Return mode that entity must be switched into
15588 prior to the execution of insn. */
15591 ix86_mode_needed (int entity, rtx insn)
15596 return ix86_avx_u128_mode_needed (insn);
15601 return ix86_i387_mode_needed (entity, insn);
15603 gcc_unreachable ();
15608 /* Check if a 256bit AVX register is referenced in stores. */
15611 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
15613 if (ix86_check_avx256_register (&dest, NULL))
15615 bool *used = (bool *) data;
15620 /* Calculate mode of upper 128bit AVX registers after the insn. */
15623 ix86_avx_u128_mode_after (int mode, rtx insn)
15625 rtx pat = PATTERN (insn);
15627 if (vzeroupper_operation (pat, VOIDmode)
15628 || vzeroall_operation (pat, VOIDmode))
15629 return AVX_U128_CLEAN;
15631 /* We know that state is clean after CALL insn if there are no
15632 256bit registers used in the function return register. */
15635 bool avx_reg256_found = false;
15636 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
15637 if (!avx_reg256_found)
15638 return AVX_U128_CLEAN;
15641 /* Otherwise, return current mode. Remember that if insn
15642 references AVX 256bit registers, the mode was already changed
15643 to DIRTY from MODE_NEEDED. */
15647 /* Return the mode that an insn results in. */
15650 ix86_mode_after (int entity, int mode, rtx insn)
15655 return ix86_avx_u128_mode_after (mode, insn);
15662 gcc_unreachable ();
15667 ix86_avx_u128_mode_entry (void)
15671 /* Entry mode is set to AVX_U128_DIRTY if there are
15672 256bit modes used in function arguments. */
15673 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
15674 arg = TREE_CHAIN (arg))
15676 rtx incoming = DECL_INCOMING_RTL (arg);
15678 if (incoming && ix86_check_avx256_register (&incoming, NULL))
15679 return AVX_U128_DIRTY;
15682 return AVX_U128_CLEAN;
15685 /* Return a mode that ENTITY is assumed to be
15686 switched to at function entry. */
15689 ix86_mode_entry (int entity)
15694 return ix86_avx_u128_mode_entry ();
15699 return I387_CW_ANY;
15701 gcc_unreachable ();
15706 ix86_avx_u128_mode_exit (void)
15708 rtx reg = crtl->return_rtx;
15710 /* Exit mode is set to AVX_U128_DIRTY if there are
15711 256bit modes used in the function return register. */
15712 if (reg && ix86_check_avx256_register (®, NULL))
15713 return AVX_U128_DIRTY;
15715 return AVX_U128_CLEAN;
15718 /* Return a mode that ENTITY is assumed to be
15719 switched to at function exit. */
15722 ix86_mode_exit (int entity)
15727 return ix86_avx_u128_mode_exit ();
15732 return I387_CW_ANY;
15734 gcc_unreachable ();
15738 /* Output code to initialize control word copies used by trunc?f?i and
15739 rounding patterns. CURRENT_MODE is set to current control word,
15740 while NEW_MODE is set to new control word. */
15743 emit_i387_cw_initialization (int mode)
15745 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
15748 enum ix86_stack_slot slot;
15750 rtx reg = gen_reg_rtx (HImode);
15752 emit_insn (gen_x86_fnstcw_1 (stored_mode));
15753 emit_move_insn (reg, copy_rtx (stored_mode));
15755 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
15756 || optimize_insn_for_size_p ())
15760 case I387_CW_TRUNC:
15761 /* round toward zero (truncate) */
15762 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
15763 slot = SLOT_CW_TRUNC;
15766 case I387_CW_FLOOR:
15767 /* round down toward -oo */
15768 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15769 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
15770 slot = SLOT_CW_FLOOR;
15774 /* round up toward +oo */
15775 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
15776 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
15777 slot = SLOT_CW_CEIL;
15780 case I387_CW_MASK_PM:
15781 /* mask precision exception for nearbyint() */
15782 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15783 slot = SLOT_CW_MASK_PM;
15787 gcc_unreachable ();
15794 case I387_CW_TRUNC:
15795 /* round toward zero (truncate) */
15796 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
15797 slot = SLOT_CW_TRUNC;
15800 case I387_CW_FLOOR:
15801 /* round down toward -oo */
15802 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
15803 slot = SLOT_CW_FLOOR;
15807 /* round up toward +oo */
15808 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
15809 slot = SLOT_CW_CEIL;
15812 case I387_CW_MASK_PM:
15813 /* mask precision exception for nearbyint() */
15814 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
15815 slot = SLOT_CW_MASK_PM;
15819 gcc_unreachable ();
15823 gcc_assert (slot < MAX_386_STACK_LOCALS);
15825 new_mode = assign_386_stack_local (HImode, slot);
15826 emit_move_insn (new_mode, reg);
15829 /* Emit vzeroupper. */
15832 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
15836 /* Cancel automatic vzeroupper insertion if there are
15837 live call-saved SSE registers at the insertion point. */
15839 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15840 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
15844 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15845 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
15848 emit_insn (gen_avx_vzeroupper ());
15851 /* Generate one or more insns to set ENTITY to MODE. */
15854 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
15859 if (mode == AVX_U128_CLEAN)
15860 ix86_avx_emit_vzeroupper (regs_live);
15866 if (mode != I387_CW_ANY
15867 && mode != I387_CW_UNINITIALIZED)
15868 emit_i387_cw_initialization (mode);
15871 gcc_unreachable ();
15875 /* Output code for INSN to convert a float to a signed int. OPERANDS
15876 are the insn operands. The output may be [HSD]Imode and the input
15877 operand may be [SDX]Fmode. */
15880 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
15882 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15883 int dimode_p = GET_MODE (operands[0]) == DImode;
15884 int round_mode = get_attr_i387_cw (insn);
15886 /* Jump through a hoop or two for DImode, since the hardware has no
15887 non-popping instruction. We used to do this a different way, but
15888 that was somewhat fragile and broke with post-reload splitters. */
15889 if ((dimode_p || fisttp) && !stack_top_dies)
15890 output_asm_insn ("fld\t%y1", operands);
15892 gcc_assert (STACK_TOP_P (operands[1]));
15893 gcc_assert (MEM_P (operands[0]));
15894 gcc_assert (GET_MODE (operands[1]) != TFmode);
15897 output_asm_insn ("fisttp%Z0\t%0", operands);
15900 if (round_mode != I387_CW_ANY)
15901 output_asm_insn ("fldcw\t%3", operands);
15902 if (stack_top_dies || dimode_p)
15903 output_asm_insn ("fistp%Z0\t%0", operands);
15905 output_asm_insn ("fist%Z0\t%0", operands);
15906 if (round_mode != I387_CW_ANY)
15907 output_asm_insn ("fldcw\t%2", operands);
15913 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15914 have the values zero or one, indicates the ffreep insn's operand
15915 from the OPERANDS array. */
15917 static const char *
15918 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
15920 if (TARGET_USE_FFREEP)
15921 #ifdef HAVE_AS_IX86_FFREEP
15922 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
15925 static char retval[32];
15926 int regno = REGNO (operands[opno]);
15928 gcc_assert (STACK_REGNO_P (regno));
15930 regno -= FIRST_STACK_REG;
15932 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
15937 return opno ? "fstp\t%y1" : "fstp\t%y0";
15941 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15942 should be used. UNORDERED_P is true when fucom should be used. */
15945 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
15947 int stack_top_dies;
15948 rtx cmp_op0, cmp_op1;
15949 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
15953 cmp_op0 = operands[0];
15954 cmp_op1 = operands[1];
15958 cmp_op0 = operands[1];
15959 cmp_op1 = operands[2];
15964 if (GET_MODE (operands[0]) == SFmode)
15966 return "%vucomiss\t{%1, %0|%0, %1}";
15968 return "%vcomiss\t{%1, %0|%0, %1}";
15971 return "%vucomisd\t{%1, %0|%0, %1}";
15973 return "%vcomisd\t{%1, %0|%0, %1}";
15976 gcc_assert (STACK_TOP_P (cmp_op0));
15978 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
15980 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
15982 if (stack_top_dies)
15984 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
15985 return output_387_ffreep (operands, 1);
15988 return "ftst\n\tfnstsw\t%0";
15991 if (STACK_REG_P (cmp_op1)
15993 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
15994 && REGNO (cmp_op1) != FIRST_STACK_REG)
15996 /* If both the top of the 387 stack dies, and the other operand
15997 is also a stack register that dies, then this must be a
15998 `fcompp' float compare */
16002 /* There is no double popping fcomi variant. Fortunately,
16003 eflags is immune from the fstp's cc clobbering. */
16005 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16007 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16008 return output_387_ffreep (operands, 0);
16013 return "fucompp\n\tfnstsw\t%0";
16015 return "fcompp\n\tfnstsw\t%0";
16020 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16022 static const char * const alt[16] =
16024 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16025 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16026 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16027 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16029 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16030 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16034 "fcomi\t{%y1, %0|%0, %y1}",
16035 "fcomip\t{%y1, %0|%0, %y1}",
16036 "fucomi\t{%y1, %0|%0, %y1}",
16037 "fucomip\t{%y1, %0|%0, %y1}",
16048 mask = eflags_p << 3;
16049 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16050 mask |= unordered_p << 1;
16051 mask |= stack_top_dies;
16053 gcc_assert (mask < 16);
16062 ix86_output_addr_vec_elt (FILE *file, int value)
16064 const char *directive = ASM_LONG;
16068 directive = ASM_QUAD;
16070 gcc_assert (!TARGET_64BIT);
16073 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16077 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16079 const char *directive = ASM_LONG;
16082 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16083 directive = ASM_QUAD;
16085 gcc_assert (!TARGET_64BIT);
16087 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16088 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16089 fprintf (file, "%s%s%d-%s%d\n",
16090 directive, LPREFIX, value, LPREFIX, rel);
16091 else if (HAVE_AS_GOTOFF_IN_DATA)
16092 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16094 else if (TARGET_MACHO)
16096 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16097 machopic_output_function_base_name (file);
16102 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16103 GOT_SYMBOL_NAME, LPREFIX, value);
16106 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16110 ix86_expand_clear (rtx dest)
16114 /* We play register width games, which are only valid after reload. */
16115 gcc_assert (reload_completed);
16117 /* Avoid HImode and its attendant prefix byte. */
16118 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16119 dest = gen_rtx_REG (SImode, REGNO (dest));
16120 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16122 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
16123 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
16125 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16126 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16132 /* X is an unchanging MEM. If it is a constant pool reference, return
16133 the constant pool rtx, else NULL. */
16136 maybe_get_pool_constant (rtx x)
16138 x = ix86_delegitimize_address (XEXP (x, 0));
16140 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16141 return get_pool_constant (x);
16147 ix86_expand_move (enum machine_mode mode, rtx operands[])
16150 enum tls_model model;
16155 if (GET_CODE (op1) == SYMBOL_REF)
16159 model = SYMBOL_REF_TLS_MODEL (op1);
16162 op1 = legitimize_tls_address (op1, model, true);
16163 op1 = force_operand (op1, op0);
16166 op1 = convert_to_mode (mode, op1, 1);
16168 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16171 else if (GET_CODE (op1) == CONST
16172 && GET_CODE (XEXP (op1, 0)) == PLUS
16173 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16175 rtx addend = XEXP (XEXP (op1, 0), 1);
16176 rtx symbol = XEXP (XEXP (op1, 0), 0);
16179 model = SYMBOL_REF_TLS_MODEL (symbol);
16181 tmp = legitimize_tls_address (symbol, model, true);
16183 tmp = legitimize_pe_coff_symbol (symbol, true);
16187 tmp = force_operand (tmp, NULL);
16188 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16189 op0, 1, OPTAB_DIRECT);
16192 op1 = convert_to_mode (mode, tmp, 1);
16196 if ((flag_pic || MACHOPIC_INDIRECT)
16197 && symbolic_operand (op1, mode))
16199 if (TARGET_MACHO && !TARGET_64BIT)
16202 /* dynamic-no-pic */
16203 if (MACHOPIC_INDIRECT)
16205 rtx temp = ((reload_in_progress
16206 || ((op0 && REG_P (op0))
16208 ? op0 : gen_reg_rtx (Pmode));
16209 op1 = machopic_indirect_data_reference (op1, temp);
16211 op1 = machopic_legitimize_pic_address (op1, mode,
16212 temp == op1 ? 0 : temp);
16214 if (op0 != op1 && GET_CODE (op0) != MEM)
16216 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16220 if (GET_CODE (op0) == MEM)
16221 op1 = force_reg (Pmode, op1);
16225 if (GET_CODE (temp) != REG)
16226 temp = gen_reg_rtx (Pmode);
16227 temp = legitimize_pic_address (op1, temp);
16232 /* dynamic-no-pic */
16238 op1 = force_reg (mode, op1);
16239 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16241 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16242 op1 = legitimize_pic_address (op1, reg);
16245 op1 = convert_to_mode (mode, op1, 1);
16252 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16253 || !push_operand (op0, mode))
16255 op1 = force_reg (mode, op1);
16257 if (push_operand (op0, mode)
16258 && ! general_no_elim_operand (op1, mode))
16259 op1 = copy_to_mode_reg (mode, op1);
16261 /* Force large constants in 64bit compilation into register
16262 to get them CSEed. */
16263 if (can_create_pseudo_p ()
16264 && (mode == DImode) && TARGET_64BIT
16265 && immediate_operand (op1, mode)
16266 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16267 && !register_operand (op0, mode)
16269 op1 = copy_to_mode_reg (mode, op1);
16271 if (can_create_pseudo_p ()
16272 && FLOAT_MODE_P (mode)
16273 && GET_CODE (op1) == CONST_DOUBLE)
16275 /* If we are loading a floating point constant to a register,
16276 force the value to memory now, since we'll get better code
16277 out the back end. */
16279 op1 = validize_mem (force_const_mem (mode, op1));
16280 if (!register_operand (op0, mode))
16282 rtx temp = gen_reg_rtx (mode);
16283 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16284 emit_move_insn (op0, temp);
16290 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16294 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16296 rtx op0 = operands[0], op1 = operands[1];
16297 unsigned int align = GET_MODE_ALIGNMENT (mode);
16299 /* Force constants other than zero into memory. We do not know how
16300 the instructions used to build constants modify the upper 64 bits
16301 of the register, once we have that information we may be able
16302 to handle some of them more efficiently. */
16303 if (can_create_pseudo_p ()
16304 && register_operand (op0, mode)
16305 && (CONSTANT_P (op1)
16306 || (GET_CODE (op1) == SUBREG
16307 && CONSTANT_P (SUBREG_REG (op1))))
16308 && !standard_sse_constant_p (op1))
16309 op1 = validize_mem (force_const_mem (mode, op1));
16311 /* We need to check memory alignment for SSE mode since attribute
16312 can make operands unaligned. */
16313 if (can_create_pseudo_p ()
16314 && SSE_REG_MODE_P (mode)
16315 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16316 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16320 /* ix86_expand_vector_move_misalign() does not like constants ... */
16321 if (CONSTANT_P (op1)
16322 || (GET_CODE (op1) == SUBREG
16323 && CONSTANT_P (SUBREG_REG (op1))))
16324 op1 = validize_mem (force_const_mem (mode, op1));
16326 /* ... nor both arguments in memory. */
16327 if (!register_operand (op0, mode)
16328 && !register_operand (op1, mode))
16329 op1 = force_reg (mode, op1);
16331 tmp[0] = op0; tmp[1] = op1;
16332 ix86_expand_vector_move_misalign (mode, tmp);
16336 /* Make operand1 a register if it isn't already. */
16337 if (can_create_pseudo_p ()
16338 && !register_operand (op0, mode)
16339 && !register_operand (op1, mode))
16341 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16345 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16348 /* Split 32-byte AVX unaligned load and store if needed. */
16351 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16354 rtx (*extract) (rtx, rtx, rtx);
16355 rtx (*load_unaligned) (rtx, rtx);
16356 rtx (*store_unaligned) (rtx, rtx);
16357 enum machine_mode mode;
16359 switch (GET_MODE (op0))
16362 gcc_unreachable ();
16364 extract = gen_avx_vextractf128v32qi;
16365 load_unaligned = gen_avx_loaddqu256;
16366 store_unaligned = gen_avx_storedqu256;
16370 extract = gen_avx_vextractf128v8sf;
16371 load_unaligned = gen_avx_loadups256;
16372 store_unaligned = gen_avx_storeups256;
16376 extract = gen_avx_vextractf128v4df;
16377 load_unaligned = gen_avx_loadupd256;
16378 store_unaligned = gen_avx_storeupd256;
16385 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16387 rtx r = gen_reg_rtx (mode);
16388 m = adjust_address (op1, mode, 0);
16389 emit_move_insn (r, m);
16390 m = adjust_address (op1, mode, 16);
16391 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16392 emit_move_insn (op0, r);
16395 emit_insn (load_unaligned (op0, op1));
16397 else if (MEM_P (op0))
16399 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16401 m = adjust_address (op0, mode, 0);
16402 emit_insn (extract (m, op1, const0_rtx));
16403 m = adjust_address (op0, mode, 16);
16404 emit_insn (extract (m, op1, const1_rtx));
16407 emit_insn (store_unaligned (op0, op1));
16410 gcc_unreachable ();
16413 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16414 straight to ix86_expand_vector_move. */
16415 /* Code generation for scalar reg-reg moves of single and double precision data:
16416 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16420 if (x86_sse_partial_reg_dependency == true)
16425 Code generation for scalar loads of double precision data:
16426 if (x86_sse_split_regs == true)
16427 movlpd mem, reg (gas syntax)
16431 Code generation for unaligned packed loads of single precision data
16432 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16433 if (x86_sse_unaligned_move_optimal)
16436 if (x86_sse_partial_reg_dependency == true)
16448 Code generation for unaligned packed loads of double precision data
16449 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16450 if (x86_sse_unaligned_move_optimal)
16453 if (x86_sse_split_regs == true)
16466 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
16474 && GET_MODE_SIZE (mode) == 32)
16476 switch (GET_MODE_CLASS (mode))
16478 case MODE_VECTOR_INT:
16480 op0 = gen_lowpart (V32QImode, op0);
16481 op1 = gen_lowpart (V32QImode, op1);
16484 case MODE_VECTOR_FLOAT:
16485 ix86_avx256_split_vector_move_misalign (op0, op1);
16489 gcc_unreachable ();
16497 /* ??? If we have typed data, then it would appear that using
16498 movdqu is the only way to get unaligned data loaded with
16500 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16502 op0 = gen_lowpart (V16QImode, op0);
16503 op1 = gen_lowpart (V16QImode, op1);
16504 /* We will eventually emit movups based on insn attributes. */
16505 emit_insn (gen_sse2_loaddqu (op0, op1));
16507 else if (TARGET_SSE2 && mode == V2DFmode)
16512 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16513 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16514 || optimize_insn_for_size_p ())
16516 /* We will eventually emit movups based on insn attributes. */
16517 emit_insn (gen_sse2_loadupd (op0, op1));
16521 /* When SSE registers are split into halves, we can avoid
16522 writing to the top half twice. */
16523 if (TARGET_SSE_SPLIT_REGS)
16525 emit_clobber (op0);
16530 /* ??? Not sure about the best option for the Intel chips.
16531 The following would seem to satisfy; the register is
16532 entirely cleared, breaking the dependency chain. We
16533 then store to the upper half, with a dependency depth
16534 of one. A rumor has it that Intel recommends two movsd
16535 followed by an unpacklpd, but this is unconfirmed. And
16536 given that the dependency depth of the unpacklpd would
16537 still be one, I'm not sure why this would be better. */
16538 zero = CONST0_RTX (V2DFmode);
16541 m = adjust_address (op1, DFmode, 0);
16542 emit_insn (gen_sse2_loadlpd (op0, zero, m));
16543 m = adjust_address (op1, DFmode, 8);
16544 emit_insn (gen_sse2_loadhpd (op0, op0, m));
16549 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16550 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16551 || optimize_insn_for_size_p ())
16553 op0 = gen_lowpart (V4SFmode, op0);
16554 op1 = gen_lowpart (V4SFmode, op1);
16555 emit_insn (gen_sse_loadups (op0, op1));
16559 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
16560 emit_move_insn (op0, CONST0_RTX (mode));
16562 emit_clobber (op0);
16564 if (mode != V4SFmode)
16565 op0 = gen_lowpart (V4SFmode, op0);
16567 m = adjust_address (op1, V2SFmode, 0);
16568 emit_insn (gen_sse_loadlps (op0, op0, m));
16569 m = adjust_address (op1, V2SFmode, 8);
16570 emit_insn (gen_sse_loadhps (op0, op0, m));
16573 else if (MEM_P (op0))
16575 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16577 op0 = gen_lowpart (V16QImode, op0);
16578 op1 = gen_lowpart (V16QImode, op1);
16579 /* We will eventually emit movups based on insn attributes. */
16580 emit_insn (gen_sse2_storedqu (op0, op1));
16582 else if (TARGET_SSE2 && mode == V2DFmode)
16585 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16586 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16587 || optimize_insn_for_size_p ())
16588 /* We will eventually emit movups based on insn attributes. */
16589 emit_insn (gen_sse2_storeupd (op0, op1));
16592 m = adjust_address (op0, DFmode, 0);
16593 emit_insn (gen_sse2_storelpd (m, op1));
16594 m = adjust_address (op0, DFmode, 8);
16595 emit_insn (gen_sse2_storehpd (m, op1));
16600 if (mode != V4SFmode)
16601 op1 = gen_lowpart (V4SFmode, op1);
16604 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16605 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16606 || optimize_insn_for_size_p ())
16608 op0 = gen_lowpart (V4SFmode, op0);
16609 emit_insn (gen_sse_storeups (op0, op1));
16613 m = adjust_address (op0, V2SFmode, 0);
16614 emit_insn (gen_sse_storelps (m, op1));
16615 m = adjust_address (op0, V2SFmode, 8);
16616 emit_insn (gen_sse_storehps (m, op1));
16621 gcc_unreachable ();
16624 /* Expand a push in MODE. This is some mode for which we do not support
16625 proper push instructions, at least from the registers that we expect
16626 the value to live in. */
16629 ix86_expand_push (enum machine_mode mode, rtx x)
16633 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
16634 GEN_INT (-GET_MODE_SIZE (mode)),
16635 stack_pointer_rtx, 1, OPTAB_DIRECT);
16636 if (tmp != stack_pointer_rtx)
16637 emit_move_insn (stack_pointer_rtx, tmp);
16639 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
16641 /* When we push an operand onto stack, it has to be aligned at least
16642 at the function argument boundary. However since we don't have
16643 the argument type, we can't determine the actual argument
16645 emit_move_insn (tmp, x);
16648 /* Helper function of ix86_fixup_binary_operands to canonicalize
16649 operand order. Returns true if the operands should be swapped. */
16652 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
16655 rtx dst = operands[0];
16656 rtx src1 = operands[1];
16657 rtx src2 = operands[2];
16659 /* If the operation is not commutative, we can't do anything. */
16660 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
16663 /* Highest priority is that src1 should match dst. */
16664 if (rtx_equal_p (dst, src1))
16666 if (rtx_equal_p (dst, src2))
16669 /* Next highest priority is that immediate constants come second. */
16670 if (immediate_operand (src2, mode))
16672 if (immediate_operand (src1, mode))
16675 /* Lowest priority is that memory references should come second. */
16685 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16686 destination to use for the operation. If different from the true
16687 destination in operands[0], a copy operation will be required. */
16690 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
16693 rtx dst = operands[0];
16694 rtx src1 = operands[1];
16695 rtx src2 = operands[2];
16697 /* Canonicalize operand order. */
16698 if (ix86_swap_binary_operands_p (code, mode, operands))
16702 /* It is invalid to swap operands of different modes. */
16703 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
16710 /* Both source operands cannot be in memory. */
16711 if (MEM_P (src1) && MEM_P (src2))
16713 /* Optimization: Only read from memory once. */
16714 if (rtx_equal_p (src1, src2))
16716 src2 = force_reg (mode, src2);
16720 src2 = force_reg (mode, src2);
16723 /* If the destination is memory, and we do not have matching source
16724 operands, do things in registers. */
16725 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16726 dst = gen_reg_rtx (mode);
16728 /* Source 1 cannot be a constant. */
16729 if (CONSTANT_P (src1))
16730 src1 = force_reg (mode, src1);
16732 /* Source 1 cannot be a non-matching memory. */
16733 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16734 src1 = force_reg (mode, src1);
16736 /* Improve address combine. */
16738 && GET_MODE_CLASS (mode) == MODE_INT
16740 src2 = force_reg (mode, src2);
16742 operands[1] = src1;
16743 operands[2] = src2;
16747 /* Similarly, but assume that the destination has already been
16748 set up properly. */
16751 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
16752 enum machine_mode mode, rtx operands[])
16754 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
16755 gcc_assert (dst == operands[0]);
16758 /* Attempt to expand a binary operator. Make the expansion closer to the
16759 actual machine, then just general_operand, which will allow 3 separate
16760 memory references (one output, two input) in a single insn. */
16763 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
16766 rtx src1, src2, dst, op, clob;
16768 dst = ix86_fixup_binary_operands (code, mode, operands);
16769 src1 = operands[1];
16770 src2 = operands[2];
16772 /* Emit the instruction. */
16774 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
16775 if (reload_in_progress)
16777 /* Reload doesn't know about the flags register, and doesn't know that
16778 it doesn't want to clobber it. We can only do this with PLUS. */
16779 gcc_assert (code == PLUS);
16782 else if (reload_completed
16784 && !rtx_equal_p (dst, src1))
16786 /* This is going to be an LEA; avoid splitting it later. */
16791 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16792 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16795 /* Fix up the destination if needed. */
16796 if (dst != operands[0])
16797 emit_move_insn (operands[0], dst);
16800 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16801 the given OPERANDS. */
16804 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
16807 rtx op1 = NULL_RTX, op2 = NULL_RTX;
16808 if (GET_CODE (operands[1]) == SUBREG)
16813 else if (GET_CODE (operands[2]) == SUBREG)
16818 /* Optimize (__m128i) d | (__m128i) e and similar code
16819 when d and e are float vectors into float vector logical
16820 insn. In C/C++ without using intrinsics there is no other way
16821 to express vector logical operation on float vectors than
16822 to cast them temporarily to integer vectors. */
16824 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16825 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
16826 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
16827 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
16828 && SUBREG_BYTE (op1) == 0
16829 && (GET_CODE (op2) == CONST_VECTOR
16830 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
16831 && SUBREG_BYTE (op2) == 0))
16832 && can_create_pseudo_p ())
16835 switch (GET_MODE (SUBREG_REG (op1)))
16841 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
16842 if (GET_CODE (op2) == CONST_VECTOR)
16844 op2 = gen_lowpart (GET_MODE (dst), op2);
16845 op2 = force_reg (GET_MODE (dst), op2);
16850 op2 = SUBREG_REG (operands[2]);
16851 if (!nonimmediate_operand (op2, GET_MODE (dst)))
16852 op2 = force_reg (GET_MODE (dst), op2);
16854 op1 = SUBREG_REG (op1);
16855 if (!nonimmediate_operand (op1, GET_MODE (dst)))
16856 op1 = force_reg (GET_MODE (dst), op1);
16857 emit_insn (gen_rtx_SET (VOIDmode, dst,
16858 gen_rtx_fmt_ee (code, GET_MODE (dst),
16860 emit_move_insn (operands[0], gen_lowpart (mode, dst));
16866 if (!nonimmediate_operand (operands[1], mode))
16867 operands[1] = force_reg (mode, operands[1]);
16868 if (!nonimmediate_operand (operands[2], mode))
16869 operands[2] = force_reg (mode, operands[2]);
16870 ix86_fixup_binary_operands_no_copy (code, mode, operands);
16871 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16872 gen_rtx_fmt_ee (code, mode, operands[1],
16876 /* Return TRUE or FALSE depending on whether the binary operator meets the
16877 appropriate constraints. */
16880 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
16883 rtx dst = operands[0];
16884 rtx src1 = operands[1];
16885 rtx src2 = operands[2];
16887 /* Both source operands cannot be in memory. */
16888 if (MEM_P (src1) && MEM_P (src2))
16891 /* Canonicalize operand order for commutative operators. */
16892 if (ix86_swap_binary_operands_p (code, mode, operands))
16899 /* If the destination is memory, we must have a matching source operand. */
16900 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
16903 /* Source 1 cannot be a constant. */
16904 if (CONSTANT_P (src1))
16907 /* Source 1 cannot be a non-matching memory. */
16908 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
16909 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16910 return (code == AND
16913 || (TARGET_64BIT && mode == DImode))
16914 && satisfies_constraint_L (src2));
16919 /* Attempt to expand a unary operator. Make the expansion closer to the
16920 actual machine, then just general_operand, which will allow 2 separate
16921 memory references (one output, one input) in a single insn. */
16924 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
16927 int matching_memory;
16928 rtx src, dst, op, clob;
16933 /* If the destination is memory, and we do not have matching source
16934 operands, do things in registers. */
16935 matching_memory = 0;
16938 if (rtx_equal_p (dst, src))
16939 matching_memory = 1;
16941 dst = gen_reg_rtx (mode);
16944 /* When source operand is memory, destination must match. */
16945 if (MEM_P (src) && !matching_memory)
16946 src = force_reg (mode, src);
16948 /* Emit the instruction. */
16950 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
16951 if (reload_in_progress || code == NOT)
16953 /* Reload doesn't know about the flags register, and doesn't know that
16954 it doesn't want to clobber it. */
16955 gcc_assert (code == NOT);
16960 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16961 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
16964 /* Fix up the destination if needed. */
16965 if (dst != operands[0])
16966 emit_move_insn (operands[0], dst);
16969 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16970 divisor are within the range [0-255]. */
16973 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
16976 rtx end_label, qimode_label;
16977 rtx insn, div, mod;
16978 rtx scratch, tmp0, tmp1, tmp2;
16979 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
16980 rtx (*gen_zero_extend) (rtx, rtx);
16981 rtx (*gen_test_ccno_1) (rtx, rtx);
16986 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
16987 gen_test_ccno_1 = gen_testsi_ccno_1;
16988 gen_zero_extend = gen_zero_extendqisi2;
16991 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
16992 gen_test_ccno_1 = gen_testdi_ccno_1;
16993 gen_zero_extend = gen_zero_extendqidi2;
16996 gcc_unreachable ();
16999 end_label = gen_label_rtx ();
17000 qimode_label = gen_label_rtx ();
17002 scratch = gen_reg_rtx (mode);
17004 /* Use 8bit unsigned divimod if dividend and divisor are within
17005 the range [0-255]. */
17006 emit_move_insn (scratch, operands[2]);
17007 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17008 scratch, 1, OPTAB_DIRECT);
17009 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17010 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17011 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17012 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17013 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17015 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17016 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17017 JUMP_LABEL (insn) = qimode_label;
17019 /* Generate original signed/unsigned divimod. */
17020 div = gen_divmod4_1 (operands[0], operands[1],
17021 operands[2], operands[3]);
17024 /* Branch to the end. */
17025 emit_jump_insn (gen_jump (end_label));
17028 /* Generate 8bit unsigned divide. */
17029 emit_label (qimode_label);
17030 /* Don't use operands[0] for result of 8bit divide since not all
17031 registers support QImode ZERO_EXTRACT. */
17032 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17033 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17034 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17035 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17039 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17040 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17044 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17045 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17048 /* Extract remainder from AH. */
17049 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17050 if (REG_P (operands[1]))
17051 insn = emit_move_insn (operands[1], tmp1);
17054 /* Need a new scratch register since the old one has result
17056 scratch = gen_reg_rtx (mode);
17057 emit_move_insn (scratch, tmp1);
17058 insn = emit_move_insn (operands[1], scratch);
17060 set_unique_reg_note (insn, REG_EQUAL, mod);
17062 /* Zero extend quotient from AL. */
17063 tmp1 = gen_lowpart (QImode, tmp0);
17064 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17065 set_unique_reg_note (insn, REG_EQUAL, div);
17067 emit_label (end_label);
17070 #define LEA_MAX_STALL (3)
17071 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17073 /* Increase given DISTANCE in half-cycles according to
17074 dependencies between PREV and NEXT instructions.
17075 Add 1 half-cycle if there is no dependency and
17076 go to next cycle if there is some dependecy. */
17078 static unsigned int
17079 increase_distance (rtx prev, rtx next, unsigned int distance)
17084 if (!prev || !next)
17085 return distance + (distance & 1) + 2;
17087 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17088 return distance + 1;
17090 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17091 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17092 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17093 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17094 return distance + (distance & 1) + 2;
17096 return distance + 1;
17099 /* Function checks if instruction INSN defines register number
17100 REGNO1 or REGNO2. */
17103 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17108 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17109 if (DF_REF_REG_DEF_P (*def_rec)
17110 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17111 && (regno1 == DF_REF_REGNO (*def_rec)
17112 || regno2 == DF_REF_REGNO (*def_rec)))
17120 /* Function checks if instruction INSN uses register number
17121 REGNO as a part of address expression. */
17124 insn_uses_reg_mem (unsigned int regno, rtx insn)
17128 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17129 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17135 /* Search backward for non-agu definition of register number REGNO1
17136 or register number REGNO2 in basic block starting from instruction
17137 START up to head of basic block or instruction INSN.
17139 Function puts true value into *FOUND var if definition was found
17140 and false otherwise.
17142 Distance in half-cycles between START and found instruction or head
17143 of BB is added to DISTANCE and returned. */
17146 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17147 rtx insn, int distance,
17148 rtx start, bool *found)
17150 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17158 && distance < LEA_SEARCH_THRESHOLD)
17160 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17162 distance = increase_distance (prev, next, distance);
17163 if (insn_defines_reg (regno1, regno2, prev))
17165 if (recog_memoized (prev) < 0
17166 || get_attr_type (prev) != TYPE_LEA)
17175 if (prev == BB_HEAD (bb))
17178 prev = PREV_INSN (prev);
17184 /* Search backward for non-agu definition of register number REGNO1
17185 or register number REGNO2 in INSN's basic block until
17186 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17187 2. Reach neighbour BBs boundary, or
17188 3. Reach agu definition.
17189 Returns the distance between the non-agu definition point and INSN.
17190 If no definition point, returns -1. */
17193 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17196 basic_block bb = BLOCK_FOR_INSN (insn);
17198 bool found = false;
17200 if (insn != BB_HEAD (bb))
17201 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17202 distance, PREV_INSN (insn),
17205 if (!found && distance < LEA_SEARCH_THRESHOLD)
17209 bool simple_loop = false;
17211 FOR_EACH_EDGE (e, ei, bb->preds)
17214 simple_loop = true;
17219 distance = distance_non_agu_define_in_bb (regno1, regno2,
17221 BB_END (bb), &found);
17224 int shortest_dist = -1;
17225 bool found_in_bb = false;
17227 FOR_EACH_EDGE (e, ei, bb->preds)
17230 = distance_non_agu_define_in_bb (regno1, regno2,
17236 if (shortest_dist < 0)
17237 shortest_dist = bb_dist;
17238 else if (bb_dist > 0)
17239 shortest_dist = MIN (bb_dist, shortest_dist);
17245 distance = shortest_dist;
17249 /* get_attr_type may modify recog data. We want to make sure
17250 that recog data is valid for instruction INSN, on which
17251 distance_non_agu_define is called. INSN is unchanged here. */
17252 extract_insn_cached (insn);
17257 return distance >> 1;
17260 /* Return the distance in half-cycles between INSN and the next
17261 insn that uses register number REGNO in memory address added
17262 to DISTANCE. Return -1 if REGNO0 is set.
17264 Put true value into *FOUND if register usage was found and
17266 Put true value into *REDEFINED if register redefinition was
17267 found and false otherwise. */
17270 distance_agu_use_in_bb (unsigned int regno,
17271 rtx insn, int distance, rtx start,
17272 bool *found, bool *redefined)
17274 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17279 *redefined = false;
17283 && distance < LEA_SEARCH_THRESHOLD)
17285 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17287 distance = increase_distance(prev, next, distance);
17288 if (insn_uses_reg_mem (regno, next))
17290 /* Return DISTANCE if OP0 is used in memory
17291 address in NEXT. */
17296 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17298 /* Return -1 if OP0 is set in NEXT. */
17306 if (next == BB_END (bb))
17309 next = NEXT_INSN (next);
17315 /* Return the distance between INSN and the next insn that uses
17316 register number REGNO0 in memory address. Return -1 if no such
17317 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17320 distance_agu_use (unsigned int regno0, rtx insn)
17322 basic_block bb = BLOCK_FOR_INSN (insn);
17324 bool found = false;
17325 bool redefined = false;
17327 if (insn != BB_END (bb))
17328 distance = distance_agu_use_in_bb (regno0, insn, distance,
17330 &found, &redefined);
17332 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
17336 bool simple_loop = false;
17338 FOR_EACH_EDGE (e, ei, bb->succs)
17341 simple_loop = true;
17346 distance = distance_agu_use_in_bb (regno0, insn,
17347 distance, BB_HEAD (bb),
17348 &found, &redefined);
17351 int shortest_dist = -1;
17352 bool found_in_bb = false;
17353 bool redefined_in_bb = false;
17355 FOR_EACH_EDGE (e, ei, bb->succs)
17358 = distance_agu_use_in_bb (regno0, insn,
17359 distance, BB_HEAD (e->dest),
17360 &found_in_bb, &redefined_in_bb);
17363 if (shortest_dist < 0)
17364 shortest_dist = bb_dist;
17365 else if (bb_dist > 0)
17366 shortest_dist = MIN (bb_dist, shortest_dist);
17372 distance = shortest_dist;
17376 if (!found || redefined)
17379 return distance >> 1;
17382 /* Define this macro to tune LEA priority vs ADD, it take effect when
17383 there is a dilemma of choicing LEA or ADD
17384 Negative value: ADD is more preferred than LEA
17386 Positive value: LEA is more preferred than ADD*/
17387 #define IX86_LEA_PRIORITY 0
17389 /* Return true if usage of lea INSN has performance advantage
17390 over a sequence of instructions. Instructions sequence has
17391 SPLIT_COST cycles higher latency than lea latency. */
17394 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
17395 unsigned int regno2, int split_cost)
17397 int dist_define, dist_use;
17399 dist_define = distance_non_agu_define (regno1, regno2, insn);
17400 dist_use = distance_agu_use (regno0, insn);
17402 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
17404 /* If there is no non AGU operand definition, no AGU
17405 operand usage and split cost is 0 then both lea
17406 and non lea variants have same priority. Currently
17407 we prefer lea for 64 bit code and non lea on 32 bit
17409 if (dist_use < 0 && split_cost == 0)
17410 return TARGET_64BIT || IX86_LEA_PRIORITY;
17415 /* With longer definitions distance lea is more preferable.
17416 Here we change it to take into account splitting cost and
17418 dist_define += split_cost + IX86_LEA_PRIORITY;
17420 /* If there is no use in memory addess then we just check
17421 that split cost exceeds AGU stall. */
17423 return dist_define > LEA_MAX_STALL;
17425 /* If this insn has both backward non-agu dependence and forward
17426 agu dependence, the one with short distance takes effect. */
17427 return dist_define >= dist_use;
17430 /* Return true if it is legal to clobber flags by INSN and
17431 false otherwise. */
17434 ix86_ok_to_clobber_flags (rtx insn)
17436 basic_block bb = BLOCK_FOR_INSN (insn);
17442 if (NONDEBUG_INSN_P (insn))
17444 for (use = DF_INSN_USES (insn); *use; use++)
17445 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
17448 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
17452 if (insn == BB_END (bb))
17455 insn = NEXT_INSN (insn);
17458 live = df_get_live_out(bb);
17459 return !REGNO_REG_SET_P (live, FLAGS_REG);
17462 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17463 move and add to avoid AGU stalls. */
17466 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
17468 unsigned int regno0, regno1, regno2;
17470 /* Check if we need to optimize. */
17471 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17474 /* Check it is correct to split here. */
17475 if (!ix86_ok_to_clobber_flags(insn))
17478 regno0 = true_regnum (operands[0]);
17479 regno1 = true_regnum (operands[1]);
17480 regno2 = true_regnum (operands[2]);
17482 /* We need to split only adds with non destructive
17483 destination operand. */
17484 if (regno0 == regno1 || regno0 == regno2)
17487 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
17490 /* Return true if we should emit lea instruction instead of mov
17494 ix86_use_lea_for_mov (rtx insn, rtx operands[])
17496 unsigned int regno0, regno1;
17498 /* Check if we need to optimize. */
17499 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17502 /* Use lea for reg to reg moves only. */
17503 if (!REG_P (operands[0]) || !REG_P (operands[1]))
17506 regno0 = true_regnum (operands[0]);
17507 regno1 = true_regnum (operands[1]);
17509 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
17512 /* Return true if we need to split lea into a sequence of
17513 instructions to avoid AGU stalls. */
17516 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
17518 unsigned int regno0, regno1, regno2;
17520 struct ix86_address parts;
17523 /* Check we need to optimize. */
17524 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17527 /* Check it is correct to split here. */
17528 if (!ix86_ok_to_clobber_flags(insn))
17531 ok = ix86_decompose_address (operands[1], &parts);
17534 /* There should be at least two components in the address. */
17535 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
17536 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
17539 /* We should not split into add if non legitimate pic
17540 operand is used as displacement. */
17541 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
17544 regno0 = true_regnum (operands[0]) ;
17545 regno1 = INVALID_REGNUM;
17546 regno2 = INVALID_REGNUM;
17549 regno1 = true_regnum (parts.base);
17551 regno2 = true_regnum (parts.index);
17555 /* Compute how many cycles we will add to execution time
17556 if split lea into a sequence of instructions. */
17557 if (parts.base || parts.index)
17559 /* Have to use mov instruction if non desctructive
17560 destination form is used. */
17561 if (regno1 != regno0 && regno2 != regno0)
17564 /* Have to add index to base if both exist. */
17565 if (parts.base && parts.index)
17568 /* Have to use shift and adds if scale is 2 or greater. */
17569 if (parts.scale > 1)
17571 if (regno0 != regno1)
17573 else if (regno2 == regno0)
17576 split_cost += parts.scale;
17579 /* Have to use add instruction with immediate if
17580 disp is non zero. */
17581 if (parts.disp && parts.disp != const0_rtx)
17584 /* Subtract the price of lea. */
17588 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
17591 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17592 matches destination. RTX includes clobber of FLAGS_REG. */
17595 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
17600 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
17601 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17603 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17606 /* Return true if regno1 def is nearest to the insn. */
17609 find_nearest_reg_def (rtx insn, int regno1, int regno2)
17612 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
17616 while (prev && prev != start)
17618 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
17620 prev = PREV_INSN (prev);
17623 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
17625 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
17627 prev = PREV_INSN (prev);
17630 /* None of the regs is defined in the bb. */
17634 /* Split lea instructions into a sequence of instructions
17635 which are executed on ALU to avoid AGU stalls.
17636 It is assumed that it is allowed to clobber flags register
17637 at lea position. */
17640 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
17642 unsigned int regno0, regno1, regno2;
17643 struct ix86_address parts;
17647 ok = ix86_decompose_address (operands[1], &parts);
17650 target = gen_lowpart (mode, operands[0]);
17652 regno0 = true_regnum (target);
17653 regno1 = INVALID_REGNUM;
17654 regno2 = INVALID_REGNUM;
17658 parts.base = gen_lowpart (mode, parts.base);
17659 regno1 = true_regnum (parts.base);
17664 parts.index = gen_lowpart (mode, parts.index);
17665 regno2 = true_regnum (parts.index);
17669 parts.disp = gen_lowpart (mode, parts.disp);
17671 if (parts.scale > 1)
17673 /* Case r1 = r1 + ... */
17674 if (regno1 == regno0)
17676 /* If we have a case r1 = r1 + C * r1 then we
17677 should use multiplication which is very
17678 expensive. Assume cost model is wrong if we
17679 have such case here. */
17680 gcc_assert (regno2 != regno0);
17682 for (adds = parts.scale; adds > 0; adds--)
17683 ix86_emit_binop (PLUS, mode, target, parts.index);
17687 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17688 if (regno0 != regno2)
17689 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
17691 /* Use shift for scaling. */
17692 ix86_emit_binop (ASHIFT, mode, target,
17693 GEN_INT (exact_log2 (parts.scale)));
17696 ix86_emit_binop (PLUS, mode, target, parts.base);
17698 if (parts.disp && parts.disp != const0_rtx)
17699 ix86_emit_binop (PLUS, mode, target, parts.disp);
17702 else if (!parts.base && !parts.index)
17704 gcc_assert(parts.disp);
17705 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
17711 if (regno0 != regno2)
17712 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
17714 else if (!parts.index)
17716 if (regno0 != regno1)
17717 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
17721 if (regno0 == regno1)
17723 else if (regno0 == regno2)
17729 /* Find better operand for SET instruction, depending
17730 on which definition is farther from the insn. */
17731 if (find_nearest_reg_def (insn, regno1, regno2))
17732 tmp = parts.index, tmp1 = parts.base;
17734 tmp = parts.base, tmp1 = parts.index;
17736 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17738 if (parts.disp && parts.disp != const0_rtx)
17739 ix86_emit_binop (PLUS, mode, target, parts.disp);
17741 ix86_emit_binop (PLUS, mode, target, tmp1);
17745 ix86_emit_binop (PLUS, mode, target, tmp);
17748 if (parts.disp && parts.disp != const0_rtx)
17749 ix86_emit_binop (PLUS, mode, target, parts.disp);
17753 /* Return true if it is ok to optimize an ADD operation to LEA
17754 operation to avoid flag register consumation. For most processors,
17755 ADD is faster than LEA. For the processors like ATOM, if the
17756 destination register of LEA holds an actual address which will be
17757 used soon, LEA is better and otherwise ADD is better. */
17760 ix86_lea_for_add_ok (rtx insn, rtx operands[])
17762 unsigned int regno0 = true_regnum (operands[0]);
17763 unsigned int regno1 = true_regnum (operands[1]);
17764 unsigned int regno2 = true_regnum (operands[2]);
17766 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17767 if (regno0 != regno1 && regno0 != regno2)
17770 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
17773 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
17776 /* Return true if destination reg of SET_BODY is shift count of
17780 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
17786 /* Retrieve destination of SET_BODY. */
17787 switch (GET_CODE (set_body))
17790 set_dest = SET_DEST (set_body);
17791 if (!set_dest || !REG_P (set_dest))
17795 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
17796 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
17804 /* Retrieve shift count of USE_BODY. */
17805 switch (GET_CODE (use_body))
17808 shift_rtx = XEXP (use_body, 1);
17811 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
17812 if (ix86_dep_by_shift_count_body (set_body,
17813 XVECEXP (use_body, 0, i)))
17821 && (GET_CODE (shift_rtx) == ASHIFT
17822 || GET_CODE (shift_rtx) == LSHIFTRT
17823 || GET_CODE (shift_rtx) == ASHIFTRT
17824 || GET_CODE (shift_rtx) == ROTATE
17825 || GET_CODE (shift_rtx) == ROTATERT))
17827 rtx shift_count = XEXP (shift_rtx, 1);
17829 /* Return true if shift count is dest of SET_BODY. */
17830 if (REG_P (shift_count))
17832 /* Add check since it can be invoked before register
17833 allocation in pre-reload schedule. */
17834 if (reload_completed
17835 && true_regnum (set_dest) == true_regnum (shift_count))
17837 else if (REGNO(set_dest) == REGNO(shift_count))
17845 /* Return true if destination reg of SET_INSN is shift count of
17849 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
17851 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
17852 PATTERN (use_insn));
17855 /* Return TRUE or FALSE depending on whether the unary operator meets the
17856 appropriate constraints. */
17859 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
17860 enum machine_mode mode ATTRIBUTE_UNUSED,
17861 rtx operands[2] ATTRIBUTE_UNUSED)
17863 /* If one of operands is memory, source and destination must match. */
17864 if ((MEM_P (operands[0])
17865 || MEM_P (operands[1]))
17866 && ! rtx_equal_p (operands[0], operands[1]))
17871 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17872 are ok, keeping in mind the possible movddup alternative. */
17875 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
17877 if (MEM_P (operands[0]))
17878 return rtx_equal_p (operands[0], operands[1 + high]);
17879 if (MEM_P (operands[1]) && MEM_P (operands[2]))
17880 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
17884 /* Post-reload splitter for converting an SF or DFmode value in an
17885 SSE register into an unsigned SImode. */
17888 ix86_split_convert_uns_si_sse (rtx operands[])
17890 enum machine_mode vecmode;
17891 rtx value, large, zero_or_two31, input, two31, x;
17893 large = operands[1];
17894 zero_or_two31 = operands[2];
17895 input = operands[3];
17896 two31 = operands[4];
17897 vecmode = GET_MODE (large);
17898 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
17900 /* Load up the value into the low element. We must ensure that the other
17901 elements are valid floats -- zero is the easiest such value. */
17904 if (vecmode == V4SFmode)
17905 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
17907 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
17911 input = gen_rtx_REG (vecmode, REGNO (input));
17912 emit_move_insn (value, CONST0_RTX (vecmode));
17913 if (vecmode == V4SFmode)
17914 emit_insn (gen_sse_movss (value, value, input));
17916 emit_insn (gen_sse2_movsd (value, value, input));
17919 emit_move_insn (large, two31);
17920 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
17922 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
17923 emit_insn (gen_rtx_SET (VOIDmode, large, x));
17925 x = gen_rtx_AND (vecmode, zero_or_two31, large);
17926 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
17928 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
17929 emit_insn (gen_rtx_SET (VOIDmode, value, x));
17931 large = gen_rtx_REG (V4SImode, REGNO (large));
17932 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
17934 x = gen_rtx_REG (V4SImode, REGNO (value));
17935 if (vecmode == V4SFmode)
17936 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
17938 emit_insn (gen_sse2_cvttpd2dq (x, value));
17941 emit_insn (gen_xorv4si3 (value, value, large));
17944 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17945 Expects the 64-bit DImode to be supplied in a pair of integral
17946 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17947 -mfpmath=sse, !optimize_size only. */
17950 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
17952 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
17953 rtx int_xmm, fp_xmm;
17954 rtx biases, exponents;
17957 int_xmm = gen_reg_rtx (V4SImode);
17958 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
17959 emit_insn (gen_movdi_to_sse (int_xmm, input));
17960 else if (TARGET_SSE_SPLIT_REGS)
17962 emit_clobber (int_xmm);
17963 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
17967 x = gen_reg_rtx (V2DImode);
17968 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
17969 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
17972 x = gen_rtx_CONST_VECTOR (V4SImode,
17973 gen_rtvec (4, GEN_INT (0x43300000UL),
17974 GEN_INT (0x45300000UL),
17975 const0_rtx, const0_rtx));
17976 exponents = validize_mem (force_const_mem (V4SImode, x));
17978 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17979 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
17981 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17982 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17983 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17984 (0x1.0p84 + double(fp_value_hi_xmm)).
17985 Note these exponents differ by 32. */
17987 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
17989 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17990 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17991 real_ldexp (&bias_lo_rvt, &dconst1, 52);
17992 real_ldexp (&bias_hi_rvt, &dconst1, 84);
17993 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
17994 x = const_double_from_real_value (bias_hi_rvt, DFmode);
17995 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
17996 biases = validize_mem (force_const_mem (V2DFmode, biases));
17997 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
17999 /* Add the upper and lower DFmode values together. */
18001 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18004 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18005 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18006 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18009 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18012 /* Not used, but eases macroization of patterns. */
18014 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18015 rtx input ATTRIBUTE_UNUSED)
18017 gcc_unreachable ();
18020 /* Convert an unsigned SImode value into a DFmode. Only currently used
18021 for SSE, but applicable anywhere. */
18024 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18026 REAL_VALUE_TYPE TWO31r;
18029 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18030 NULL, 1, OPTAB_DIRECT);
18032 fp = gen_reg_rtx (DFmode);
18033 emit_insn (gen_floatsidf2 (fp, x));
18035 real_ldexp (&TWO31r, &dconst1, 31);
18036 x = const_double_from_real_value (TWO31r, DFmode);
18038 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18040 emit_move_insn (target, x);
18043 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18044 32-bit mode; otherwise we have a direct convert instruction. */
18047 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18049 REAL_VALUE_TYPE TWO32r;
18050 rtx fp_lo, fp_hi, x;
18052 fp_lo = gen_reg_rtx (DFmode);
18053 fp_hi = gen_reg_rtx (DFmode);
18055 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18057 real_ldexp (&TWO32r, &dconst1, 32);
18058 x = const_double_from_real_value (TWO32r, DFmode);
18059 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18061 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18063 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18066 emit_move_insn (target, x);
18069 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18070 For x86_32, -mfpmath=sse, !optimize_size only. */
18072 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18074 REAL_VALUE_TYPE ONE16r;
18075 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18077 real_ldexp (&ONE16r, &dconst1, 16);
18078 x = const_double_from_real_value (ONE16r, SFmode);
18079 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18080 NULL, 0, OPTAB_DIRECT);
18081 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18082 NULL, 0, OPTAB_DIRECT);
18083 fp_hi = gen_reg_rtx (SFmode);
18084 fp_lo = gen_reg_rtx (SFmode);
18085 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18086 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18087 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18089 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18091 if (!rtx_equal_p (target, fp_hi))
18092 emit_move_insn (target, fp_hi);
18095 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18096 a vector of unsigned ints VAL to vector of floats TARGET. */
18099 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18102 REAL_VALUE_TYPE TWO16r;
18103 enum machine_mode intmode = GET_MODE (val);
18104 enum machine_mode fltmode = GET_MODE (target);
18105 rtx (*cvt) (rtx, rtx);
18107 if (intmode == V4SImode)
18108 cvt = gen_floatv4siv4sf2;
18110 cvt = gen_floatv8siv8sf2;
18111 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18112 tmp[0] = force_reg (intmode, tmp[0]);
18113 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18115 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18116 NULL_RTX, 1, OPTAB_DIRECT);
18117 tmp[3] = gen_reg_rtx (fltmode);
18118 emit_insn (cvt (tmp[3], tmp[1]));
18119 tmp[4] = gen_reg_rtx (fltmode);
18120 emit_insn (cvt (tmp[4], tmp[2]));
18121 real_ldexp (&TWO16r, &dconst1, 16);
18122 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18123 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18124 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18126 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18128 if (tmp[7] != target)
18129 emit_move_insn (target, tmp[7]);
18132 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18133 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18134 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18135 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18138 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18140 REAL_VALUE_TYPE TWO31r;
18141 rtx two31r, tmp[4];
18142 enum machine_mode mode = GET_MODE (val);
18143 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18144 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18145 rtx (*cmp) (rtx, rtx, rtx, rtx);
18148 for (i = 0; i < 3; i++)
18149 tmp[i] = gen_reg_rtx (mode);
18150 real_ldexp (&TWO31r, &dconst1, 31);
18151 two31r = const_double_from_real_value (TWO31r, scalarmode);
18152 two31r = ix86_build_const_vector (mode, 1, two31r);
18153 two31r = force_reg (mode, two31r);
18156 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18157 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18158 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18159 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18160 default: gcc_unreachable ();
18162 tmp[3] = gen_rtx_LE (mode, two31r, val);
18163 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18164 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18166 if (intmode == V4SImode || TARGET_AVX2)
18167 *xorp = expand_simple_binop (intmode, ASHIFT,
18168 gen_lowpart (intmode, tmp[0]),
18169 GEN_INT (31), NULL_RTX, 0,
18173 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18174 two31 = ix86_build_const_vector (intmode, 1, two31);
18175 *xorp = expand_simple_binop (intmode, AND,
18176 gen_lowpart (intmode, tmp[0]),
18177 two31, NULL_RTX, 0,
18180 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18184 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18185 then replicate the value for all elements of the vector
18189 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18193 enum machine_mode scalar_mode;
18210 n_elt = GET_MODE_NUNITS (mode);
18211 v = rtvec_alloc (n_elt);
18212 scalar_mode = GET_MODE_INNER (mode);
18214 RTVEC_ELT (v, 0) = value;
18216 for (i = 1; i < n_elt; ++i)
18217 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18219 return gen_rtx_CONST_VECTOR (mode, v);
18222 gcc_unreachable ();
18226 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18227 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18228 for an SSE register. If VECT is true, then replicate the mask for
18229 all elements of the vector register. If INVERT is true, then create
18230 a mask excluding the sign bit. */
18233 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18235 enum machine_mode vec_mode, imode;
18236 HOST_WIDE_INT hi, lo;
18241 /* Find the sign bit, sign extended to 2*HWI. */
18249 mode = GET_MODE_INNER (mode);
18251 lo = 0x80000000, hi = lo < 0;
18259 mode = GET_MODE_INNER (mode);
18261 if (HOST_BITS_PER_WIDE_INT >= 64)
18262 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18264 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18269 vec_mode = VOIDmode;
18270 if (HOST_BITS_PER_WIDE_INT >= 64)
18273 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18280 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18284 lo = ~lo, hi = ~hi;
18290 mask = immed_double_const (lo, hi, imode);
18292 vec = gen_rtvec (2, v, mask);
18293 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
18294 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
18301 gcc_unreachable ();
18305 lo = ~lo, hi = ~hi;
18307 /* Force this value into the low part of a fp vector constant. */
18308 mask = immed_double_const (lo, hi, imode);
18309 mask = gen_lowpart (mode, mask);
18311 if (vec_mode == VOIDmode)
18312 return force_reg (mode, mask);
18314 v = ix86_build_const_vector (vec_mode, vect, mask);
18315 return force_reg (vec_mode, v);
18318 /* Generate code for floating point ABS or NEG. */
18321 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
18324 rtx mask, set, dst, src;
18325 bool use_sse = false;
18326 bool vector_mode = VECTOR_MODE_P (mode);
18327 enum machine_mode vmode = mode;
18331 else if (mode == TFmode)
18333 else if (TARGET_SSE_MATH)
18335 use_sse = SSE_FLOAT_MODE_P (mode);
18336 if (mode == SFmode)
18338 else if (mode == DFmode)
18342 /* NEG and ABS performed with SSE use bitwise mask operations.
18343 Create the appropriate mask now. */
18345 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
18352 set = gen_rtx_fmt_e (code, mode, src);
18353 set = gen_rtx_SET (VOIDmode, dst, set);
18360 use = gen_rtx_USE (VOIDmode, mask);
18362 par = gen_rtvec (2, set, use);
18365 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18366 par = gen_rtvec (3, set, use, clob);
18368 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
18374 /* Expand a copysign operation. Special case operand 0 being a constant. */
18377 ix86_expand_copysign (rtx operands[])
18379 enum machine_mode mode, vmode;
18380 rtx dest, op0, op1, mask, nmask;
18382 dest = operands[0];
18386 mode = GET_MODE (dest);
18388 if (mode == SFmode)
18390 else if (mode == DFmode)
18395 if (GET_CODE (op0) == CONST_DOUBLE)
18397 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
18399 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
18400 op0 = simplify_unary_operation (ABS, mode, op0, mode);
18402 if (mode == SFmode || mode == DFmode)
18404 if (op0 == CONST0_RTX (mode))
18405 op0 = CONST0_RTX (vmode);
18408 rtx v = ix86_build_const_vector (vmode, false, op0);
18410 op0 = force_reg (vmode, v);
18413 else if (op0 != CONST0_RTX (mode))
18414 op0 = force_reg (mode, op0);
18416 mask = ix86_build_signbit_mask (vmode, 0, 0);
18418 if (mode == SFmode)
18419 copysign_insn = gen_copysignsf3_const;
18420 else if (mode == DFmode)
18421 copysign_insn = gen_copysigndf3_const;
18423 copysign_insn = gen_copysigntf3_const;
18425 emit_insn (copysign_insn (dest, op0, op1, mask));
18429 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
18431 nmask = ix86_build_signbit_mask (vmode, 0, 1);
18432 mask = ix86_build_signbit_mask (vmode, 0, 0);
18434 if (mode == SFmode)
18435 copysign_insn = gen_copysignsf3_var;
18436 else if (mode == DFmode)
18437 copysign_insn = gen_copysigndf3_var;
18439 copysign_insn = gen_copysigntf3_var;
18441 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
18445 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18446 be a constant, and so has already been expanded into a vector constant. */
18449 ix86_split_copysign_const (rtx operands[])
18451 enum machine_mode mode, vmode;
18452 rtx dest, op0, mask, x;
18454 dest = operands[0];
18456 mask = operands[3];
18458 mode = GET_MODE (dest);
18459 vmode = GET_MODE (mask);
18461 dest = simplify_gen_subreg (vmode, dest, mode, 0);
18462 x = gen_rtx_AND (vmode, dest, mask);
18463 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18465 if (op0 != CONST0_RTX (vmode))
18467 x = gen_rtx_IOR (vmode, dest, op0);
18468 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18472 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18473 so we have to do two masks. */
18476 ix86_split_copysign_var (rtx operands[])
18478 enum machine_mode mode, vmode;
18479 rtx dest, scratch, op0, op1, mask, nmask, x;
18481 dest = operands[0];
18482 scratch = operands[1];
18485 nmask = operands[4];
18486 mask = operands[5];
18488 mode = GET_MODE (dest);
18489 vmode = GET_MODE (mask);
18491 if (rtx_equal_p (op0, op1))
18493 /* Shouldn't happen often (it's useless, obviously), but when it does
18494 we'd generate incorrect code if we continue below. */
18495 emit_move_insn (dest, op0);
18499 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
18501 gcc_assert (REGNO (op1) == REGNO (scratch));
18503 x = gen_rtx_AND (vmode, scratch, mask);
18504 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
18507 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
18508 x = gen_rtx_NOT (vmode, dest);
18509 x = gen_rtx_AND (vmode, x, op0);
18510 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18514 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
18516 x = gen_rtx_AND (vmode, scratch, mask);
18518 else /* alternative 2,4 */
18520 gcc_assert (REGNO (mask) == REGNO (scratch));
18521 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
18522 x = gen_rtx_AND (vmode, scratch, op1);
18524 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
18526 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
18528 dest = simplify_gen_subreg (vmode, op0, mode, 0);
18529 x = gen_rtx_AND (vmode, dest, nmask);
18531 else /* alternative 3,4 */
18533 gcc_assert (REGNO (nmask) == REGNO (dest));
18535 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
18536 x = gen_rtx_AND (vmode, dest, op0);
18538 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18541 x = gen_rtx_IOR (vmode, dest, scratch);
18542 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
18545 /* Return TRUE or FALSE depending on whether the first SET in INSN
18546 has source and destination with matching CC modes, and that the
18547 CC mode is at least as constrained as REQ_MODE. */
18550 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
18553 enum machine_mode set_mode;
18555 set = PATTERN (insn);
18556 if (GET_CODE (set) == PARALLEL)
18557 set = XVECEXP (set, 0, 0);
18558 gcc_assert (GET_CODE (set) == SET);
18559 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
18561 set_mode = GET_MODE (SET_DEST (set));
18565 if (req_mode != CCNOmode
18566 && (req_mode != CCmode
18567 || XEXP (SET_SRC (set), 1) != const0_rtx))
18571 if (req_mode == CCGCmode)
18575 if (req_mode == CCGOCmode || req_mode == CCNOmode)
18579 if (req_mode == CCZmode)
18589 if (set_mode != req_mode)
18594 gcc_unreachable ();
18597 return GET_MODE (SET_SRC (set)) == set_mode;
18600 /* Generate insn patterns to do an integer compare of OPERANDS. */
18603 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
18605 enum machine_mode cmpmode;
18608 cmpmode = SELECT_CC_MODE (code, op0, op1);
18609 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
18611 /* This is very simple, but making the interface the same as in the
18612 FP case makes the rest of the code easier. */
18613 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
18614 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
18616 /* Return the test that should be put into the flags user, i.e.
18617 the bcc, scc, or cmov instruction. */
18618 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
18621 /* Figure out whether to use ordered or unordered fp comparisons.
18622 Return the appropriate mode to use. */
18625 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
18627 /* ??? In order to make all comparisons reversible, we do all comparisons
18628 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18629 all forms trapping and nontrapping comparisons, we can make inequality
18630 comparisons trapping again, since it results in better code when using
18631 FCOM based compares. */
18632 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
18636 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
18638 enum machine_mode mode = GET_MODE (op0);
18640 if (SCALAR_FLOAT_MODE_P (mode))
18642 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
18643 return ix86_fp_compare_mode (code);
18648 /* Only zero flag is needed. */
18649 case EQ: /* ZF=0 */
18650 case NE: /* ZF!=0 */
18652 /* Codes needing carry flag. */
18653 case GEU: /* CF=0 */
18654 case LTU: /* CF=1 */
18655 /* Detect overflow checks. They need just the carry flag. */
18656 if (GET_CODE (op0) == PLUS
18657 && rtx_equal_p (op1, XEXP (op0, 0)))
18661 case GTU: /* CF=0 & ZF=0 */
18662 case LEU: /* CF=1 | ZF=1 */
18663 /* Detect overflow checks. They need just the carry flag. */
18664 if (GET_CODE (op0) == MINUS
18665 && rtx_equal_p (op1, XEXP (op0, 0)))
18669 /* Codes possibly doable only with sign flag when
18670 comparing against zero. */
18671 case GE: /* SF=OF or SF=0 */
18672 case LT: /* SF<>OF or SF=1 */
18673 if (op1 == const0_rtx)
18676 /* For other cases Carry flag is not required. */
18678 /* Codes doable only with sign flag when comparing
18679 against zero, but we miss jump instruction for it
18680 so we need to use relational tests against overflow
18681 that thus needs to be zero. */
18682 case GT: /* ZF=0 & SF=OF */
18683 case LE: /* ZF=1 | SF<>OF */
18684 if (op1 == const0_rtx)
18688 /* strcmp pattern do (use flags) and combine may ask us for proper
18693 gcc_unreachable ();
18697 /* Return the fixed registers used for condition codes. */
18700 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
18707 /* If two condition code modes are compatible, return a condition code
18708 mode which is compatible with both. Otherwise, return
18711 static enum machine_mode
18712 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
18717 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
18720 if ((m1 == CCGCmode && m2 == CCGOCmode)
18721 || (m1 == CCGOCmode && m2 == CCGCmode))
18724 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
18726 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
18732 gcc_unreachable ();
18762 /* These are only compatible with themselves, which we already
18769 /* Return a comparison we can do and that it is equivalent to
18770 swap_condition (code) apart possibly from orderedness.
18771 But, never change orderedness if TARGET_IEEE_FP, returning
18772 UNKNOWN in that case if necessary. */
18774 static enum rtx_code
18775 ix86_fp_swap_condition (enum rtx_code code)
18779 case GT: /* GTU - CF=0 & ZF=0 */
18780 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
18781 case GE: /* GEU - CF=0 */
18782 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
18783 case UNLT: /* LTU - CF=1 */
18784 return TARGET_IEEE_FP ? UNKNOWN : GT;
18785 case UNLE: /* LEU - CF=1 | ZF=1 */
18786 return TARGET_IEEE_FP ? UNKNOWN : GE;
18788 return swap_condition (code);
18792 /* Return cost of comparison CODE using the best strategy for performance.
18793 All following functions do use number of instructions as a cost metrics.
18794 In future this should be tweaked to compute bytes for optimize_size and
18795 take into account performance of various instructions on various CPUs. */
18798 ix86_fp_comparison_cost (enum rtx_code code)
18802 /* The cost of code using bit-twiddling on %ah. */
18819 arith_cost = TARGET_IEEE_FP ? 5 : 4;
18823 arith_cost = TARGET_IEEE_FP ? 6 : 4;
18826 gcc_unreachable ();
18829 switch (ix86_fp_comparison_strategy (code))
18831 case IX86_FPCMP_COMI:
18832 return arith_cost > 4 ? 3 : 2;
18833 case IX86_FPCMP_SAHF:
18834 return arith_cost > 4 ? 4 : 3;
18840 /* Return strategy to use for floating-point. We assume that fcomi is always
18841 preferrable where available, since that is also true when looking at size
18842 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18844 enum ix86_fpcmp_strategy
18845 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
18847 /* Do fcomi/sahf based test when profitable. */
18850 return IX86_FPCMP_COMI;
18852 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
18853 return IX86_FPCMP_SAHF;
18855 return IX86_FPCMP_ARITH;
18858 /* Swap, force into registers, or otherwise massage the two operands
18859 to a fp comparison. The operands are updated in place; the new
18860 comparison code is returned. */
18862 static enum rtx_code
18863 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
18865 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
18866 rtx op0 = *pop0, op1 = *pop1;
18867 enum machine_mode op_mode = GET_MODE (op0);
18868 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
18870 /* All of the unordered compare instructions only work on registers.
18871 The same is true of the fcomi compare instructions. The XFmode
18872 compare instructions require registers except when comparing
18873 against zero or when converting operand 1 from fixed point to
18877 && (fpcmp_mode == CCFPUmode
18878 || (op_mode == XFmode
18879 && ! (standard_80387_constant_p (op0) == 1
18880 || standard_80387_constant_p (op1) == 1)
18881 && GET_CODE (op1) != FLOAT)
18882 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
18884 op0 = force_reg (op_mode, op0);
18885 op1 = force_reg (op_mode, op1);
18889 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18890 things around if they appear profitable, otherwise force op0
18891 into a register. */
18893 if (standard_80387_constant_p (op0) == 0
18895 && ! (standard_80387_constant_p (op1) == 0
18898 enum rtx_code new_code = ix86_fp_swap_condition (code);
18899 if (new_code != UNKNOWN)
18902 tmp = op0, op0 = op1, op1 = tmp;
18908 op0 = force_reg (op_mode, op0);
18910 if (CONSTANT_P (op1))
18912 int tmp = standard_80387_constant_p (op1);
18914 op1 = validize_mem (force_const_mem (op_mode, op1));
18918 op1 = force_reg (op_mode, op1);
18921 op1 = force_reg (op_mode, op1);
18925 /* Try to rearrange the comparison to make it cheaper. */
18926 if (ix86_fp_comparison_cost (code)
18927 > ix86_fp_comparison_cost (swap_condition (code))
18928 && (REG_P (op1) || can_create_pseudo_p ()))
18931 tmp = op0, op0 = op1, op1 = tmp;
18932 code = swap_condition (code);
18934 op0 = force_reg (op_mode, op0);
18942 /* Convert comparison codes we use to represent FP comparison to integer
18943 code that will result in proper branch. Return UNKNOWN if no such code
18947 ix86_fp_compare_code_to_integer (enum rtx_code code)
18976 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18979 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
18981 enum machine_mode fpcmp_mode, intcmp_mode;
18984 fpcmp_mode = ix86_fp_compare_mode (code);
18985 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
18987 /* Do fcomi/sahf based test when profitable. */
18988 switch (ix86_fp_comparison_strategy (code))
18990 case IX86_FPCMP_COMI:
18991 intcmp_mode = fpcmp_mode;
18992 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
18993 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
18998 case IX86_FPCMP_SAHF:
18999 intcmp_mode = fpcmp_mode;
19000 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19001 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19005 scratch = gen_reg_rtx (HImode);
19006 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19007 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19010 case IX86_FPCMP_ARITH:
19011 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19012 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19013 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19015 scratch = gen_reg_rtx (HImode);
19016 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19018 /* In the unordered case, we have to check C2 for NaN's, which
19019 doesn't happen to work out to anything nice combination-wise.
19020 So do some bit twiddling on the value we've got in AH to come
19021 up with an appropriate set of condition codes. */
19023 intcmp_mode = CCNOmode;
19028 if (code == GT || !TARGET_IEEE_FP)
19030 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19035 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19036 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19037 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19038 intcmp_mode = CCmode;
19044 if (code == LT && TARGET_IEEE_FP)
19046 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19047 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19048 intcmp_mode = CCmode;
19053 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19059 if (code == GE || !TARGET_IEEE_FP)
19061 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19066 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19067 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19073 if (code == LE && TARGET_IEEE_FP)
19075 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19076 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19077 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19078 intcmp_mode = CCmode;
19083 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19089 if (code == EQ && TARGET_IEEE_FP)
19091 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19092 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19093 intcmp_mode = CCmode;
19098 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19104 if (code == NE && TARGET_IEEE_FP)
19106 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19107 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19113 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19119 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19123 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19128 gcc_unreachable ();
19136 /* Return the test that should be put into the flags user, i.e.
19137 the bcc, scc, or cmov instruction. */
19138 return gen_rtx_fmt_ee (code, VOIDmode,
19139 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19144 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19148 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19149 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19151 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19153 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19154 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19157 ret = ix86_expand_int_compare (code, op0, op1);
19163 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19165 enum machine_mode mode = GET_MODE (op0);
19177 tmp = ix86_expand_compare (code, op0, op1);
19178 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19179 gen_rtx_LABEL_REF (VOIDmode, label),
19181 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19188 /* Expand DImode branch into multiple compare+branch. */
19190 rtx lo[2], hi[2], label2;
19191 enum rtx_code code1, code2, code3;
19192 enum machine_mode submode;
19194 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19196 tmp = op0, op0 = op1, op1 = tmp;
19197 code = swap_condition (code);
19200 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19201 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19203 submode = mode == DImode ? SImode : DImode;
19205 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19206 avoid two branches. This costs one extra insn, so disable when
19207 optimizing for size. */
19209 if ((code == EQ || code == NE)
19210 && (!optimize_insn_for_size_p ()
19211 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19216 if (hi[1] != const0_rtx)
19217 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19218 NULL_RTX, 0, OPTAB_WIDEN);
19221 if (lo[1] != const0_rtx)
19222 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19223 NULL_RTX, 0, OPTAB_WIDEN);
19225 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19226 NULL_RTX, 0, OPTAB_WIDEN);
19228 ix86_expand_branch (code, tmp, const0_rtx, label);
19232 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19233 op1 is a constant and the low word is zero, then we can just
19234 examine the high word. Similarly for low word -1 and
19235 less-or-equal-than or greater-than. */
19237 if (CONST_INT_P (hi[1]))
19240 case LT: case LTU: case GE: case GEU:
19241 if (lo[1] == const0_rtx)
19243 ix86_expand_branch (code, hi[0], hi[1], label);
19247 case LE: case LEU: case GT: case GTU:
19248 if (lo[1] == constm1_rtx)
19250 ix86_expand_branch (code, hi[0], hi[1], label);
19258 /* Otherwise, we need two or three jumps. */
19260 label2 = gen_label_rtx ();
19263 code2 = swap_condition (code);
19264 code3 = unsigned_condition (code);
19268 case LT: case GT: case LTU: case GTU:
19271 case LE: code1 = LT; code2 = GT; break;
19272 case GE: code1 = GT; code2 = LT; break;
19273 case LEU: code1 = LTU; code2 = GTU; break;
19274 case GEU: code1 = GTU; code2 = LTU; break;
19276 case EQ: code1 = UNKNOWN; code2 = NE; break;
19277 case NE: code2 = UNKNOWN; break;
19280 gcc_unreachable ();
19285 * if (hi(a) < hi(b)) goto true;
19286 * if (hi(a) > hi(b)) goto false;
19287 * if (lo(a) < lo(b)) goto true;
19291 if (code1 != UNKNOWN)
19292 ix86_expand_branch (code1, hi[0], hi[1], label);
19293 if (code2 != UNKNOWN)
19294 ix86_expand_branch (code2, hi[0], hi[1], label2);
19296 ix86_expand_branch (code3, lo[0], lo[1], label);
19298 if (code2 != UNKNOWN)
19299 emit_label (label2);
19304 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
19309 /* Split branch based on floating point condition. */
19311 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
19312 rtx target1, rtx target2, rtx tmp, rtx pushed)
19317 if (target2 != pc_rtx)
19320 code = reverse_condition_maybe_unordered (code);
19325 condition = ix86_expand_fp_compare (code, op1, op2,
19328 /* Remove pushed operand from stack. */
19330 ix86_free_from_memory (GET_MODE (pushed));
19332 i = emit_jump_insn (gen_rtx_SET
19334 gen_rtx_IF_THEN_ELSE (VOIDmode,
19335 condition, target1, target2)));
19336 if (split_branch_probability >= 0)
19337 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
19341 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
19345 gcc_assert (GET_MODE (dest) == QImode);
19347 ret = ix86_expand_compare (code, op0, op1);
19348 PUT_MODE (ret, QImode);
19349 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
19352 /* Expand comparison setting or clearing carry flag. Return true when
19353 successful and set pop for the operation. */
19355 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
19357 enum machine_mode mode =
19358 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
19360 /* Do not handle double-mode compares that go through special path. */
19361 if (mode == (TARGET_64BIT ? TImode : DImode))
19364 if (SCALAR_FLOAT_MODE_P (mode))
19366 rtx compare_op, compare_seq;
19368 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19370 /* Shortcut: following common codes never translate
19371 into carry flag compares. */
19372 if (code == EQ || code == NE || code == UNEQ || code == LTGT
19373 || code == ORDERED || code == UNORDERED)
19376 /* These comparisons require zero flag; swap operands so they won't. */
19377 if ((code == GT || code == UNLE || code == LE || code == UNGT)
19378 && !TARGET_IEEE_FP)
19383 code = swap_condition (code);
19386 /* Try to expand the comparison and verify that we end up with
19387 carry flag based comparison. This fails to be true only when
19388 we decide to expand comparison using arithmetic that is not
19389 too common scenario. */
19391 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19392 compare_seq = get_insns ();
19395 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
19396 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
19397 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
19399 code = GET_CODE (compare_op);
19401 if (code != LTU && code != GEU)
19404 emit_insn (compare_seq);
19409 if (!INTEGRAL_MODE_P (mode))
19418 /* Convert a==0 into (unsigned)a<1. */
19421 if (op1 != const0_rtx)
19424 code = (code == EQ ? LTU : GEU);
19427 /* Convert a>b into b<a or a>=b-1. */
19430 if (CONST_INT_P (op1))
19432 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
19433 /* Bail out on overflow. We still can swap operands but that
19434 would force loading of the constant into register. */
19435 if (op1 == const0_rtx
19436 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
19438 code = (code == GTU ? GEU : LTU);
19445 code = (code == GTU ? LTU : GEU);
19449 /* Convert a>=0 into (unsigned)a<0x80000000. */
19452 if (mode == DImode || op1 != const0_rtx)
19454 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
19455 code = (code == LT ? GEU : LTU);
19459 if (mode == DImode || op1 != constm1_rtx)
19461 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
19462 code = (code == LE ? GEU : LTU);
19468 /* Swapping operands may cause constant to appear as first operand. */
19469 if (!nonimmediate_operand (op0, VOIDmode))
19471 if (!can_create_pseudo_p ())
19473 op0 = force_reg (mode, op0);
19475 *pop = ix86_expand_compare (code, op0, op1);
19476 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
19481 ix86_expand_int_movcc (rtx operands[])
19483 enum rtx_code code = GET_CODE (operands[1]), compare_code;
19484 rtx compare_seq, compare_op;
19485 enum machine_mode mode = GET_MODE (operands[0]);
19486 bool sign_bit_compare_p = false;
19487 rtx op0 = XEXP (operands[1], 0);
19488 rtx op1 = XEXP (operands[1], 1);
19490 if (GET_MODE (op0) == TImode
19491 || (GET_MODE (op0) == DImode
19496 compare_op = ix86_expand_compare (code, op0, op1);
19497 compare_seq = get_insns ();
19500 compare_code = GET_CODE (compare_op);
19502 if ((op1 == const0_rtx && (code == GE || code == LT))
19503 || (op1 == constm1_rtx && (code == GT || code == LE)))
19504 sign_bit_compare_p = true;
19506 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19507 HImode insns, we'd be swallowed in word prefix ops. */
19509 if ((mode != HImode || TARGET_FAST_PREFIX)
19510 && (mode != (TARGET_64BIT ? TImode : DImode))
19511 && CONST_INT_P (operands[2])
19512 && CONST_INT_P (operands[3]))
19514 rtx out = operands[0];
19515 HOST_WIDE_INT ct = INTVAL (operands[2]);
19516 HOST_WIDE_INT cf = INTVAL (operands[3]);
19517 HOST_WIDE_INT diff;
19520 /* Sign bit compares are better done using shifts than we do by using
19522 if (sign_bit_compare_p
19523 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
19525 /* Detect overlap between destination and compare sources. */
19528 if (!sign_bit_compare_p)
19531 bool fpcmp = false;
19533 compare_code = GET_CODE (compare_op);
19535 flags = XEXP (compare_op, 0);
19537 if (GET_MODE (flags) == CCFPmode
19538 || GET_MODE (flags) == CCFPUmode)
19542 = ix86_fp_compare_code_to_integer (compare_code);
19545 /* To simplify rest of code, restrict to the GEU case. */
19546 if (compare_code == LTU)
19548 HOST_WIDE_INT tmp = ct;
19551 compare_code = reverse_condition (compare_code);
19552 code = reverse_condition (code);
19557 PUT_CODE (compare_op,
19558 reverse_condition_maybe_unordered
19559 (GET_CODE (compare_op)));
19561 PUT_CODE (compare_op,
19562 reverse_condition (GET_CODE (compare_op)));
19566 if (reg_overlap_mentioned_p (out, op0)
19567 || reg_overlap_mentioned_p (out, op1))
19568 tmp = gen_reg_rtx (mode);
19570 if (mode == DImode)
19571 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
19573 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
19574 flags, compare_op));
19578 if (code == GT || code == GE)
19579 code = reverse_condition (code);
19582 HOST_WIDE_INT tmp = ct;
19587 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
19600 tmp = expand_simple_binop (mode, PLUS,
19602 copy_rtx (tmp), 1, OPTAB_DIRECT);
19613 tmp = expand_simple_binop (mode, IOR,
19615 copy_rtx (tmp), 1, OPTAB_DIRECT);
19617 else if (diff == -1 && ct)
19627 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
19629 tmp = expand_simple_binop (mode, PLUS,
19630 copy_rtx (tmp), GEN_INT (cf),
19631 copy_rtx (tmp), 1, OPTAB_DIRECT);
19639 * andl cf - ct, dest
19649 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
19652 tmp = expand_simple_binop (mode, AND,
19654 gen_int_mode (cf - ct, mode),
19655 copy_rtx (tmp), 1, OPTAB_DIRECT);
19657 tmp = expand_simple_binop (mode, PLUS,
19658 copy_rtx (tmp), GEN_INT (ct),
19659 copy_rtx (tmp), 1, OPTAB_DIRECT);
19662 if (!rtx_equal_p (tmp, out))
19663 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
19670 enum machine_mode cmp_mode = GET_MODE (op0);
19673 tmp = ct, ct = cf, cf = tmp;
19676 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19678 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19680 /* We may be reversing unordered compare to normal compare, that
19681 is not valid in general (we may convert non-trapping condition
19682 to trapping one), however on i386 we currently emit all
19683 comparisons unordered. */
19684 compare_code = reverse_condition_maybe_unordered (compare_code);
19685 code = reverse_condition_maybe_unordered (code);
19689 compare_code = reverse_condition (compare_code);
19690 code = reverse_condition (code);
19694 compare_code = UNKNOWN;
19695 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
19696 && CONST_INT_P (op1))
19698 if (op1 == const0_rtx
19699 && (code == LT || code == GE))
19700 compare_code = code;
19701 else if (op1 == constm1_rtx)
19705 else if (code == GT)
19710 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19711 if (compare_code != UNKNOWN
19712 && GET_MODE (op0) == GET_MODE (out)
19713 && (cf == -1 || ct == -1))
19715 /* If lea code below could be used, only optimize
19716 if it results in a 2 insn sequence. */
19718 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
19719 || diff == 3 || diff == 5 || diff == 9)
19720 || (compare_code == LT && ct == -1)
19721 || (compare_code == GE && cf == -1))
19724 * notl op1 (if necessary)
19732 code = reverse_condition (code);
19735 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19737 out = expand_simple_binop (mode, IOR,
19739 out, 1, OPTAB_DIRECT);
19740 if (out != operands[0])
19741 emit_move_insn (operands[0], out);
19748 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
19749 || diff == 3 || diff == 5 || diff == 9)
19750 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
19752 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
19758 * lea cf(dest*(ct-cf)),dest
19762 * This also catches the degenerate setcc-only case.
19768 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19771 /* On x86_64 the lea instruction operates on Pmode, so we need
19772 to get arithmetics done in proper mode to match. */
19774 tmp = copy_rtx (out);
19778 out1 = copy_rtx (out);
19779 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
19783 tmp = gen_rtx_PLUS (mode, tmp, out1);
19789 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
19792 if (!rtx_equal_p (tmp, out))
19795 out = force_operand (tmp, copy_rtx (out));
19797 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
19799 if (!rtx_equal_p (out, operands[0]))
19800 emit_move_insn (operands[0], copy_rtx (out));
19806 * General case: Jumpful:
19807 * xorl dest,dest cmpl op1, op2
19808 * cmpl op1, op2 movl ct, dest
19809 * setcc dest jcc 1f
19810 * decl dest movl cf, dest
19811 * andl (cf-ct),dest 1:
19814 * Size 20. Size 14.
19816 * This is reasonably steep, but branch mispredict costs are
19817 * high on modern cpus, so consider failing only if optimizing
19821 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19822 && BRANCH_COST (optimize_insn_for_speed_p (),
19827 enum machine_mode cmp_mode = GET_MODE (op0);
19832 if (SCALAR_FLOAT_MODE_P (cmp_mode))
19834 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
19836 /* We may be reversing unordered compare to normal compare,
19837 that is not valid in general (we may convert non-trapping
19838 condition to trapping one), however on i386 we currently
19839 emit all comparisons unordered. */
19840 code = reverse_condition_maybe_unordered (code);
19844 code = reverse_condition (code);
19845 if (compare_code != UNKNOWN)
19846 compare_code = reverse_condition (compare_code);
19850 if (compare_code != UNKNOWN)
19852 /* notl op1 (if needed)
19857 For x < 0 (resp. x <= -1) there will be no notl,
19858 so if possible swap the constants to get rid of the
19860 True/false will be -1/0 while code below (store flag
19861 followed by decrement) is 0/-1, so the constants need
19862 to be exchanged once more. */
19864 if (compare_code == GE || !cf)
19866 code = reverse_condition (code);
19871 HOST_WIDE_INT tmp = cf;
19876 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
19880 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
19882 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
19884 copy_rtx (out), 1, OPTAB_DIRECT);
19887 out = expand_simple_binop (mode, AND, copy_rtx (out),
19888 gen_int_mode (cf - ct, mode),
19889 copy_rtx (out), 1, OPTAB_DIRECT);
19891 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
19892 copy_rtx (out), 1, OPTAB_DIRECT);
19893 if (!rtx_equal_p (out, operands[0]))
19894 emit_move_insn (operands[0], copy_rtx (out));
19900 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
19902 /* Try a few things more with specific constants and a variable. */
19905 rtx var, orig_out, out, tmp;
19907 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19910 /* If one of the two operands is an interesting constant, load a
19911 constant with the above and mask it in with a logical operation. */
19913 if (CONST_INT_P (operands[2]))
19916 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
19917 operands[3] = constm1_rtx, op = and_optab;
19918 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
19919 operands[3] = const0_rtx, op = ior_optab;
19923 else if (CONST_INT_P (operands[3]))
19926 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
19927 operands[2] = constm1_rtx, op = and_optab;
19928 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
19929 operands[2] = const0_rtx, op = ior_optab;
19936 orig_out = operands[0];
19937 tmp = gen_reg_rtx (mode);
19940 /* Recurse to get the constant loaded. */
19941 if (ix86_expand_int_movcc (operands) == 0)
19944 /* Mask in the interesting variable. */
19945 out = expand_binop (mode, op, var, tmp, orig_out, 0,
19947 if (!rtx_equal_p (out, orig_out))
19948 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
19954 * For comparison with above,
19964 if (! nonimmediate_operand (operands[2], mode))
19965 operands[2] = force_reg (mode, operands[2]);
19966 if (! nonimmediate_operand (operands[3], mode))
19967 operands[3] = force_reg (mode, operands[3]);
19969 if (! register_operand (operands[2], VOIDmode)
19971 || ! register_operand (operands[3], VOIDmode)))
19972 operands[2] = force_reg (mode, operands[2]);
19975 && ! register_operand (operands[3], VOIDmode))
19976 operands[3] = force_reg (mode, operands[3]);
19978 emit_insn (compare_seq);
19979 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
19980 gen_rtx_IF_THEN_ELSE (mode,
19981 compare_op, operands[2],
19986 /* Swap, force into registers, or otherwise massage the two operands
19987 to an sse comparison with a mask result. Thus we differ a bit from
19988 ix86_prepare_fp_compare_args which expects to produce a flags result.
19990 The DEST operand exists to help determine whether to commute commutative
19991 operators. The POP0/POP1 operands are updated in place. The new
19992 comparison code is returned, or UNKNOWN if not implementable. */
19994 static enum rtx_code
19995 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
19996 rtx *pop0, rtx *pop1)
20004 /* AVX supports all the needed comparisons. */
20007 /* We have no LTGT as an operator. We could implement it with
20008 NE & ORDERED, but this requires an extra temporary. It's
20009 not clear that it's worth it. */
20016 /* These are supported directly. */
20023 /* AVX has 3 operand comparisons, no need to swap anything. */
20026 /* For commutative operators, try to canonicalize the destination
20027 operand to be first in the comparison - this helps reload to
20028 avoid extra moves. */
20029 if (!dest || !rtx_equal_p (dest, *pop1))
20037 /* These are not supported directly before AVX, and furthermore
20038 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20039 comparison operands to transform into something that is
20044 code = swap_condition (code);
20048 gcc_unreachable ();
20054 /* Detect conditional moves that exactly match min/max operational
20055 semantics. Note that this is IEEE safe, as long as we don't
20056 interchange the operands.
20058 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20059 and TRUE if the operation is successful and instructions are emitted. */
20062 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20063 rtx cmp_op1, rtx if_true, rtx if_false)
20065 enum machine_mode mode;
20071 else if (code == UNGE)
20074 if_true = if_false;
20080 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20082 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20087 mode = GET_MODE (dest);
20089 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20090 but MODE may be a vector mode and thus not appropriate. */
20091 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20093 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20096 if_true = force_reg (mode, if_true);
20097 v = gen_rtvec (2, if_true, if_false);
20098 tmp = gen_rtx_UNSPEC (mode, v, u);
20102 code = is_min ? SMIN : SMAX;
20103 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20106 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20110 /* Expand an sse vector comparison. Return the register with the result. */
20113 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20114 rtx op_true, rtx op_false)
20116 enum machine_mode mode = GET_MODE (dest);
20117 enum machine_mode cmp_mode = GET_MODE (cmp_op0);
20120 cmp_op0 = force_reg (cmp_mode, cmp_op0);
20121 if (!nonimmediate_operand (cmp_op1, cmp_mode))
20122 cmp_op1 = force_reg (cmp_mode, cmp_op1);
20125 || reg_overlap_mentioned_p (dest, op_true)
20126 || reg_overlap_mentioned_p (dest, op_false))
20127 dest = gen_reg_rtx (mode);
20129 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20130 if (cmp_mode != mode)
20132 x = force_reg (cmp_mode, x);
20133 convert_move (dest, x, false);
20136 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20141 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20142 operations. This is used for both scalar and vector conditional moves. */
20145 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20147 enum machine_mode mode = GET_MODE (dest);
20150 if (vector_all_ones_operand (op_true, mode)
20151 && rtx_equal_p (op_false, CONST0_RTX (mode)))
20153 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20155 else if (op_false == CONST0_RTX (mode))
20157 op_true = force_reg (mode, op_true);
20158 x = gen_rtx_AND (mode, cmp, op_true);
20159 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20161 else if (op_true == CONST0_RTX (mode))
20163 op_false = force_reg (mode, op_false);
20164 x = gen_rtx_NOT (mode, cmp);
20165 x = gen_rtx_AND (mode, x, op_false);
20166 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20168 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode))
20170 op_false = force_reg (mode, op_false);
20171 x = gen_rtx_IOR (mode, cmp, op_false);
20172 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20174 else if (TARGET_XOP)
20176 op_true = force_reg (mode, op_true);
20178 if (!nonimmediate_operand (op_false, mode))
20179 op_false = force_reg (mode, op_false);
20181 emit_insn (gen_rtx_SET (mode, dest,
20182 gen_rtx_IF_THEN_ELSE (mode, cmp,
20188 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20190 if (!nonimmediate_operand (op_true, mode))
20191 op_true = force_reg (mode, op_true);
20193 op_false = force_reg (mode, op_false);
20199 gen = gen_sse4_1_blendvps;
20203 gen = gen_sse4_1_blendvpd;
20211 gen = gen_sse4_1_pblendvb;
20212 dest = gen_lowpart (V16QImode, dest);
20213 op_false = gen_lowpart (V16QImode, op_false);
20214 op_true = gen_lowpart (V16QImode, op_true);
20215 cmp = gen_lowpart (V16QImode, cmp);
20220 gen = gen_avx_blendvps256;
20224 gen = gen_avx_blendvpd256;
20232 gen = gen_avx2_pblendvb;
20233 dest = gen_lowpart (V32QImode, dest);
20234 op_false = gen_lowpart (V32QImode, op_false);
20235 op_true = gen_lowpart (V32QImode, op_true);
20236 cmp = gen_lowpart (V32QImode, cmp);
20244 emit_insn (gen (dest, op_false, op_true, cmp));
20247 op_true = force_reg (mode, op_true);
20249 t2 = gen_reg_rtx (mode);
20251 t3 = gen_reg_rtx (mode);
20255 x = gen_rtx_AND (mode, op_true, cmp);
20256 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
20258 x = gen_rtx_NOT (mode, cmp);
20259 x = gen_rtx_AND (mode, x, op_false);
20260 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
20262 x = gen_rtx_IOR (mode, t3, t2);
20263 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20268 /* Expand a floating-point conditional move. Return true if successful. */
20271 ix86_expand_fp_movcc (rtx operands[])
20273 enum machine_mode mode = GET_MODE (operands[0]);
20274 enum rtx_code code = GET_CODE (operands[1]);
20275 rtx tmp, compare_op;
20276 rtx op0 = XEXP (operands[1], 0);
20277 rtx op1 = XEXP (operands[1], 1);
20279 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20281 enum machine_mode cmode;
20283 /* Since we've no cmove for sse registers, don't force bad register
20284 allocation just to gain access to it. Deny movcc when the
20285 comparison mode doesn't match the move mode. */
20286 cmode = GET_MODE (op0);
20287 if (cmode == VOIDmode)
20288 cmode = GET_MODE (op1);
20292 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
20293 if (code == UNKNOWN)
20296 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
20297 operands[2], operands[3]))
20300 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
20301 operands[2], operands[3]);
20302 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
20306 if (GET_MODE (op0) == TImode
20307 || (GET_MODE (op0) == DImode
20311 /* The floating point conditional move instructions don't directly
20312 support conditions resulting from a signed integer comparison. */
20314 compare_op = ix86_expand_compare (code, op0, op1);
20315 if (!fcmov_comparison_operator (compare_op, VOIDmode))
20317 tmp = gen_reg_rtx (QImode);
20318 ix86_expand_setcc (tmp, code, op0, op1);
20320 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
20323 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20324 gen_rtx_IF_THEN_ELSE (mode, compare_op,
20325 operands[2], operands[3])));
20330 /* Expand a floating-point vector conditional move; a vcond operation
20331 rather than a movcc operation. */
20334 ix86_expand_fp_vcond (rtx operands[])
20336 enum rtx_code code = GET_CODE (operands[3]);
20339 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
20340 &operands[4], &operands[5]);
20341 if (code == UNKNOWN)
20344 switch (GET_CODE (operands[3]))
20347 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
20348 operands[5], operands[0], operands[0]);
20349 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
20350 operands[5], operands[1], operands[2]);
20354 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
20355 operands[5], operands[0], operands[0]);
20356 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
20357 operands[5], operands[1], operands[2]);
20361 gcc_unreachable ();
20363 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
20365 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
20369 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
20370 operands[5], operands[1], operands[2]))
20373 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
20374 operands[1], operands[2]);
20375 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
20379 /* Expand a signed/unsigned integral vector conditional move. */
20382 ix86_expand_int_vcond (rtx operands[])
20384 enum machine_mode data_mode = GET_MODE (operands[0]);
20385 enum machine_mode mode = GET_MODE (operands[4]);
20386 enum rtx_code code = GET_CODE (operands[3]);
20387 bool negate = false;
20390 cop0 = operands[4];
20391 cop1 = operands[5];
20393 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20394 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20395 if ((code == LT || code == GE)
20396 && data_mode == mode
20397 && cop1 == CONST0_RTX (mode)
20398 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
20399 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
20400 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
20401 && (GET_MODE_SIZE (data_mode) == 16
20402 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
20404 rtx negop = operands[2 - (code == LT)];
20405 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
20406 if (negop == CONST1_RTX (data_mode))
20408 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
20409 operands[0], 1, OPTAB_DIRECT);
20410 if (res != operands[0])
20411 emit_move_insn (operands[0], res);
20414 else if (GET_MODE_INNER (data_mode) != DImode
20415 && vector_all_ones_operand (negop, data_mode))
20417 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
20418 operands[0], 0, OPTAB_DIRECT);
20419 if (res != operands[0])
20420 emit_move_insn (operands[0], res);
20425 if (!nonimmediate_operand (cop1, mode))
20426 cop1 = force_reg (mode, cop1);
20427 if (!general_operand (operands[1], data_mode))
20428 operands[1] = force_reg (data_mode, operands[1]);
20429 if (!general_operand (operands[2], data_mode))
20430 operands[2] = force_reg (data_mode, operands[2]);
20432 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20434 && (mode == V16QImode || mode == V8HImode
20435 || mode == V4SImode || mode == V2DImode))
20439 /* Canonicalize the comparison to EQ, GT, GTU. */
20450 code = reverse_condition (code);
20456 code = reverse_condition (code);
20462 code = swap_condition (code);
20463 x = cop0, cop0 = cop1, cop1 = x;
20467 gcc_unreachable ();
20470 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20471 if (mode == V2DImode)
20476 /* SSE4.1 supports EQ. */
20477 if (!TARGET_SSE4_1)
20483 /* SSE4.2 supports GT/GTU. */
20484 if (!TARGET_SSE4_2)
20489 gcc_unreachable ();
20493 /* Unsigned parallel compare is not supported by the hardware.
20494 Play some tricks to turn this into a signed comparison
20498 cop0 = force_reg (mode, cop0);
20508 rtx (*gen_sub3) (rtx, rtx, rtx);
20512 case V8SImode: gen_sub3 = gen_subv8si3; break;
20513 case V4DImode: gen_sub3 = gen_subv4di3; break;
20514 case V4SImode: gen_sub3 = gen_subv4si3; break;
20515 case V2DImode: gen_sub3 = gen_subv2di3; break;
20517 gcc_unreachable ();
20519 /* Subtract (-(INT MAX) - 1) from both operands to make
20521 mask = ix86_build_signbit_mask (mode, true, false);
20522 t1 = gen_reg_rtx (mode);
20523 emit_insn (gen_sub3 (t1, cop0, mask));
20525 t2 = gen_reg_rtx (mode);
20526 emit_insn (gen_sub3 (t2, cop1, mask));
20538 /* Perform a parallel unsigned saturating subtraction. */
20539 x = gen_reg_rtx (mode);
20540 emit_insn (gen_rtx_SET (VOIDmode, x,
20541 gen_rtx_US_MINUS (mode, cop0, cop1)));
20544 cop1 = CONST0_RTX (mode);
20550 gcc_unreachable ();
20555 /* Allow the comparison to be done in one mode, but the movcc to
20556 happen in another mode. */
20557 if (data_mode == mode)
20559 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
20560 operands[1+negate], operands[2-negate]);
20564 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
20565 x = ix86_expand_sse_cmp (gen_lowpart (mode, operands[0]),
20567 operands[1+negate], operands[2-negate]);
20568 x = gen_lowpart (data_mode, x);
20571 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
20572 operands[2-negate]);
20576 /* Expand a variable vector permutation. */
20579 ix86_expand_vec_perm (rtx operands[])
20581 rtx target = operands[0];
20582 rtx op0 = operands[1];
20583 rtx op1 = operands[2];
20584 rtx mask = operands[3];
20585 rtx t1, t2, t3, t4, vt, vt2, vec[32];
20586 enum machine_mode mode = GET_MODE (op0);
20587 enum machine_mode maskmode = GET_MODE (mask);
20589 bool one_operand_shuffle = rtx_equal_p (op0, op1);
20591 /* Number of elements in the vector. */
20592 w = GET_MODE_NUNITS (mode);
20593 e = GET_MODE_UNIT_SIZE (mode);
20594 gcc_assert (w <= 32);
20598 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
20600 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20601 an constant shuffle operand. With a tiny bit of effort we can
20602 use VPERMD instead. A re-interpretation stall for V4DFmode is
20603 unfortunate but there's no avoiding it.
20604 Similarly for V16HImode we don't have instructions for variable
20605 shuffling, while for V32QImode we can use after preparing suitable
20606 masks vpshufb; vpshufb; vpermq; vpor. */
20608 if (mode == V16HImode)
20610 maskmode = mode = V32QImode;
20616 maskmode = mode = V8SImode;
20620 t1 = gen_reg_rtx (maskmode);
20622 /* Replicate the low bits of the V4DImode mask into V8SImode:
20624 t1 = { A A B B C C D D }. */
20625 for (i = 0; i < w / 2; ++i)
20626 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
20627 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20628 vt = force_reg (maskmode, vt);
20629 mask = gen_lowpart (maskmode, mask);
20630 if (maskmode == V8SImode)
20631 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
20633 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
20635 /* Multiply the shuffle indicies by two. */
20636 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
20639 /* Add one to the odd shuffle indicies:
20640 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20641 for (i = 0; i < w / 2; ++i)
20643 vec[i * 2] = const0_rtx;
20644 vec[i * 2 + 1] = const1_rtx;
20646 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20647 vt = validize_mem (force_const_mem (maskmode, vt));
20648 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
20651 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20652 operands[3] = mask = t1;
20653 target = gen_lowpart (mode, target);
20654 op0 = gen_lowpart (mode, op0);
20655 op1 = gen_lowpart (mode, op1);
20661 /* The VPERMD and VPERMPS instructions already properly ignore
20662 the high bits of the shuffle elements. No need for us to
20663 perform an AND ourselves. */
20664 if (one_operand_shuffle)
20665 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
20668 t1 = gen_reg_rtx (V8SImode);
20669 t2 = gen_reg_rtx (V8SImode);
20670 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
20671 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
20677 mask = gen_lowpart (V8SFmode, mask);
20678 if (one_operand_shuffle)
20679 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
20682 t1 = gen_reg_rtx (V8SFmode);
20683 t2 = gen_reg_rtx (V8SFmode);
20684 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
20685 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
20691 /* By combining the two 128-bit input vectors into one 256-bit
20692 input vector, we can use VPERMD and VPERMPS for the full
20693 two-operand shuffle. */
20694 t1 = gen_reg_rtx (V8SImode);
20695 t2 = gen_reg_rtx (V8SImode);
20696 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
20697 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20698 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
20699 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
20703 t1 = gen_reg_rtx (V8SFmode);
20704 t2 = gen_reg_rtx (V8SImode);
20705 mask = gen_lowpart (V4SImode, mask);
20706 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
20707 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
20708 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
20709 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
20713 t1 = gen_reg_rtx (V32QImode);
20714 t2 = gen_reg_rtx (V32QImode);
20715 t3 = gen_reg_rtx (V32QImode);
20716 vt2 = GEN_INT (128);
20717 for (i = 0; i < 32; i++)
20719 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20720 vt = force_reg (V32QImode, vt);
20721 for (i = 0; i < 32; i++)
20722 vec[i] = i < 16 ? vt2 : const0_rtx;
20723 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
20724 vt2 = force_reg (V32QImode, vt2);
20725 /* From mask create two adjusted masks, which contain the same
20726 bits as mask in the low 7 bits of each vector element.
20727 The first mask will have the most significant bit clear
20728 if it requests element from the same 128-bit lane
20729 and MSB set if it requests element from the other 128-bit lane.
20730 The second mask will have the opposite values of the MSB,
20731 and additionally will have its 128-bit lanes swapped.
20732 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20733 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20734 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20735 stands for other 12 bytes. */
20736 /* The bit whether element is from the same lane or the other
20737 lane is bit 4, so shift it up by 3 to the MSB position. */
20738 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode, t1),
20739 gen_lowpart (V4DImode, mask),
20741 /* Clear MSB bits from the mask just in case it had them set. */
20742 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
20743 /* After this t1 will have MSB set for elements from other lane. */
20744 emit_insn (gen_xorv32qi3 (t1, t1, vt2));
20745 /* Clear bits other than MSB. */
20746 emit_insn (gen_andv32qi3 (t1, t1, vt));
20747 /* Or in the lower bits from mask into t3. */
20748 emit_insn (gen_iorv32qi3 (t3, t1, t2));
20749 /* And invert MSB bits in t1, so MSB is set for elements from the same
20751 emit_insn (gen_xorv32qi3 (t1, t1, vt));
20752 /* Swap 128-bit lanes in t3. */
20753 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20754 gen_lowpart (V4DImode, t3),
20755 const2_rtx, GEN_INT (3),
20756 const0_rtx, const1_rtx));
20757 /* And or in the lower bits from mask into t1. */
20758 emit_insn (gen_iorv32qi3 (t1, t1, t2));
20759 if (one_operand_shuffle)
20761 /* Each of these shuffles will put 0s in places where
20762 element from the other 128-bit lane is needed, otherwise
20763 will shuffle in the requested value. */
20764 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, t3));
20765 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
20766 /* For t3 the 128-bit lanes are swapped again. */
20767 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20768 gen_lowpart (V4DImode, t3),
20769 const2_rtx, GEN_INT (3),
20770 const0_rtx, const1_rtx));
20771 /* And oring both together leads to the result. */
20772 emit_insn (gen_iorv32qi3 (target, t1, t3));
20776 t4 = gen_reg_rtx (V32QImode);
20777 /* Similarly to the above one_operand_shuffle code,
20778 just for repeated twice for each operand. merge_two:
20779 code will merge the two results together. */
20780 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, t3));
20781 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, t3));
20782 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
20783 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
20784 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t4),
20785 gen_lowpart (V4DImode, t4),
20786 const2_rtx, GEN_INT (3),
20787 const0_rtx, const1_rtx));
20788 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode, t3),
20789 gen_lowpart (V4DImode, t3),
20790 const2_rtx, GEN_INT (3),
20791 const0_rtx, const1_rtx));
20792 emit_insn (gen_iorv32qi3 (t4, t2, t4));
20793 emit_insn (gen_iorv32qi3 (t3, t1, t3));
20799 gcc_assert (GET_MODE_SIZE (mode) <= 16);
20806 /* The XOP VPPERM insn supports three inputs. By ignoring the
20807 one_operand_shuffle special case, we avoid creating another
20808 set of constant vectors in memory. */
20809 one_operand_shuffle = false;
20811 /* mask = mask & {2*w-1, ...} */
20812 vt = GEN_INT (2*w - 1);
20816 /* mask = mask & {w-1, ...} */
20817 vt = GEN_INT (w - 1);
20820 for (i = 0; i < w; i++)
20822 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20823 mask = expand_simple_binop (maskmode, AND, mask, vt,
20824 NULL_RTX, 0, OPTAB_DIRECT);
20826 /* For non-QImode operations, convert the word permutation control
20827 into a byte permutation control. */
20828 if (mode != V16QImode)
20830 mask = expand_simple_binop (maskmode, ASHIFT, mask,
20831 GEN_INT (exact_log2 (e)),
20832 NULL_RTX, 0, OPTAB_DIRECT);
20834 /* Convert mask to vector of chars. */
20835 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
20837 /* Replicate each of the input bytes into byte positions:
20838 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20839 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20840 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20841 for (i = 0; i < 16; ++i)
20842 vec[i] = GEN_INT (i/e * e);
20843 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20844 vt = validize_mem (force_const_mem (V16QImode, vt));
20846 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
20848 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
20850 /* Convert it into the byte positions by doing
20851 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20852 for (i = 0; i < 16; ++i)
20853 vec[i] = GEN_INT (i % e);
20854 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
20855 vt = validize_mem (force_const_mem (V16QImode, vt));
20856 emit_insn (gen_addv16qi3 (mask, mask, vt));
20859 /* The actual shuffle operations all operate on V16QImode. */
20860 op0 = gen_lowpart (V16QImode, op0);
20861 op1 = gen_lowpart (V16QImode, op1);
20862 target = gen_lowpart (V16QImode, target);
20866 emit_insn (gen_xop_pperm (target, op0, op1, mask));
20868 else if (one_operand_shuffle)
20870 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
20877 /* Shuffle the two input vectors independently. */
20878 t1 = gen_reg_rtx (V16QImode);
20879 t2 = gen_reg_rtx (V16QImode);
20880 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
20881 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
20884 /* Then merge them together. The key is whether any given control
20885 element contained a bit set that indicates the second word. */
20886 mask = operands[3];
20888 if (maskmode == V2DImode && !TARGET_SSE4_1)
20890 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20891 more shuffle to convert the V2DI input mask into a V4SI
20892 input mask. At which point the masking that expand_int_vcond
20893 will work as desired. */
20894 rtx t3 = gen_reg_rtx (V4SImode);
20895 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
20896 const0_rtx, const0_rtx,
20897 const2_rtx, const2_rtx));
20899 maskmode = V4SImode;
20903 for (i = 0; i < w; i++)
20905 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
20906 vt = force_reg (maskmode, vt);
20907 mask = expand_simple_binop (maskmode, AND, mask, vt,
20908 NULL_RTX, 0, OPTAB_DIRECT);
20910 xops[0] = gen_lowpart (mode, operands[0]);
20911 xops[1] = gen_lowpart (mode, t2);
20912 xops[2] = gen_lowpart (mode, t1);
20913 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
20916 ok = ix86_expand_int_vcond (xops);
20921 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20922 true if we should do zero extension, else sign extension. HIGH_P is
20923 true if we want the N/2 high elements, else the low elements. */
20926 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
20928 enum machine_mode imode = GET_MODE (src);
20933 rtx (*unpack)(rtx, rtx);
20934 rtx (*extract)(rtx, rtx) = NULL;
20935 enum machine_mode halfmode = BLKmode;
20941 unpack = gen_avx2_zero_extendv16qiv16hi2;
20943 unpack = gen_avx2_sign_extendv16qiv16hi2;
20944 halfmode = V16QImode;
20946 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
20950 unpack = gen_avx2_zero_extendv8hiv8si2;
20952 unpack = gen_avx2_sign_extendv8hiv8si2;
20953 halfmode = V8HImode;
20955 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
20959 unpack = gen_avx2_zero_extendv4siv4di2;
20961 unpack = gen_avx2_sign_extendv4siv4di2;
20962 halfmode = V4SImode;
20964 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
20968 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
20970 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
20974 unpack = gen_sse4_1_zero_extendv4hiv4si2;
20976 unpack = gen_sse4_1_sign_extendv4hiv4si2;
20980 unpack = gen_sse4_1_zero_extendv2siv2di2;
20982 unpack = gen_sse4_1_sign_extendv2siv2di2;
20985 gcc_unreachable ();
20988 if (GET_MODE_SIZE (imode) == 32)
20990 tmp = gen_reg_rtx (halfmode);
20991 emit_insn (extract (tmp, src));
20995 /* Shift higher 8 bytes to lower 8 bytes. */
20996 tmp = gen_reg_rtx (imode);
20997 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, tmp),
20998 gen_lowpart (V1TImode, src),
21004 emit_insn (unpack (dest, tmp));
21008 rtx (*unpack)(rtx, rtx, rtx);
21014 unpack = gen_vec_interleave_highv16qi;
21016 unpack = gen_vec_interleave_lowv16qi;
21020 unpack = gen_vec_interleave_highv8hi;
21022 unpack = gen_vec_interleave_lowv8hi;
21026 unpack = gen_vec_interleave_highv4si;
21028 unpack = gen_vec_interleave_lowv4si;
21031 gcc_unreachable ();
21035 tmp = force_reg (imode, CONST0_RTX (imode));
21037 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21038 src, pc_rtx, pc_rtx);
21040 emit_insn (unpack (gen_lowpart (imode, dest), src, tmp));
21044 /* Expand conditional increment or decrement using adb/sbb instructions.
21045 The default case using setcc followed by the conditional move can be
21046 done by generic code. */
21048 ix86_expand_int_addcc (rtx operands[])
21050 enum rtx_code code = GET_CODE (operands[1]);
21052 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21054 rtx val = const0_rtx;
21055 bool fpcmp = false;
21056 enum machine_mode mode;
21057 rtx op0 = XEXP (operands[1], 0);
21058 rtx op1 = XEXP (operands[1], 1);
21060 if (operands[3] != const1_rtx
21061 && operands[3] != constm1_rtx)
21063 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21065 code = GET_CODE (compare_op);
21067 flags = XEXP (compare_op, 0);
21069 if (GET_MODE (flags) == CCFPmode
21070 || GET_MODE (flags) == CCFPUmode)
21073 code = ix86_fp_compare_code_to_integer (code);
21080 PUT_CODE (compare_op,
21081 reverse_condition_maybe_unordered
21082 (GET_CODE (compare_op)));
21084 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21087 mode = GET_MODE (operands[0]);
21089 /* Construct either adc or sbb insn. */
21090 if ((code == LTU) == (operands[3] == constm1_rtx))
21095 insn = gen_subqi3_carry;
21098 insn = gen_subhi3_carry;
21101 insn = gen_subsi3_carry;
21104 insn = gen_subdi3_carry;
21107 gcc_unreachable ();
21115 insn = gen_addqi3_carry;
21118 insn = gen_addhi3_carry;
21121 insn = gen_addsi3_carry;
21124 insn = gen_adddi3_carry;
21127 gcc_unreachable ();
21130 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21136 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21137 but works for floating pointer parameters and nonoffsetable memories.
21138 For pushes, it returns just stack offsets; the values will be saved
21139 in the right order. Maximally three parts are generated. */
21142 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21147 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21149 size = (GET_MODE_SIZE (mode) + 4) / 8;
21151 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
21152 gcc_assert (size >= 2 && size <= 4);
21154 /* Optimize constant pool reference to immediates. This is used by fp
21155 moves, that force all constants to memory to allow combining. */
21156 if (MEM_P (operand) && MEM_READONLY_P (operand))
21158 rtx tmp = maybe_get_pool_constant (operand);
21163 if (MEM_P (operand) && !offsettable_memref_p (operand))
21165 /* The only non-offsetable memories we handle are pushes. */
21166 int ok = push_operand (operand, VOIDmode);
21170 operand = copy_rtx (operand);
21171 PUT_MODE (operand, word_mode);
21172 parts[0] = parts[1] = parts[2] = parts[3] = operand;
21176 if (GET_CODE (operand) == CONST_VECTOR)
21178 enum machine_mode imode = int_mode_for_mode (mode);
21179 /* Caution: if we looked through a constant pool memory above,
21180 the operand may actually have a different mode now. That's
21181 ok, since we want to pun this all the way back to an integer. */
21182 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
21183 gcc_assert (operand != NULL);
21189 if (mode == DImode)
21190 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
21195 if (REG_P (operand))
21197 gcc_assert (reload_completed);
21198 for (i = 0; i < size; i++)
21199 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
21201 else if (offsettable_memref_p (operand))
21203 operand = adjust_address (operand, SImode, 0);
21204 parts[0] = operand;
21205 for (i = 1; i < size; i++)
21206 parts[i] = adjust_address (operand, SImode, 4 * i);
21208 else if (GET_CODE (operand) == CONST_DOUBLE)
21213 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
21217 real_to_target (l, &r, mode);
21218 parts[3] = gen_int_mode (l[3], SImode);
21219 parts[2] = gen_int_mode (l[2], SImode);
21222 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
21223 long double may not be 80-bit. */
21224 real_to_target (l, &r, mode);
21225 parts[2] = gen_int_mode (l[2], SImode);
21228 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
21231 gcc_unreachable ();
21233 parts[1] = gen_int_mode (l[1], SImode);
21234 parts[0] = gen_int_mode (l[0], SImode);
21237 gcc_unreachable ();
21242 if (mode == TImode)
21243 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
21244 if (mode == XFmode || mode == TFmode)
21246 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
21247 if (REG_P (operand))
21249 gcc_assert (reload_completed);
21250 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
21251 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
21253 else if (offsettable_memref_p (operand))
21255 operand = adjust_address (operand, DImode, 0);
21256 parts[0] = operand;
21257 parts[1] = adjust_address (operand, upper_mode, 8);
21259 else if (GET_CODE (operand) == CONST_DOUBLE)
21264 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
21265 real_to_target (l, &r, mode);
21267 /* Do not use shift by 32 to avoid warning on 32bit systems. */
21268 if (HOST_BITS_PER_WIDE_INT >= 64)
21271 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
21272 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
21275 parts[0] = immed_double_const (l[0], l[1], DImode);
21277 if (upper_mode == SImode)
21278 parts[1] = gen_int_mode (l[2], SImode);
21279 else if (HOST_BITS_PER_WIDE_INT >= 64)
21282 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
21283 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
21286 parts[1] = immed_double_const (l[2], l[3], DImode);
21289 gcc_unreachable ();
21296 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21297 Return false when normal moves are needed; true when all required
21298 insns have been emitted. Operands 2-4 contain the input values
21299 int the correct order; operands 5-7 contain the output values. */
21302 ix86_split_long_move (rtx operands[])
21307 int collisions = 0;
21308 enum machine_mode mode = GET_MODE (operands[0]);
21309 bool collisionparts[4];
21311 /* The DFmode expanders may ask us to move double.
21312 For 64bit target this is single move. By hiding the fact
21313 here we simplify i386.md splitters. */
21314 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
21316 /* Optimize constant pool reference to immediates. This is used by
21317 fp moves, that force all constants to memory to allow combining. */
21319 if (MEM_P (operands[1])
21320 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
21321 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
21322 operands[1] = get_pool_constant (XEXP (operands[1], 0));
21323 if (push_operand (operands[0], VOIDmode))
21325 operands[0] = copy_rtx (operands[0]);
21326 PUT_MODE (operands[0], word_mode);
21329 operands[0] = gen_lowpart (DImode, operands[0]);
21330 operands[1] = gen_lowpart (DImode, operands[1]);
21331 emit_move_insn (operands[0], operands[1]);
21335 /* The only non-offsettable memory we handle is push. */
21336 if (push_operand (operands[0], VOIDmode))
21339 gcc_assert (!MEM_P (operands[0])
21340 || offsettable_memref_p (operands[0]));
21342 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
21343 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
21345 /* When emitting push, take care for source operands on the stack. */
21346 if (push && MEM_P (operands[1])
21347 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
21349 rtx src_base = XEXP (part[1][nparts - 1], 0);
21351 /* Compensate for the stack decrement by 4. */
21352 if (!TARGET_64BIT && nparts == 3
21353 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
21354 src_base = plus_constant (Pmode, src_base, 4);
21356 /* src_base refers to the stack pointer and is
21357 automatically decreased by emitted push. */
21358 for (i = 0; i < nparts; i++)
21359 part[1][i] = change_address (part[1][i],
21360 GET_MODE (part[1][i]), src_base);
21363 /* We need to do copy in the right order in case an address register
21364 of the source overlaps the destination. */
21365 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
21369 for (i = 0; i < nparts; i++)
21372 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
21373 if (collisionparts[i])
21377 /* Collision in the middle part can be handled by reordering. */
21378 if (collisions == 1 && nparts == 3 && collisionparts [1])
21380 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
21381 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
21383 else if (collisions == 1
21385 && (collisionparts [1] || collisionparts [2]))
21387 if (collisionparts [1])
21389 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
21390 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
21394 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
21395 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
21399 /* If there are more collisions, we can't handle it by reordering.
21400 Do an lea to the last part and use only one colliding move. */
21401 else if (collisions > 1)
21407 base = part[0][nparts - 1];
21409 /* Handle the case when the last part isn't valid for lea.
21410 Happens in 64-bit mode storing the 12-byte XFmode. */
21411 if (GET_MODE (base) != Pmode)
21412 base = gen_rtx_REG (Pmode, REGNO (base));
21414 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
21415 part[1][0] = replace_equiv_address (part[1][0], base);
21416 for (i = 1; i < nparts; i++)
21418 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
21419 part[1][i] = replace_equiv_address (part[1][i], tmp);
21430 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
21431 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
21432 stack_pointer_rtx, GEN_INT (-4)));
21433 emit_move_insn (part[0][2], part[1][2]);
21435 else if (nparts == 4)
21437 emit_move_insn (part[0][3], part[1][3]);
21438 emit_move_insn (part[0][2], part[1][2]);
21443 /* In 64bit mode we don't have 32bit push available. In case this is
21444 register, it is OK - we will just use larger counterpart. We also
21445 retype memory - these comes from attempt to avoid REX prefix on
21446 moving of second half of TFmode value. */
21447 if (GET_MODE (part[1][1]) == SImode)
21449 switch (GET_CODE (part[1][1]))
21452 part[1][1] = adjust_address (part[1][1], DImode, 0);
21456 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
21460 gcc_unreachable ();
21463 if (GET_MODE (part[1][0]) == SImode)
21464 part[1][0] = part[1][1];
21467 emit_move_insn (part[0][1], part[1][1]);
21468 emit_move_insn (part[0][0], part[1][0]);
21472 /* Choose correct order to not overwrite the source before it is copied. */
21473 if ((REG_P (part[0][0])
21474 && REG_P (part[1][1])
21475 && (REGNO (part[0][0]) == REGNO (part[1][1])
21477 && REGNO (part[0][0]) == REGNO (part[1][2]))
21479 && REGNO (part[0][0]) == REGNO (part[1][3]))))
21481 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
21483 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
21485 operands[2 + i] = part[0][j];
21486 operands[6 + i] = part[1][j];
21491 for (i = 0; i < nparts; i++)
21493 operands[2 + i] = part[0][i];
21494 operands[6 + i] = part[1][i];
21498 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21499 if (optimize_insn_for_size_p ())
21501 for (j = 0; j < nparts - 1; j++)
21502 if (CONST_INT_P (operands[6 + j])
21503 && operands[6 + j] != const0_rtx
21504 && REG_P (operands[2 + j]))
21505 for (i = j; i < nparts - 1; i++)
21506 if (CONST_INT_P (operands[7 + i])
21507 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
21508 operands[7 + i] = operands[2 + j];
21511 for (i = 0; i < nparts; i++)
21512 emit_move_insn (operands[2 + i], operands[6 + i]);
21517 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21518 left shift by a constant, either using a single shift or
21519 a sequence of add instructions. */
21522 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
21524 rtx (*insn)(rtx, rtx, rtx);
21527 || (count * ix86_cost->add <= ix86_cost->shift_const
21528 && !optimize_insn_for_size_p ()))
21530 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
21531 while (count-- > 0)
21532 emit_insn (insn (operand, operand, operand));
21536 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
21537 emit_insn (insn (operand, operand, GEN_INT (count)));
21542 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
21544 rtx (*gen_ashl3)(rtx, rtx, rtx);
21545 rtx (*gen_shld)(rtx, rtx, rtx);
21546 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21548 rtx low[2], high[2];
21551 if (CONST_INT_P (operands[2]))
21553 split_double_mode (mode, operands, 2, low, high);
21554 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21556 if (count >= half_width)
21558 emit_move_insn (high[0], low[1]);
21559 emit_move_insn (low[0], const0_rtx);
21561 if (count > half_width)
21562 ix86_expand_ashl_const (high[0], count - half_width, mode);
21566 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
21568 if (!rtx_equal_p (operands[0], operands[1]))
21569 emit_move_insn (operands[0], operands[1]);
21571 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
21572 ix86_expand_ashl_const (low[0], count, mode);
21577 split_double_mode (mode, operands, 1, low, high);
21579 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
21581 if (operands[1] == const1_rtx)
21583 /* Assuming we've chosen a QImode capable registers, then 1 << N
21584 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21585 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
21587 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
21589 ix86_expand_clear (low[0]);
21590 ix86_expand_clear (high[0]);
21591 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
21593 d = gen_lowpart (QImode, low[0]);
21594 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
21595 s = gen_rtx_EQ (QImode, flags, const0_rtx);
21596 emit_insn (gen_rtx_SET (VOIDmode, d, s));
21598 d = gen_lowpart (QImode, high[0]);
21599 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
21600 s = gen_rtx_NE (QImode, flags, const0_rtx);
21601 emit_insn (gen_rtx_SET (VOIDmode, d, s));
21604 /* Otherwise, we can get the same results by manually performing
21605 a bit extract operation on bit 5/6, and then performing the two
21606 shifts. The two methods of getting 0/1 into low/high are exactly
21607 the same size. Avoiding the shift in the bit extract case helps
21608 pentium4 a bit; no one else seems to care much either way. */
21611 enum machine_mode half_mode;
21612 rtx (*gen_lshr3)(rtx, rtx, rtx);
21613 rtx (*gen_and3)(rtx, rtx, rtx);
21614 rtx (*gen_xor3)(rtx, rtx, rtx);
21615 HOST_WIDE_INT bits;
21618 if (mode == DImode)
21620 half_mode = SImode;
21621 gen_lshr3 = gen_lshrsi3;
21622 gen_and3 = gen_andsi3;
21623 gen_xor3 = gen_xorsi3;
21628 half_mode = DImode;
21629 gen_lshr3 = gen_lshrdi3;
21630 gen_and3 = gen_anddi3;
21631 gen_xor3 = gen_xordi3;
21635 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
21636 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
21638 x = gen_lowpart (half_mode, operands[2]);
21639 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
21641 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
21642 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
21643 emit_move_insn (low[0], high[0]);
21644 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
21647 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
21648 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
21652 if (operands[1] == constm1_rtx)
21654 /* For -1 << N, we can avoid the shld instruction, because we
21655 know that we're shifting 0...31/63 ones into a -1. */
21656 emit_move_insn (low[0], constm1_rtx);
21657 if (optimize_insn_for_size_p ())
21658 emit_move_insn (high[0], low[0]);
21660 emit_move_insn (high[0], constm1_rtx);
21664 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
21666 if (!rtx_equal_p (operands[0], operands[1]))
21667 emit_move_insn (operands[0], operands[1]);
21669 split_double_mode (mode, operands, 1, low, high);
21670 emit_insn (gen_shld (high[0], low[0], operands[2]));
21673 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
21675 if (TARGET_CMOVE && scratch)
21677 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21678 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21680 ix86_expand_clear (scratch);
21681 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
21685 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21686 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21688 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
21693 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
21695 rtx (*gen_ashr3)(rtx, rtx, rtx)
21696 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
21697 rtx (*gen_shrd)(rtx, rtx, rtx);
21698 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21700 rtx low[2], high[2];
21703 if (CONST_INT_P (operands[2]))
21705 split_double_mode (mode, operands, 2, low, high);
21706 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21708 if (count == GET_MODE_BITSIZE (mode) - 1)
21710 emit_move_insn (high[0], high[1]);
21711 emit_insn (gen_ashr3 (high[0], high[0],
21712 GEN_INT (half_width - 1)));
21713 emit_move_insn (low[0], high[0]);
21716 else if (count >= half_width)
21718 emit_move_insn (low[0], high[1]);
21719 emit_move_insn (high[0], low[0]);
21720 emit_insn (gen_ashr3 (high[0], high[0],
21721 GEN_INT (half_width - 1)));
21723 if (count > half_width)
21724 emit_insn (gen_ashr3 (low[0], low[0],
21725 GEN_INT (count - half_width)));
21729 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21731 if (!rtx_equal_p (operands[0], operands[1]))
21732 emit_move_insn (operands[0], operands[1]);
21734 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21735 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
21740 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21742 if (!rtx_equal_p (operands[0], operands[1]))
21743 emit_move_insn (operands[0], operands[1]);
21745 split_double_mode (mode, operands, 1, low, high);
21747 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21748 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
21750 if (TARGET_CMOVE && scratch)
21752 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21753 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21755 emit_move_insn (scratch, high[0]);
21756 emit_insn (gen_ashr3 (scratch, scratch,
21757 GEN_INT (half_width - 1)));
21758 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21763 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
21764 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
21766 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
21772 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
21774 rtx (*gen_lshr3)(rtx, rtx, rtx)
21775 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
21776 rtx (*gen_shrd)(rtx, rtx, rtx);
21777 int half_width = GET_MODE_BITSIZE (mode) >> 1;
21779 rtx low[2], high[2];
21782 if (CONST_INT_P (operands[2]))
21784 split_double_mode (mode, operands, 2, low, high);
21785 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
21787 if (count >= half_width)
21789 emit_move_insn (low[0], high[1]);
21790 ix86_expand_clear (high[0]);
21792 if (count > half_width)
21793 emit_insn (gen_lshr3 (low[0], low[0],
21794 GEN_INT (count - half_width)));
21798 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21800 if (!rtx_equal_p (operands[0], operands[1]))
21801 emit_move_insn (operands[0], operands[1]);
21803 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
21804 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
21809 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
21811 if (!rtx_equal_p (operands[0], operands[1]))
21812 emit_move_insn (operands[0], operands[1]);
21814 split_double_mode (mode, operands, 1, low, high);
21816 emit_insn (gen_shrd (low[0], high[0], operands[2]));
21817 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
21819 if (TARGET_CMOVE && scratch)
21821 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
21822 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
21824 ix86_expand_clear (scratch);
21825 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
21830 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
21831 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
21833 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
21838 /* Predict just emitted jump instruction to be taken with probability PROB. */
21840 predict_jump (int prob)
21842 rtx insn = get_last_insn ();
21843 gcc_assert (JUMP_P (insn));
21844 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
21847 /* Helper function for the string operations below. Dest VARIABLE whether
21848 it is aligned to VALUE bytes. If true, jump to the label. */
21850 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
21852 rtx label = gen_label_rtx ();
21853 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
21854 if (GET_MODE (variable) == DImode)
21855 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
21857 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
21858 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
21861 predict_jump (REG_BR_PROB_BASE * 50 / 100);
21863 predict_jump (REG_BR_PROB_BASE * 90 / 100);
21867 /* Adjust COUNTER by the VALUE. */
21869 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
21871 rtx (*gen_add)(rtx, rtx, rtx)
21872 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
21874 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
21877 /* Zero extend possibly SImode EXP to Pmode register. */
21879 ix86_zero_extend_to_Pmode (rtx exp)
21881 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
21884 /* Divide COUNTREG by SCALE. */
21886 scale_counter (rtx countreg, int scale)
21892 if (CONST_INT_P (countreg))
21893 return GEN_INT (INTVAL (countreg) / scale);
21894 gcc_assert (REG_P (countreg));
21896 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
21897 GEN_INT (exact_log2 (scale)),
21898 NULL, 1, OPTAB_DIRECT);
21902 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21903 DImode for constant loop counts. */
21905 static enum machine_mode
21906 counter_mode (rtx count_exp)
21908 if (GET_MODE (count_exp) != VOIDmode)
21909 return GET_MODE (count_exp);
21910 if (!CONST_INT_P (count_exp))
21912 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
21917 /* When SRCPTR is non-NULL, output simple loop to move memory
21918 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21919 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21920 equivalent loop to set memory by VALUE (supposed to be in MODE).
21922 The size is rounded down to whole number of chunk size moved at once.
21923 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21927 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
21928 rtx destptr, rtx srcptr, rtx value,
21929 rtx count, enum machine_mode mode, int unroll,
21932 rtx out_label, top_label, iter, tmp;
21933 enum machine_mode iter_mode = counter_mode (count);
21934 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
21935 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
21941 top_label = gen_label_rtx ();
21942 out_label = gen_label_rtx ();
21943 iter = gen_reg_rtx (iter_mode);
21945 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
21946 NULL, 1, OPTAB_DIRECT);
21947 /* Those two should combine. */
21948 if (piece_size == const1_rtx)
21950 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
21952 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21954 emit_move_insn (iter, const0_rtx);
21956 emit_label (top_label);
21958 tmp = convert_modes (Pmode, iter_mode, iter, true);
21959 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
21960 destmem = change_address (destmem, mode, x_addr);
21964 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
21965 srcmem = change_address (srcmem, mode, y_addr);
21967 /* When unrolling for chips that reorder memory reads and writes,
21968 we can save registers by using single temporary.
21969 Also using 4 temporaries is overkill in 32bit mode. */
21970 if (!TARGET_64BIT && 0)
21972 for (i = 0; i < unroll; i++)
21977 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
21979 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21981 emit_move_insn (destmem, srcmem);
21987 gcc_assert (unroll <= 4);
21988 for (i = 0; i < unroll; i++)
21990 tmpreg[i] = gen_reg_rtx (mode);
21994 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
21996 emit_move_insn (tmpreg[i], srcmem);
21998 for (i = 0; i < unroll; i++)
22003 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22005 emit_move_insn (destmem, tmpreg[i]);
22010 for (i = 0; i < unroll; i++)
22014 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22015 emit_move_insn (destmem, value);
22018 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22019 true, OPTAB_LIB_WIDEN);
22021 emit_move_insn (iter, tmp);
22023 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22025 if (expected_size != -1)
22027 expected_size /= GET_MODE_SIZE (mode) * unroll;
22028 if (expected_size == 0)
22030 else if (expected_size > REG_BR_PROB_BASE)
22031 predict_jump (REG_BR_PROB_BASE - 1);
22033 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22036 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22037 iter = ix86_zero_extend_to_Pmode (iter);
22038 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22039 true, OPTAB_LIB_WIDEN);
22040 if (tmp != destptr)
22041 emit_move_insn (destptr, tmp);
22044 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22045 true, OPTAB_LIB_WIDEN);
22047 emit_move_insn (srcptr, tmp);
22049 emit_label (out_label);
22052 /* Output "rep; mov" instruction.
22053 Arguments have same meaning as for previous function */
22055 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
22056 rtx destptr, rtx srcptr,
22058 enum machine_mode mode)
22063 HOST_WIDE_INT rounded_count;
22065 /* If the size is known, it is shorter to use rep movs. */
22066 if (mode == QImode && CONST_INT_P (count)
22067 && !(INTVAL (count) & 3))
22070 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22071 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22072 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22073 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22074 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
22075 if (mode != QImode)
22077 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22078 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22079 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22080 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22081 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22082 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22086 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22087 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22089 if (CONST_INT_P (count))
22091 rounded_count = (INTVAL (count)
22092 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22093 destmem = shallow_copy_rtx (destmem);
22094 srcmem = shallow_copy_rtx (srcmem);
22095 set_mem_size (destmem, rounded_count);
22096 set_mem_size (srcmem, rounded_count);
22100 if (MEM_SIZE_KNOWN_P (destmem))
22101 clear_mem_size (destmem);
22102 if (MEM_SIZE_KNOWN_P (srcmem))
22103 clear_mem_size (srcmem);
22105 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22109 /* Output "rep; stos" instruction.
22110 Arguments have same meaning as for previous function */
22112 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
22113 rtx count, enum machine_mode mode,
22118 HOST_WIDE_INT rounded_count;
22120 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22121 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22122 value = force_reg (mode, gen_lowpart (mode, value));
22123 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
22124 if (mode != QImode)
22126 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22127 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22128 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22131 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22132 if (orig_value == const0_rtx && CONST_INT_P (count))
22134 rounded_count = (INTVAL (count)
22135 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22136 destmem = shallow_copy_rtx (destmem);
22137 set_mem_size (destmem, rounded_count);
22139 else if (MEM_SIZE_KNOWN_P (destmem))
22140 clear_mem_size (destmem);
22141 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22145 emit_strmov (rtx destmem, rtx srcmem,
22146 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
22148 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
22149 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
22150 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22153 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
22155 expand_movmem_epilogue (rtx destmem, rtx srcmem,
22156 rtx destptr, rtx srcptr, rtx count, int max_size)
22159 if (CONST_INT_P (count))
22161 HOST_WIDE_INT countval = INTVAL (count);
22164 if ((countval & 0x10) && max_size > 16)
22168 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
22169 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
22172 gcc_unreachable ();
22175 if ((countval & 0x08) && max_size > 8)
22178 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
22181 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
22182 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
22186 if ((countval & 0x04) && max_size > 4)
22188 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
22191 if ((countval & 0x02) && max_size > 2)
22193 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
22196 if ((countval & 0x01) && max_size > 1)
22198 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
22205 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
22206 count, 1, OPTAB_DIRECT);
22207 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
22208 count, QImode, 1, 4);
22212 /* When there are stringops, we can cheaply increase dest and src pointers.
22213 Otherwise we save code size by maintaining offset (zero is readily
22214 available from preceding rep operation) and using x86 addressing modes.
22216 if (TARGET_SINGLE_STRINGOP)
22220 rtx label = ix86_expand_aligntest (count, 4, true);
22221 src = change_address (srcmem, SImode, srcptr);
22222 dest = change_address (destmem, SImode, destptr);
22223 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22224 emit_label (label);
22225 LABEL_NUSES (label) = 1;
22229 rtx label = ix86_expand_aligntest (count, 2, true);
22230 src = change_address (srcmem, HImode, srcptr);
22231 dest = change_address (destmem, HImode, destptr);
22232 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22233 emit_label (label);
22234 LABEL_NUSES (label) = 1;
22238 rtx label = ix86_expand_aligntest (count, 1, true);
22239 src = change_address (srcmem, QImode, srcptr);
22240 dest = change_address (destmem, QImode, destptr);
22241 emit_insn (gen_strmov (destptr, dest, srcptr, src));
22242 emit_label (label);
22243 LABEL_NUSES (label) = 1;
22248 rtx offset = force_reg (Pmode, const0_rtx);
22253 rtx label = ix86_expand_aligntest (count, 4, true);
22254 src = change_address (srcmem, SImode, srcptr);
22255 dest = change_address (destmem, SImode, destptr);
22256 emit_move_insn (dest, src);
22257 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
22258 true, OPTAB_LIB_WIDEN);
22260 emit_move_insn (offset, tmp);
22261 emit_label (label);
22262 LABEL_NUSES (label) = 1;
22266 rtx label = ix86_expand_aligntest (count, 2, true);
22267 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
22268 src = change_address (srcmem, HImode, tmp);
22269 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
22270 dest = change_address (destmem, HImode, tmp);
22271 emit_move_insn (dest, src);
22272 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
22273 true, OPTAB_LIB_WIDEN);
22275 emit_move_insn (offset, tmp);
22276 emit_label (label);
22277 LABEL_NUSES (label) = 1;
22281 rtx label = ix86_expand_aligntest (count, 1, true);
22282 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
22283 src = change_address (srcmem, QImode, tmp);
22284 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
22285 dest = change_address (destmem, QImode, tmp);
22286 emit_move_insn (dest, src);
22287 emit_label (label);
22288 LABEL_NUSES (label) = 1;
22293 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22295 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
22296 rtx count, int max_size)
22299 expand_simple_binop (counter_mode (count), AND, count,
22300 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
22301 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
22302 gen_lowpart (QImode, value), count, QImode,
22306 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22308 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
22312 if (CONST_INT_P (count))
22314 HOST_WIDE_INT countval = INTVAL (count);
22317 if ((countval & 0x10) && max_size > 16)
22321 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
22322 emit_insn (gen_strset (destptr, dest, value));
22323 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
22324 emit_insn (gen_strset (destptr, dest, value));
22327 gcc_unreachable ();
22330 if ((countval & 0x08) && max_size > 8)
22334 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
22335 emit_insn (gen_strset (destptr, dest, value));
22339 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
22340 emit_insn (gen_strset (destptr, dest, value));
22341 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
22342 emit_insn (gen_strset (destptr, dest, value));
22346 if ((countval & 0x04) && max_size > 4)
22348 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
22349 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
22352 if ((countval & 0x02) && max_size > 2)
22354 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
22355 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
22358 if ((countval & 0x01) && max_size > 1)
22360 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
22361 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
22368 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
22373 rtx label = ix86_expand_aligntest (count, 16, true);
22376 dest = change_address (destmem, DImode, destptr);
22377 emit_insn (gen_strset (destptr, dest, value));
22378 emit_insn (gen_strset (destptr, dest, value));
22382 dest = change_address (destmem, SImode, destptr);
22383 emit_insn (gen_strset (destptr, dest, value));
22384 emit_insn (gen_strset (destptr, dest, value));
22385 emit_insn (gen_strset (destptr, dest, value));
22386 emit_insn (gen_strset (destptr, dest, value));
22388 emit_label (label);
22389 LABEL_NUSES (label) = 1;
22393 rtx label = ix86_expand_aligntest (count, 8, true);
22396 dest = change_address (destmem, DImode, destptr);
22397 emit_insn (gen_strset (destptr, dest, value));
22401 dest = change_address (destmem, SImode, destptr);
22402 emit_insn (gen_strset (destptr, dest, value));
22403 emit_insn (gen_strset (destptr, dest, value));
22405 emit_label (label);
22406 LABEL_NUSES (label) = 1;
22410 rtx label = ix86_expand_aligntest (count, 4, true);
22411 dest = change_address (destmem, SImode, destptr);
22412 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
22413 emit_label (label);
22414 LABEL_NUSES (label) = 1;
22418 rtx label = ix86_expand_aligntest (count, 2, true);
22419 dest = change_address (destmem, HImode, destptr);
22420 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
22421 emit_label (label);
22422 LABEL_NUSES (label) = 1;
22426 rtx label = ix86_expand_aligntest (count, 1, true);
22427 dest = change_address (destmem, QImode, destptr);
22428 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
22429 emit_label (label);
22430 LABEL_NUSES (label) = 1;
22434 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22435 DESIRED_ALIGNMENT. */
22437 expand_movmem_prologue (rtx destmem, rtx srcmem,
22438 rtx destptr, rtx srcptr, rtx count,
22439 int align, int desired_alignment)
22441 if (align <= 1 && desired_alignment > 1)
22443 rtx label = ix86_expand_aligntest (destptr, 1, false);
22444 srcmem = change_address (srcmem, QImode, srcptr);
22445 destmem = change_address (destmem, QImode, destptr);
22446 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22447 ix86_adjust_counter (count, 1);
22448 emit_label (label);
22449 LABEL_NUSES (label) = 1;
22451 if (align <= 2 && desired_alignment > 2)
22453 rtx label = ix86_expand_aligntest (destptr, 2, false);
22454 srcmem = change_address (srcmem, HImode, srcptr);
22455 destmem = change_address (destmem, HImode, destptr);
22456 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22457 ix86_adjust_counter (count, 2);
22458 emit_label (label);
22459 LABEL_NUSES (label) = 1;
22461 if (align <= 4 && desired_alignment > 4)
22463 rtx label = ix86_expand_aligntest (destptr, 4, false);
22464 srcmem = change_address (srcmem, SImode, srcptr);
22465 destmem = change_address (destmem, SImode, destptr);
22466 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
22467 ix86_adjust_counter (count, 4);
22468 emit_label (label);
22469 LABEL_NUSES (label) = 1;
22471 gcc_assert (desired_alignment <= 8);
22474 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22475 ALIGN_BYTES is how many bytes need to be copied. */
22477 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
22478 int desired_align, int align_bytes)
22481 rtx orig_dst = dst;
22482 rtx orig_src = src;
22484 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
22485 if (src_align_bytes >= 0)
22486 src_align_bytes = desired_align - src_align_bytes;
22487 if (align_bytes & 1)
22489 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
22490 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
22492 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22494 if (align_bytes & 2)
22496 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
22497 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
22498 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
22499 set_mem_align (dst, 2 * BITS_PER_UNIT);
22500 if (src_align_bytes >= 0
22501 && (src_align_bytes & 1) == (align_bytes & 1)
22502 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
22503 set_mem_align (src, 2 * BITS_PER_UNIT);
22505 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22507 if (align_bytes & 4)
22509 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
22510 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
22511 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
22512 set_mem_align (dst, 4 * BITS_PER_UNIT);
22513 if (src_align_bytes >= 0)
22515 unsigned int src_align = 0;
22516 if ((src_align_bytes & 3) == (align_bytes & 3))
22518 else if ((src_align_bytes & 1) == (align_bytes & 1))
22520 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
22521 set_mem_align (src, src_align * BITS_PER_UNIT);
22524 emit_insn (gen_strmov (destreg, dst, srcreg, src));
22526 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
22527 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
22528 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
22529 set_mem_align (dst, desired_align * BITS_PER_UNIT);
22530 if (src_align_bytes >= 0)
22532 unsigned int src_align = 0;
22533 if ((src_align_bytes & 7) == (align_bytes & 7))
22535 else if ((src_align_bytes & 3) == (align_bytes & 3))
22537 else if ((src_align_bytes & 1) == (align_bytes & 1))
22539 if (src_align > (unsigned int) desired_align)
22540 src_align = desired_align;
22541 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
22542 set_mem_align (src, src_align * BITS_PER_UNIT);
22544 if (MEM_SIZE_KNOWN_P (orig_dst))
22545 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
22546 if (MEM_SIZE_KNOWN_P (orig_src))
22547 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
22552 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22553 DESIRED_ALIGNMENT. */
22555 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
22556 int align, int desired_alignment)
22558 if (align <= 1 && desired_alignment > 1)
22560 rtx label = ix86_expand_aligntest (destptr, 1, false);
22561 destmem = change_address (destmem, QImode, destptr);
22562 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
22563 ix86_adjust_counter (count, 1);
22564 emit_label (label);
22565 LABEL_NUSES (label) = 1;
22567 if (align <= 2 && desired_alignment > 2)
22569 rtx label = ix86_expand_aligntest (destptr, 2, false);
22570 destmem = change_address (destmem, HImode, destptr);
22571 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
22572 ix86_adjust_counter (count, 2);
22573 emit_label (label);
22574 LABEL_NUSES (label) = 1;
22576 if (align <= 4 && desired_alignment > 4)
22578 rtx label = ix86_expand_aligntest (destptr, 4, false);
22579 destmem = change_address (destmem, SImode, destptr);
22580 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
22581 ix86_adjust_counter (count, 4);
22582 emit_label (label);
22583 LABEL_NUSES (label) = 1;
22585 gcc_assert (desired_alignment <= 8);
22588 /* Set enough from DST to align DST known to by aligned by ALIGN to
22589 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22591 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
22592 int desired_align, int align_bytes)
22595 rtx orig_dst = dst;
22596 if (align_bytes & 1)
22598 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
22600 emit_insn (gen_strset (destreg, dst,
22601 gen_lowpart (QImode, value)));
22603 if (align_bytes & 2)
22605 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
22606 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
22607 set_mem_align (dst, 2 * BITS_PER_UNIT);
22609 emit_insn (gen_strset (destreg, dst,
22610 gen_lowpart (HImode, value)));
22612 if (align_bytes & 4)
22614 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
22615 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
22616 set_mem_align (dst, 4 * BITS_PER_UNIT);
22618 emit_insn (gen_strset (destreg, dst,
22619 gen_lowpart (SImode, value)));
22621 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
22622 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
22623 set_mem_align (dst, desired_align * BITS_PER_UNIT);
22624 if (MEM_SIZE_KNOWN_P (orig_dst))
22625 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
22629 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22630 static enum stringop_alg
22631 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
22632 int *dynamic_check, bool *noalign)
22634 const struct stringop_algs * algs;
22635 bool optimize_for_speed;
22636 /* Algorithms using the rep prefix want at least edi and ecx;
22637 additionally, memset wants eax and memcpy wants esi. Don't
22638 consider such algorithms if the user has appropriated those
22639 registers for their own purposes. */
22640 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
22642 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
22645 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22646 || (alg != rep_prefix_1_byte \
22647 && alg != rep_prefix_4_byte \
22648 && alg != rep_prefix_8_byte))
22649 const struct processor_costs *cost;
22651 /* Even if the string operation call is cold, we still might spend a lot
22652 of time processing large blocks. */
22653 if (optimize_function_for_size_p (cfun)
22654 || (optimize_insn_for_size_p ()
22655 && expected_size != -1 && expected_size < 256))
22656 optimize_for_speed = false;
22658 optimize_for_speed = true;
22660 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
22662 *dynamic_check = -1;
22664 algs = &cost->memset[TARGET_64BIT != 0];
22666 algs = &cost->memcpy[TARGET_64BIT != 0];
22667 if (ix86_stringop_alg != no_stringop && ALG_USABLE_P (ix86_stringop_alg))
22668 return ix86_stringop_alg;
22669 /* rep; movq or rep; movl is the smallest variant. */
22670 else if (!optimize_for_speed)
22672 if (!count || (count & 3))
22673 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
22675 return rep_prefix_usable ? rep_prefix_4_byte : loop;
22677 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22679 else if (expected_size != -1 && expected_size < 4)
22680 return loop_1_byte;
22681 else if (expected_size != -1)
22684 enum stringop_alg alg = libcall;
22685 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22687 /* We get here if the algorithms that were not libcall-based
22688 were rep-prefix based and we are unable to use rep prefixes
22689 based on global register usage. Break out of the loop and
22690 use the heuristic below. */
22691 if (algs->size[i].max == 0)
22693 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
22695 enum stringop_alg candidate = algs->size[i].alg;
22697 if (candidate != libcall && ALG_USABLE_P (candidate))
22699 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22700 last non-libcall inline algorithm. */
22701 if (TARGET_INLINE_ALL_STRINGOPS)
22703 /* When the current size is best to be copied by a libcall,
22704 but we are still forced to inline, run the heuristic below
22705 that will pick code for medium sized blocks. */
22706 if (alg != libcall)
22710 else if (ALG_USABLE_P (candidate))
22712 *noalign = algs->size[i].noalign;
22717 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
22719 /* When asked to inline the call anyway, try to pick meaningful choice.
22720 We look for maximal size of block that is faster to copy by hand and
22721 take blocks of at most of that size guessing that average size will
22722 be roughly half of the block.
22724 If this turns out to be bad, we might simply specify the preferred
22725 choice in ix86_costs. */
22726 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22727 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
22730 enum stringop_alg alg;
22732 bool any_alg_usable_p = true;
22734 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
22736 enum stringop_alg candidate = algs->size[i].alg;
22737 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
22739 if (candidate != libcall && candidate
22740 && ALG_USABLE_P (candidate))
22741 max = algs->size[i].max;
22743 /* If there aren't any usable algorithms, then recursing on
22744 smaller sizes isn't going to find anything. Just return the
22745 simple byte-at-a-time copy loop. */
22746 if (!any_alg_usable_p)
22748 /* Pick something reasonable. */
22749 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22750 *dynamic_check = 128;
22751 return loop_1_byte;
22755 alg = decide_alg (count, max / 2, memset, dynamic_check, noalign);
22756 gcc_assert (*dynamic_check == -1);
22757 gcc_assert (alg != libcall);
22758 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
22759 *dynamic_check = max;
22762 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
22763 #undef ALG_USABLE_P
22766 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22767 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22769 decide_alignment (int align,
22770 enum stringop_alg alg,
22773 int desired_align = 0;
22777 gcc_unreachable ();
22779 case unrolled_loop:
22780 desired_align = GET_MODE_SIZE (Pmode);
22782 case rep_prefix_8_byte:
22785 case rep_prefix_4_byte:
22786 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22787 copying whole cacheline at once. */
22788 if (TARGET_PENTIUMPRO)
22793 case rep_prefix_1_byte:
22794 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22795 copying whole cacheline at once. */
22796 if (TARGET_PENTIUMPRO)
22810 if (desired_align < align)
22811 desired_align = align;
22812 if (expected_size != -1 && expected_size < 4)
22813 desired_align = align;
22814 return desired_align;
22817 /* Return the smallest power of 2 greater than VAL. */
22819 smallest_pow2_greater_than (int val)
22827 /* Expand string move (memcpy) operation. Use i386 string operations
22828 when profitable. expand_setmem contains similar code. The code
22829 depends upon architecture, block size and alignment, but always has
22830 the same overall structure:
22832 1) Prologue guard: Conditional that jumps up to epilogues for small
22833 blocks that can be handled by epilogue alone. This is faster
22834 but also needed for correctness, since prologue assume the block
22835 is larger than the desired alignment.
22837 Optional dynamic check for size and libcall for large
22838 blocks is emitted here too, with -minline-stringops-dynamically.
22840 2) Prologue: copy first few bytes in order to get destination
22841 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22842 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22843 copied. We emit either a jump tree on power of two sized
22844 blocks, or a byte loop.
22846 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22847 with specified algorithm.
22849 4) Epilogue: code copying tail of the block that is too small to be
22850 handled by main body (or up to size guarded by prologue guard). */
22853 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
22854 rtx expected_align_exp, rtx expected_size_exp)
22860 rtx jump_around_label = NULL;
22861 HOST_WIDE_INT align = 1;
22862 unsigned HOST_WIDE_INT count = 0;
22863 HOST_WIDE_INT expected_size = -1;
22864 int size_needed = 0, epilogue_size_needed;
22865 int desired_align = 0, align_bytes = 0;
22866 enum stringop_alg alg;
22868 bool need_zero_guard = false;
22871 if (CONST_INT_P (align_exp))
22872 align = INTVAL (align_exp);
22873 /* i386 can do misaligned access on reasonably increased cost. */
22874 if (CONST_INT_P (expected_align_exp)
22875 && INTVAL (expected_align_exp) > align)
22876 align = INTVAL (expected_align_exp);
22877 /* ALIGN is the minimum of destination and source alignment, but we care here
22878 just about destination alignment. */
22879 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
22880 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
22882 if (CONST_INT_P (count_exp))
22883 count = expected_size = INTVAL (count_exp);
22884 if (CONST_INT_P (expected_size_exp) && count == 0)
22885 expected_size = INTVAL (expected_size_exp);
22887 /* Make sure we don't need to care about overflow later on. */
22888 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
22891 /* Step 0: Decide on preferred algorithm, desired alignment and
22892 size of chunks to be copied by main loop. */
22894 alg = decide_alg (count, expected_size, false, &dynamic_check, &noalign);
22895 desired_align = decide_alignment (align, alg, expected_size);
22897 if (!TARGET_ALIGN_STRINGOPS || noalign)
22898 align = desired_align;
22900 if (alg == libcall)
22902 gcc_assert (alg != no_stringop);
22904 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
22905 destreg = copy_addr_to_reg (XEXP (dst, 0));
22906 srcreg = copy_addr_to_reg (XEXP (src, 0));
22911 gcc_unreachable ();
22913 need_zero_guard = true;
22914 size_needed = GET_MODE_SIZE (word_mode);
22916 case unrolled_loop:
22917 need_zero_guard = true;
22918 size_needed = GET_MODE_SIZE (word_mode) * (TARGET_64BIT ? 4 : 2);
22920 case rep_prefix_8_byte:
22923 case rep_prefix_4_byte:
22926 case rep_prefix_1_byte:
22930 need_zero_guard = true;
22935 epilogue_size_needed = size_needed;
22937 /* Step 1: Prologue guard. */
22939 /* Alignment code needs count to be in register. */
22940 if (CONST_INT_P (count_exp) && desired_align > align)
22942 if (INTVAL (count_exp) > desired_align
22943 && INTVAL (count_exp) > size_needed)
22946 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
22947 if (align_bytes <= 0)
22950 align_bytes = desired_align - align_bytes;
22952 if (align_bytes == 0)
22953 count_exp = force_reg (counter_mode (count_exp), count_exp);
22955 gcc_assert (desired_align >= 1 && align >= 1);
22957 /* Ensure that alignment prologue won't copy past end of block. */
22958 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
22960 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
22961 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22962 Make sure it is power of 2. */
22963 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
22967 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
22969 /* If main algorithm works on QImode, no epilogue is needed.
22970 For small sizes just don't align anything. */
22971 if (size_needed == 1)
22972 desired_align = align;
22979 label = gen_label_rtx ();
22980 emit_cmp_and_jump_insns (count_exp,
22981 GEN_INT (epilogue_size_needed),
22982 LTU, 0, counter_mode (count_exp), 1, label);
22983 if (expected_size == -1 || expected_size < epilogue_size_needed)
22984 predict_jump (REG_BR_PROB_BASE * 60 / 100);
22986 predict_jump (REG_BR_PROB_BASE * 20 / 100);
22990 /* Emit code to decide on runtime whether library call or inline should be
22992 if (dynamic_check != -1)
22994 if (CONST_INT_P (count_exp))
22996 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
22998 emit_block_move_via_libcall (dst, src, count_exp, false);
22999 count_exp = const0_rtx;
23005 rtx hot_label = gen_label_rtx ();
23006 jump_around_label = gen_label_rtx ();
23007 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
23008 LEU, 0, GET_MODE (count_exp), 1, hot_label);
23009 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23010 emit_block_move_via_libcall (dst, src, count_exp, false);
23011 emit_jump (jump_around_label);
23012 emit_label (hot_label);
23016 /* Step 2: Alignment prologue. */
23018 if (desired_align > align)
23020 if (align_bytes == 0)
23022 /* Except for the first move in epilogue, we no longer know
23023 constant offset in aliasing info. It don't seems to worth
23024 the pain to maintain it for the first move, so throw away
23026 src = change_address (src, BLKmode, srcreg);
23027 dst = change_address (dst, BLKmode, destreg);
23028 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
23033 /* If we know how many bytes need to be stored before dst is
23034 sufficiently aligned, maintain aliasing info accurately. */
23035 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
23036 desired_align, align_bytes);
23037 count_exp = plus_constant (counter_mode (count_exp),
23038 count_exp, -align_bytes);
23039 count -= align_bytes;
23041 if (need_zero_guard
23042 && (count < (unsigned HOST_WIDE_INT) size_needed
23043 || (align_bytes == 0
23044 && count < ((unsigned HOST_WIDE_INT) size_needed
23045 + desired_align - align))))
23047 /* It is possible that we copied enough so the main loop will not
23049 gcc_assert (size_needed > 1);
23050 if (label == NULL_RTX)
23051 label = gen_label_rtx ();
23052 emit_cmp_and_jump_insns (count_exp,
23053 GEN_INT (size_needed),
23054 LTU, 0, counter_mode (count_exp), 1, label);
23055 if (expected_size == -1
23056 || expected_size < (desired_align - align) / 2 + size_needed)
23057 predict_jump (REG_BR_PROB_BASE * 20 / 100);
23059 predict_jump (REG_BR_PROB_BASE * 60 / 100);
23062 if (label && size_needed == 1)
23064 emit_label (label);
23065 LABEL_NUSES (label) = 1;
23067 epilogue_size_needed = 1;
23069 else if (label == NULL_RTX)
23070 epilogue_size_needed = size_needed;
23072 /* Step 3: Main loop. */
23078 gcc_unreachable ();
23080 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
23081 count_exp, QImode, 1, expected_size);
23084 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
23085 count_exp, word_mode, 1, expected_size);
23087 case unrolled_loop:
23088 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
23089 registers for 4 temporaries anyway. */
23090 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
23091 count_exp, word_mode, TARGET_64BIT ? 4 : 2,
23094 case rep_prefix_8_byte:
23095 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
23098 case rep_prefix_4_byte:
23099 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
23102 case rep_prefix_1_byte:
23103 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
23107 /* Adjust properly the offset of src and dest memory for aliasing. */
23108 if (CONST_INT_P (count_exp))
23110 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
23111 (count / size_needed) * size_needed);
23112 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
23113 (count / size_needed) * size_needed);
23117 src = change_address (src, BLKmode, srcreg);
23118 dst = change_address (dst, BLKmode, destreg);
23121 /* Step 4: Epilogue to copy the remaining bytes. */
23125 /* When the main loop is done, COUNT_EXP might hold original count,
23126 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23127 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23128 bytes. Compensate if needed. */
23130 if (size_needed < epilogue_size_needed)
23133 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
23134 GEN_INT (size_needed - 1), count_exp, 1,
23136 if (tmp != count_exp)
23137 emit_move_insn (count_exp, tmp);
23139 emit_label (label);
23140 LABEL_NUSES (label) = 1;
23143 if (count_exp != const0_rtx && epilogue_size_needed > 1)
23144 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
23145 epilogue_size_needed);
23146 if (jump_around_label)
23147 emit_label (jump_around_label);
23151 /* Helper function for memcpy. For QImode value 0xXY produce
23152 0xXYXYXYXY of wide specified by MODE. This is essentially
23153 a * 0x10101010, but we can do slightly better than
23154 synth_mult by unwinding the sequence by hand on CPUs with
23157 promote_duplicated_reg (enum machine_mode mode, rtx val)
23159 enum machine_mode valmode = GET_MODE (val);
23161 int nops = mode == DImode ? 3 : 2;
23163 gcc_assert (mode == SImode || mode == DImode);
23164 if (val == const0_rtx)
23165 return copy_to_mode_reg (mode, const0_rtx);
23166 if (CONST_INT_P (val))
23168 HOST_WIDE_INT v = INTVAL (val) & 255;
23172 if (mode == DImode)
23173 v |= (v << 16) << 16;
23174 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
23177 if (valmode == VOIDmode)
23179 if (valmode != QImode)
23180 val = gen_lowpart (QImode, val);
23181 if (mode == QImode)
23183 if (!TARGET_PARTIAL_REG_STALL)
23185 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
23186 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
23187 <= (ix86_cost->shift_const + ix86_cost->add) * nops
23188 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
23190 rtx reg = convert_modes (mode, QImode, val, true);
23191 tmp = promote_duplicated_reg (mode, const1_rtx);
23192 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
23197 rtx reg = convert_modes (mode, QImode, val, true);
23199 if (!TARGET_PARTIAL_REG_STALL)
23200 if (mode == SImode)
23201 emit_insn (gen_movsi_insv_1 (reg, reg));
23203 emit_insn (gen_movdi_insv_1 (reg, reg));
23206 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
23207 NULL, 1, OPTAB_DIRECT);
23209 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23211 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
23212 NULL, 1, OPTAB_DIRECT);
23213 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23214 if (mode == SImode)
23216 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
23217 NULL, 1, OPTAB_DIRECT);
23218 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
23223 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
23224 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
23225 alignment from ALIGN to DESIRED_ALIGN. */
23227 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
23232 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
23233 promoted_val = promote_duplicated_reg (DImode, val);
23234 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
23235 promoted_val = promote_duplicated_reg (SImode, val);
23236 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
23237 promoted_val = promote_duplicated_reg (HImode, val);
23239 promoted_val = val;
23241 return promoted_val;
23244 /* Expand string clear operation (bzero). Use i386 string operations when
23245 profitable. See expand_movmem comment for explanation of individual
23246 steps performed. */
23248 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
23249 rtx expected_align_exp, rtx expected_size_exp)
23254 rtx jump_around_label = NULL;
23255 HOST_WIDE_INT align = 1;
23256 unsigned HOST_WIDE_INT count = 0;
23257 HOST_WIDE_INT expected_size = -1;
23258 int size_needed = 0, epilogue_size_needed;
23259 int desired_align = 0, align_bytes = 0;
23260 enum stringop_alg alg;
23261 rtx promoted_val = NULL;
23262 bool force_loopy_epilogue = false;
23264 bool need_zero_guard = false;
23267 if (CONST_INT_P (align_exp))
23268 align = INTVAL (align_exp);
23269 /* i386 can do misaligned access on reasonably increased cost. */
23270 if (CONST_INT_P (expected_align_exp)
23271 && INTVAL (expected_align_exp) > align)
23272 align = INTVAL (expected_align_exp);
23273 if (CONST_INT_P (count_exp))
23274 count = expected_size = INTVAL (count_exp);
23275 if (CONST_INT_P (expected_size_exp) && count == 0)
23276 expected_size = INTVAL (expected_size_exp);
23278 /* Make sure we don't need to care about overflow later on. */
23279 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
23282 /* Step 0: Decide on preferred algorithm, desired alignment and
23283 size of chunks to be copied by main loop. */
23285 alg = decide_alg (count, expected_size, true, &dynamic_check, &noalign);
23286 desired_align = decide_alignment (align, alg, expected_size);
23288 if (!TARGET_ALIGN_STRINGOPS || noalign)
23289 align = desired_align;
23291 if (alg == libcall)
23293 gcc_assert (alg != no_stringop);
23295 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
23296 destreg = copy_addr_to_reg (XEXP (dst, 0));
23301 gcc_unreachable ();
23303 need_zero_guard = true;
23304 size_needed = GET_MODE_SIZE (word_mode);
23306 case unrolled_loop:
23307 need_zero_guard = true;
23308 size_needed = GET_MODE_SIZE (word_mode) * 4;
23310 case rep_prefix_8_byte:
23313 case rep_prefix_4_byte:
23316 case rep_prefix_1_byte:
23320 need_zero_guard = true;
23324 epilogue_size_needed = size_needed;
23326 /* Step 1: Prologue guard. */
23328 /* Alignment code needs count to be in register. */
23329 if (CONST_INT_P (count_exp) && desired_align > align)
23331 if (INTVAL (count_exp) > desired_align
23332 && INTVAL (count_exp) > size_needed)
23335 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
23336 if (align_bytes <= 0)
23339 align_bytes = desired_align - align_bytes;
23341 if (align_bytes == 0)
23343 enum machine_mode mode = SImode;
23344 if (TARGET_64BIT && (count & ~0xffffffff))
23346 count_exp = force_reg (mode, count_exp);
23349 /* Do the cheap promotion to allow better CSE across the
23350 main loop and epilogue (ie one load of the big constant in the
23351 front of all code. */
23352 if (CONST_INT_P (val_exp))
23353 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
23354 desired_align, align);
23355 /* Ensure that alignment prologue won't copy past end of block. */
23356 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
23358 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
23359 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23360 Make sure it is power of 2. */
23361 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
23363 /* To improve performance of small blocks, we jump around the VAL
23364 promoting mode. This mean that if the promoted VAL is not constant,
23365 we might not use it in the epilogue and have to use byte
23367 if (epilogue_size_needed > 2 && !promoted_val)
23368 force_loopy_epilogue = true;
23371 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
23373 /* If main algorithm works on QImode, no epilogue is needed.
23374 For small sizes just don't align anything. */
23375 if (size_needed == 1)
23376 desired_align = align;
23383 label = gen_label_rtx ();
23384 emit_cmp_and_jump_insns (count_exp,
23385 GEN_INT (epilogue_size_needed),
23386 LTU, 0, counter_mode (count_exp), 1, label);
23387 if (expected_size == -1 || expected_size <= epilogue_size_needed)
23388 predict_jump (REG_BR_PROB_BASE * 60 / 100);
23390 predict_jump (REG_BR_PROB_BASE * 20 / 100);
23393 if (dynamic_check != -1)
23395 rtx hot_label = gen_label_rtx ();
23396 jump_around_label = gen_label_rtx ();
23397 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
23398 LEU, 0, counter_mode (count_exp), 1, hot_label);
23399 predict_jump (REG_BR_PROB_BASE * 90 / 100);
23400 set_storage_via_libcall (dst, count_exp, val_exp, false);
23401 emit_jump (jump_around_label);
23402 emit_label (hot_label);
23405 /* Step 2: Alignment prologue. */
23407 /* Do the expensive promotion once we branched off the small blocks. */
23409 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
23410 desired_align, align);
23411 gcc_assert (desired_align >= 1 && align >= 1);
23413 if (desired_align > align)
23415 if (align_bytes == 0)
23417 /* Except for the first move in epilogue, we no longer know
23418 constant offset in aliasing info. It don't seems to worth
23419 the pain to maintain it for the first move, so throw away
23421 dst = change_address (dst, BLKmode, destreg);
23422 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
23427 /* If we know how many bytes need to be stored before dst is
23428 sufficiently aligned, maintain aliasing info accurately. */
23429 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
23430 desired_align, align_bytes);
23431 count_exp = plus_constant (counter_mode (count_exp),
23432 count_exp, -align_bytes);
23433 count -= align_bytes;
23435 if (need_zero_guard
23436 && (count < (unsigned HOST_WIDE_INT) size_needed
23437 || (align_bytes == 0
23438 && count < ((unsigned HOST_WIDE_INT) size_needed
23439 + desired_align - align))))
23441 /* It is possible that we copied enough so the main loop will not
23443 gcc_assert (size_needed > 1);
23444 if (label == NULL_RTX)
23445 label = gen_label_rtx ();
23446 emit_cmp_and_jump_insns (count_exp,
23447 GEN_INT (size_needed),
23448 LTU, 0, counter_mode (count_exp), 1, label);
23449 if (expected_size == -1
23450 || expected_size < (desired_align - align) / 2 + size_needed)
23451 predict_jump (REG_BR_PROB_BASE * 20 / 100);
23453 predict_jump (REG_BR_PROB_BASE * 60 / 100);
23456 if (label && size_needed == 1)
23458 emit_label (label);
23459 LABEL_NUSES (label) = 1;
23461 promoted_val = val_exp;
23462 epilogue_size_needed = 1;
23464 else if (label == NULL_RTX)
23465 epilogue_size_needed = size_needed;
23467 /* Step 3: Main loop. */
23473 gcc_unreachable ();
23475 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23476 count_exp, QImode, 1, expected_size);
23479 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23480 count_exp, word_mode, 1, expected_size);
23482 case unrolled_loop:
23483 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
23484 count_exp, word_mode, 4, expected_size);
23486 case rep_prefix_8_byte:
23487 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23490 case rep_prefix_4_byte:
23491 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23494 case rep_prefix_1_byte:
23495 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
23499 /* Adjust properly the offset of src and dest memory for aliasing. */
23500 if (CONST_INT_P (count_exp))
23501 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
23502 (count / size_needed) * size_needed);
23504 dst = change_address (dst, BLKmode, destreg);
23506 /* Step 4: Epilogue to copy the remaining bytes. */
23510 /* When the main loop is done, COUNT_EXP might hold original count,
23511 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23512 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23513 bytes. Compensate if needed. */
23515 if (size_needed < epilogue_size_needed)
23518 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
23519 GEN_INT (size_needed - 1), count_exp, 1,
23521 if (tmp != count_exp)
23522 emit_move_insn (count_exp, tmp);
23524 emit_label (label);
23525 LABEL_NUSES (label) = 1;
23528 if (count_exp != const0_rtx && epilogue_size_needed > 1)
23530 if (force_loopy_epilogue)
23531 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
23532 epilogue_size_needed);
23534 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
23535 epilogue_size_needed);
23537 if (jump_around_label)
23538 emit_label (jump_around_label);
23542 /* Expand the appropriate insns for doing strlen if not just doing
23545 out = result, initialized with the start address
23546 align_rtx = alignment of the address.
23547 scratch = scratch register, initialized with the startaddress when
23548 not aligned, otherwise undefined
23550 This is just the body. It needs the initializations mentioned above and
23551 some address computing at the end. These things are done in i386.md. */
23554 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
23558 rtx align_2_label = NULL_RTX;
23559 rtx align_3_label = NULL_RTX;
23560 rtx align_4_label = gen_label_rtx ();
23561 rtx end_0_label = gen_label_rtx ();
23563 rtx tmpreg = gen_reg_rtx (SImode);
23564 rtx scratch = gen_reg_rtx (SImode);
23568 if (CONST_INT_P (align_rtx))
23569 align = INTVAL (align_rtx);
23571 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23573 /* Is there a known alignment and is it less than 4? */
23576 rtx scratch1 = gen_reg_rtx (Pmode);
23577 emit_move_insn (scratch1, out);
23578 /* Is there a known alignment and is it not 2? */
23581 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
23582 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
23584 /* Leave just the 3 lower bits. */
23585 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
23586 NULL_RTX, 0, OPTAB_WIDEN);
23588 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23589 Pmode, 1, align_4_label);
23590 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
23591 Pmode, 1, align_2_label);
23592 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
23593 Pmode, 1, align_3_label);
23597 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23598 check if is aligned to 4 - byte. */
23600 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
23601 NULL_RTX, 0, OPTAB_WIDEN);
23603 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
23604 Pmode, 1, align_4_label);
23607 mem = change_address (src, QImode, out);
23609 /* Now compare the bytes. */
23611 /* Compare the first n unaligned byte on a byte per byte basis. */
23612 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
23613 QImode, 1, end_0_label);
23615 /* Increment the address. */
23616 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23618 /* Not needed with an alignment of 2 */
23621 emit_label (align_2_label);
23623 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23626 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23628 emit_label (align_3_label);
23631 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
23634 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
23637 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23638 align this loop. It gives only huge programs, but does not help to
23640 emit_label (align_4_label);
23642 mem = change_address (src, SImode, out);
23643 emit_move_insn (scratch, mem);
23644 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
23646 /* This formula yields a nonzero result iff one of the bytes is zero.
23647 This saves three branches inside loop and many cycles. */
23649 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
23650 emit_insn (gen_one_cmplsi2 (scratch, scratch));
23651 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
23652 emit_insn (gen_andsi3 (tmpreg, tmpreg,
23653 gen_int_mode (0x80808080, SImode)));
23654 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
23659 rtx reg = gen_reg_rtx (SImode);
23660 rtx reg2 = gen_reg_rtx (Pmode);
23661 emit_move_insn (reg, tmpreg);
23662 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
23664 /* If zero is not in the first two bytes, move two bytes forward. */
23665 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23666 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23667 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23668 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
23669 gen_rtx_IF_THEN_ELSE (SImode, tmp,
23672 /* Emit lea manually to avoid clobbering of flags. */
23673 emit_insn (gen_rtx_SET (SImode, reg2,
23674 gen_rtx_PLUS (Pmode, out, const2_rtx)));
23676 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23677 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
23678 emit_insn (gen_rtx_SET (VOIDmode, out,
23679 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
23685 rtx end_2_label = gen_label_rtx ();
23686 /* Is zero in the first two bytes? */
23688 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
23689 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
23690 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
23691 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23692 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
23694 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23695 JUMP_LABEL (tmp) = end_2_label;
23697 /* Not in the first two. Move two bytes forward. */
23698 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
23699 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
23701 emit_label (end_2_label);
23705 /* Avoid branch in fixing the byte. */
23706 tmpreg = gen_lowpart (QImode, tmpreg);
23707 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
23708 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
23709 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
23710 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
23712 emit_label (end_0_label);
23715 /* Expand strlen. */
23718 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
23720 rtx addr, scratch1, scratch2, scratch3, scratch4;
23722 /* The generic case of strlen expander is long. Avoid it's
23723 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23725 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23726 && !TARGET_INLINE_ALL_STRINGOPS
23727 && !optimize_insn_for_size_p ()
23728 && (!CONST_INT_P (align) || INTVAL (align) < 4))
23731 addr = force_reg (Pmode, XEXP (src, 0));
23732 scratch1 = gen_reg_rtx (Pmode);
23734 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
23735 && !optimize_insn_for_size_p ())
23737 /* Well it seems that some optimizer does not combine a call like
23738 foo(strlen(bar), strlen(bar));
23739 when the move and the subtraction is done here. It does calculate
23740 the length just once when these instructions are done inside of
23741 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23742 often used and I use one fewer register for the lifetime of
23743 output_strlen_unroll() this is better. */
23745 emit_move_insn (out, addr);
23747 ix86_expand_strlensi_unroll_1 (out, src, align);
23749 /* strlensi_unroll_1 returns the address of the zero at the end of
23750 the string, like memchr(), so compute the length by subtracting
23751 the start address. */
23752 emit_insn (ix86_gen_sub3 (out, out, addr));
23758 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23759 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
23762 scratch2 = gen_reg_rtx (Pmode);
23763 scratch3 = gen_reg_rtx (Pmode);
23764 scratch4 = force_reg (Pmode, constm1_rtx);
23766 emit_move_insn (scratch3, addr);
23767 eoschar = force_reg (QImode, eoschar);
23769 src = replace_equiv_address_nv (src, scratch3);
23771 /* If .md starts supporting :P, this can be done in .md. */
23772 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
23773 scratch4), UNSPEC_SCAS);
23774 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
23775 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
23776 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
23781 /* For given symbol (function) construct code to compute address of it's PLT
23782 entry in large x86-64 PIC model. */
23784 construct_plt_address (rtx symbol)
23788 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
23789 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
23790 gcc_assert (Pmode == DImode);
23792 tmp = gen_reg_rtx (Pmode);
23793 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
23795 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
23796 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
23801 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
23803 rtx pop, bool sibcall)
23805 unsigned int const cregs_size
23806 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
23807 rtx vec[3 + cregs_size];
23808 rtx use = NULL, call;
23809 unsigned int vec_len = 0;
23811 if (pop == const0_rtx)
23813 gcc_assert (!TARGET_64BIT || !pop);
23815 if (TARGET_MACHO && !TARGET_64BIT)
23818 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
23819 fnaddr = machopic_indirect_call_target (fnaddr);
23824 /* Static functions and indirect calls don't need the pic register. */
23827 || (ix86_cmodel == CM_LARGE_PIC
23828 && DEFAULT_ABI != MS_ABI))
23829 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23830 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
23831 use_reg (&use, pic_offset_table_rtx);
23834 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
23836 rtx al = gen_rtx_REG (QImode, AX_REG);
23837 emit_move_insn (al, callarg2);
23838 use_reg (&use, al);
23841 if (ix86_cmodel == CM_LARGE_PIC
23844 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
23845 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
23846 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
23848 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
23849 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
23851 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
23852 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
23855 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
23857 call = gen_rtx_SET (VOIDmode, retval, call);
23858 vec[vec_len++] = call;
23862 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
23863 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
23864 vec[vec_len++] = pop;
23867 if (TARGET_64BIT_MS_ABI
23868 && (!callarg2 || INTVAL (callarg2) != -2))
23872 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
23873 UNSPEC_MS_TO_SYSV_CALL);
23875 for (i = 0; i < cregs_size; i++)
23877 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
23878 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
23881 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
23886 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
23887 call = emit_call_insn (call);
23889 CALL_INSN_FUNCTION_USAGE (call) = use;
23894 /* Output the assembly for a call instruction. */
23897 ix86_output_call_insn (rtx insn, rtx call_op)
23899 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
23900 bool seh_nop_p = false;
23903 if (SIBLING_CALL_P (insn))
23907 /* SEH epilogue detection requires the indirect branch case
23908 to include REX.W. */
23909 else if (TARGET_SEH)
23910 xasm = "rex.W jmp %A0";
23914 output_asm_insn (xasm, &call_op);
23918 /* SEH unwinding can require an extra nop to be emitted in several
23919 circumstances. Determine if we have one of those. */
23924 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
23926 /* If we get to another real insn, we don't need the nop. */
23930 /* If we get to the epilogue note, prevent a catch region from
23931 being adjacent to the standard epilogue sequence. If non-
23932 call-exceptions, we'll have done this during epilogue emission. */
23933 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
23934 && !flag_non_call_exceptions
23935 && !can_throw_internal (insn))
23942 /* If we didn't find a real insn following the call, prevent the
23943 unwinder from looking into the next function. */
23949 xasm = "call\t%P0";
23951 xasm = "call\t%A0";
23953 output_asm_insn (xasm, &call_op);
23961 /* Clear stack slot assignments remembered from previous functions.
23962 This is called from INIT_EXPANDERS once before RTL is emitted for each
23965 static struct machine_function *
23966 ix86_init_machine_status (void)
23968 struct machine_function *f;
23970 f = ggc_alloc_cleared_machine_function ();
23971 f->use_fast_prologue_epilogue_nregs = -1;
23972 f->call_abi = ix86_abi;
23977 /* Return a MEM corresponding to a stack slot with mode MODE.
23978 Allocate a new slot if necessary.
23980 The RTL for a function can have several slots available: N is
23981 which slot to use. */
23984 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
23986 struct stack_local_entry *s;
23988 gcc_assert (n < MAX_386_STACK_LOCALS);
23990 for (s = ix86_stack_locals; s; s = s->next)
23991 if (s->mode == mode && s->n == n)
23992 return validize_mem (copy_rtx (s->rtl));
23994 s = ggc_alloc_stack_local_entry ();
23997 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
23999 s->next = ix86_stack_locals;
24000 ix86_stack_locals = s;
24001 return validize_mem (s->rtl);
24005 ix86_instantiate_decls (void)
24007 struct stack_local_entry *s;
24009 for (s = ix86_stack_locals; s; s = s->next)
24010 if (s->rtl != NULL_RTX)
24011 instantiate_decl_rtl (s->rtl);
24014 /* Calculate the length of the memory address in the instruction encoding.
24015 Includes addr32 prefix, does not include the one-byte modrm, opcode,
24016 or other prefixes. We never generate addr32 prefix for LEA insn. */
24019 memory_address_length (rtx addr, bool lea)
24021 struct ix86_address parts;
24022 rtx base, index, disp;
24026 if (GET_CODE (addr) == PRE_DEC
24027 || GET_CODE (addr) == POST_INC
24028 || GET_CODE (addr) == PRE_MODIFY
24029 || GET_CODE (addr) == POST_MODIFY)
24032 ok = ix86_decompose_address (addr, &parts);
24035 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
24037 /* If this is not LEA instruction, add the length of addr32 prefix. */
24038 if (TARGET_64BIT && !lea
24039 && (SImode_address_operand (addr, VOIDmode)
24040 || (parts.base && GET_MODE (parts.base) == SImode)
24041 || (parts.index && GET_MODE (parts.index) == SImode)))
24045 index = parts.index;
24048 if (base && GET_CODE (base) == SUBREG)
24049 base = SUBREG_REG (base);
24050 if (index && GET_CODE (index) == SUBREG)
24051 index = SUBREG_REG (index);
24053 gcc_assert (base == NULL_RTX || REG_P (base));
24054 gcc_assert (index == NULL_RTX || REG_P (index));
24057 - esp as the base always wants an index,
24058 - ebp as the base always wants a displacement,
24059 - r12 as the base always wants an index,
24060 - r13 as the base always wants a displacement. */
24062 /* Register Indirect. */
24063 if (base && !index && !disp)
24065 /* esp (for its index) and ebp (for its displacement) need
24066 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
24068 if (base == arg_pointer_rtx
24069 || base == frame_pointer_rtx
24070 || REGNO (base) == SP_REG
24071 || REGNO (base) == BP_REG
24072 || REGNO (base) == R12_REG
24073 || REGNO (base) == R13_REG)
24077 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
24078 is not disp32, but disp32(%rip), so for disp32
24079 SIB byte is needed, unless print_operand_address
24080 optimizes it into disp32(%rip) or (%rip) is implied
24082 else if (disp && !base && !index)
24089 if (GET_CODE (disp) == CONST)
24090 symbol = XEXP (disp, 0);
24091 if (GET_CODE (symbol) == PLUS
24092 && CONST_INT_P (XEXP (symbol, 1)))
24093 symbol = XEXP (symbol, 0);
24095 if (GET_CODE (symbol) != LABEL_REF
24096 && (GET_CODE (symbol) != SYMBOL_REF
24097 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
24098 && (GET_CODE (symbol) != UNSPEC
24099 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
24100 && XINT (symbol, 1) != UNSPEC_PCREL
24101 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
24107 /* Find the length of the displacement constant. */
24110 if (base && satisfies_constraint_K (disp))
24115 /* ebp always wants a displacement. Similarly r13. */
24116 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
24119 /* An index requires the two-byte modrm form.... */
24121 /* ...like esp (or r12), which always wants an index. */
24122 || base == arg_pointer_rtx
24123 || base == frame_pointer_rtx
24124 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
24131 /* Compute default value for "length_immediate" attribute. When SHORTFORM
24132 is set, expect that insn have 8bit immediate alternative. */
24134 ix86_attr_length_immediate_default (rtx insn, bool shortform)
24138 extract_insn_cached (insn);
24139 for (i = recog_data.n_operands - 1; i >= 0; --i)
24140 if (CONSTANT_P (recog_data.operand[i]))
24142 enum attr_mode mode = get_attr_mode (insn);
24145 if (shortform && CONST_INT_P (recog_data.operand[i]))
24147 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
24154 ival = trunc_int_for_mode (ival, HImode);
24157 ival = trunc_int_for_mode (ival, SImode);
24162 if (IN_RANGE (ival, -128, 127))
24179 /* Immediates for DImode instructions are encoded
24180 as 32bit sign extended values. */
24185 fatal_insn ("unknown insn mode", insn);
24191 /* Compute default value for "length_address" attribute. */
24193 ix86_attr_length_address_default (rtx insn)
24197 if (get_attr_type (insn) == TYPE_LEA)
24199 rtx set = PATTERN (insn), addr;
24201 if (GET_CODE (set) == PARALLEL)
24202 set = XVECEXP (set, 0, 0);
24204 gcc_assert (GET_CODE (set) == SET);
24206 addr = SET_SRC (set);
24208 return memory_address_length (addr, true);
24211 extract_insn_cached (insn);
24212 for (i = recog_data.n_operands - 1; i >= 0; --i)
24213 if (MEM_P (recog_data.operand[i]))
24215 constrain_operands_cached (reload_completed);
24216 if (which_alternative != -1)
24218 const char *constraints = recog_data.constraints[i];
24219 int alt = which_alternative;
24221 while (*constraints == '=' || *constraints == '+')
24224 while (*constraints++ != ',')
24226 /* Skip ignored operands. */
24227 if (*constraints == 'X')
24230 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
24235 /* Compute default value for "length_vex" attribute. It includes
24236 2 or 3 byte VEX prefix and 1 opcode byte. */
24239 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
24243 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24244 byte VEX prefix. */
24245 if (!has_0f_opcode || has_vex_w)
24248 /* We can always use 2 byte VEX prefix in 32bit. */
24252 extract_insn_cached (insn);
24254 for (i = recog_data.n_operands - 1; i >= 0; --i)
24255 if (REG_P (recog_data.operand[i]))
24257 /* REX.W bit uses 3 byte VEX prefix. */
24258 if (GET_MODE (recog_data.operand[i]) == DImode
24259 && GENERAL_REG_P (recog_data.operand[i]))
24264 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24265 if (MEM_P (recog_data.operand[i])
24266 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
24273 /* Return the maximum number of instructions a cpu can issue. */
24276 ix86_issue_rate (void)
24280 case PROCESSOR_PENTIUM:
24281 case PROCESSOR_ATOM:
24282 case PROCESSOR_SLM:
24284 case PROCESSOR_BTVER2:
24287 case PROCESSOR_PENTIUMPRO:
24288 case PROCESSOR_PENTIUM4:
24289 case PROCESSOR_CORE2:
24290 case PROCESSOR_COREI7:
24291 case PROCESSOR_HASWELL:
24292 case PROCESSOR_ATHLON:
24294 case PROCESSOR_AMDFAM10:
24295 case PROCESSOR_NOCONA:
24296 case PROCESSOR_GENERIC32:
24297 case PROCESSOR_GENERIC64:
24298 case PROCESSOR_BDVER1:
24299 case PROCESSOR_BDVER2:
24300 case PROCESSOR_BDVER3:
24301 case PROCESSOR_BTVER1:
24309 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24310 by DEP_INSN and nothing set by DEP_INSN. */
24313 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
24317 /* Simplify the test for uninteresting insns. */
24318 if (insn_type != TYPE_SETCC
24319 && insn_type != TYPE_ICMOV
24320 && insn_type != TYPE_FCMOV
24321 && insn_type != TYPE_IBR)
24324 if ((set = single_set (dep_insn)) != 0)
24326 set = SET_DEST (set);
24329 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
24330 && XVECLEN (PATTERN (dep_insn), 0) == 2
24331 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
24332 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
24334 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
24335 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
24340 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
24343 /* This test is true if the dependent insn reads the flags but
24344 not any other potentially set register. */
24345 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
24348 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
24354 /* Return true iff USE_INSN has a memory address with operands set by
24358 ix86_agi_dependent (rtx set_insn, rtx use_insn)
24361 extract_insn_cached (use_insn);
24362 for (i = recog_data.n_operands - 1; i >= 0; --i)
24363 if (MEM_P (recog_data.operand[i]))
24365 rtx addr = XEXP (recog_data.operand[i], 0);
24366 return modified_in_p (addr, set_insn) != 0;
24372 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
24374 enum attr_type insn_type, dep_insn_type;
24375 enum attr_memory memory;
24377 int dep_insn_code_number;
24379 /* Anti and output dependencies have zero cost on all CPUs. */
24380 if (REG_NOTE_KIND (link) != 0)
24383 dep_insn_code_number = recog_memoized (dep_insn);
24385 /* If we can't recognize the insns, we can't really do anything. */
24386 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
24389 insn_type = get_attr_type (insn);
24390 dep_insn_type = get_attr_type (dep_insn);
24394 case PROCESSOR_PENTIUM:
24395 /* Address Generation Interlock adds a cycle of latency. */
24396 if (insn_type == TYPE_LEA)
24398 rtx addr = PATTERN (insn);
24400 if (GET_CODE (addr) == PARALLEL)
24401 addr = XVECEXP (addr, 0, 0);
24403 gcc_assert (GET_CODE (addr) == SET);
24405 addr = SET_SRC (addr);
24406 if (modified_in_p (addr, dep_insn))
24409 else if (ix86_agi_dependent (dep_insn, insn))
24412 /* ??? Compares pair with jump/setcc. */
24413 if (ix86_flags_dependent (insn, dep_insn, insn_type))
24416 /* Floating point stores require value to be ready one cycle earlier. */
24417 if (insn_type == TYPE_FMOV
24418 && get_attr_memory (insn) == MEMORY_STORE
24419 && !ix86_agi_dependent (dep_insn, insn))
24423 case PROCESSOR_PENTIUMPRO:
24424 memory = get_attr_memory (insn);
24426 /* INT->FP conversion is expensive. */
24427 if (get_attr_fp_int_src (dep_insn))
24430 /* There is one cycle extra latency between an FP op and a store. */
24431 if (insn_type == TYPE_FMOV
24432 && (set = single_set (dep_insn)) != NULL_RTX
24433 && (set2 = single_set (insn)) != NULL_RTX
24434 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
24435 && MEM_P (SET_DEST (set2)))
24438 /* Show ability of reorder buffer to hide latency of load by executing
24439 in parallel with previous instruction in case
24440 previous instruction is not needed to compute the address. */
24441 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24442 && !ix86_agi_dependent (dep_insn, insn))
24444 /* Claim moves to take one cycle, as core can issue one load
24445 at time and the next load can start cycle later. */
24446 if (dep_insn_type == TYPE_IMOV
24447 || dep_insn_type == TYPE_FMOV)
24455 memory = get_attr_memory (insn);
24457 /* The esp dependency is resolved before the instruction is really
24459 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
24460 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
24463 /* INT->FP conversion is expensive. */
24464 if (get_attr_fp_int_src (dep_insn))
24467 /* Show ability of reorder buffer to hide latency of load by executing
24468 in parallel with previous instruction in case
24469 previous instruction is not needed to compute the address. */
24470 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24471 && !ix86_agi_dependent (dep_insn, insn))
24473 /* Claim moves to take one cycle, as core can issue one load
24474 at time and the next load can start cycle later. */
24475 if (dep_insn_type == TYPE_IMOV
24476 || dep_insn_type == TYPE_FMOV)
24485 case PROCESSOR_ATHLON:
24487 case PROCESSOR_AMDFAM10:
24488 case PROCESSOR_BDVER1:
24489 case PROCESSOR_BDVER2:
24490 case PROCESSOR_BDVER3:
24491 case PROCESSOR_BTVER1:
24492 case PROCESSOR_BTVER2:
24493 case PROCESSOR_ATOM:
24494 case PROCESSOR_GENERIC32:
24495 case PROCESSOR_GENERIC64:
24496 memory = get_attr_memory (insn);
24498 /* Show ability of reorder buffer to hide latency of load by executing
24499 in parallel with previous instruction in case
24500 previous instruction is not needed to compute the address. */
24501 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
24502 && !ix86_agi_dependent (dep_insn, insn))
24504 enum attr_unit unit = get_attr_unit (insn);
24507 /* Because of the difference between the length of integer and
24508 floating unit pipeline preparation stages, the memory operands
24509 for floating point are cheaper.
24511 ??? For Athlon it the difference is most probably 2. */
24512 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
24515 loadcost = TARGET_ATHLON ? 2 : 0;
24517 if (cost >= loadcost)
24530 /* How many alternative schedules to try. This should be as wide as the
24531 scheduling freedom in the DFA, but no wider. Making this value too
24532 large results extra work for the scheduler. */
24535 ia32_multipass_dfa_lookahead (void)
24539 case PROCESSOR_PENTIUM:
24542 case PROCESSOR_PENTIUMPRO:
24546 case PROCESSOR_CORE2:
24547 case PROCESSOR_COREI7:
24548 case PROCESSOR_HASWELL:
24549 case PROCESSOR_ATOM:
24550 case PROCESSOR_SLM:
24551 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24552 as many instructions can be executed on a cycle, i.e.,
24553 issue_rate. I wonder why tuning for many CPUs does not do this. */
24554 if (reload_completed)
24555 return ix86_issue_rate ();
24556 /* Don't use lookahead for pre-reload schedule to save compile time. */
24564 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24565 execution. It is applied if
24566 (1) IMUL instruction is on the top of list;
24567 (2) There exists the only producer of independent IMUL instruction in
24569 (3) Put found producer on the top of ready list.
24570 Returns issue rate. */
24573 ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
24574 int clock_var ATTRIBUTE_UNUSED)
24576 static int issue_rate = -1;
24577 int n_ready = *pn_ready;
24578 rtx insn, insn1, insn2;
24580 sd_iterator_def sd_it;
24584 /* Set up issue rate. */
24585 issue_rate = ix86_issue_rate();
24587 /* Do reodering for Atom only. */
24588 if (ix86_tune != PROCESSOR_ATOM)
24590 /* Do not perform ready list reodering for pre-reload schedule pass. */
24591 if (!reload_completed)
24593 /* Nothing to do if ready list contains only 1 instruction. */
24597 /* Check that IMUL instruction is on the top of ready list. */
24598 insn = ready[n_ready - 1];
24599 if (!NONDEBUG_INSN_P (insn))
24601 insn = PATTERN (insn);
24602 if (GET_CODE (insn) == PARALLEL)
24603 insn = XVECEXP (insn, 0, 0);
24604 if (GET_CODE (insn) != SET)
24606 if (!(GET_CODE (SET_SRC (insn)) == MULT
24607 && GET_MODE (SET_SRC (insn)) == SImode))
24610 /* Search for producer of independent IMUL instruction. */
24611 for (i = n_ready - 2; i>= 0; i--)
24614 if (!NONDEBUG_INSN_P (insn))
24616 /* Skip IMUL instruction. */
24617 insn2 = PATTERN (insn);
24618 if (GET_CODE (insn2) == PARALLEL)
24619 insn2 = XVECEXP (insn2, 0, 0);
24620 if (GET_CODE (insn2) == SET
24621 && GET_CODE (SET_SRC (insn2)) == MULT
24622 && GET_MODE (SET_SRC (insn2)) == SImode)
24625 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
24628 con = DEP_CON (dep);
24629 if (!NONDEBUG_INSN_P (con))
24631 insn1 = PATTERN (con);
24632 if (GET_CODE (insn1) == PARALLEL)
24633 insn1 = XVECEXP (insn1, 0, 0);
24635 if (GET_CODE (insn1) == SET
24636 && GET_CODE (SET_SRC (insn1)) == MULT
24637 && GET_MODE (SET_SRC (insn1)) == SImode)
24639 sd_iterator_def sd_it1;
24641 /* Check if there is no other dependee for IMUL. */
24643 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
24646 pro = DEP_PRO (dep1);
24647 if (!NONDEBUG_INSN_P (pro))
24660 return issue_rate; /* Didn't find IMUL producer. */
24662 if (sched_verbose > 1)
24663 fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n",
24664 INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1]));
24666 /* Put IMUL producer (ready[index]) at the top of ready list. */
24667 insn1= ready[index];
24668 for (i = index; i < n_ready - 1; i++)
24669 ready[i] = ready[i + 1];
24670 ready[n_ready - 1] = insn1;
24676 ix86_class_likely_spilled_p (reg_class_t);
24678 /* Returns true if lhs of insn is HW function argument register and set up
24679 is_spilled to true if it is likely spilled HW register. */
24681 insn_is_function_arg (rtx insn, bool* is_spilled)
24685 if (!NONDEBUG_INSN_P (insn))
24687 /* Call instructions are not movable, ignore it. */
24690 insn = PATTERN (insn);
24691 if (GET_CODE (insn) == PARALLEL)
24692 insn = XVECEXP (insn, 0, 0);
24693 if (GET_CODE (insn) != SET)
24695 dst = SET_DEST (insn);
24696 if (REG_P (dst) && HARD_REGISTER_P (dst)
24697 && ix86_function_arg_regno_p (REGNO (dst)))
24699 /* Is it likely spilled HW register? */
24700 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
24701 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
24702 *is_spilled = true;
24708 /* Add output dependencies for chain of function adjacent arguments if only
24709 there is a move to likely spilled HW register. Return first argument
24710 if at least one dependence was added or NULL otherwise. */
24712 add_parameter_dependencies (rtx call, rtx head)
24716 rtx first_arg = NULL;
24717 bool is_spilled = false;
24719 head = PREV_INSN (head);
24721 /* Find nearest to call argument passing instruction. */
24724 last = PREV_INSN (last);
24727 if (!NONDEBUG_INSN_P (last))
24729 if (insn_is_function_arg (last, &is_spilled))
24737 insn = PREV_INSN (last);
24738 if (!INSN_P (insn))
24742 if (!NONDEBUG_INSN_P (insn))
24747 if (insn_is_function_arg (insn, &is_spilled))
24749 /* Add output depdendence between two function arguments if chain
24750 of output arguments contains likely spilled HW registers. */
24752 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
24753 first_arg = last = insn;
24763 /* Add output or anti dependency from insn to first_arg to restrict its code
24766 avoid_func_arg_motion (rtx first_arg, rtx insn)
24771 set = single_set (insn);
24774 tmp = SET_DEST (set);
24777 /* Add output dependency to the first function argument. */
24778 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
24781 /* Add anti dependency. */
24782 add_dependence (first_arg, insn, REG_DEP_ANTI);
24785 /* Avoid cross block motion of function argument through adding dependency
24786 from the first non-jump instruction in bb. */
24788 add_dependee_for_func_arg (rtx arg, basic_block bb)
24790 rtx insn = BB_END (bb);
24794 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
24796 rtx set = single_set (insn);
24799 avoid_func_arg_motion (arg, insn);
24803 if (insn == BB_HEAD (bb))
24805 insn = PREV_INSN (insn);
24809 /* Hook for pre-reload schedule - avoid motion of function arguments
24810 passed in likely spilled HW registers. */
24812 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
24815 rtx first_arg = NULL;
24816 if (reload_completed)
24818 while (head != tail && DEBUG_INSN_P (head))
24819 head = NEXT_INSN (head);
24820 for (insn = tail; insn != head; insn = PREV_INSN (insn))
24821 if (INSN_P (insn) && CALL_P (insn))
24823 first_arg = add_parameter_dependencies (insn, head);
24826 /* Add dependee for first argument to predecessors if only
24827 region contains more than one block. */
24828 basic_block bb = BLOCK_FOR_INSN (insn);
24829 int rgn = CONTAINING_RGN (bb->index);
24830 int nr_blks = RGN_NR_BLOCKS (rgn);
24831 /* Skip trivial regions and region head blocks that can have
24832 predecessors outside of region. */
24833 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
24837 /* Assume that region is SCC, i.e. all immediate predecessors
24838 of non-head block are in the same region. */
24839 FOR_EACH_EDGE (e, ei, bb->preds)
24841 /* Avoid creating of loop-carried dependencies through
24842 using topological odering in region. */
24843 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
24844 add_dependee_for_func_arg (first_arg, e->src);
24852 else if (first_arg)
24853 avoid_func_arg_motion (first_arg, insn);
24856 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24857 HW registers to maximum, to schedule them at soon as possible. These are
24858 moves from function argument registers at the top of the function entry
24859 and moves from function return value registers after call. */
24861 ix86_adjust_priority (rtx insn, int priority)
24865 if (reload_completed)
24868 if (!NONDEBUG_INSN_P (insn))
24871 set = single_set (insn);
24874 rtx tmp = SET_SRC (set);
24876 && HARD_REGISTER_P (tmp)
24877 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
24878 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
24879 return current_sched_info->sched_max_insns_priority;
24885 /* Model decoder of Core 2/i7.
24886 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24887 track the instruction fetch block boundaries and make sure that long
24888 (9+ bytes) instructions are assigned to D0. */
24890 /* Maximum length of an insn that can be handled by
24891 a secondary decoder unit. '8' for Core 2/i7. */
24892 static int core2i7_secondary_decoder_max_insn_size;
24894 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24895 '16' for Core 2/i7. */
24896 static int core2i7_ifetch_block_size;
24898 /* Maximum number of instructions decoder can handle per cycle.
24899 '6' for Core 2/i7. */
24900 static int core2i7_ifetch_block_max_insns;
24902 typedef struct ix86_first_cycle_multipass_data_ *
24903 ix86_first_cycle_multipass_data_t;
24904 typedef const struct ix86_first_cycle_multipass_data_ *
24905 const_ix86_first_cycle_multipass_data_t;
24907 /* A variable to store target state across calls to max_issue within
24909 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
24910 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
24912 /* Initialize DATA. */
24914 core2i7_first_cycle_multipass_init (void *_data)
24916 ix86_first_cycle_multipass_data_t data
24917 = (ix86_first_cycle_multipass_data_t) _data;
24919 data->ifetch_block_len = 0;
24920 data->ifetch_block_n_insns = 0;
24921 data->ready_try_change = NULL;
24922 data->ready_try_change_size = 0;
24925 /* Advancing the cycle; reset ifetch block counts. */
24927 core2i7_dfa_post_advance_cycle (void)
24929 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
24931 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
24933 data->ifetch_block_len = 0;
24934 data->ifetch_block_n_insns = 0;
24937 static int min_insn_size (rtx);
24939 /* Filter out insns from ready_try that the core will not be able to issue
24940 on current cycle due to decoder. */
24942 core2i7_first_cycle_multipass_filter_ready_try
24943 (const_ix86_first_cycle_multipass_data_t data,
24944 char *ready_try, int n_ready, bool first_cycle_insn_p)
24951 if (ready_try[n_ready])
24954 insn = get_ready_element (n_ready);
24955 insn_size = min_insn_size (insn);
24957 if (/* If this is a too long an insn for a secondary decoder ... */
24958 (!first_cycle_insn_p
24959 && insn_size > core2i7_secondary_decoder_max_insn_size)
24960 /* ... or it would not fit into the ifetch block ... */
24961 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
24962 /* ... or the decoder is full already ... */
24963 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
24964 /* ... mask the insn out. */
24966 ready_try[n_ready] = 1;
24968 if (data->ready_try_change)
24969 bitmap_set_bit (data->ready_try_change, n_ready);
24974 /* Prepare for a new round of multipass lookahead scheduling. */
24976 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
24977 bool first_cycle_insn_p)
24979 ix86_first_cycle_multipass_data_t data
24980 = (ix86_first_cycle_multipass_data_t) _data;
24981 const_ix86_first_cycle_multipass_data_t prev_data
24982 = ix86_first_cycle_multipass_data;
24984 /* Restore the state from the end of the previous round. */
24985 data->ifetch_block_len = prev_data->ifetch_block_len;
24986 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
24988 /* Filter instructions that cannot be issued on current cycle due to
24989 decoder restrictions. */
24990 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
24991 first_cycle_insn_p);
24994 /* INSN is being issued in current solution. Account for its impact on
24995 the decoder model. */
24997 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
24998 rtx insn, const void *_prev_data)
25000 ix86_first_cycle_multipass_data_t data
25001 = (ix86_first_cycle_multipass_data_t) _data;
25002 const_ix86_first_cycle_multipass_data_t prev_data
25003 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
25005 int insn_size = min_insn_size (insn);
25007 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
25008 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
25009 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
25010 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
25012 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
25013 if (!data->ready_try_change)
25015 data->ready_try_change = sbitmap_alloc (n_ready);
25016 data->ready_try_change_size = n_ready;
25018 else if (data->ready_try_change_size < n_ready)
25020 data->ready_try_change = sbitmap_resize (data->ready_try_change,
25022 data->ready_try_change_size = n_ready;
25024 bitmap_clear (data->ready_try_change);
25026 /* Filter out insns from ready_try that the core will not be able to issue
25027 on current cycle due to decoder. */
25028 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
25032 /* Revert the effect on ready_try. */
25034 core2i7_first_cycle_multipass_backtrack (const void *_data,
25036 int n_ready ATTRIBUTE_UNUSED)
25038 const_ix86_first_cycle_multipass_data_t data
25039 = (const_ix86_first_cycle_multipass_data_t) _data;
25040 unsigned int i = 0;
25041 sbitmap_iterator sbi;
25043 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
25044 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
25050 /* Save the result of multipass lookahead scheduling for the next round. */
25052 core2i7_first_cycle_multipass_end (const void *_data)
25054 const_ix86_first_cycle_multipass_data_t data
25055 = (const_ix86_first_cycle_multipass_data_t) _data;
25056 ix86_first_cycle_multipass_data_t next_data
25057 = ix86_first_cycle_multipass_data;
25061 next_data->ifetch_block_len = data->ifetch_block_len;
25062 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
25066 /* Deallocate target data. */
25068 core2i7_first_cycle_multipass_fini (void *_data)
25070 ix86_first_cycle_multipass_data_t data
25071 = (ix86_first_cycle_multipass_data_t) _data;
25073 if (data->ready_try_change)
25075 sbitmap_free (data->ready_try_change);
25076 data->ready_try_change = NULL;
25077 data->ready_try_change_size = 0;
25081 /* Prepare for scheduling pass. */
25083 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
25084 int verbose ATTRIBUTE_UNUSED,
25085 int max_uid ATTRIBUTE_UNUSED)
25087 /* Install scheduling hooks for current CPU. Some of these hooks are used
25088 in time-critical parts of the scheduler, so we only set them up when
25089 they are actually used. */
25092 case PROCESSOR_CORE2:
25093 case PROCESSOR_COREI7:
25094 case PROCESSOR_HASWELL:
25095 /* Do not perform multipass scheduling for pre-reload schedule
25096 to save compile time. */
25097 if (reload_completed)
25099 targetm.sched.dfa_post_advance_cycle
25100 = core2i7_dfa_post_advance_cycle;
25101 targetm.sched.first_cycle_multipass_init
25102 = core2i7_first_cycle_multipass_init;
25103 targetm.sched.first_cycle_multipass_begin
25104 = core2i7_first_cycle_multipass_begin;
25105 targetm.sched.first_cycle_multipass_issue
25106 = core2i7_first_cycle_multipass_issue;
25107 targetm.sched.first_cycle_multipass_backtrack
25108 = core2i7_first_cycle_multipass_backtrack;
25109 targetm.sched.first_cycle_multipass_end
25110 = core2i7_first_cycle_multipass_end;
25111 targetm.sched.first_cycle_multipass_fini
25112 = core2i7_first_cycle_multipass_fini;
25114 /* Set decoder parameters. */
25115 core2i7_secondary_decoder_max_insn_size = 8;
25116 core2i7_ifetch_block_size = 16;
25117 core2i7_ifetch_block_max_insns = 6;
25120 /* ... Fall through ... */
25122 targetm.sched.dfa_post_advance_cycle = NULL;
25123 targetm.sched.first_cycle_multipass_init = NULL;
25124 targetm.sched.first_cycle_multipass_begin = NULL;
25125 targetm.sched.first_cycle_multipass_issue = NULL;
25126 targetm.sched.first_cycle_multipass_backtrack = NULL;
25127 targetm.sched.first_cycle_multipass_end = NULL;
25128 targetm.sched.first_cycle_multipass_fini = NULL;
25134 /* Compute the alignment given to a constant that is being placed in memory.
25135 EXP is the constant and ALIGN is the alignment that the object would
25137 The value of this function is used instead of that alignment to align
25141 ix86_constant_alignment (tree exp, int align)
25143 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
25144 || TREE_CODE (exp) == INTEGER_CST)
25146 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
25148 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
25151 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
25152 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
25153 return BITS_PER_WORD;
25158 /* Compute the alignment for a static variable.
25159 TYPE is the data type, and ALIGN is the alignment that
25160 the object would ordinarily have. The value of this function is used
25161 instead of that alignment to align the object. */
25164 ix86_data_alignment (tree type, int align)
25166 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
25168 if (AGGREGATE_TYPE_P (type)
25169 && TYPE_SIZE (type)
25170 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
25171 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
25172 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
25173 && align < max_align)
25176 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25177 to 16byte boundary. */
25180 if (AGGREGATE_TYPE_P (type)
25181 && TYPE_SIZE (type)
25182 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
25183 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
25184 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
25188 if (TREE_CODE (type) == ARRAY_TYPE)
25190 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
25192 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
25195 else if (TREE_CODE (type) == COMPLEX_TYPE)
25198 if (TYPE_MODE (type) == DCmode && align < 64)
25200 if ((TYPE_MODE (type) == XCmode
25201 || TYPE_MODE (type) == TCmode) && align < 128)
25204 else if ((TREE_CODE (type) == RECORD_TYPE
25205 || TREE_CODE (type) == UNION_TYPE
25206 || TREE_CODE (type) == QUAL_UNION_TYPE)
25207 && TYPE_FIELDS (type))
25209 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
25211 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
25214 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
25215 || TREE_CODE (type) == INTEGER_TYPE)
25217 if (TYPE_MODE (type) == DFmode && align < 64)
25219 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
25226 /* Compute the alignment for a local variable or a stack slot. EXP is
25227 the data type or decl itself, MODE is the widest mode available and
25228 ALIGN is the alignment that the object would ordinarily have. The
25229 value of this macro is used instead of that alignment to align the
25233 ix86_local_alignment (tree exp, enum machine_mode mode,
25234 unsigned int align)
25238 if (exp && DECL_P (exp))
25240 type = TREE_TYPE (exp);
25249 /* Don't do dynamic stack realignment for long long objects with
25250 -mpreferred-stack-boundary=2. */
25253 && ix86_preferred_stack_boundary < 64
25254 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
25255 && (!type || !TYPE_USER_ALIGN (type))
25256 && (!decl || !DECL_USER_ALIGN (decl)))
25259 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25260 register in MODE. We will return the largest alignment of XF
25264 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
25265 align = GET_MODE_ALIGNMENT (DFmode);
25269 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25270 to 16byte boundary. Exact wording is:
25272 An array uses the same alignment as its elements, except that a local or
25273 global array variable of length at least 16 bytes or
25274 a C99 variable-length array variable always has alignment of at least 16 bytes.
25276 This was added to allow use of aligned SSE instructions at arrays. This
25277 rule is meant for static storage (where compiler can not do the analysis
25278 by itself). We follow it for automatic variables only when convenient.
25279 We fully control everything in the function compiled and functions from
25280 other unit can not rely on the alignment.
25282 Exclude va_list type. It is the common case of local array where
25283 we can not benefit from the alignment.
25285 TODO: Probably one should optimize for size only when var is not escaping. */
25286 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
25289 if (AGGREGATE_TYPE_P (type)
25290 && (va_list_type_node == NULL_TREE
25291 || (TYPE_MAIN_VARIANT (type)
25292 != TYPE_MAIN_VARIANT (va_list_type_node)))
25293 && TYPE_SIZE (type)
25294 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
25295 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
25296 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
25299 if (TREE_CODE (type) == ARRAY_TYPE)
25301 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
25303 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
25306 else if (TREE_CODE (type) == COMPLEX_TYPE)
25308 if (TYPE_MODE (type) == DCmode && align < 64)
25310 if ((TYPE_MODE (type) == XCmode
25311 || TYPE_MODE (type) == TCmode) && align < 128)
25314 else if ((TREE_CODE (type) == RECORD_TYPE
25315 || TREE_CODE (type) == UNION_TYPE
25316 || TREE_CODE (type) == QUAL_UNION_TYPE)
25317 && TYPE_FIELDS (type))
25319 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
25321 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
25324 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
25325 || TREE_CODE (type) == INTEGER_TYPE)
25328 if (TYPE_MODE (type) == DFmode && align < 64)
25330 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
25336 /* Compute the minimum required alignment for dynamic stack realignment
25337 purposes for a local variable, parameter or a stack slot. EXP is
25338 the data type or decl itself, MODE is its mode and ALIGN is the
25339 alignment that the object would ordinarily have. */
25342 ix86_minimum_alignment (tree exp, enum machine_mode mode,
25343 unsigned int align)
25347 if (exp && DECL_P (exp))
25349 type = TREE_TYPE (exp);
25358 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
25361 /* Don't do dynamic stack realignment for long long objects with
25362 -mpreferred-stack-boundary=2. */
25363 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
25364 && (!type || !TYPE_USER_ALIGN (type))
25365 && (!decl || !DECL_USER_ALIGN (decl)))
25371 /* Find a location for the static chain incoming to a nested function.
25372 This is a register, unless all free registers are used by arguments. */
25375 ix86_static_chain (const_tree fndecl, bool incoming_p)
25379 if (!DECL_STATIC_CHAIN (fndecl))
25384 /* We always use R10 in 64-bit mode. */
25392 /* By default in 32-bit mode we use ECX to pass the static chain. */
25395 fntype = TREE_TYPE (fndecl);
25396 ccvt = ix86_get_callcvt (fntype);
25397 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
25399 /* Fastcall functions use ecx/edx for arguments, which leaves
25400 us with EAX for the static chain.
25401 Thiscall functions use ecx for arguments, which also
25402 leaves us with EAX for the static chain. */
25405 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
25407 /* Thiscall functions use ecx for arguments, which leaves
25408 us with EAX and EDX for the static chain.
25409 We are using for abi-compatibility EAX. */
25412 else if (ix86_function_regparm (fntype, fndecl) == 3)
25414 /* For regparm 3, we have no free call-clobbered registers in
25415 which to store the static chain. In order to implement this,
25416 we have the trampoline push the static chain to the stack.
25417 However, we can't push a value below the return address when
25418 we call the nested function directly, so we have to use an
25419 alternate entry point. For this we use ESI, and have the
25420 alternate entry point push ESI, so that things appear the
25421 same once we're executing the nested function. */
25424 if (fndecl == current_function_decl)
25425 ix86_static_chain_on_stack = true;
25426 return gen_frame_mem (SImode,
25427 plus_constant (Pmode,
25428 arg_pointer_rtx, -8));
25434 return gen_rtx_REG (Pmode, regno);
25437 /* Emit RTL insns to initialize the variable parts of a trampoline.
25438 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25439 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25440 to be passed to the target function. */
25443 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
25449 fnaddr = XEXP (DECL_RTL (fndecl), 0);
25455 /* Load the function address to r11. Try to load address using
25456 the shorter movl instead of movabs. We may want to support
25457 movq for kernel mode, but kernel does not use trampolines at
25458 the moment. FNADDR is a 32bit address and may not be in
25459 DImode when ptr_mode == SImode. Always use movl in this
25461 if (ptr_mode == SImode
25462 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
25464 fnaddr = copy_addr_to_reg (fnaddr);
25466 mem = adjust_address (m_tramp, HImode, offset);
25467 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
25469 mem = adjust_address (m_tramp, SImode, offset + 2);
25470 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
25475 mem = adjust_address (m_tramp, HImode, offset);
25476 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
25478 mem = adjust_address (m_tramp, DImode, offset + 2);
25479 emit_move_insn (mem, fnaddr);
25483 /* Load static chain using movabs to r10. Use the shorter movl
25484 instead of movabs when ptr_mode == SImode. */
25485 if (ptr_mode == SImode)
25496 mem = adjust_address (m_tramp, HImode, offset);
25497 emit_move_insn (mem, gen_int_mode (opcode, HImode));
25499 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
25500 emit_move_insn (mem, chain_value);
25503 /* Jump to r11; the last (unused) byte is a nop, only there to
25504 pad the write out to a single 32-bit store. */
25505 mem = adjust_address (m_tramp, SImode, offset);
25506 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
25513 /* Depending on the static chain location, either load a register
25514 with a constant, or push the constant to the stack. All of the
25515 instructions are the same size. */
25516 chain = ix86_static_chain (fndecl, true);
25519 switch (REGNO (chain))
25522 opcode = 0xb8; break;
25524 opcode = 0xb9; break;
25526 gcc_unreachable ();
25532 mem = adjust_address (m_tramp, QImode, offset);
25533 emit_move_insn (mem, gen_int_mode (opcode, QImode));
25535 mem = adjust_address (m_tramp, SImode, offset + 1);
25536 emit_move_insn (mem, chain_value);
25539 mem = adjust_address (m_tramp, QImode, offset);
25540 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
25542 mem = adjust_address (m_tramp, SImode, offset + 1);
25544 /* Compute offset from the end of the jmp to the target function.
25545 In the case in which the trampoline stores the static chain on
25546 the stack, we need to skip the first insn which pushes the
25547 (call-saved) register static chain; this push is 1 byte. */
25549 disp = expand_binop (SImode, sub_optab, fnaddr,
25550 plus_constant (Pmode, XEXP (m_tramp, 0),
25551 offset - (MEM_P (chain) ? 1 : 0)),
25552 NULL_RTX, 1, OPTAB_DIRECT);
25553 emit_move_insn (mem, disp);
25556 gcc_assert (offset <= TRAMPOLINE_SIZE);
25558 #ifdef HAVE_ENABLE_EXECUTE_STACK
25559 #ifdef CHECK_EXECUTE_STACK_ENABLED
25560 if (CHECK_EXECUTE_STACK_ENABLED)
25562 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
25563 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
25567 /* The following file contains several enumerations and data structures
25568 built from the definitions in i386-builtin-types.def. */
25570 #include "i386-builtin-types.inc"
25572 /* Table for the ix86 builtin non-function types. */
25573 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
25575 /* Retrieve an element from the above table, building some of
25576 the types lazily. */
25579 ix86_get_builtin_type (enum ix86_builtin_type tcode)
25581 unsigned int index;
25584 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
25586 type = ix86_builtin_type_tab[(int) tcode];
25590 gcc_assert (tcode > IX86_BT_LAST_PRIM);
25591 if (tcode <= IX86_BT_LAST_VECT)
25593 enum machine_mode mode;
25595 index = tcode - IX86_BT_LAST_PRIM - 1;
25596 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
25597 mode = ix86_builtin_type_vect_mode[index];
25599 type = build_vector_type_for_mode (itype, mode);
25605 index = tcode - IX86_BT_LAST_VECT - 1;
25606 if (tcode <= IX86_BT_LAST_PTR)
25607 quals = TYPE_UNQUALIFIED;
25609 quals = TYPE_QUAL_CONST;
25611 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
25612 if (quals != TYPE_UNQUALIFIED)
25613 itype = build_qualified_type (itype, quals);
25615 type = build_pointer_type (itype);
25618 ix86_builtin_type_tab[(int) tcode] = type;
25622 /* Table for the ix86 builtin function types. */
25623 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
25625 /* Retrieve an element from the above table, building some of
25626 the types lazily. */
25629 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
25633 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
25635 type = ix86_builtin_func_type_tab[(int) tcode];
25639 if (tcode <= IX86_BT_LAST_FUNC)
25641 unsigned start = ix86_builtin_func_start[(int) tcode];
25642 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
25643 tree rtype, atype, args = void_list_node;
25646 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
25647 for (i = after - 1; i > start; --i)
25649 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
25650 args = tree_cons (NULL, atype, args);
25653 type = build_function_type (rtype, args);
25657 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
25658 enum ix86_builtin_func_type icode;
25660 icode = ix86_builtin_func_alias_base[index];
25661 type = ix86_get_builtin_func_type (icode);
25664 ix86_builtin_func_type_tab[(int) tcode] = type;
25669 /* Codes for all the SSE/MMX builtins. */
25672 IX86_BUILTIN_ADDPS,
25673 IX86_BUILTIN_ADDSS,
25674 IX86_BUILTIN_DIVPS,
25675 IX86_BUILTIN_DIVSS,
25676 IX86_BUILTIN_MULPS,
25677 IX86_BUILTIN_MULSS,
25678 IX86_BUILTIN_SUBPS,
25679 IX86_BUILTIN_SUBSS,
25681 IX86_BUILTIN_CMPEQPS,
25682 IX86_BUILTIN_CMPLTPS,
25683 IX86_BUILTIN_CMPLEPS,
25684 IX86_BUILTIN_CMPGTPS,
25685 IX86_BUILTIN_CMPGEPS,
25686 IX86_BUILTIN_CMPNEQPS,
25687 IX86_BUILTIN_CMPNLTPS,
25688 IX86_BUILTIN_CMPNLEPS,
25689 IX86_BUILTIN_CMPNGTPS,
25690 IX86_BUILTIN_CMPNGEPS,
25691 IX86_BUILTIN_CMPORDPS,
25692 IX86_BUILTIN_CMPUNORDPS,
25693 IX86_BUILTIN_CMPEQSS,
25694 IX86_BUILTIN_CMPLTSS,
25695 IX86_BUILTIN_CMPLESS,
25696 IX86_BUILTIN_CMPNEQSS,
25697 IX86_BUILTIN_CMPNLTSS,
25698 IX86_BUILTIN_CMPNLESS,
25699 IX86_BUILTIN_CMPNGTSS,
25700 IX86_BUILTIN_CMPNGESS,
25701 IX86_BUILTIN_CMPORDSS,
25702 IX86_BUILTIN_CMPUNORDSS,
25704 IX86_BUILTIN_COMIEQSS,
25705 IX86_BUILTIN_COMILTSS,
25706 IX86_BUILTIN_COMILESS,
25707 IX86_BUILTIN_COMIGTSS,
25708 IX86_BUILTIN_COMIGESS,
25709 IX86_BUILTIN_COMINEQSS,
25710 IX86_BUILTIN_UCOMIEQSS,
25711 IX86_BUILTIN_UCOMILTSS,
25712 IX86_BUILTIN_UCOMILESS,
25713 IX86_BUILTIN_UCOMIGTSS,
25714 IX86_BUILTIN_UCOMIGESS,
25715 IX86_BUILTIN_UCOMINEQSS,
25717 IX86_BUILTIN_CVTPI2PS,
25718 IX86_BUILTIN_CVTPS2PI,
25719 IX86_BUILTIN_CVTSI2SS,
25720 IX86_BUILTIN_CVTSI642SS,
25721 IX86_BUILTIN_CVTSS2SI,
25722 IX86_BUILTIN_CVTSS2SI64,
25723 IX86_BUILTIN_CVTTPS2PI,
25724 IX86_BUILTIN_CVTTSS2SI,
25725 IX86_BUILTIN_CVTTSS2SI64,
25727 IX86_BUILTIN_MAXPS,
25728 IX86_BUILTIN_MAXSS,
25729 IX86_BUILTIN_MINPS,
25730 IX86_BUILTIN_MINSS,
25732 IX86_BUILTIN_LOADUPS,
25733 IX86_BUILTIN_STOREUPS,
25734 IX86_BUILTIN_MOVSS,
25736 IX86_BUILTIN_MOVHLPS,
25737 IX86_BUILTIN_MOVLHPS,
25738 IX86_BUILTIN_LOADHPS,
25739 IX86_BUILTIN_LOADLPS,
25740 IX86_BUILTIN_STOREHPS,
25741 IX86_BUILTIN_STORELPS,
25743 IX86_BUILTIN_MASKMOVQ,
25744 IX86_BUILTIN_MOVMSKPS,
25745 IX86_BUILTIN_PMOVMSKB,
25747 IX86_BUILTIN_MOVNTPS,
25748 IX86_BUILTIN_MOVNTQ,
25750 IX86_BUILTIN_LOADDQU,
25751 IX86_BUILTIN_STOREDQU,
25753 IX86_BUILTIN_PACKSSWB,
25754 IX86_BUILTIN_PACKSSDW,
25755 IX86_BUILTIN_PACKUSWB,
25757 IX86_BUILTIN_PADDB,
25758 IX86_BUILTIN_PADDW,
25759 IX86_BUILTIN_PADDD,
25760 IX86_BUILTIN_PADDQ,
25761 IX86_BUILTIN_PADDSB,
25762 IX86_BUILTIN_PADDSW,
25763 IX86_BUILTIN_PADDUSB,
25764 IX86_BUILTIN_PADDUSW,
25765 IX86_BUILTIN_PSUBB,
25766 IX86_BUILTIN_PSUBW,
25767 IX86_BUILTIN_PSUBD,
25768 IX86_BUILTIN_PSUBQ,
25769 IX86_BUILTIN_PSUBSB,
25770 IX86_BUILTIN_PSUBSW,
25771 IX86_BUILTIN_PSUBUSB,
25772 IX86_BUILTIN_PSUBUSW,
25775 IX86_BUILTIN_PANDN,
25779 IX86_BUILTIN_PAVGB,
25780 IX86_BUILTIN_PAVGW,
25782 IX86_BUILTIN_PCMPEQB,
25783 IX86_BUILTIN_PCMPEQW,
25784 IX86_BUILTIN_PCMPEQD,
25785 IX86_BUILTIN_PCMPGTB,
25786 IX86_BUILTIN_PCMPGTW,
25787 IX86_BUILTIN_PCMPGTD,
25789 IX86_BUILTIN_PMADDWD,
25791 IX86_BUILTIN_PMAXSW,
25792 IX86_BUILTIN_PMAXUB,
25793 IX86_BUILTIN_PMINSW,
25794 IX86_BUILTIN_PMINUB,
25796 IX86_BUILTIN_PMULHUW,
25797 IX86_BUILTIN_PMULHW,
25798 IX86_BUILTIN_PMULLW,
25800 IX86_BUILTIN_PSADBW,
25801 IX86_BUILTIN_PSHUFW,
25803 IX86_BUILTIN_PSLLW,
25804 IX86_BUILTIN_PSLLD,
25805 IX86_BUILTIN_PSLLQ,
25806 IX86_BUILTIN_PSRAW,
25807 IX86_BUILTIN_PSRAD,
25808 IX86_BUILTIN_PSRLW,
25809 IX86_BUILTIN_PSRLD,
25810 IX86_BUILTIN_PSRLQ,
25811 IX86_BUILTIN_PSLLWI,
25812 IX86_BUILTIN_PSLLDI,
25813 IX86_BUILTIN_PSLLQI,
25814 IX86_BUILTIN_PSRAWI,
25815 IX86_BUILTIN_PSRADI,
25816 IX86_BUILTIN_PSRLWI,
25817 IX86_BUILTIN_PSRLDI,
25818 IX86_BUILTIN_PSRLQI,
25820 IX86_BUILTIN_PUNPCKHBW,
25821 IX86_BUILTIN_PUNPCKHWD,
25822 IX86_BUILTIN_PUNPCKHDQ,
25823 IX86_BUILTIN_PUNPCKLBW,
25824 IX86_BUILTIN_PUNPCKLWD,
25825 IX86_BUILTIN_PUNPCKLDQ,
25827 IX86_BUILTIN_SHUFPS,
25829 IX86_BUILTIN_RCPPS,
25830 IX86_BUILTIN_RCPSS,
25831 IX86_BUILTIN_RSQRTPS,
25832 IX86_BUILTIN_RSQRTPS_NR,
25833 IX86_BUILTIN_RSQRTSS,
25834 IX86_BUILTIN_RSQRTF,
25835 IX86_BUILTIN_SQRTPS,
25836 IX86_BUILTIN_SQRTPS_NR,
25837 IX86_BUILTIN_SQRTSS,
25839 IX86_BUILTIN_UNPCKHPS,
25840 IX86_BUILTIN_UNPCKLPS,
25842 IX86_BUILTIN_ANDPS,
25843 IX86_BUILTIN_ANDNPS,
25845 IX86_BUILTIN_XORPS,
25848 IX86_BUILTIN_LDMXCSR,
25849 IX86_BUILTIN_STMXCSR,
25850 IX86_BUILTIN_SFENCE,
25852 IX86_BUILTIN_FXSAVE,
25853 IX86_BUILTIN_FXRSTOR,
25854 IX86_BUILTIN_FXSAVE64,
25855 IX86_BUILTIN_FXRSTOR64,
25857 IX86_BUILTIN_XSAVE,
25858 IX86_BUILTIN_XRSTOR,
25859 IX86_BUILTIN_XSAVE64,
25860 IX86_BUILTIN_XRSTOR64,
25862 IX86_BUILTIN_XSAVEOPT,
25863 IX86_BUILTIN_XSAVEOPT64,
25865 /* 3DNow! Original */
25866 IX86_BUILTIN_FEMMS,
25867 IX86_BUILTIN_PAVGUSB,
25868 IX86_BUILTIN_PF2ID,
25869 IX86_BUILTIN_PFACC,
25870 IX86_BUILTIN_PFADD,
25871 IX86_BUILTIN_PFCMPEQ,
25872 IX86_BUILTIN_PFCMPGE,
25873 IX86_BUILTIN_PFCMPGT,
25874 IX86_BUILTIN_PFMAX,
25875 IX86_BUILTIN_PFMIN,
25876 IX86_BUILTIN_PFMUL,
25877 IX86_BUILTIN_PFRCP,
25878 IX86_BUILTIN_PFRCPIT1,
25879 IX86_BUILTIN_PFRCPIT2,
25880 IX86_BUILTIN_PFRSQIT1,
25881 IX86_BUILTIN_PFRSQRT,
25882 IX86_BUILTIN_PFSUB,
25883 IX86_BUILTIN_PFSUBR,
25884 IX86_BUILTIN_PI2FD,
25885 IX86_BUILTIN_PMULHRW,
25887 /* 3DNow! Athlon Extensions */
25888 IX86_BUILTIN_PF2IW,
25889 IX86_BUILTIN_PFNACC,
25890 IX86_BUILTIN_PFPNACC,
25891 IX86_BUILTIN_PI2FW,
25892 IX86_BUILTIN_PSWAPDSI,
25893 IX86_BUILTIN_PSWAPDSF,
25896 IX86_BUILTIN_ADDPD,
25897 IX86_BUILTIN_ADDSD,
25898 IX86_BUILTIN_DIVPD,
25899 IX86_BUILTIN_DIVSD,
25900 IX86_BUILTIN_MULPD,
25901 IX86_BUILTIN_MULSD,
25902 IX86_BUILTIN_SUBPD,
25903 IX86_BUILTIN_SUBSD,
25905 IX86_BUILTIN_CMPEQPD,
25906 IX86_BUILTIN_CMPLTPD,
25907 IX86_BUILTIN_CMPLEPD,
25908 IX86_BUILTIN_CMPGTPD,
25909 IX86_BUILTIN_CMPGEPD,
25910 IX86_BUILTIN_CMPNEQPD,
25911 IX86_BUILTIN_CMPNLTPD,
25912 IX86_BUILTIN_CMPNLEPD,
25913 IX86_BUILTIN_CMPNGTPD,
25914 IX86_BUILTIN_CMPNGEPD,
25915 IX86_BUILTIN_CMPORDPD,
25916 IX86_BUILTIN_CMPUNORDPD,
25917 IX86_BUILTIN_CMPEQSD,
25918 IX86_BUILTIN_CMPLTSD,
25919 IX86_BUILTIN_CMPLESD,
25920 IX86_BUILTIN_CMPNEQSD,
25921 IX86_BUILTIN_CMPNLTSD,
25922 IX86_BUILTIN_CMPNLESD,
25923 IX86_BUILTIN_CMPORDSD,
25924 IX86_BUILTIN_CMPUNORDSD,
25926 IX86_BUILTIN_COMIEQSD,
25927 IX86_BUILTIN_COMILTSD,
25928 IX86_BUILTIN_COMILESD,
25929 IX86_BUILTIN_COMIGTSD,
25930 IX86_BUILTIN_COMIGESD,
25931 IX86_BUILTIN_COMINEQSD,
25932 IX86_BUILTIN_UCOMIEQSD,
25933 IX86_BUILTIN_UCOMILTSD,
25934 IX86_BUILTIN_UCOMILESD,
25935 IX86_BUILTIN_UCOMIGTSD,
25936 IX86_BUILTIN_UCOMIGESD,
25937 IX86_BUILTIN_UCOMINEQSD,
25939 IX86_BUILTIN_MAXPD,
25940 IX86_BUILTIN_MAXSD,
25941 IX86_BUILTIN_MINPD,
25942 IX86_BUILTIN_MINSD,
25944 IX86_BUILTIN_ANDPD,
25945 IX86_BUILTIN_ANDNPD,
25947 IX86_BUILTIN_XORPD,
25949 IX86_BUILTIN_SQRTPD,
25950 IX86_BUILTIN_SQRTSD,
25952 IX86_BUILTIN_UNPCKHPD,
25953 IX86_BUILTIN_UNPCKLPD,
25955 IX86_BUILTIN_SHUFPD,
25957 IX86_BUILTIN_LOADUPD,
25958 IX86_BUILTIN_STOREUPD,
25959 IX86_BUILTIN_MOVSD,
25961 IX86_BUILTIN_LOADHPD,
25962 IX86_BUILTIN_LOADLPD,
25964 IX86_BUILTIN_CVTDQ2PD,
25965 IX86_BUILTIN_CVTDQ2PS,
25967 IX86_BUILTIN_CVTPD2DQ,
25968 IX86_BUILTIN_CVTPD2PI,
25969 IX86_BUILTIN_CVTPD2PS,
25970 IX86_BUILTIN_CVTTPD2DQ,
25971 IX86_BUILTIN_CVTTPD2PI,
25973 IX86_BUILTIN_CVTPI2PD,
25974 IX86_BUILTIN_CVTSI2SD,
25975 IX86_BUILTIN_CVTSI642SD,
25977 IX86_BUILTIN_CVTSD2SI,
25978 IX86_BUILTIN_CVTSD2SI64,
25979 IX86_BUILTIN_CVTSD2SS,
25980 IX86_BUILTIN_CVTSS2SD,
25981 IX86_BUILTIN_CVTTSD2SI,
25982 IX86_BUILTIN_CVTTSD2SI64,
25984 IX86_BUILTIN_CVTPS2DQ,
25985 IX86_BUILTIN_CVTPS2PD,
25986 IX86_BUILTIN_CVTTPS2DQ,
25988 IX86_BUILTIN_MOVNTI,
25989 IX86_BUILTIN_MOVNTI64,
25990 IX86_BUILTIN_MOVNTPD,
25991 IX86_BUILTIN_MOVNTDQ,
25993 IX86_BUILTIN_MOVQ128,
25996 IX86_BUILTIN_MASKMOVDQU,
25997 IX86_BUILTIN_MOVMSKPD,
25998 IX86_BUILTIN_PMOVMSKB128,
26000 IX86_BUILTIN_PACKSSWB128,
26001 IX86_BUILTIN_PACKSSDW128,
26002 IX86_BUILTIN_PACKUSWB128,
26004 IX86_BUILTIN_PADDB128,
26005 IX86_BUILTIN_PADDW128,
26006 IX86_BUILTIN_PADDD128,
26007 IX86_BUILTIN_PADDQ128,
26008 IX86_BUILTIN_PADDSB128,
26009 IX86_BUILTIN_PADDSW128,
26010 IX86_BUILTIN_PADDUSB128,
26011 IX86_BUILTIN_PADDUSW128,
26012 IX86_BUILTIN_PSUBB128,
26013 IX86_BUILTIN_PSUBW128,
26014 IX86_BUILTIN_PSUBD128,
26015 IX86_BUILTIN_PSUBQ128,
26016 IX86_BUILTIN_PSUBSB128,
26017 IX86_BUILTIN_PSUBSW128,
26018 IX86_BUILTIN_PSUBUSB128,
26019 IX86_BUILTIN_PSUBUSW128,
26021 IX86_BUILTIN_PAND128,
26022 IX86_BUILTIN_PANDN128,
26023 IX86_BUILTIN_POR128,
26024 IX86_BUILTIN_PXOR128,
26026 IX86_BUILTIN_PAVGB128,
26027 IX86_BUILTIN_PAVGW128,
26029 IX86_BUILTIN_PCMPEQB128,
26030 IX86_BUILTIN_PCMPEQW128,
26031 IX86_BUILTIN_PCMPEQD128,
26032 IX86_BUILTIN_PCMPGTB128,
26033 IX86_BUILTIN_PCMPGTW128,
26034 IX86_BUILTIN_PCMPGTD128,
26036 IX86_BUILTIN_PMADDWD128,
26038 IX86_BUILTIN_PMAXSW128,
26039 IX86_BUILTIN_PMAXUB128,
26040 IX86_BUILTIN_PMINSW128,
26041 IX86_BUILTIN_PMINUB128,
26043 IX86_BUILTIN_PMULUDQ,
26044 IX86_BUILTIN_PMULUDQ128,
26045 IX86_BUILTIN_PMULHUW128,
26046 IX86_BUILTIN_PMULHW128,
26047 IX86_BUILTIN_PMULLW128,
26049 IX86_BUILTIN_PSADBW128,
26050 IX86_BUILTIN_PSHUFHW,
26051 IX86_BUILTIN_PSHUFLW,
26052 IX86_BUILTIN_PSHUFD,
26054 IX86_BUILTIN_PSLLDQI128,
26055 IX86_BUILTIN_PSLLWI128,
26056 IX86_BUILTIN_PSLLDI128,
26057 IX86_BUILTIN_PSLLQI128,
26058 IX86_BUILTIN_PSRAWI128,
26059 IX86_BUILTIN_PSRADI128,
26060 IX86_BUILTIN_PSRLDQI128,
26061 IX86_BUILTIN_PSRLWI128,
26062 IX86_BUILTIN_PSRLDI128,
26063 IX86_BUILTIN_PSRLQI128,
26065 IX86_BUILTIN_PSLLDQ128,
26066 IX86_BUILTIN_PSLLW128,
26067 IX86_BUILTIN_PSLLD128,
26068 IX86_BUILTIN_PSLLQ128,
26069 IX86_BUILTIN_PSRAW128,
26070 IX86_BUILTIN_PSRAD128,
26071 IX86_BUILTIN_PSRLW128,
26072 IX86_BUILTIN_PSRLD128,
26073 IX86_BUILTIN_PSRLQ128,
26075 IX86_BUILTIN_PUNPCKHBW128,
26076 IX86_BUILTIN_PUNPCKHWD128,
26077 IX86_BUILTIN_PUNPCKHDQ128,
26078 IX86_BUILTIN_PUNPCKHQDQ128,
26079 IX86_BUILTIN_PUNPCKLBW128,
26080 IX86_BUILTIN_PUNPCKLWD128,
26081 IX86_BUILTIN_PUNPCKLDQ128,
26082 IX86_BUILTIN_PUNPCKLQDQ128,
26084 IX86_BUILTIN_CLFLUSH,
26085 IX86_BUILTIN_MFENCE,
26086 IX86_BUILTIN_LFENCE,
26087 IX86_BUILTIN_PAUSE,
26089 IX86_BUILTIN_BSRSI,
26090 IX86_BUILTIN_BSRDI,
26091 IX86_BUILTIN_RDPMC,
26092 IX86_BUILTIN_RDTSC,
26093 IX86_BUILTIN_RDTSCP,
26094 IX86_BUILTIN_ROLQI,
26095 IX86_BUILTIN_ROLHI,
26096 IX86_BUILTIN_RORQI,
26097 IX86_BUILTIN_RORHI,
26100 IX86_BUILTIN_ADDSUBPS,
26101 IX86_BUILTIN_HADDPS,
26102 IX86_BUILTIN_HSUBPS,
26103 IX86_BUILTIN_MOVSHDUP,
26104 IX86_BUILTIN_MOVSLDUP,
26105 IX86_BUILTIN_ADDSUBPD,
26106 IX86_BUILTIN_HADDPD,
26107 IX86_BUILTIN_HSUBPD,
26108 IX86_BUILTIN_LDDQU,
26110 IX86_BUILTIN_MONITOR,
26111 IX86_BUILTIN_MWAIT,
26114 IX86_BUILTIN_PHADDW,
26115 IX86_BUILTIN_PHADDD,
26116 IX86_BUILTIN_PHADDSW,
26117 IX86_BUILTIN_PHSUBW,
26118 IX86_BUILTIN_PHSUBD,
26119 IX86_BUILTIN_PHSUBSW,
26120 IX86_BUILTIN_PMADDUBSW,
26121 IX86_BUILTIN_PMULHRSW,
26122 IX86_BUILTIN_PSHUFB,
26123 IX86_BUILTIN_PSIGNB,
26124 IX86_BUILTIN_PSIGNW,
26125 IX86_BUILTIN_PSIGND,
26126 IX86_BUILTIN_PALIGNR,
26127 IX86_BUILTIN_PABSB,
26128 IX86_BUILTIN_PABSW,
26129 IX86_BUILTIN_PABSD,
26131 IX86_BUILTIN_PHADDW128,
26132 IX86_BUILTIN_PHADDD128,
26133 IX86_BUILTIN_PHADDSW128,
26134 IX86_BUILTIN_PHSUBW128,
26135 IX86_BUILTIN_PHSUBD128,
26136 IX86_BUILTIN_PHSUBSW128,
26137 IX86_BUILTIN_PMADDUBSW128,
26138 IX86_BUILTIN_PMULHRSW128,
26139 IX86_BUILTIN_PSHUFB128,
26140 IX86_BUILTIN_PSIGNB128,
26141 IX86_BUILTIN_PSIGNW128,
26142 IX86_BUILTIN_PSIGND128,
26143 IX86_BUILTIN_PALIGNR128,
26144 IX86_BUILTIN_PABSB128,
26145 IX86_BUILTIN_PABSW128,
26146 IX86_BUILTIN_PABSD128,
26148 /* AMDFAM10 - SSE4A New Instructions. */
26149 IX86_BUILTIN_MOVNTSD,
26150 IX86_BUILTIN_MOVNTSS,
26151 IX86_BUILTIN_EXTRQI,
26152 IX86_BUILTIN_EXTRQ,
26153 IX86_BUILTIN_INSERTQI,
26154 IX86_BUILTIN_INSERTQ,
26157 IX86_BUILTIN_BLENDPD,
26158 IX86_BUILTIN_BLENDPS,
26159 IX86_BUILTIN_BLENDVPD,
26160 IX86_BUILTIN_BLENDVPS,
26161 IX86_BUILTIN_PBLENDVB128,
26162 IX86_BUILTIN_PBLENDW128,
26167 IX86_BUILTIN_INSERTPS128,
26169 IX86_BUILTIN_MOVNTDQA,
26170 IX86_BUILTIN_MPSADBW128,
26171 IX86_BUILTIN_PACKUSDW128,
26172 IX86_BUILTIN_PCMPEQQ,
26173 IX86_BUILTIN_PHMINPOSUW128,
26175 IX86_BUILTIN_PMAXSB128,
26176 IX86_BUILTIN_PMAXSD128,
26177 IX86_BUILTIN_PMAXUD128,
26178 IX86_BUILTIN_PMAXUW128,
26180 IX86_BUILTIN_PMINSB128,
26181 IX86_BUILTIN_PMINSD128,
26182 IX86_BUILTIN_PMINUD128,
26183 IX86_BUILTIN_PMINUW128,
26185 IX86_BUILTIN_PMOVSXBW128,
26186 IX86_BUILTIN_PMOVSXBD128,
26187 IX86_BUILTIN_PMOVSXBQ128,
26188 IX86_BUILTIN_PMOVSXWD128,
26189 IX86_BUILTIN_PMOVSXWQ128,
26190 IX86_BUILTIN_PMOVSXDQ128,
26192 IX86_BUILTIN_PMOVZXBW128,
26193 IX86_BUILTIN_PMOVZXBD128,
26194 IX86_BUILTIN_PMOVZXBQ128,
26195 IX86_BUILTIN_PMOVZXWD128,
26196 IX86_BUILTIN_PMOVZXWQ128,
26197 IX86_BUILTIN_PMOVZXDQ128,
26199 IX86_BUILTIN_PMULDQ128,
26200 IX86_BUILTIN_PMULLD128,
26202 IX86_BUILTIN_ROUNDSD,
26203 IX86_BUILTIN_ROUNDSS,
26205 IX86_BUILTIN_ROUNDPD,
26206 IX86_BUILTIN_ROUNDPS,
26208 IX86_BUILTIN_FLOORPD,
26209 IX86_BUILTIN_CEILPD,
26210 IX86_BUILTIN_TRUNCPD,
26211 IX86_BUILTIN_RINTPD,
26212 IX86_BUILTIN_ROUNDPD_AZ,
26214 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
26215 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
26216 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
26218 IX86_BUILTIN_FLOORPS,
26219 IX86_BUILTIN_CEILPS,
26220 IX86_BUILTIN_TRUNCPS,
26221 IX86_BUILTIN_RINTPS,
26222 IX86_BUILTIN_ROUNDPS_AZ,
26224 IX86_BUILTIN_FLOORPS_SFIX,
26225 IX86_BUILTIN_CEILPS_SFIX,
26226 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
26228 IX86_BUILTIN_PTESTZ,
26229 IX86_BUILTIN_PTESTC,
26230 IX86_BUILTIN_PTESTNZC,
26232 IX86_BUILTIN_VEC_INIT_V2SI,
26233 IX86_BUILTIN_VEC_INIT_V4HI,
26234 IX86_BUILTIN_VEC_INIT_V8QI,
26235 IX86_BUILTIN_VEC_EXT_V2DF,
26236 IX86_BUILTIN_VEC_EXT_V2DI,
26237 IX86_BUILTIN_VEC_EXT_V4SF,
26238 IX86_BUILTIN_VEC_EXT_V4SI,
26239 IX86_BUILTIN_VEC_EXT_V8HI,
26240 IX86_BUILTIN_VEC_EXT_V2SI,
26241 IX86_BUILTIN_VEC_EXT_V4HI,
26242 IX86_BUILTIN_VEC_EXT_V16QI,
26243 IX86_BUILTIN_VEC_SET_V2DI,
26244 IX86_BUILTIN_VEC_SET_V4SF,
26245 IX86_BUILTIN_VEC_SET_V4SI,
26246 IX86_BUILTIN_VEC_SET_V8HI,
26247 IX86_BUILTIN_VEC_SET_V4HI,
26248 IX86_BUILTIN_VEC_SET_V16QI,
26250 IX86_BUILTIN_VEC_PACK_SFIX,
26251 IX86_BUILTIN_VEC_PACK_SFIX256,
26254 IX86_BUILTIN_CRC32QI,
26255 IX86_BUILTIN_CRC32HI,
26256 IX86_BUILTIN_CRC32SI,
26257 IX86_BUILTIN_CRC32DI,
26259 IX86_BUILTIN_PCMPESTRI128,
26260 IX86_BUILTIN_PCMPESTRM128,
26261 IX86_BUILTIN_PCMPESTRA128,
26262 IX86_BUILTIN_PCMPESTRC128,
26263 IX86_BUILTIN_PCMPESTRO128,
26264 IX86_BUILTIN_PCMPESTRS128,
26265 IX86_BUILTIN_PCMPESTRZ128,
26266 IX86_BUILTIN_PCMPISTRI128,
26267 IX86_BUILTIN_PCMPISTRM128,
26268 IX86_BUILTIN_PCMPISTRA128,
26269 IX86_BUILTIN_PCMPISTRC128,
26270 IX86_BUILTIN_PCMPISTRO128,
26271 IX86_BUILTIN_PCMPISTRS128,
26272 IX86_BUILTIN_PCMPISTRZ128,
26274 IX86_BUILTIN_PCMPGTQ,
26276 /* AES instructions */
26277 IX86_BUILTIN_AESENC128,
26278 IX86_BUILTIN_AESENCLAST128,
26279 IX86_BUILTIN_AESDEC128,
26280 IX86_BUILTIN_AESDECLAST128,
26281 IX86_BUILTIN_AESIMC128,
26282 IX86_BUILTIN_AESKEYGENASSIST128,
26284 /* PCLMUL instruction */
26285 IX86_BUILTIN_PCLMULQDQ128,
26288 IX86_BUILTIN_ADDPD256,
26289 IX86_BUILTIN_ADDPS256,
26290 IX86_BUILTIN_ADDSUBPD256,
26291 IX86_BUILTIN_ADDSUBPS256,
26292 IX86_BUILTIN_ANDPD256,
26293 IX86_BUILTIN_ANDPS256,
26294 IX86_BUILTIN_ANDNPD256,
26295 IX86_BUILTIN_ANDNPS256,
26296 IX86_BUILTIN_BLENDPD256,
26297 IX86_BUILTIN_BLENDPS256,
26298 IX86_BUILTIN_BLENDVPD256,
26299 IX86_BUILTIN_BLENDVPS256,
26300 IX86_BUILTIN_DIVPD256,
26301 IX86_BUILTIN_DIVPS256,
26302 IX86_BUILTIN_DPPS256,
26303 IX86_BUILTIN_HADDPD256,
26304 IX86_BUILTIN_HADDPS256,
26305 IX86_BUILTIN_HSUBPD256,
26306 IX86_BUILTIN_HSUBPS256,
26307 IX86_BUILTIN_MAXPD256,
26308 IX86_BUILTIN_MAXPS256,
26309 IX86_BUILTIN_MINPD256,
26310 IX86_BUILTIN_MINPS256,
26311 IX86_BUILTIN_MULPD256,
26312 IX86_BUILTIN_MULPS256,
26313 IX86_BUILTIN_ORPD256,
26314 IX86_BUILTIN_ORPS256,
26315 IX86_BUILTIN_SHUFPD256,
26316 IX86_BUILTIN_SHUFPS256,
26317 IX86_BUILTIN_SUBPD256,
26318 IX86_BUILTIN_SUBPS256,
26319 IX86_BUILTIN_XORPD256,
26320 IX86_BUILTIN_XORPS256,
26321 IX86_BUILTIN_CMPSD,
26322 IX86_BUILTIN_CMPSS,
26323 IX86_BUILTIN_CMPPD,
26324 IX86_BUILTIN_CMPPS,
26325 IX86_BUILTIN_CMPPD256,
26326 IX86_BUILTIN_CMPPS256,
26327 IX86_BUILTIN_CVTDQ2PD256,
26328 IX86_BUILTIN_CVTDQ2PS256,
26329 IX86_BUILTIN_CVTPD2PS256,
26330 IX86_BUILTIN_CVTPS2DQ256,
26331 IX86_BUILTIN_CVTPS2PD256,
26332 IX86_BUILTIN_CVTTPD2DQ256,
26333 IX86_BUILTIN_CVTPD2DQ256,
26334 IX86_BUILTIN_CVTTPS2DQ256,
26335 IX86_BUILTIN_EXTRACTF128PD256,
26336 IX86_BUILTIN_EXTRACTF128PS256,
26337 IX86_BUILTIN_EXTRACTF128SI256,
26338 IX86_BUILTIN_VZEROALL,
26339 IX86_BUILTIN_VZEROUPPER,
26340 IX86_BUILTIN_VPERMILVARPD,
26341 IX86_BUILTIN_VPERMILVARPS,
26342 IX86_BUILTIN_VPERMILVARPD256,
26343 IX86_BUILTIN_VPERMILVARPS256,
26344 IX86_BUILTIN_VPERMILPD,
26345 IX86_BUILTIN_VPERMILPS,
26346 IX86_BUILTIN_VPERMILPD256,
26347 IX86_BUILTIN_VPERMILPS256,
26348 IX86_BUILTIN_VPERMIL2PD,
26349 IX86_BUILTIN_VPERMIL2PS,
26350 IX86_BUILTIN_VPERMIL2PD256,
26351 IX86_BUILTIN_VPERMIL2PS256,
26352 IX86_BUILTIN_VPERM2F128PD256,
26353 IX86_BUILTIN_VPERM2F128PS256,
26354 IX86_BUILTIN_VPERM2F128SI256,
26355 IX86_BUILTIN_VBROADCASTSS,
26356 IX86_BUILTIN_VBROADCASTSD256,
26357 IX86_BUILTIN_VBROADCASTSS256,
26358 IX86_BUILTIN_VBROADCASTPD256,
26359 IX86_BUILTIN_VBROADCASTPS256,
26360 IX86_BUILTIN_VINSERTF128PD256,
26361 IX86_BUILTIN_VINSERTF128PS256,
26362 IX86_BUILTIN_VINSERTF128SI256,
26363 IX86_BUILTIN_LOADUPD256,
26364 IX86_BUILTIN_LOADUPS256,
26365 IX86_BUILTIN_STOREUPD256,
26366 IX86_BUILTIN_STOREUPS256,
26367 IX86_BUILTIN_LDDQU256,
26368 IX86_BUILTIN_MOVNTDQ256,
26369 IX86_BUILTIN_MOVNTPD256,
26370 IX86_BUILTIN_MOVNTPS256,
26371 IX86_BUILTIN_LOADDQU256,
26372 IX86_BUILTIN_STOREDQU256,
26373 IX86_BUILTIN_MASKLOADPD,
26374 IX86_BUILTIN_MASKLOADPS,
26375 IX86_BUILTIN_MASKSTOREPD,
26376 IX86_BUILTIN_MASKSTOREPS,
26377 IX86_BUILTIN_MASKLOADPD256,
26378 IX86_BUILTIN_MASKLOADPS256,
26379 IX86_BUILTIN_MASKSTOREPD256,
26380 IX86_BUILTIN_MASKSTOREPS256,
26381 IX86_BUILTIN_MOVSHDUP256,
26382 IX86_BUILTIN_MOVSLDUP256,
26383 IX86_BUILTIN_MOVDDUP256,
26385 IX86_BUILTIN_SQRTPD256,
26386 IX86_BUILTIN_SQRTPS256,
26387 IX86_BUILTIN_SQRTPS_NR256,
26388 IX86_BUILTIN_RSQRTPS256,
26389 IX86_BUILTIN_RSQRTPS_NR256,
26391 IX86_BUILTIN_RCPPS256,
26393 IX86_BUILTIN_ROUNDPD256,
26394 IX86_BUILTIN_ROUNDPS256,
26396 IX86_BUILTIN_FLOORPD256,
26397 IX86_BUILTIN_CEILPD256,
26398 IX86_BUILTIN_TRUNCPD256,
26399 IX86_BUILTIN_RINTPD256,
26400 IX86_BUILTIN_ROUNDPD_AZ256,
26402 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
26403 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
26404 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
26406 IX86_BUILTIN_FLOORPS256,
26407 IX86_BUILTIN_CEILPS256,
26408 IX86_BUILTIN_TRUNCPS256,
26409 IX86_BUILTIN_RINTPS256,
26410 IX86_BUILTIN_ROUNDPS_AZ256,
26412 IX86_BUILTIN_FLOORPS_SFIX256,
26413 IX86_BUILTIN_CEILPS_SFIX256,
26414 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
26416 IX86_BUILTIN_UNPCKHPD256,
26417 IX86_BUILTIN_UNPCKLPD256,
26418 IX86_BUILTIN_UNPCKHPS256,
26419 IX86_BUILTIN_UNPCKLPS256,
26421 IX86_BUILTIN_SI256_SI,
26422 IX86_BUILTIN_PS256_PS,
26423 IX86_BUILTIN_PD256_PD,
26424 IX86_BUILTIN_SI_SI256,
26425 IX86_BUILTIN_PS_PS256,
26426 IX86_BUILTIN_PD_PD256,
26428 IX86_BUILTIN_VTESTZPD,
26429 IX86_BUILTIN_VTESTCPD,
26430 IX86_BUILTIN_VTESTNZCPD,
26431 IX86_BUILTIN_VTESTZPS,
26432 IX86_BUILTIN_VTESTCPS,
26433 IX86_BUILTIN_VTESTNZCPS,
26434 IX86_BUILTIN_VTESTZPD256,
26435 IX86_BUILTIN_VTESTCPD256,
26436 IX86_BUILTIN_VTESTNZCPD256,
26437 IX86_BUILTIN_VTESTZPS256,
26438 IX86_BUILTIN_VTESTCPS256,
26439 IX86_BUILTIN_VTESTNZCPS256,
26440 IX86_BUILTIN_PTESTZ256,
26441 IX86_BUILTIN_PTESTC256,
26442 IX86_BUILTIN_PTESTNZC256,
26444 IX86_BUILTIN_MOVMSKPD256,
26445 IX86_BUILTIN_MOVMSKPS256,
26448 IX86_BUILTIN_MPSADBW256,
26449 IX86_BUILTIN_PABSB256,
26450 IX86_BUILTIN_PABSW256,
26451 IX86_BUILTIN_PABSD256,
26452 IX86_BUILTIN_PACKSSDW256,
26453 IX86_BUILTIN_PACKSSWB256,
26454 IX86_BUILTIN_PACKUSDW256,
26455 IX86_BUILTIN_PACKUSWB256,
26456 IX86_BUILTIN_PADDB256,
26457 IX86_BUILTIN_PADDW256,
26458 IX86_BUILTIN_PADDD256,
26459 IX86_BUILTIN_PADDQ256,
26460 IX86_BUILTIN_PADDSB256,
26461 IX86_BUILTIN_PADDSW256,
26462 IX86_BUILTIN_PADDUSB256,
26463 IX86_BUILTIN_PADDUSW256,
26464 IX86_BUILTIN_PALIGNR256,
26465 IX86_BUILTIN_AND256I,
26466 IX86_BUILTIN_ANDNOT256I,
26467 IX86_BUILTIN_PAVGB256,
26468 IX86_BUILTIN_PAVGW256,
26469 IX86_BUILTIN_PBLENDVB256,
26470 IX86_BUILTIN_PBLENDVW256,
26471 IX86_BUILTIN_PCMPEQB256,
26472 IX86_BUILTIN_PCMPEQW256,
26473 IX86_BUILTIN_PCMPEQD256,
26474 IX86_BUILTIN_PCMPEQQ256,
26475 IX86_BUILTIN_PCMPGTB256,
26476 IX86_BUILTIN_PCMPGTW256,
26477 IX86_BUILTIN_PCMPGTD256,
26478 IX86_BUILTIN_PCMPGTQ256,
26479 IX86_BUILTIN_PHADDW256,
26480 IX86_BUILTIN_PHADDD256,
26481 IX86_BUILTIN_PHADDSW256,
26482 IX86_BUILTIN_PHSUBW256,
26483 IX86_BUILTIN_PHSUBD256,
26484 IX86_BUILTIN_PHSUBSW256,
26485 IX86_BUILTIN_PMADDUBSW256,
26486 IX86_BUILTIN_PMADDWD256,
26487 IX86_BUILTIN_PMAXSB256,
26488 IX86_BUILTIN_PMAXSW256,
26489 IX86_BUILTIN_PMAXSD256,
26490 IX86_BUILTIN_PMAXUB256,
26491 IX86_BUILTIN_PMAXUW256,
26492 IX86_BUILTIN_PMAXUD256,
26493 IX86_BUILTIN_PMINSB256,
26494 IX86_BUILTIN_PMINSW256,
26495 IX86_BUILTIN_PMINSD256,
26496 IX86_BUILTIN_PMINUB256,
26497 IX86_BUILTIN_PMINUW256,
26498 IX86_BUILTIN_PMINUD256,
26499 IX86_BUILTIN_PMOVMSKB256,
26500 IX86_BUILTIN_PMOVSXBW256,
26501 IX86_BUILTIN_PMOVSXBD256,
26502 IX86_BUILTIN_PMOVSXBQ256,
26503 IX86_BUILTIN_PMOVSXWD256,
26504 IX86_BUILTIN_PMOVSXWQ256,
26505 IX86_BUILTIN_PMOVSXDQ256,
26506 IX86_BUILTIN_PMOVZXBW256,
26507 IX86_BUILTIN_PMOVZXBD256,
26508 IX86_BUILTIN_PMOVZXBQ256,
26509 IX86_BUILTIN_PMOVZXWD256,
26510 IX86_BUILTIN_PMOVZXWQ256,
26511 IX86_BUILTIN_PMOVZXDQ256,
26512 IX86_BUILTIN_PMULDQ256,
26513 IX86_BUILTIN_PMULHRSW256,
26514 IX86_BUILTIN_PMULHUW256,
26515 IX86_BUILTIN_PMULHW256,
26516 IX86_BUILTIN_PMULLW256,
26517 IX86_BUILTIN_PMULLD256,
26518 IX86_BUILTIN_PMULUDQ256,
26519 IX86_BUILTIN_POR256,
26520 IX86_BUILTIN_PSADBW256,
26521 IX86_BUILTIN_PSHUFB256,
26522 IX86_BUILTIN_PSHUFD256,
26523 IX86_BUILTIN_PSHUFHW256,
26524 IX86_BUILTIN_PSHUFLW256,
26525 IX86_BUILTIN_PSIGNB256,
26526 IX86_BUILTIN_PSIGNW256,
26527 IX86_BUILTIN_PSIGND256,
26528 IX86_BUILTIN_PSLLDQI256,
26529 IX86_BUILTIN_PSLLWI256,
26530 IX86_BUILTIN_PSLLW256,
26531 IX86_BUILTIN_PSLLDI256,
26532 IX86_BUILTIN_PSLLD256,
26533 IX86_BUILTIN_PSLLQI256,
26534 IX86_BUILTIN_PSLLQ256,
26535 IX86_BUILTIN_PSRAWI256,
26536 IX86_BUILTIN_PSRAW256,
26537 IX86_BUILTIN_PSRADI256,
26538 IX86_BUILTIN_PSRAD256,
26539 IX86_BUILTIN_PSRLDQI256,
26540 IX86_BUILTIN_PSRLWI256,
26541 IX86_BUILTIN_PSRLW256,
26542 IX86_BUILTIN_PSRLDI256,
26543 IX86_BUILTIN_PSRLD256,
26544 IX86_BUILTIN_PSRLQI256,
26545 IX86_BUILTIN_PSRLQ256,
26546 IX86_BUILTIN_PSUBB256,
26547 IX86_BUILTIN_PSUBW256,
26548 IX86_BUILTIN_PSUBD256,
26549 IX86_BUILTIN_PSUBQ256,
26550 IX86_BUILTIN_PSUBSB256,
26551 IX86_BUILTIN_PSUBSW256,
26552 IX86_BUILTIN_PSUBUSB256,
26553 IX86_BUILTIN_PSUBUSW256,
26554 IX86_BUILTIN_PUNPCKHBW256,
26555 IX86_BUILTIN_PUNPCKHWD256,
26556 IX86_BUILTIN_PUNPCKHDQ256,
26557 IX86_BUILTIN_PUNPCKHQDQ256,
26558 IX86_BUILTIN_PUNPCKLBW256,
26559 IX86_BUILTIN_PUNPCKLWD256,
26560 IX86_BUILTIN_PUNPCKLDQ256,
26561 IX86_BUILTIN_PUNPCKLQDQ256,
26562 IX86_BUILTIN_PXOR256,
26563 IX86_BUILTIN_MOVNTDQA256,
26564 IX86_BUILTIN_VBROADCASTSS_PS,
26565 IX86_BUILTIN_VBROADCASTSS_PS256,
26566 IX86_BUILTIN_VBROADCASTSD_PD256,
26567 IX86_BUILTIN_VBROADCASTSI256,
26568 IX86_BUILTIN_PBLENDD256,
26569 IX86_BUILTIN_PBLENDD128,
26570 IX86_BUILTIN_PBROADCASTB256,
26571 IX86_BUILTIN_PBROADCASTW256,
26572 IX86_BUILTIN_PBROADCASTD256,
26573 IX86_BUILTIN_PBROADCASTQ256,
26574 IX86_BUILTIN_PBROADCASTB128,
26575 IX86_BUILTIN_PBROADCASTW128,
26576 IX86_BUILTIN_PBROADCASTD128,
26577 IX86_BUILTIN_PBROADCASTQ128,
26578 IX86_BUILTIN_VPERMVARSI256,
26579 IX86_BUILTIN_VPERMDF256,
26580 IX86_BUILTIN_VPERMVARSF256,
26581 IX86_BUILTIN_VPERMDI256,
26582 IX86_BUILTIN_VPERMTI256,
26583 IX86_BUILTIN_VEXTRACT128I256,
26584 IX86_BUILTIN_VINSERT128I256,
26585 IX86_BUILTIN_MASKLOADD,
26586 IX86_BUILTIN_MASKLOADQ,
26587 IX86_BUILTIN_MASKLOADD256,
26588 IX86_BUILTIN_MASKLOADQ256,
26589 IX86_BUILTIN_MASKSTORED,
26590 IX86_BUILTIN_MASKSTOREQ,
26591 IX86_BUILTIN_MASKSTORED256,
26592 IX86_BUILTIN_MASKSTOREQ256,
26593 IX86_BUILTIN_PSLLVV4DI,
26594 IX86_BUILTIN_PSLLVV2DI,
26595 IX86_BUILTIN_PSLLVV8SI,
26596 IX86_BUILTIN_PSLLVV4SI,
26597 IX86_BUILTIN_PSRAVV8SI,
26598 IX86_BUILTIN_PSRAVV4SI,
26599 IX86_BUILTIN_PSRLVV4DI,
26600 IX86_BUILTIN_PSRLVV2DI,
26601 IX86_BUILTIN_PSRLVV8SI,
26602 IX86_BUILTIN_PSRLVV4SI,
26604 IX86_BUILTIN_GATHERSIV2DF,
26605 IX86_BUILTIN_GATHERSIV4DF,
26606 IX86_BUILTIN_GATHERDIV2DF,
26607 IX86_BUILTIN_GATHERDIV4DF,
26608 IX86_BUILTIN_GATHERSIV4SF,
26609 IX86_BUILTIN_GATHERSIV8SF,
26610 IX86_BUILTIN_GATHERDIV4SF,
26611 IX86_BUILTIN_GATHERDIV8SF,
26612 IX86_BUILTIN_GATHERSIV2DI,
26613 IX86_BUILTIN_GATHERSIV4DI,
26614 IX86_BUILTIN_GATHERDIV2DI,
26615 IX86_BUILTIN_GATHERDIV4DI,
26616 IX86_BUILTIN_GATHERSIV4SI,
26617 IX86_BUILTIN_GATHERSIV8SI,
26618 IX86_BUILTIN_GATHERDIV4SI,
26619 IX86_BUILTIN_GATHERDIV8SI,
26621 /* Alternate 4 element gather for the vectorizer where
26622 all operands are 32-byte wide. */
26623 IX86_BUILTIN_GATHERALTSIV4DF,
26624 IX86_BUILTIN_GATHERALTDIV8SF,
26625 IX86_BUILTIN_GATHERALTSIV4DI,
26626 IX86_BUILTIN_GATHERALTDIV8SI,
26628 /* TFmode support builtins. */
26630 IX86_BUILTIN_HUGE_VALQ,
26631 IX86_BUILTIN_FABSQ,
26632 IX86_BUILTIN_COPYSIGNQ,
26634 /* Vectorizer support builtins. */
26635 IX86_BUILTIN_CPYSGNPS,
26636 IX86_BUILTIN_CPYSGNPD,
26637 IX86_BUILTIN_CPYSGNPS256,
26638 IX86_BUILTIN_CPYSGNPD256,
26640 /* FMA4 instructions. */
26641 IX86_BUILTIN_VFMADDSS,
26642 IX86_BUILTIN_VFMADDSD,
26643 IX86_BUILTIN_VFMADDPS,
26644 IX86_BUILTIN_VFMADDPD,
26645 IX86_BUILTIN_VFMADDPS256,
26646 IX86_BUILTIN_VFMADDPD256,
26647 IX86_BUILTIN_VFMADDSUBPS,
26648 IX86_BUILTIN_VFMADDSUBPD,
26649 IX86_BUILTIN_VFMADDSUBPS256,
26650 IX86_BUILTIN_VFMADDSUBPD256,
26652 /* FMA3 instructions. */
26653 IX86_BUILTIN_VFMADDSS3,
26654 IX86_BUILTIN_VFMADDSD3,
26656 /* XOP instructions. */
26657 IX86_BUILTIN_VPCMOV,
26658 IX86_BUILTIN_VPCMOV_V2DI,
26659 IX86_BUILTIN_VPCMOV_V4SI,
26660 IX86_BUILTIN_VPCMOV_V8HI,
26661 IX86_BUILTIN_VPCMOV_V16QI,
26662 IX86_BUILTIN_VPCMOV_V4SF,
26663 IX86_BUILTIN_VPCMOV_V2DF,
26664 IX86_BUILTIN_VPCMOV256,
26665 IX86_BUILTIN_VPCMOV_V4DI256,
26666 IX86_BUILTIN_VPCMOV_V8SI256,
26667 IX86_BUILTIN_VPCMOV_V16HI256,
26668 IX86_BUILTIN_VPCMOV_V32QI256,
26669 IX86_BUILTIN_VPCMOV_V8SF256,
26670 IX86_BUILTIN_VPCMOV_V4DF256,
26672 IX86_BUILTIN_VPPERM,
26674 IX86_BUILTIN_VPMACSSWW,
26675 IX86_BUILTIN_VPMACSWW,
26676 IX86_BUILTIN_VPMACSSWD,
26677 IX86_BUILTIN_VPMACSWD,
26678 IX86_BUILTIN_VPMACSSDD,
26679 IX86_BUILTIN_VPMACSDD,
26680 IX86_BUILTIN_VPMACSSDQL,
26681 IX86_BUILTIN_VPMACSSDQH,
26682 IX86_BUILTIN_VPMACSDQL,
26683 IX86_BUILTIN_VPMACSDQH,
26684 IX86_BUILTIN_VPMADCSSWD,
26685 IX86_BUILTIN_VPMADCSWD,
26687 IX86_BUILTIN_VPHADDBW,
26688 IX86_BUILTIN_VPHADDBD,
26689 IX86_BUILTIN_VPHADDBQ,
26690 IX86_BUILTIN_VPHADDWD,
26691 IX86_BUILTIN_VPHADDWQ,
26692 IX86_BUILTIN_VPHADDDQ,
26693 IX86_BUILTIN_VPHADDUBW,
26694 IX86_BUILTIN_VPHADDUBD,
26695 IX86_BUILTIN_VPHADDUBQ,
26696 IX86_BUILTIN_VPHADDUWD,
26697 IX86_BUILTIN_VPHADDUWQ,
26698 IX86_BUILTIN_VPHADDUDQ,
26699 IX86_BUILTIN_VPHSUBBW,
26700 IX86_BUILTIN_VPHSUBWD,
26701 IX86_BUILTIN_VPHSUBDQ,
26703 IX86_BUILTIN_VPROTB,
26704 IX86_BUILTIN_VPROTW,
26705 IX86_BUILTIN_VPROTD,
26706 IX86_BUILTIN_VPROTQ,
26707 IX86_BUILTIN_VPROTB_IMM,
26708 IX86_BUILTIN_VPROTW_IMM,
26709 IX86_BUILTIN_VPROTD_IMM,
26710 IX86_BUILTIN_VPROTQ_IMM,
26712 IX86_BUILTIN_VPSHLB,
26713 IX86_BUILTIN_VPSHLW,
26714 IX86_BUILTIN_VPSHLD,
26715 IX86_BUILTIN_VPSHLQ,
26716 IX86_BUILTIN_VPSHAB,
26717 IX86_BUILTIN_VPSHAW,
26718 IX86_BUILTIN_VPSHAD,
26719 IX86_BUILTIN_VPSHAQ,
26721 IX86_BUILTIN_VFRCZSS,
26722 IX86_BUILTIN_VFRCZSD,
26723 IX86_BUILTIN_VFRCZPS,
26724 IX86_BUILTIN_VFRCZPD,
26725 IX86_BUILTIN_VFRCZPS256,
26726 IX86_BUILTIN_VFRCZPD256,
26728 IX86_BUILTIN_VPCOMEQUB,
26729 IX86_BUILTIN_VPCOMNEUB,
26730 IX86_BUILTIN_VPCOMLTUB,
26731 IX86_BUILTIN_VPCOMLEUB,
26732 IX86_BUILTIN_VPCOMGTUB,
26733 IX86_BUILTIN_VPCOMGEUB,
26734 IX86_BUILTIN_VPCOMFALSEUB,
26735 IX86_BUILTIN_VPCOMTRUEUB,
26737 IX86_BUILTIN_VPCOMEQUW,
26738 IX86_BUILTIN_VPCOMNEUW,
26739 IX86_BUILTIN_VPCOMLTUW,
26740 IX86_BUILTIN_VPCOMLEUW,
26741 IX86_BUILTIN_VPCOMGTUW,
26742 IX86_BUILTIN_VPCOMGEUW,
26743 IX86_BUILTIN_VPCOMFALSEUW,
26744 IX86_BUILTIN_VPCOMTRUEUW,
26746 IX86_BUILTIN_VPCOMEQUD,
26747 IX86_BUILTIN_VPCOMNEUD,
26748 IX86_BUILTIN_VPCOMLTUD,
26749 IX86_BUILTIN_VPCOMLEUD,
26750 IX86_BUILTIN_VPCOMGTUD,
26751 IX86_BUILTIN_VPCOMGEUD,
26752 IX86_BUILTIN_VPCOMFALSEUD,
26753 IX86_BUILTIN_VPCOMTRUEUD,
26755 IX86_BUILTIN_VPCOMEQUQ,
26756 IX86_BUILTIN_VPCOMNEUQ,
26757 IX86_BUILTIN_VPCOMLTUQ,
26758 IX86_BUILTIN_VPCOMLEUQ,
26759 IX86_BUILTIN_VPCOMGTUQ,
26760 IX86_BUILTIN_VPCOMGEUQ,
26761 IX86_BUILTIN_VPCOMFALSEUQ,
26762 IX86_BUILTIN_VPCOMTRUEUQ,
26764 IX86_BUILTIN_VPCOMEQB,
26765 IX86_BUILTIN_VPCOMNEB,
26766 IX86_BUILTIN_VPCOMLTB,
26767 IX86_BUILTIN_VPCOMLEB,
26768 IX86_BUILTIN_VPCOMGTB,
26769 IX86_BUILTIN_VPCOMGEB,
26770 IX86_BUILTIN_VPCOMFALSEB,
26771 IX86_BUILTIN_VPCOMTRUEB,
26773 IX86_BUILTIN_VPCOMEQW,
26774 IX86_BUILTIN_VPCOMNEW,
26775 IX86_BUILTIN_VPCOMLTW,
26776 IX86_BUILTIN_VPCOMLEW,
26777 IX86_BUILTIN_VPCOMGTW,
26778 IX86_BUILTIN_VPCOMGEW,
26779 IX86_BUILTIN_VPCOMFALSEW,
26780 IX86_BUILTIN_VPCOMTRUEW,
26782 IX86_BUILTIN_VPCOMEQD,
26783 IX86_BUILTIN_VPCOMNED,
26784 IX86_BUILTIN_VPCOMLTD,
26785 IX86_BUILTIN_VPCOMLED,
26786 IX86_BUILTIN_VPCOMGTD,
26787 IX86_BUILTIN_VPCOMGED,
26788 IX86_BUILTIN_VPCOMFALSED,
26789 IX86_BUILTIN_VPCOMTRUED,
26791 IX86_BUILTIN_VPCOMEQQ,
26792 IX86_BUILTIN_VPCOMNEQ,
26793 IX86_BUILTIN_VPCOMLTQ,
26794 IX86_BUILTIN_VPCOMLEQ,
26795 IX86_BUILTIN_VPCOMGTQ,
26796 IX86_BUILTIN_VPCOMGEQ,
26797 IX86_BUILTIN_VPCOMFALSEQ,
26798 IX86_BUILTIN_VPCOMTRUEQ,
26800 /* LWP instructions. */
26801 IX86_BUILTIN_LLWPCB,
26802 IX86_BUILTIN_SLWPCB,
26803 IX86_BUILTIN_LWPVAL32,
26804 IX86_BUILTIN_LWPVAL64,
26805 IX86_BUILTIN_LWPINS32,
26806 IX86_BUILTIN_LWPINS64,
26811 IX86_BUILTIN_XBEGIN,
26813 IX86_BUILTIN_XABORT,
26814 IX86_BUILTIN_XTEST,
26816 /* BMI instructions. */
26817 IX86_BUILTIN_BEXTR32,
26818 IX86_BUILTIN_BEXTR64,
26821 /* TBM instructions. */
26822 IX86_BUILTIN_BEXTRI32,
26823 IX86_BUILTIN_BEXTRI64,
26825 /* BMI2 instructions. */
26826 IX86_BUILTIN_BZHI32,
26827 IX86_BUILTIN_BZHI64,
26828 IX86_BUILTIN_PDEP32,
26829 IX86_BUILTIN_PDEP64,
26830 IX86_BUILTIN_PEXT32,
26831 IX86_BUILTIN_PEXT64,
26833 /* ADX instructions. */
26834 IX86_BUILTIN_ADDCARRYX32,
26835 IX86_BUILTIN_ADDCARRYX64,
26837 /* FSGSBASE instructions. */
26838 IX86_BUILTIN_RDFSBASE32,
26839 IX86_BUILTIN_RDFSBASE64,
26840 IX86_BUILTIN_RDGSBASE32,
26841 IX86_BUILTIN_RDGSBASE64,
26842 IX86_BUILTIN_WRFSBASE32,
26843 IX86_BUILTIN_WRFSBASE64,
26844 IX86_BUILTIN_WRGSBASE32,
26845 IX86_BUILTIN_WRGSBASE64,
26847 /* RDRND instructions. */
26848 IX86_BUILTIN_RDRAND16_STEP,
26849 IX86_BUILTIN_RDRAND32_STEP,
26850 IX86_BUILTIN_RDRAND64_STEP,
26852 /* RDSEED instructions. */
26853 IX86_BUILTIN_RDSEED16_STEP,
26854 IX86_BUILTIN_RDSEED32_STEP,
26855 IX86_BUILTIN_RDSEED64_STEP,
26857 /* F16C instructions. */
26858 IX86_BUILTIN_CVTPH2PS,
26859 IX86_BUILTIN_CVTPH2PS256,
26860 IX86_BUILTIN_CVTPS2PH,
26861 IX86_BUILTIN_CVTPS2PH256,
26863 /* CFString built-in for darwin */
26864 IX86_BUILTIN_CFSTRING,
26866 /* Builtins to get CPU type and supported features. */
26867 IX86_BUILTIN_CPU_INIT,
26868 IX86_BUILTIN_CPU_IS,
26869 IX86_BUILTIN_CPU_SUPPORTS,
26874 /* Table for the ix86 builtin decls. */
26875 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
26877 /* Table of all of the builtin functions that are possible with different ISA's
26878 but are waiting to be built until a function is declared to use that
26880 struct builtin_isa {
26881 const char *name; /* function name */
26882 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
26883 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
26884 bool const_p; /* true if the declaration is constant */
26885 bool set_and_not_built_p;
26888 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
26891 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26892 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26893 function decl in the ix86_builtins array. Returns the function decl or
26894 NULL_TREE, if the builtin was not added.
26896 If the front end has a special hook for builtin functions, delay adding
26897 builtin functions that aren't in the current ISA until the ISA is changed
26898 with function specific optimization. Doing so, can save about 300K for the
26899 default compiler. When the builtin is expanded, check at that time whether
26902 If the front end doesn't have a special hook, record all builtins, even if
26903 it isn't an instruction set in the current ISA in case the user uses
26904 function specific options for a different ISA, so that we don't get scope
26905 errors if a builtin is added in the middle of a function scope. */
26908 def_builtin (HOST_WIDE_INT mask, const char *name,
26909 enum ix86_builtin_func_type tcode,
26910 enum ix86_builtins code)
26912 tree decl = NULL_TREE;
26914 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
26916 ix86_builtins_isa[(int) code].isa = mask;
26918 mask &= ~OPTION_MASK_ISA_64BIT;
26920 || (mask & ix86_isa_flags) != 0
26921 || (lang_hooks.builtin_function
26922 == lang_hooks.builtin_function_ext_scope))
26925 tree type = ix86_get_builtin_func_type (tcode);
26926 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
26928 ix86_builtins[(int) code] = decl;
26929 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
26933 ix86_builtins[(int) code] = NULL_TREE;
26934 ix86_builtins_isa[(int) code].tcode = tcode;
26935 ix86_builtins_isa[(int) code].name = name;
26936 ix86_builtins_isa[(int) code].const_p = false;
26937 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
26944 /* Like def_builtin, but also marks the function decl "const". */
26947 def_builtin_const (HOST_WIDE_INT mask, const char *name,
26948 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
26950 tree decl = def_builtin (mask, name, tcode, code);
26952 TREE_READONLY (decl) = 1;
26954 ix86_builtins_isa[(int) code].const_p = true;
26959 /* Add any new builtin functions for a given ISA that may not have been
26960 declared. This saves a bit of space compared to adding all of the
26961 declarations to the tree, even if we didn't use them. */
26964 ix86_add_new_builtins (HOST_WIDE_INT isa)
26968 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
26970 if ((ix86_builtins_isa[i].isa & isa) != 0
26971 && ix86_builtins_isa[i].set_and_not_built_p)
26975 /* Don't define the builtin again. */
26976 ix86_builtins_isa[i].set_and_not_built_p = false;
26978 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
26979 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
26980 type, i, BUILT_IN_MD, NULL,
26983 ix86_builtins[i] = decl;
26984 if (ix86_builtins_isa[i].const_p)
26985 TREE_READONLY (decl) = 1;
26990 /* Bits for builtin_description.flag. */
26992 /* Set when we don't support the comparison natively, and should
26993 swap_comparison in order to support it. */
26994 #define BUILTIN_DESC_SWAP_OPERANDS 1
26996 struct builtin_description
26998 const HOST_WIDE_INT mask;
26999 const enum insn_code icode;
27000 const char *const name;
27001 const enum ix86_builtins code;
27002 const enum rtx_code comparison;
27006 static const struct builtin_description bdesc_comi[] =
27008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
27009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
27010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
27011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
27012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
27013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
27014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
27015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
27016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
27017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
27018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
27019 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
27020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
27021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
27022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
27023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
27024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
27025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
27026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
27027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
27028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
27029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
27030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
27031 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
27034 static const struct builtin_description bdesc_pcmpestr[] =
27037 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
27038 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
27039 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
27040 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
27041 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
27042 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
27043 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
27046 static const struct builtin_description bdesc_pcmpistr[] =
27049 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
27050 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
27051 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
27052 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
27053 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
27054 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
27055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
27058 /* Special builtins with variable number of arguments. */
27059 static const struct builtin_description bdesc_special_args[] =
27061 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
27062 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
27063 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
27066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
27069 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
27071 /* FXSR, XSAVE and XSAVEOPT */
27072 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
27073 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
27074 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27075 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27076 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27078 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
27079 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
27080 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27081 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27082 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
27085 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
27086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
27087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
27089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
27090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
27091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
27092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
27094 /* SSE or 3DNow!A */
27095 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
27096 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
27099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
27100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
27101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
27102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
27103 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
27104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
27105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
27106 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
27107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
27108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
27110 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
27111 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
27114 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
27117 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
27120 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
27121 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
27124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
27125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
27127 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
27128 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
27129 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
27130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
27131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
27133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
27134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
27135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
27136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
27137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
27138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
27139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
27141 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
27142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
27143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
27145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
27146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
27147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
27148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
27149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
27150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
27151 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
27152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
27155 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
27156 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
27157 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
27158 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
27159 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
27160 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
27161 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
27162 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
27163 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
27165 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
27166 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
27167 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
27168 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
27169 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
27170 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
27173 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
27174 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
27175 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
27176 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
27177 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
27178 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
27179 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
27180 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
27183 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
27184 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
27185 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
27188 /* Builtins with variable number of arguments. */
27189 static const struct builtin_description bdesc_args[] =
27191 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
27192 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
27193 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
27194 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
27195 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
27196 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
27197 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
27200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27222 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27226 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27228 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27235 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
27236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
27238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
27239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
27240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
27242 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
27244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
27247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
27251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27252 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
27254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
27258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
27259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
27260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
27261 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
27264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
27265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
27266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27269 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27270 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27271 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27272 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27273 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27274 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
27275 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27276 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27277 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27278 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27279 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27280 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27281 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27282 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27283 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27286 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
27287 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
27288 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
27289 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
27290 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27291 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
27294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
27295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27296 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27298 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
27301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
27302 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
27303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
27304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
27305 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
27307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27309 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27310 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27311 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
27319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
27320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
27321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
27325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
27326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
27327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27328 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
27329 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
27331 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
27332 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
27333 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27334 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
27335 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
27336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
27337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
27339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
27341 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27342 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27346 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27348 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27349 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27351 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27354 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27355 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27356 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27357 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
27360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
27361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
27363 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
27365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
27369 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
27370 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
27372 /* SSE MMX or 3Dnow!A */
27373 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27375 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27377 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27378 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27379 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27380 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27382 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
27383 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
27385 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
27388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
27391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
27392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
27393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
27394 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
27396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
27397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
27398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
27399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
27400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
27402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
27404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
27405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
27406 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
27407 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
27409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
27411 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27413 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27414 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27415 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27416 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
27423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
27424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
27425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
27427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
27429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
27430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
27431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
27433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
27435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
27436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
27437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
27439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
27440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
27441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
27443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27444 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27445 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27448 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27450 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27451 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27453 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27456 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27457 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27459 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
27461 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27462 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27463 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27464 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27465 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27466 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27467 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27468 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27479 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27480 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
27482 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27484 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27485 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27497 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27498 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27499 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27502 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27503 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27504 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27505 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27506 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27507 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27508 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27509 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
27512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
27513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
27515 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
27518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
27519 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
27521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
27523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
27524 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
27525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
27526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
27528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
27529 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27530 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27531 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
27532 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27533 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27534 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
27536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
27537 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27538 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27539 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
27540 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27541 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27542 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
27544 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
27545 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
27546 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
27547 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
27549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
27550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
27551 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
27553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
27555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27558 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
27559 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
27562 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
27563 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27565 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27566 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27567 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27568 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27569 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
27570 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
27573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
27574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
27575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
27577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
27578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
27580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27587 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27588 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27591 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27592 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
27593 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
27594 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27595 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27596 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27597 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27598 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27599 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
27600 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27601 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
27602 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27603 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
27606 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
27607 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
27610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
27613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
27614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27617 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
27618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
27619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
27621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
27622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
27623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
27624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
27625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
27626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
27627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
27628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
27629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
27630 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
27631 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
27632 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
27633 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27635 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
27636 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27637 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27638 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27639 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27640 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27641 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
27642 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27643 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27644 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
27645 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
27646 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
27650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
27651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27652 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
27655 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
27656 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
27657 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
27659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
27660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
27662 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
27663 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
27665 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
27666 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
27667 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
27668 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
27670 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
27671 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
27673 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27674 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
27676 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27677 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27678 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
27681 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27682 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
27683 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
27684 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27685 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27688 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
27689 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
27690 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
27691 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27694 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
27695 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27697 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27698 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27699 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27700 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27703 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
27706 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27707 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27710 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27711 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27714 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27720 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27721 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27722 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27723 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27724 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27725 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27726 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27727 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27728 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27729 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27730 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27731 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
27734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
27735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
27736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
27738 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
27741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
27742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
27748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
27749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
27752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
27753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
27754 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
27755 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
27756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
27757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
27759 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
27760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
27761 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
27763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
27764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
27765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
27766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
27767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
27769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
27770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
27771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
27773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27777 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27779 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27781 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
27788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
27789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
27790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
27791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
27793 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
27794 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
27796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
27797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
27799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
27800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
27801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
27802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
27804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
27805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
27807 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
27808 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
27810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
27816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
27817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
27818 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
27819 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
27820 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
27822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
27825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
27828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
27831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
27834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
27838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
27839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
27841 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
27842 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
27844 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
27847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
27848 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
27849 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
27850 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
27851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
27852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
27853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
27854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
27855 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27856 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27857 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27858 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
27864 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
27869 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
27870 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27871 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27872 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27873 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27874 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27875 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27876 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27877 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27879 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27880 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
27885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
27886 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27887 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27888 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27889 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27890 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27891 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27892 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27893 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27894 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27895 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27896 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27897 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
27899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
27900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
27901 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
27902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
27903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
27904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
27905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
27906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
27907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
27908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
27909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
27910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
27911 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
27912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27913 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27914 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27915 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27916 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27917 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
27918 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
27920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
27922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
27923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
27924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
27928 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27929 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27930 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27931 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27932 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
27933 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
27934 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27935 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27936 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27937 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
27939 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
27940 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
27941 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
27942 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
27943 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
27944 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
27945 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27946 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27947 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27948 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
27958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
27959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27961 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
27963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
27964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
27965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
27966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
27967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
27968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
27969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
27970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
27971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
27972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
27973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
27974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
27975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
27976 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27977 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
27978 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
27979 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
27980 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
27981 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
27982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
27983 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27984 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27985 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27986 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27987 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27988 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27989 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
27990 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
27991 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
27992 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
27994 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
27997 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
27998 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
27999 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
28002 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28003 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28006 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
28007 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
28008 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
28009 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
28012 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28013 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28014 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28015 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28016 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
28017 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
28020 /* FMA4 and XOP. */
28021 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
28022 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
28023 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
28024 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
28025 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
28026 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
28027 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
28028 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
28029 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
28030 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
28031 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
28032 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
28033 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
28034 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
28035 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
28036 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
28037 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
28038 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
28039 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
28040 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
28041 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
28042 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
28043 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
28044 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
28045 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
28046 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
28047 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
28048 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
28049 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
28050 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
28051 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
28052 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
28053 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
28054 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
28055 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
28056 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
28057 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
28058 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
28059 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
28060 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
28061 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
28062 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
28063 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
28064 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
28065 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
28066 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
28067 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
28068 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
28069 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
28070 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
28071 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
28072 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
28074 static const struct builtin_description bdesc_multi_arg[] =
28076 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
28077 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
28078 UNKNOWN, (int)MULTI_ARG_3_SF },
28079 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
28080 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
28081 UNKNOWN, (int)MULTI_ARG_3_DF },
28083 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
28084 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
28085 UNKNOWN, (int)MULTI_ARG_3_SF },
28086 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
28087 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
28088 UNKNOWN, (int)MULTI_ARG_3_DF },
28090 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
28091 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
28092 UNKNOWN, (int)MULTI_ARG_3_SF },
28093 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
28094 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
28095 UNKNOWN, (int)MULTI_ARG_3_DF },
28096 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
28097 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
28098 UNKNOWN, (int)MULTI_ARG_3_SF2 },
28099 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
28100 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
28101 UNKNOWN, (int)MULTI_ARG_3_DF2 },
28103 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
28104 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
28105 UNKNOWN, (int)MULTI_ARG_3_SF },
28106 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
28107 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
28108 UNKNOWN, (int)MULTI_ARG_3_DF },
28109 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
28110 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
28111 UNKNOWN, (int)MULTI_ARG_3_SF2 },
28112 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
28113 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
28114 UNKNOWN, (int)MULTI_ARG_3_DF2 },
28116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
28117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
28118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
28119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
28120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
28121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
28122 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
28124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
28125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
28126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
28127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
28128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
28129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
28130 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
28132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
28134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
28135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
28136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
28137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
28138 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
28139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
28140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
28141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
28142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
28143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
28144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
28145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
28147 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
28148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
28149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
28150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
28151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
28152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
28153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
28154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
28155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
28156 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
28157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
28158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
28159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
28160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
28161 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
28162 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
28164 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
28165 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
28166 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
28167 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
28168 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
28169 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
28171 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
28172 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
28173 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
28174 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
28175 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
28176 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
28177 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
28178 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
28179 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
28180 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
28181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
28182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
28183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
28184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
28185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
28187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
28188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
28189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
28190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
28191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
28192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
28193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
28195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
28196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
28197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
28198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
28199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
28200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
28201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
28203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
28204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
28205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
28206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
28207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
28208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
28209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
28211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
28212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
28213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
28214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
28215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
28216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
28217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
28219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
28220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
28221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
28222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
28223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
28224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
28225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
28227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
28228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
28229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
28230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
28231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
28232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
28233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
28235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
28236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
28237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
28238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
28239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
28240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
28241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
28243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
28244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
28245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
28246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
28247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
28248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
28249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
28251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
28252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
28253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
28254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
28255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
28256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
28257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
28258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
28260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
28261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
28262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
28263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
28264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
28265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
28266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
28267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
28269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
28270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
28271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
28272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
28276 /* TM vector builtins. */
28278 /* Reuse the existing x86-specific `struct builtin_description' cause
28279 we're lazy. Add casts to make them fit. */
28280 static const struct builtin_description bdesc_tm[] =
28282 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28283 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28284 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
28285 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28286 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28287 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28288 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
28290 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28291 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28292 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
28293 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28294 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28295 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28296 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
28298 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28299 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28300 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
28301 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28302 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28303 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28304 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
28306 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
28307 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
28308 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
28311 /* TM callbacks. */
28313 /* Return the builtin decl needed to load a vector of TYPE. */
28316 ix86_builtin_tm_load (tree type)
28318 if (TREE_CODE (type) == VECTOR_TYPE)
28320 switch (tree_low_cst (TYPE_SIZE (type), 1))
28323 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
28325 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
28327 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
28333 /* Return the builtin decl needed to store a vector of TYPE. */
28336 ix86_builtin_tm_store (tree type)
28338 if (TREE_CODE (type) == VECTOR_TYPE)
28340 switch (tree_low_cst (TYPE_SIZE (type), 1))
28343 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
28345 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
28347 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
28353 /* Initialize the transactional memory vector load/store builtins. */
28356 ix86_init_tm_builtins (void)
28358 enum ix86_builtin_func_type ftype;
28359 const struct builtin_description *d;
28362 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
28363 tree attrs_log, attrs_type_log;
28368 /* If there are no builtins defined, we must be compiling in a
28369 language without trans-mem support. */
28370 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
28373 /* Use whatever attributes a normal TM load has. */
28374 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
28375 attrs_load = DECL_ATTRIBUTES (decl);
28376 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28377 /* Use whatever attributes a normal TM store has. */
28378 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
28379 attrs_store = DECL_ATTRIBUTES (decl);
28380 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28381 /* Use whatever attributes a normal TM log has. */
28382 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
28383 attrs_log = DECL_ATTRIBUTES (decl);
28384 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
28386 for (i = 0, d = bdesc_tm;
28387 i < ARRAY_SIZE (bdesc_tm);
28390 if ((d->mask & ix86_isa_flags) != 0
28391 || (lang_hooks.builtin_function
28392 == lang_hooks.builtin_function_ext_scope))
28394 tree type, attrs, attrs_type;
28395 enum built_in_function code = (enum built_in_function) d->code;
28397 ftype = (enum ix86_builtin_func_type) d->flag;
28398 type = ix86_get_builtin_func_type (ftype);
28400 if (BUILTIN_TM_LOAD_P (code))
28402 attrs = attrs_load;
28403 attrs_type = attrs_type_load;
28405 else if (BUILTIN_TM_STORE_P (code))
28407 attrs = attrs_store;
28408 attrs_type = attrs_type_store;
28413 attrs_type = attrs_type_log;
28415 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
28416 /* The builtin without the prefix for
28417 calling it directly. */
28418 d->name + strlen ("__builtin_"),
28420 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28421 set the TYPE_ATTRIBUTES. */
28422 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
28424 set_builtin_decl (code, decl, false);
28429 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28430 in the current target ISA to allow the user to compile particular modules
28431 with different target specific options that differ from the command line
28434 ix86_init_mmx_sse_builtins (void)
28436 const struct builtin_description * d;
28437 enum ix86_builtin_func_type ftype;
28440 /* Add all special builtins with variable number of operands. */
28441 for (i = 0, d = bdesc_special_args;
28442 i < ARRAY_SIZE (bdesc_special_args);
28448 ftype = (enum ix86_builtin_func_type) d->flag;
28449 def_builtin (d->mask, d->name, ftype, d->code);
28452 /* Add all builtins with variable number of operands. */
28453 for (i = 0, d = bdesc_args;
28454 i < ARRAY_SIZE (bdesc_args);
28460 ftype = (enum ix86_builtin_func_type) d->flag;
28461 def_builtin_const (d->mask, d->name, ftype, d->code);
28464 /* pcmpestr[im] insns. */
28465 for (i = 0, d = bdesc_pcmpestr;
28466 i < ARRAY_SIZE (bdesc_pcmpestr);
28469 if (d->code == IX86_BUILTIN_PCMPESTRM128)
28470 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
28472 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
28473 def_builtin_const (d->mask, d->name, ftype, d->code);
28476 /* pcmpistr[im] insns. */
28477 for (i = 0, d = bdesc_pcmpistr;
28478 i < ARRAY_SIZE (bdesc_pcmpistr);
28481 if (d->code == IX86_BUILTIN_PCMPISTRM128)
28482 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
28484 ftype = INT_FTYPE_V16QI_V16QI_INT;
28485 def_builtin_const (d->mask, d->name, ftype, d->code);
28488 /* comi/ucomi insns. */
28489 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
28491 if (d->mask == OPTION_MASK_ISA_SSE2)
28492 ftype = INT_FTYPE_V2DF_V2DF;
28494 ftype = INT_FTYPE_V4SF_V4SF;
28495 def_builtin_const (d->mask, d->name, ftype, d->code);
28499 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
28500 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
28501 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
28502 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
28504 /* SSE or 3DNow!A */
28505 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28506 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
28507 IX86_BUILTIN_MASKMOVQ);
28510 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
28511 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
28513 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
28514 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
28515 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
28516 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
28519 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
28520 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
28521 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
28522 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
28525 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
28526 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
28527 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
28528 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
28529 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
28530 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
28531 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
28532 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
28533 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
28534 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
28535 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
28536 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
28539 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
28540 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
28543 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
28544 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
28545 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
28546 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
28547 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
28548 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
28549 IX86_BUILTIN_RDRAND64_STEP);
28552 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
28553 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
28554 IX86_BUILTIN_GATHERSIV2DF);
28556 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
28557 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
28558 IX86_BUILTIN_GATHERSIV4DF);
28560 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
28561 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
28562 IX86_BUILTIN_GATHERDIV2DF);
28564 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
28565 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
28566 IX86_BUILTIN_GATHERDIV4DF);
28568 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
28569 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
28570 IX86_BUILTIN_GATHERSIV4SF);
28572 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
28573 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
28574 IX86_BUILTIN_GATHERSIV8SF);
28576 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
28577 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
28578 IX86_BUILTIN_GATHERDIV4SF);
28580 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
28581 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
28582 IX86_BUILTIN_GATHERDIV8SF);
28584 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
28585 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
28586 IX86_BUILTIN_GATHERSIV2DI);
28588 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
28589 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
28590 IX86_BUILTIN_GATHERSIV4DI);
28592 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
28593 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
28594 IX86_BUILTIN_GATHERDIV2DI);
28596 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
28597 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
28598 IX86_BUILTIN_GATHERDIV4DI);
28600 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
28601 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
28602 IX86_BUILTIN_GATHERSIV4SI);
28604 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
28605 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
28606 IX86_BUILTIN_GATHERSIV8SI);
28608 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
28609 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
28610 IX86_BUILTIN_GATHERDIV4SI);
28612 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
28613 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
28614 IX86_BUILTIN_GATHERDIV8SI);
28616 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
28617 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
28618 IX86_BUILTIN_GATHERALTSIV4DF);
28620 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
28621 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
28622 IX86_BUILTIN_GATHERALTDIV8SF);
28624 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
28625 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
28626 IX86_BUILTIN_GATHERALTSIV4DI);
28628 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
28629 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
28630 IX86_BUILTIN_GATHERALTDIV8SI);
28633 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
28634 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
28636 /* MMX access to the vec_init patterns. */
28637 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
28638 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
28640 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
28641 V4HI_FTYPE_HI_HI_HI_HI,
28642 IX86_BUILTIN_VEC_INIT_V4HI);
28644 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
28645 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
28646 IX86_BUILTIN_VEC_INIT_V8QI);
28648 /* Access to the vec_extract patterns. */
28649 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
28650 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
28651 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
28652 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
28653 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
28654 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
28655 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
28656 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
28657 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
28658 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
28660 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28661 "__builtin_ia32_vec_ext_v4hi",
28662 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
28664 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
28665 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
28667 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
28668 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
28670 /* Access to the vec_set patterns. */
28671 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
28672 "__builtin_ia32_vec_set_v2di",
28673 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
28675 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
28676 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
28678 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
28679 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
28681 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
28682 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
28684 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
28685 "__builtin_ia32_vec_set_v4hi",
28686 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
28688 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
28689 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
28692 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
28693 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
28694 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
28695 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
28696 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
28697 "__builtin_ia32_rdseed_di_step",
28698 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
28701 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28702 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
28703 def_builtin (OPTION_MASK_ISA_64BIT,
28704 "__builtin_ia32_addcarryx_u64",
28705 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
28706 IX86_BUILTIN_ADDCARRYX64);
28708 /* Add FMA4 multi-arg argument instructions */
28709 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
28714 ftype = (enum ix86_builtin_func_type) d->flag;
28715 def_builtin_const (d->mask, d->name, ftype, d->code);
28719 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
28720 to return a pointer to VERSION_DECL if the outcome of the expression
28721 formed by PREDICATE_CHAIN is true. This function will be called during
28722 version dispatch to decide which function version to execute. It returns
28723 the basic block at the end, to which more conditions can be added. */
28726 add_condition_to_bb (tree function_decl, tree version_decl,
28727 tree predicate_chain, basic_block new_bb)
28729 gimple return_stmt;
28730 tree convert_expr, result_var;
28731 gimple convert_stmt;
28732 gimple call_cond_stmt;
28733 gimple if_else_stmt;
28735 basic_block bb1, bb2, bb3;
28738 tree cond_var, and_expr_var = NULL_TREE;
28741 tree predicate_decl, predicate_arg;
28743 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
28745 gcc_assert (new_bb != NULL);
28746 gseq = bb_seq (new_bb);
28749 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
28750 build_fold_addr_expr (version_decl));
28751 result_var = create_tmp_var (ptr_type_node, NULL);
28752 convert_stmt = gimple_build_assign (result_var, convert_expr);
28753 return_stmt = gimple_build_return (result_var);
28755 if (predicate_chain == NULL_TREE)
28757 gimple_seq_add_stmt (&gseq, convert_stmt);
28758 gimple_seq_add_stmt (&gseq, return_stmt);
28759 set_bb_seq (new_bb, gseq);
28760 gimple_set_bb (convert_stmt, new_bb);
28761 gimple_set_bb (return_stmt, new_bb);
28766 while (predicate_chain != NULL)
28768 cond_var = create_tmp_var (integer_type_node, NULL);
28769 predicate_decl = TREE_PURPOSE (predicate_chain);
28770 predicate_arg = TREE_VALUE (predicate_chain);
28771 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
28772 gimple_call_set_lhs (call_cond_stmt, cond_var);
28774 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
28775 gimple_set_bb (call_cond_stmt, new_bb);
28776 gimple_seq_add_stmt (&gseq, call_cond_stmt);
28778 predicate_chain = TREE_CHAIN (predicate_chain);
28780 if (and_expr_var == NULL)
28781 and_expr_var = cond_var;
28784 gimple assign_stmt;
28785 /* Use MIN_EXPR to check if any integer is zero?.
28786 and_expr_var = min_expr <cond_var, and_expr_var> */
28787 assign_stmt = gimple_build_assign (and_expr_var,
28788 build2 (MIN_EXPR, integer_type_node,
28789 cond_var, and_expr_var));
28791 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
28792 gimple_set_bb (assign_stmt, new_bb);
28793 gimple_seq_add_stmt (&gseq, assign_stmt);
28797 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
28799 NULL_TREE, NULL_TREE);
28800 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
28801 gimple_set_bb (if_else_stmt, new_bb);
28802 gimple_seq_add_stmt (&gseq, if_else_stmt);
28804 gimple_seq_add_stmt (&gseq, convert_stmt);
28805 gimple_seq_add_stmt (&gseq, return_stmt);
28806 set_bb_seq (new_bb, gseq);
28809 e12 = split_block (bb1, if_else_stmt);
28811 e12->flags &= ~EDGE_FALLTHRU;
28812 e12->flags |= EDGE_TRUE_VALUE;
28814 e23 = split_block (bb2, return_stmt);
28816 gimple_set_bb (convert_stmt, bb2);
28817 gimple_set_bb (return_stmt, bb2);
28820 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
28823 make_edge (bb2, EXIT_BLOCK_PTR, 0);
28830 /* This parses the attribute arguments to target in DECL and determines
28831 the right builtin to use to match the platform specification.
28832 It returns the priority value for this version decl. If PREDICATE_LIST
28833 is not NULL, it stores the list of cpu features that need to be checked
28834 before dispatching this function. */
28836 static unsigned int
28837 get_builtin_code_for_version (tree decl, tree *predicate_list)
28840 struct cl_target_option cur_target;
28842 struct cl_target_option *new_target;
28843 const char *arg_str = NULL;
28844 const char *attrs_str = NULL;
28845 char *tok_str = NULL;
28848 /* Priority of i386 features, greater value is higher priority. This is
28849 used to decide the order in which function dispatch must happen. For
28850 instance, a version specialized for SSE4.2 should be checked for dispatch
28851 before a version for SSE3, as SSE4.2 implies SSE3. */
28852 enum feature_priority
28873 enum feature_priority priority = P_ZERO;
28875 /* These are the target attribute strings for which a dispatcher is
28876 available, from fold_builtin_cpu. */
28878 static struct _feature_list
28880 const char *const name;
28881 const enum feature_priority priority;
28883 const feature_list[] =
28889 {"ssse3", P_SSSE3},
28890 {"sse4.1", P_SSE4_1},
28891 {"sse4.2", P_SSE4_2},
28892 {"popcnt", P_POPCNT},
28898 static unsigned int NUM_FEATURES
28899 = sizeof (feature_list) / sizeof (struct _feature_list);
28903 tree predicate_chain = NULL_TREE;
28904 tree predicate_decl, predicate_arg;
28906 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
28907 gcc_assert (attrs != NULL);
28909 attrs = TREE_VALUE (TREE_VALUE (attrs));
28911 gcc_assert (TREE_CODE (attrs) == STRING_CST);
28912 attrs_str = TREE_STRING_POINTER (attrs);
28914 /* Return priority zero for default function. */
28915 if (strcmp (attrs_str, "default") == 0)
28918 /* Handle arch= if specified. For priority, set it to be 1 more than
28919 the best instruction set the processor can handle. For instance, if
28920 there is a version for atom and a version for ssse3 (the highest ISA
28921 priority for atom), the atom version must be checked for dispatch
28922 before the ssse3 version. */
28923 if (strstr (attrs_str, "arch=") != NULL)
28925 cl_target_option_save (&cur_target, &global_options);
28926 target_node = ix86_valid_target_attribute_tree (attrs);
28928 gcc_assert (target_node);
28929 new_target = TREE_TARGET_OPTION (target_node);
28930 gcc_assert (new_target);
28932 if (new_target->arch_specified && new_target->arch > 0)
28934 switch (new_target->arch)
28936 case PROCESSOR_CORE2:
28938 priority = P_PROC_SSSE3;
28940 case PROCESSOR_COREI7:
28941 arg_str = "corei7";
28942 priority = P_PROC_SSE4_2;
28944 case PROCESSOR_ATOM:
28946 priority = P_PROC_SSSE3;
28948 case PROCESSOR_AMDFAM10:
28949 arg_str = "amdfam10h";
28950 priority = P_PROC_SSE4_a;
28952 case PROCESSOR_BDVER1:
28953 arg_str = "bdver1";
28954 priority = P_PROC_FMA;
28956 case PROCESSOR_BDVER2:
28957 arg_str = "bdver2";
28958 priority = P_PROC_FMA;
28963 cl_target_option_restore (&global_options, &cur_target);
28965 if (predicate_list && arg_str == NULL)
28967 error_at (DECL_SOURCE_LOCATION (decl),
28968 "No dispatcher found for the versioning attributes");
28972 if (predicate_list)
28974 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
28975 /* For a C string literal the length includes the trailing NULL. */
28976 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
28977 predicate_chain = tree_cons (predicate_decl, predicate_arg,
28982 /* Process feature name. */
28983 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
28984 strcpy (tok_str, attrs_str);
28985 token = strtok (tok_str, ",");
28986 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
28988 while (token != NULL)
28990 /* Do not process "arch=" */
28991 if (strncmp (token, "arch=", 5) == 0)
28993 token = strtok (NULL, ",");
28996 for (i = 0; i < NUM_FEATURES; ++i)
28998 if (strcmp (token, feature_list[i].name) == 0)
29000 if (predicate_list)
29002 predicate_arg = build_string_literal (
29003 strlen (feature_list[i].name) + 1,
29004 feature_list[i].name);
29005 predicate_chain = tree_cons (predicate_decl, predicate_arg,
29008 /* Find the maximum priority feature. */
29009 if (feature_list[i].priority > priority)
29010 priority = feature_list[i].priority;
29015 if (predicate_list && i == NUM_FEATURES)
29017 error_at (DECL_SOURCE_LOCATION (decl),
29018 "No dispatcher found for %s", token);
29021 token = strtok (NULL, ",");
29025 if (predicate_list && predicate_chain == NULL_TREE)
29027 error_at (DECL_SOURCE_LOCATION (decl),
29028 "No dispatcher found for the versioning attributes : %s",
29032 else if (predicate_list)
29034 predicate_chain = nreverse (predicate_chain);
29035 *predicate_list = predicate_chain;
29041 /* This compares the priority of target features in function DECL1
29042 and DECL2. It returns positive value if DECL1 is higher priority,
29043 negative value if DECL2 is higher priority and 0 if they are the
29047 ix86_compare_version_priority (tree decl1, tree decl2)
29049 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
29050 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
29052 return (int)priority1 - (int)priority2;
29055 /* V1 and V2 point to function versions with different priorities
29056 based on the target ISA. This function compares their priorities. */
29059 feature_compare (const void *v1, const void *v2)
29061 typedef struct _function_version_info
29064 tree predicate_chain;
29065 unsigned int dispatch_priority;
29066 } function_version_info;
29068 const function_version_info c1 = *(const function_version_info *)v1;
29069 const function_version_info c2 = *(const function_version_info *)v2;
29070 return (c2.dispatch_priority - c1.dispatch_priority);
29073 /* This function generates the dispatch function for
29074 multi-versioned functions. DISPATCH_DECL is the function which will
29075 contain the dispatch logic. FNDECLS are the function choices for
29076 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
29077 in DISPATCH_DECL in which the dispatch code is generated. */
29080 dispatch_function_versions (tree dispatch_decl,
29082 basic_block *empty_bb)
29085 gimple ifunc_cpu_init_stmt;
29089 vec<tree> *fndecls;
29090 unsigned int num_versions = 0;
29091 unsigned int actual_versions = 0;
29094 struct _function_version_info
29097 tree predicate_chain;
29098 unsigned int dispatch_priority;
29099 }*function_version_info;
29101 gcc_assert (dispatch_decl != NULL
29102 && fndecls_p != NULL
29103 && empty_bb != NULL);
29105 /*fndecls_p is actually a vector. */
29106 fndecls = static_cast<vec<tree> *> (fndecls_p);
29108 /* At least one more version other than the default. */
29109 num_versions = fndecls->length ();
29110 gcc_assert (num_versions >= 2);
29112 function_version_info = (struct _function_version_info *)
29113 XNEWVEC (struct _function_version_info, (num_versions - 1));
29115 /* The first version in the vector is the default decl. */
29116 default_decl = (*fndecls)[0];
29118 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
29120 gseq = bb_seq (*empty_bb);
29121 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
29122 constructors, so explicity call __builtin_cpu_init here. */
29123 ifunc_cpu_init_stmt = gimple_build_call_vec (
29124 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
29125 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
29126 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
29127 set_bb_seq (*empty_bb, gseq);
29132 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
29134 tree version_decl = ele;
29135 tree predicate_chain = NULL_TREE;
29136 unsigned int priority;
29137 /* Get attribute string, parse it and find the right predicate decl.
29138 The predicate function could be a lengthy combination of many
29139 features, like arch-type and various isa-variants. */
29140 priority = get_builtin_code_for_version (version_decl,
29143 if (predicate_chain == NULL_TREE)
29147 function_version_info [ix - 1].version_decl = version_decl;
29148 function_version_info [ix - 1].predicate_chain = predicate_chain;
29149 function_version_info [ix - 1].dispatch_priority = priority;
29152 /* Sort the versions according to descending order of dispatch priority. The
29153 priority is based on the ISA. This is not a perfect solution. There
29154 could still be ambiguity. If more than one function version is suitable
29155 to execute, which one should be dispatched? In future, allow the user
29156 to specify a dispatch priority next to the version. */
29157 qsort (function_version_info, actual_versions,
29158 sizeof (struct _function_version_info), feature_compare);
29160 for (i = 0; i < actual_versions; ++i)
29161 *empty_bb = add_condition_to_bb (dispatch_decl,
29162 function_version_info[i].version_decl,
29163 function_version_info[i].predicate_chain,
29166 /* dispatch default version at the end. */
29167 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
29170 free (function_version_info);
29174 /* Comparator function to be used in qsort routine to sort attribute
29175 specification strings to "target". */
29178 attr_strcmp (const void *v1, const void *v2)
29180 const char *c1 = *(char *const*)v1;
29181 const char *c2 = *(char *const*)v2;
29182 return strcmp (c1, c2);
29185 /* ARGLIST is the argument to target attribute. This function tokenizes
29186 the comma separated arguments, sorts them and returns a string which
29187 is a unique identifier for the comma separated arguments. It also
29188 replaces non-identifier characters "=,-" with "_". */
29191 sorted_attr_string (tree arglist)
29194 size_t str_len_sum = 0;
29195 char **args = NULL;
29196 char *attr_str, *ret_str;
29198 unsigned int argnum = 1;
29201 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
29203 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
29204 size_t len = strlen (str);
29205 str_len_sum += len + 1;
29206 if (arg != arglist)
29208 for (i = 0; i < strlen (str); i++)
29213 attr_str = XNEWVEC (char, str_len_sum);
29215 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
29217 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
29218 size_t len = strlen (str);
29219 memcpy (attr_str + str_len_sum, str, len);
29220 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
29221 str_len_sum += len + 1;
29224 /* Replace "=,-" with "_". */
29225 for (i = 0; i < strlen (attr_str); i++)
29226 if (attr_str[i] == '=' || attr_str[i]== '-')
29232 args = XNEWVEC (char *, argnum);
29235 attr = strtok (attr_str, ",");
29236 while (attr != NULL)
29240 attr = strtok (NULL, ",");
29243 qsort (args, argnum, sizeof (char *), attr_strcmp);
29245 ret_str = XNEWVEC (char, str_len_sum);
29247 for (i = 0; i < argnum; i++)
29249 size_t len = strlen (args[i]);
29250 memcpy (ret_str + str_len_sum, args[i], len);
29251 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
29252 str_len_sum += len + 1;
29256 XDELETEVEC (attr_str);
29260 /* This function changes the assembler name for functions that are
29261 versions. If DECL is a function version and has a "target"
29262 attribute, it appends the attribute string to its assembler name. */
29265 ix86_mangle_function_version_assembler_name (tree decl, tree id)
29268 const char *orig_name, *version_string;
29269 char *attr_str, *assembler_name;
29271 if (DECL_DECLARED_INLINE_P (decl)
29272 && lookup_attribute ("gnu_inline",
29273 DECL_ATTRIBUTES (decl)))
29274 error_at (DECL_SOURCE_LOCATION (decl),
29275 "Function versions cannot be marked as gnu_inline,"
29276 " bodies have to be generated");
29278 if (DECL_VIRTUAL_P (decl)
29279 || DECL_VINDEX (decl))
29280 sorry ("Virtual function multiversioning not supported");
29282 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
29284 /* target attribute string cannot be NULL. */
29285 gcc_assert (version_attr != NULL_TREE);
29287 orig_name = IDENTIFIER_POINTER (id);
29289 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
29291 if (strcmp (version_string, "default") == 0)
29294 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
29295 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
29297 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
29299 /* Allow assembler name to be modified if already set. */
29300 if (DECL_ASSEMBLER_NAME_SET_P (decl))
29301 SET_DECL_RTL (decl, NULL);
29303 tree ret = get_identifier (assembler_name);
29304 XDELETEVEC (attr_str);
29305 XDELETEVEC (assembler_name);
29309 /* This function returns true if FN1 and FN2 are versions of the same function,
29310 that is, the target strings of the function decls are different. This assumes
29311 that FN1 and FN2 have the same signature. */
29314 ix86_function_versions (tree fn1, tree fn2)
29317 char *target1, *target2;
29320 if (TREE_CODE (fn1) != FUNCTION_DECL
29321 || TREE_CODE (fn2) != FUNCTION_DECL)
29324 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
29325 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
29327 /* At least one function decl should have the target attribute specified. */
29328 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
29331 /* Diagnose missing target attribute if one of the decls is already
29332 multi-versioned. */
29333 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
29335 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
29337 if (attr2 != NULL_TREE)
29344 error_at (DECL_SOURCE_LOCATION (fn2),
29345 "missing %<target%> attribute for multi-versioned %D",
29347 error_at (DECL_SOURCE_LOCATION (fn1),
29348 "previous declaration of %D", fn1);
29349 /* Prevent diagnosing of the same error multiple times. */
29350 DECL_ATTRIBUTES (fn2)
29351 = tree_cons (get_identifier ("target"),
29352 copy_node (TREE_VALUE (attr1)),
29353 DECL_ATTRIBUTES (fn2));
29358 target1 = sorted_attr_string (TREE_VALUE (attr1));
29359 target2 = sorted_attr_string (TREE_VALUE (attr2));
29361 /* The sorted target strings must be different for fn1 and fn2
29363 if (strcmp (target1, target2) == 0)
29368 XDELETEVEC (target1);
29369 XDELETEVEC (target2);
29375 ix86_mangle_decl_assembler_name (tree decl, tree id)
29377 /* For function version, add the target suffix to the assembler name. */
29378 if (TREE_CODE (decl) == FUNCTION_DECL
29379 && DECL_FUNCTION_VERSIONED (decl))
29380 id = ix86_mangle_function_version_assembler_name (decl, id);
29381 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
29382 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
29388 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
29389 is true, append the full path name of the source file. */
29392 make_name (tree decl, const char *suffix, bool make_unique)
29394 char *global_var_name;
29397 const char *unique_name = NULL;
29399 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29401 /* Get a unique name that can be used globally without any chances
29402 of collision at link time. */
29404 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
29406 name_len = strlen (name) + strlen (suffix) + 2;
29409 name_len += strlen (unique_name) + 1;
29410 global_var_name = XNEWVEC (char, name_len);
29412 /* Use '.' to concatenate names as it is demangler friendly. */
29414 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
29417 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
29419 return global_var_name;
29422 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29424 /* Make a dispatcher declaration for the multi-versioned function DECL.
29425 Calls to DECL function will be replaced with calls to the dispatcher
29426 by the front-end. Return the decl created. */
29429 make_dispatcher_decl (const tree decl)
29433 tree fn_type, func_type;
29434 bool is_uniq = false;
29436 if (TREE_PUBLIC (decl) == 0)
29439 func_name = make_name (decl, "ifunc", is_uniq);
29441 fn_type = TREE_TYPE (decl);
29442 func_type = build_function_type (TREE_TYPE (fn_type),
29443 TYPE_ARG_TYPES (fn_type));
29445 func_decl = build_fn_decl (func_name, func_type);
29446 XDELETEVEC (func_name);
29447 TREE_USED (func_decl) = 1;
29448 DECL_CONTEXT (func_decl) = NULL_TREE;
29449 DECL_INITIAL (func_decl) = error_mark_node;
29450 DECL_ARTIFICIAL (func_decl) = 1;
29451 /* Mark this func as external, the resolver will flip it again if
29452 it gets generated. */
29453 DECL_EXTERNAL (func_decl) = 1;
29454 /* This will be of type IFUNCs have to be externally visible. */
29455 TREE_PUBLIC (func_decl) = 1;
29462 /* Returns true if decl is multi-versioned and DECL is the default function,
29463 that is it is not tagged with target specific optimization. */
29466 is_function_default_version (const tree decl)
29468 if (TREE_CODE (decl) != FUNCTION_DECL
29469 || !DECL_FUNCTION_VERSIONED (decl))
29471 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
29473 attr = TREE_VALUE (TREE_VALUE (attr));
29474 return (TREE_CODE (attr) == STRING_CST
29475 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
29478 /* Make a dispatcher declaration for the multi-versioned function DECL.
29479 Calls to DECL function will be replaced with calls to the dispatcher
29480 by the front-end. Returns the decl of the dispatcher function. */
29483 ix86_get_function_versions_dispatcher (void *decl)
29485 tree fn = (tree) decl;
29486 struct cgraph_node *node = NULL;
29487 struct cgraph_node *default_node = NULL;
29488 struct cgraph_function_version_info *node_v = NULL;
29489 struct cgraph_function_version_info *first_v = NULL;
29491 tree dispatch_decl = NULL;
29493 struct cgraph_function_version_info *default_version_info = NULL;
29495 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
29497 node = cgraph_get_node (fn);
29498 gcc_assert (node != NULL);
29500 node_v = get_cgraph_node_version (node);
29501 gcc_assert (node_v != NULL);
29503 if (node_v->dispatcher_resolver != NULL)
29504 return node_v->dispatcher_resolver;
29506 /* Find the default version and make it the first node. */
29508 /* Go to the beginnig of the chain. */
29509 while (first_v->prev != NULL)
29510 first_v = first_v->prev;
29511 default_version_info = first_v;
29512 while (default_version_info != NULL)
29514 if (is_function_default_version
29515 (default_version_info->this_node->symbol.decl))
29517 default_version_info = default_version_info->next;
29520 /* If there is no default node, just return NULL. */
29521 if (default_version_info == NULL)
29524 /* Make default info the first node. */
29525 if (first_v != default_version_info)
29527 default_version_info->prev->next = default_version_info->next;
29528 if (default_version_info->next)
29529 default_version_info->next->prev = default_version_info->prev;
29530 first_v->prev = default_version_info;
29531 default_version_info->next = first_v;
29532 default_version_info->prev = NULL;
29535 default_node = default_version_info->this_node;
29537 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
29538 if (targetm.has_ifunc_p ())
29540 struct cgraph_function_version_info *it_v = NULL;
29541 struct cgraph_node *dispatcher_node = NULL;
29542 struct cgraph_function_version_info *dispatcher_version_info = NULL;
29544 /* Right now, the dispatching is done via ifunc. */
29545 dispatch_decl = make_dispatcher_decl (default_node->symbol.decl);
29547 dispatcher_node = cgraph_get_create_node (dispatch_decl);
29548 gcc_assert (dispatcher_node != NULL);
29549 dispatcher_node->dispatcher_function = 1;
29550 dispatcher_version_info
29551 = insert_new_cgraph_node_version (dispatcher_node);
29552 dispatcher_version_info->next = default_version_info;
29553 dispatcher_node->symbol.definition = 1;
29555 /* Set the dispatcher for all the versions. */
29556 it_v = default_version_info;
29557 while (it_v != NULL)
29559 it_v->dispatcher_resolver = dispatch_decl;
29566 error_at (DECL_SOURCE_LOCATION (default_node->symbol.decl),
29567 "multiversioning needs ifunc which is not supported "
29571 return dispatch_decl;
29574 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
29578 make_attribute (const char *name, const char *arg_name, tree chain)
29581 tree attr_arg_name;
29585 attr_name = get_identifier (name);
29586 attr_arg_name = build_string (strlen (arg_name), arg_name);
29587 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
29588 attr = tree_cons (attr_name, attr_args, chain);
29592 /* Make the resolver function decl to dispatch the versions of
29593 a multi-versioned function, DEFAULT_DECL. Create an
29594 empty basic block in the resolver and store the pointer in
29595 EMPTY_BB. Return the decl of the resolver function. */
29598 make_resolver_func (const tree default_decl,
29599 const tree dispatch_decl,
29600 basic_block *empty_bb)
29602 char *resolver_name;
29603 tree decl, type, decl_name, t;
29604 bool is_uniq = false;
29606 /* IFUNC's have to be globally visible. So, if the default_decl is
29607 not, then the name of the IFUNC should be made unique. */
29608 if (TREE_PUBLIC (default_decl) == 0)
29611 /* Append the filename to the resolver function if the versions are
29612 not externally visible. This is because the resolver function has
29613 to be externally visible for the loader to find it. So, appending
29614 the filename will prevent conflicts with a resolver function from
29615 another module which is based on the same version name. */
29616 resolver_name = make_name (default_decl, "resolver", is_uniq);
29618 /* The resolver function should return a (void *). */
29619 type = build_function_type_list (ptr_type_node, NULL_TREE);
29621 decl = build_fn_decl (resolver_name, type);
29622 decl_name = get_identifier (resolver_name);
29623 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
29625 DECL_NAME (decl) = decl_name;
29626 TREE_USED (decl) = 1;
29627 DECL_ARTIFICIAL (decl) = 1;
29628 DECL_IGNORED_P (decl) = 0;
29629 /* IFUNC resolvers have to be externally visible. */
29630 TREE_PUBLIC (decl) = 1;
29631 DECL_UNINLINABLE (decl) = 0;
29633 /* Resolver is not external, body is generated. */
29634 DECL_EXTERNAL (decl) = 0;
29635 DECL_EXTERNAL (dispatch_decl) = 0;
29637 DECL_CONTEXT (decl) = NULL_TREE;
29638 DECL_INITIAL (decl) = make_node (BLOCK);
29639 DECL_STATIC_CONSTRUCTOR (decl) = 0;
29641 if (DECL_COMDAT_GROUP (default_decl)
29642 || TREE_PUBLIC (default_decl))
29644 /* In this case, each translation unit with a call to this
29645 versioned function will put out a resolver. Ensure it
29646 is comdat to keep just one copy. */
29647 DECL_COMDAT (decl) = 1;
29648 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
29650 /* Build result decl and add to function_decl. */
29651 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
29652 DECL_ARTIFICIAL (t) = 1;
29653 DECL_IGNORED_P (t) = 1;
29654 DECL_RESULT (decl) = t;
29656 gimplify_function_tree (decl);
29657 push_cfun (DECL_STRUCT_FUNCTION (decl));
29658 *empty_bb = init_lowered_empty_function (decl, false);
29660 cgraph_add_new_function (decl, true);
29661 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
29665 gcc_assert (dispatch_decl != NULL);
29666 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
29667 DECL_ATTRIBUTES (dispatch_decl)
29668 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
29670 /* Create the alias for dispatch to resolver here. */
29671 /*cgraph_create_function_alias (dispatch_decl, decl);*/
29672 cgraph_same_body_alias (NULL, dispatch_decl, decl);
29673 XDELETEVEC (resolver_name);
29677 /* Generate the dispatching code body to dispatch multi-versioned function
29678 DECL. The target hook is called to process the "target" attributes and
29679 provide the code to dispatch the right function at run-time. NODE points
29680 to the dispatcher decl whose body will be created. */
29683 ix86_generate_version_dispatcher_body (void *node_p)
29685 tree resolver_decl;
29686 basic_block empty_bb;
29687 vec<tree> fn_ver_vec = vNULL;
29688 tree default_ver_decl;
29689 struct cgraph_node *versn;
29690 struct cgraph_node *node;
29692 struct cgraph_function_version_info *node_version_info = NULL;
29693 struct cgraph_function_version_info *versn_info = NULL;
29695 node = (cgraph_node *)node_p;
29697 node_version_info = get_cgraph_node_version (node);
29698 gcc_assert (node->dispatcher_function
29699 && node_version_info != NULL);
29701 if (node_version_info->dispatcher_resolver)
29702 return node_version_info->dispatcher_resolver;
29704 /* The first version in the chain corresponds to the default version. */
29705 default_ver_decl = node_version_info->next->this_node->symbol.decl;
29707 /* node is going to be an alias, so remove the finalized bit. */
29708 node->symbol.definition = false;
29710 resolver_decl = make_resolver_func (default_ver_decl,
29711 node->symbol.decl, &empty_bb);
29713 node_version_info->dispatcher_resolver = resolver_decl;
29715 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
29717 fn_ver_vec.create (2);
29719 for (versn_info = node_version_info->next; versn_info;
29720 versn_info = versn_info->next)
29722 versn = versn_info->this_node;
29723 /* Check for virtual functions here again, as by this time it should
29724 have been determined if this function needs a vtable index or
29725 not. This happens for methods in derived classes that override
29726 virtual methods in base classes but are not explicitly marked as
29728 if (DECL_VINDEX (versn->symbol.decl))
29729 sorry ("Virtual function multiversioning not supported");
29731 fn_ver_vec.safe_push (versn->symbol.decl);
29734 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
29735 fn_ver_vec.release ();
29736 rebuild_cgraph_edges ();
29738 return resolver_decl;
29740 /* This builds the processor_model struct type defined in
29741 libgcc/config/i386/cpuinfo.c */
29744 build_processor_model_struct (void)
29746 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
29748 tree field = NULL_TREE, field_chain = NULL_TREE;
29750 tree type = make_node (RECORD_TYPE);
29752 /* The first 3 fields are unsigned int. */
29753 for (i = 0; i < 3; ++i)
29755 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
29756 get_identifier (field_name[i]), unsigned_type_node);
29757 if (field_chain != NULL_TREE)
29758 DECL_CHAIN (field) = field_chain;
29759 field_chain = field;
29762 /* The last field is an array of unsigned integers of size one. */
29763 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
29764 get_identifier (field_name[3]),
29765 build_array_type (unsigned_type_node,
29766 build_index_type (size_one_node)));
29767 if (field_chain != NULL_TREE)
29768 DECL_CHAIN (field) = field_chain;
29769 field_chain = field;
29771 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
29775 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
29778 make_var_decl (tree type, const char *name)
29782 new_decl = build_decl (UNKNOWN_LOCATION,
29784 get_identifier(name),
29787 DECL_EXTERNAL (new_decl) = 1;
29788 TREE_STATIC (new_decl) = 1;
29789 TREE_PUBLIC (new_decl) = 1;
29790 DECL_INITIAL (new_decl) = 0;
29791 DECL_ARTIFICIAL (new_decl) = 0;
29792 DECL_PRESERVE_P (new_decl) = 1;
29794 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
29795 assemble_variable (new_decl, 0, 0, 0);
29800 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
29801 into an integer defined in libgcc/config/i386/cpuinfo.c */
29804 fold_builtin_cpu (tree fndecl, tree *args)
29807 enum ix86_builtins fn_code = (enum ix86_builtins)
29808 DECL_FUNCTION_CODE (fndecl);
29809 tree param_string_cst = NULL;
29811 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
29812 enum processor_features
29828 /* These are the values for vendor types and cpu types and subtypes
29829 in cpuinfo.c. Cpu types and subtypes should be subtracted by
29830 the corresponding start value. */
29831 enum processor_model
29842 M_CPU_SUBTYPE_START,
29843 M_INTEL_COREI7_NEHALEM,
29844 M_INTEL_COREI7_WESTMERE,
29845 M_INTEL_COREI7_SANDYBRIDGE,
29846 M_AMDFAM10H_BARCELONA,
29847 M_AMDFAM10H_SHANGHAI,
29848 M_AMDFAM10H_ISTANBUL,
29849 M_AMDFAM15H_BDVER1,
29850 M_AMDFAM15H_BDVER2,
29854 static struct _arch_names_table
29856 const char *const name;
29857 const enum processor_model model;
29859 const arch_names_table[] =
29862 {"intel", M_INTEL},
29863 {"atom", M_INTEL_ATOM},
29864 {"slm", M_INTEL_SLM},
29865 {"core2", M_INTEL_CORE2},
29866 {"corei7", M_INTEL_COREI7},
29867 {"nehalem", M_INTEL_COREI7_NEHALEM},
29868 {"westmere", M_INTEL_COREI7_WESTMERE},
29869 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
29870 {"amdfam10h", M_AMDFAM10H},
29871 {"barcelona", M_AMDFAM10H_BARCELONA},
29872 {"shanghai", M_AMDFAM10H_SHANGHAI},
29873 {"istanbul", M_AMDFAM10H_ISTANBUL},
29874 {"amdfam15h", M_AMDFAM15H},
29875 {"bdver1", M_AMDFAM15H_BDVER1},
29876 {"bdver2", M_AMDFAM15H_BDVER2},
29877 {"bdver3", M_AMDFAM15H_BDVER3},
29880 static struct _isa_names_table
29882 const char *const name;
29883 const enum processor_features feature;
29885 const isa_names_table[] =
29889 {"popcnt", F_POPCNT},
29893 {"ssse3", F_SSSE3},
29894 {"sse4.1", F_SSE4_1},
29895 {"sse4.2", F_SSE4_2},
29900 tree __processor_model_type = build_processor_model_struct ();
29901 tree __cpu_model_var = make_var_decl (__processor_model_type,
29905 varpool_add_new_variable (__cpu_model_var);
29907 gcc_assert ((args != NULL) && (*args != NULL));
29909 param_string_cst = *args;
29910 while (param_string_cst
29911 && TREE_CODE (param_string_cst) != STRING_CST)
29913 /* *args must be a expr that can contain other EXPRS leading to a
29915 if (!EXPR_P (param_string_cst))
29917 error ("Parameter to builtin must be a string constant or literal");
29918 return integer_zero_node;
29920 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
29923 gcc_assert (param_string_cst);
29925 if (fn_code == IX86_BUILTIN_CPU_IS)
29931 unsigned int field_val = 0;
29932 unsigned int NUM_ARCH_NAMES
29933 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
29935 for (i = 0; i < NUM_ARCH_NAMES; i++)
29936 if (strcmp (arch_names_table[i].name,
29937 TREE_STRING_POINTER (param_string_cst)) == 0)
29940 if (i == NUM_ARCH_NAMES)
29942 error ("Parameter to builtin not valid: %s",
29943 TREE_STRING_POINTER (param_string_cst));
29944 return integer_zero_node;
29947 field = TYPE_FIELDS (__processor_model_type);
29948 field_val = arch_names_table[i].model;
29950 /* CPU types are stored in the next field. */
29951 if (field_val > M_CPU_TYPE_START
29952 && field_val < M_CPU_SUBTYPE_START)
29954 field = DECL_CHAIN (field);
29955 field_val -= M_CPU_TYPE_START;
29958 /* CPU subtypes are stored in the next field. */
29959 if (field_val > M_CPU_SUBTYPE_START)
29961 field = DECL_CHAIN ( DECL_CHAIN (field));
29962 field_val -= M_CPU_SUBTYPE_START;
29965 /* Get the appropriate field in __cpu_model. */
29966 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
29969 /* Check the value. */
29970 final = build2 (EQ_EXPR, unsigned_type_node, ref,
29971 build_int_cstu (unsigned_type_node, field_val));
29972 return build1 (CONVERT_EXPR, integer_type_node, final);
29974 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
29981 unsigned int field_val = 0;
29982 unsigned int NUM_ISA_NAMES
29983 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
29985 for (i = 0; i < NUM_ISA_NAMES; i++)
29986 if (strcmp (isa_names_table[i].name,
29987 TREE_STRING_POINTER (param_string_cst)) == 0)
29990 if (i == NUM_ISA_NAMES)
29992 error ("Parameter to builtin not valid: %s",
29993 TREE_STRING_POINTER (param_string_cst));
29994 return integer_zero_node;
29997 field = TYPE_FIELDS (__processor_model_type);
29998 /* Get the last field, which is __cpu_features. */
29999 while (DECL_CHAIN (field))
30000 field = DECL_CHAIN (field);
30002 /* Get the appropriate field: __cpu_model.__cpu_features */
30003 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
30006 /* Access the 0th element of __cpu_features array. */
30007 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
30008 integer_zero_node, NULL_TREE, NULL_TREE);
30010 field_val = (1 << isa_names_table[i].feature);
30011 /* Return __cpu_model.__cpu_features[0] & field_val */
30012 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
30013 build_int_cstu (unsigned_type_node, field_val));
30014 return build1 (CONVERT_EXPR, integer_type_node, final);
30016 gcc_unreachable ();
30020 ix86_fold_builtin (tree fndecl, int n_args,
30021 tree *args, bool ignore ATTRIBUTE_UNUSED)
30023 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
30025 enum ix86_builtins fn_code = (enum ix86_builtins)
30026 DECL_FUNCTION_CODE (fndecl);
30027 if (fn_code == IX86_BUILTIN_CPU_IS
30028 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
30030 gcc_assert (n_args == 1);
30031 return fold_builtin_cpu (fndecl, args);
30035 #ifdef SUBTARGET_FOLD_BUILTIN
30036 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
30042 /* Make builtins to detect cpu type and features supported. NAME is
30043 the builtin name, CODE is the builtin code, and FTYPE is the function
30044 type of the builtin. */
30047 make_cpu_type_builtin (const char* name, int code,
30048 enum ix86_builtin_func_type ftype, bool is_const)
30053 type = ix86_get_builtin_func_type (ftype);
30054 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30056 gcc_assert (decl != NULL_TREE);
30057 ix86_builtins[(int) code] = decl;
30058 TREE_READONLY (decl) = is_const;
30061 /* Make builtins to get CPU type and features supported. The created
30064 __builtin_cpu_init (), to detect cpu type and features,
30065 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
30066 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
30070 ix86_init_platform_type_builtins (void)
30072 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
30073 INT_FTYPE_VOID, false);
30074 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
30075 INT_FTYPE_PCCHAR, true);
30076 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
30077 INT_FTYPE_PCCHAR, true);
30080 /* Internal method for ix86_init_builtins. */
30083 ix86_init_builtins_va_builtins_abi (void)
30085 tree ms_va_ref, sysv_va_ref;
30086 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
30087 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
30088 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
30089 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
30093 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
30094 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
30095 ms_va_ref = build_reference_type (ms_va_list_type_node);
30097 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
30100 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
30101 fnvoid_va_start_ms =
30102 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
30103 fnvoid_va_end_sysv =
30104 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
30105 fnvoid_va_start_sysv =
30106 build_varargs_function_type_list (void_type_node, sysv_va_ref,
30108 fnvoid_va_copy_ms =
30109 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
30111 fnvoid_va_copy_sysv =
30112 build_function_type_list (void_type_node, sysv_va_ref,
30113 sysv_va_ref, NULL_TREE);
30115 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
30116 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
30117 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
30118 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
30119 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
30120 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
30121 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
30122 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
30123 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
30124 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
30125 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
30126 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
30130 ix86_init_builtin_types (void)
30132 tree float128_type_node, float80_type_node;
30134 /* The __float80 type. */
30135 float80_type_node = long_double_type_node;
30136 if (TYPE_MODE (float80_type_node) != XFmode)
30138 /* The __float80 type. */
30139 float80_type_node = make_node (REAL_TYPE);
30141 TYPE_PRECISION (float80_type_node) = 80;
30142 layout_type (float80_type_node);
30144 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
30146 /* The __float128 type. */
30147 float128_type_node = make_node (REAL_TYPE);
30148 TYPE_PRECISION (float128_type_node) = 128;
30149 layout_type (float128_type_node);
30150 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
30152 /* This macro is built by i386-builtin-types.awk. */
30153 DEFINE_BUILTIN_PRIMITIVE_TYPES;
30157 ix86_init_builtins (void)
30161 ix86_init_builtin_types ();
30163 /* Builtins to get CPU type and features. */
30164 ix86_init_platform_type_builtins ();
30166 /* TFmode support builtins. */
30167 def_builtin_const (0, "__builtin_infq",
30168 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
30169 def_builtin_const (0, "__builtin_huge_valq",
30170 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
30172 /* We will expand them to normal call if SSE isn't available since
30173 they are used by libgcc. */
30174 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
30175 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
30176 BUILT_IN_MD, "__fabstf2", NULL_TREE);
30177 TREE_READONLY (t) = 1;
30178 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
30180 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
30181 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
30182 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
30183 TREE_READONLY (t) = 1;
30184 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
30186 ix86_init_tm_builtins ();
30187 ix86_init_mmx_sse_builtins ();
30190 ix86_init_builtins_va_builtins_abi ();
30192 #ifdef SUBTARGET_INIT_BUILTINS
30193 SUBTARGET_INIT_BUILTINS;
30197 /* Return the ix86 builtin for CODE. */
30200 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
30202 if (code >= IX86_BUILTIN_MAX)
30203 return error_mark_node;
30205 return ix86_builtins[code];
30208 /* Errors in the source file can cause expand_expr to return const0_rtx
30209 where we expect a vector. To avoid crashing, use one of the vector
30210 clear instructions. */
30212 safe_vector_operand (rtx x, enum machine_mode mode)
30214 if (x == const0_rtx)
30215 x = CONST0_RTX (mode);
30219 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
30222 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
30225 tree arg0 = CALL_EXPR_ARG (exp, 0);
30226 tree arg1 = CALL_EXPR_ARG (exp, 1);
30227 rtx op0 = expand_normal (arg0);
30228 rtx op1 = expand_normal (arg1);
30229 enum machine_mode tmode = insn_data[icode].operand[0].mode;
30230 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
30231 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
30233 if (VECTOR_MODE_P (mode0))
30234 op0 = safe_vector_operand (op0, mode0);
30235 if (VECTOR_MODE_P (mode1))
30236 op1 = safe_vector_operand (op1, mode1);
30238 if (optimize || !target
30239 || GET_MODE (target) != tmode
30240 || !insn_data[icode].operand[0].predicate (target, tmode))
30241 target = gen_reg_rtx (tmode);
30243 if (GET_MODE (op1) == SImode && mode1 == TImode)
30245 rtx x = gen_reg_rtx (V4SImode);
30246 emit_insn (gen_sse2_loadd (x, op1));
30247 op1 = gen_lowpart (TImode, x);
30250 if (!insn_data[icode].operand[1].predicate (op0, mode0))
30251 op0 = copy_to_mode_reg (mode0, op0);
30252 if (!insn_data[icode].operand[2].predicate (op1, mode1))
30253 op1 = copy_to_mode_reg (mode1, op1);
30255 pat = GEN_FCN (icode) (target, op0, op1);
30264 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
30267 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
30268 enum ix86_builtin_func_type m_type,
30269 enum rtx_code sub_code)
30274 bool comparison_p = false;
30276 bool last_arg_constant = false;
30277 int num_memory = 0;
30280 enum machine_mode mode;
30283 enum machine_mode tmode = insn_data[icode].operand[0].mode;
30287 case MULTI_ARG_4_DF2_DI_I:
30288 case MULTI_ARG_4_DF2_DI_I1:
30289 case MULTI_ARG_4_SF2_SI_I:
30290 case MULTI_ARG_4_SF2_SI_I1:
30292 last_arg_constant = true;
30295 case MULTI_ARG_3_SF:
30296 case MULTI_ARG_3_DF:
30297 case MULTI_ARG_3_SF2:
30298 case MULTI_ARG_3_DF2:
30299 case MULTI_ARG_3_DI:
30300 case MULTI_ARG_3_SI:
30301 case MULTI_ARG_3_SI_DI:
30302 case MULTI_ARG_3_HI:
30303 case MULTI_ARG_3_HI_SI:
30304 case MULTI_ARG_3_QI:
30305 case MULTI_ARG_3_DI2:
30306 case MULTI_ARG_3_SI2:
30307 case MULTI_ARG_3_HI2:
30308 case MULTI_ARG_3_QI2:
30312 case MULTI_ARG_2_SF:
30313 case MULTI_ARG_2_DF:
30314 case MULTI_ARG_2_DI:
30315 case MULTI_ARG_2_SI:
30316 case MULTI_ARG_2_HI:
30317 case MULTI_ARG_2_QI:
30321 case MULTI_ARG_2_DI_IMM:
30322 case MULTI_ARG_2_SI_IMM:
30323 case MULTI_ARG_2_HI_IMM:
30324 case MULTI_ARG_2_QI_IMM:
30326 last_arg_constant = true;
30329 case MULTI_ARG_1_SF:
30330 case MULTI_ARG_1_DF:
30331 case MULTI_ARG_1_SF2:
30332 case MULTI_ARG_1_DF2:
30333 case MULTI_ARG_1_DI:
30334 case MULTI_ARG_1_SI:
30335 case MULTI_ARG_1_HI:
30336 case MULTI_ARG_1_QI:
30337 case MULTI_ARG_1_SI_DI:
30338 case MULTI_ARG_1_HI_DI:
30339 case MULTI_ARG_1_HI_SI:
30340 case MULTI_ARG_1_QI_DI:
30341 case MULTI_ARG_1_QI_SI:
30342 case MULTI_ARG_1_QI_HI:
30346 case MULTI_ARG_2_DI_CMP:
30347 case MULTI_ARG_2_SI_CMP:
30348 case MULTI_ARG_2_HI_CMP:
30349 case MULTI_ARG_2_QI_CMP:
30351 comparison_p = true;
30354 case MULTI_ARG_2_SF_TF:
30355 case MULTI_ARG_2_DF_TF:
30356 case MULTI_ARG_2_DI_TF:
30357 case MULTI_ARG_2_SI_TF:
30358 case MULTI_ARG_2_HI_TF:
30359 case MULTI_ARG_2_QI_TF:
30365 gcc_unreachable ();
30368 if (optimize || !target
30369 || GET_MODE (target) != tmode
30370 || !insn_data[icode].operand[0].predicate (target, tmode))
30371 target = gen_reg_rtx (tmode);
30373 gcc_assert (nargs <= 4);
30375 for (i = 0; i < nargs; i++)
30377 tree arg = CALL_EXPR_ARG (exp, i);
30378 rtx op = expand_normal (arg);
30379 int adjust = (comparison_p) ? 1 : 0;
30380 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
30382 if (last_arg_constant && i == nargs - 1)
30384 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
30386 enum insn_code new_icode = icode;
30389 case CODE_FOR_xop_vpermil2v2df3:
30390 case CODE_FOR_xop_vpermil2v4sf3:
30391 case CODE_FOR_xop_vpermil2v4df3:
30392 case CODE_FOR_xop_vpermil2v8sf3:
30393 error ("the last argument must be a 2-bit immediate");
30394 return gen_reg_rtx (tmode);
30395 case CODE_FOR_xop_rotlv2di3:
30396 new_icode = CODE_FOR_rotlv2di3;
30398 case CODE_FOR_xop_rotlv4si3:
30399 new_icode = CODE_FOR_rotlv4si3;
30401 case CODE_FOR_xop_rotlv8hi3:
30402 new_icode = CODE_FOR_rotlv8hi3;
30404 case CODE_FOR_xop_rotlv16qi3:
30405 new_icode = CODE_FOR_rotlv16qi3;
30407 if (CONST_INT_P (op))
30409 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
30410 op = GEN_INT (INTVAL (op) & mask);
30411 gcc_checking_assert
30412 (insn_data[icode].operand[i + 1].predicate (op, mode));
30416 gcc_checking_assert
30418 && insn_data[new_icode].operand[0].mode == tmode
30419 && insn_data[new_icode].operand[1].mode == tmode
30420 && insn_data[new_icode].operand[2].mode == mode
30421 && insn_data[new_icode].operand[0].predicate
30422 == insn_data[icode].operand[0].predicate
30423 && insn_data[new_icode].operand[1].predicate
30424 == insn_data[icode].operand[1].predicate);
30430 gcc_unreachable ();
30437 if (VECTOR_MODE_P (mode))
30438 op = safe_vector_operand (op, mode);
30440 /* If we aren't optimizing, only allow one memory operand to be
30442 if (memory_operand (op, mode))
30445 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
30448 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
30450 op = force_reg (mode, op);
30454 args[i].mode = mode;
30460 pat = GEN_FCN (icode) (target, args[0].op);
30465 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
30466 GEN_INT ((int)sub_code));
30467 else if (! comparison_p)
30468 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
30471 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
30475 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
30480 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
30484 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
30488 gcc_unreachable ();
30498 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
30499 insns with vec_merge. */
30502 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
30506 tree arg0 = CALL_EXPR_ARG (exp, 0);
30507 rtx op1, op0 = expand_normal (arg0);
30508 enum machine_mode tmode = insn_data[icode].operand[0].mode;
30509 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
30511 if (optimize || !target
30512 || GET_MODE (target) != tmode
30513 || !insn_data[icode].operand[0].predicate (target, tmode))
30514 target = gen_reg_rtx (tmode);
30516 if (VECTOR_MODE_P (mode0))
30517 op0 = safe_vector_operand (op0, mode0);
30519 if ((optimize && !register_operand (op0, mode0))
30520 || !insn_data[icode].operand[1].predicate (op0, mode0))
30521 op0 = copy_to_mode_reg (mode0, op0);
30524 if (!insn_data[icode].operand[2].predicate (op1, mode0))
30525 op1 = copy_to_mode_reg (mode0, op1);
30527 pat = GEN_FCN (icode) (target, op0, op1);
30534 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
30537 ix86_expand_sse_compare (const struct builtin_description *d,
30538 tree exp, rtx target, bool swap)
30541 tree arg0 = CALL_EXPR_ARG (exp, 0);
30542 tree arg1 = CALL_EXPR_ARG (exp, 1);
30543 rtx op0 = expand_normal (arg0);
30544 rtx op1 = expand_normal (arg1);
30546 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
30547 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
30548 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
30549 enum rtx_code comparison = d->comparison;
30551 if (VECTOR_MODE_P (mode0))
30552 op0 = safe_vector_operand (op0, mode0);
30553 if (VECTOR_MODE_P (mode1))
30554 op1 = safe_vector_operand (op1, mode1);
30556 /* Swap operands if we have a comparison that isn't available in
30560 rtx tmp = gen_reg_rtx (mode1);
30561 emit_move_insn (tmp, op1);
30566 if (optimize || !target
30567 || GET_MODE (target) != tmode
30568 || !insn_data[d->icode].operand[0].predicate (target, tmode))
30569 target = gen_reg_rtx (tmode);
30571 if ((optimize && !register_operand (op0, mode0))
30572 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
30573 op0 = copy_to_mode_reg (mode0, op0);
30574 if ((optimize && !register_operand (op1, mode1))
30575 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
30576 op1 = copy_to_mode_reg (mode1, op1);
30578 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
30579 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
30586 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
30589 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
30593 tree arg0 = CALL_EXPR_ARG (exp, 0);
30594 tree arg1 = CALL_EXPR_ARG (exp, 1);
30595 rtx op0 = expand_normal (arg0);
30596 rtx op1 = expand_normal (arg1);
30597 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
30598 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
30599 enum rtx_code comparison = d->comparison;
30601 if (VECTOR_MODE_P (mode0))
30602 op0 = safe_vector_operand (op0, mode0);
30603 if (VECTOR_MODE_P (mode1))
30604 op1 = safe_vector_operand (op1, mode1);
30606 /* Swap operands if we have a comparison that isn't available in
30608 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
30615 target = gen_reg_rtx (SImode);
30616 emit_move_insn (target, const0_rtx);
30617 target = gen_rtx_SUBREG (QImode, target, 0);
30619 if ((optimize && !register_operand (op0, mode0))
30620 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
30621 op0 = copy_to_mode_reg (mode0, op0);
30622 if ((optimize && !register_operand (op1, mode1))
30623 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
30624 op1 = copy_to_mode_reg (mode1, op1);
30626 pat = GEN_FCN (d->icode) (op0, op1);
30630 emit_insn (gen_rtx_SET (VOIDmode,
30631 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
30632 gen_rtx_fmt_ee (comparison, QImode,
30636 return SUBREG_REG (target);
30639 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
30642 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
30646 tree arg0 = CALL_EXPR_ARG (exp, 0);
30647 rtx op1, op0 = expand_normal (arg0);
30648 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
30649 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
30651 if (optimize || target == 0
30652 || GET_MODE (target) != tmode
30653 || !insn_data[d->icode].operand[0].predicate (target, tmode))
30654 target = gen_reg_rtx (tmode);
30656 if (VECTOR_MODE_P (mode0))
30657 op0 = safe_vector_operand (op0, mode0);
30659 if ((optimize && !register_operand (op0, mode0))
30660 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
30661 op0 = copy_to_mode_reg (mode0, op0);
30663 op1 = GEN_INT (d->comparison);
30665 pat = GEN_FCN (d->icode) (target, op0, op1);
30673 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
30674 tree exp, rtx target)
30677 tree arg0 = CALL_EXPR_ARG (exp, 0);
30678 tree arg1 = CALL_EXPR_ARG (exp, 1);
30679 rtx op0 = expand_normal (arg0);
30680 rtx op1 = expand_normal (arg1);
30682 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
30683 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
30684 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
30686 if (optimize || target == 0
30687 || GET_MODE (target) != tmode
30688 || !insn_data[d->icode].operand[0].predicate (target, tmode))
30689 target = gen_reg_rtx (tmode);
30691 op0 = safe_vector_operand (op0, mode0);
30692 op1 = safe_vector_operand (op1, mode1);
30694 if ((optimize && !register_operand (op0, mode0))
30695 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
30696 op0 = copy_to_mode_reg (mode0, op0);
30697 if ((optimize && !register_operand (op1, mode1))
30698 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
30699 op1 = copy_to_mode_reg (mode1, op1);
30701 op2 = GEN_INT (d->comparison);
30703 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
30710 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
30713 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
30717 tree arg0 = CALL_EXPR_ARG (exp, 0);
30718 tree arg1 = CALL_EXPR_ARG (exp, 1);
30719 rtx op0 = expand_normal (arg0);
30720 rtx op1 = expand_normal (arg1);
30721 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
30722 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
30723 enum rtx_code comparison = d->comparison;
30725 if (VECTOR_MODE_P (mode0))
30726 op0 = safe_vector_operand (op0, mode0);
30727 if (VECTOR_MODE_P (mode1))
30728 op1 = safe_vector_operand (op1, mode1);
30730 target = gen_reg_rtx (SImode);
30731 emit_move_insn (target, const0_rtx);
30732 target = gen_rtx_SUBREG (QImode, target, 0);
30734 if ((optimize && !register_operand (op0, mode0))
30735 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
30736 op0 = copy_to_mode_reg (mode0, op0);
30737 if ((optimize && !register_operand (op1, mode1))
30738 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
30739 op1 = copy_to_mode_reg (mode1, op1);
30741 pat = GEN_FCN (d->icode) (op0, op1);
30745 emit_insn (gen_rtx_SET (VOIDmode,
30746 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
30747 gen_rtx_fmt_ee (comparison, QImode,
30751 return SUBREG_REG (target);
30754 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
30757 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
30758 tree exp, rtx target)
30761 tree arg0 = CALL_EXPR_ARG (exp, 0);
30762 tree arg1 = CALL_EXPR_ARG (exp, 1);
30763 tree arg2 = CALL_EXPR_ARG (exp, 2);
30764 tree arg3 = CALL_EXPR_ARG (exp, 3);
30765 tree arg4 = CALL_EXPR_ARG (exp, 4);
30766 rtx scratch0, scratch1;
30767 rtx op0 = expand_normal (arg0);
30768 rtx op1 = expand_normal (arg1);
30769 rtx op2 = expand_normal (arg2);
30770 rtx op3 = expand_normal (arg3);
30771 rtx op4 = expand_normal (arg4);
30772 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
30774 tmode0 = insn_data[d->icode].operand[0].mode;
30775 tmode1 = insn_data[d->icode].operand[1].mode;
30776 modev2 = insn_data[d->icode].operand[2].mode;
30777 modei3 = insn_data[d->icode].operand[3].mode;
30778 modev4 = insn_data[d->icode].operand[4].mode;
30779 modei5 = insn_data[d->icode].operand[5].mode;
30780 modeimm = insn_data[d->icode].operand[6].mode;
30782 if (VECTOR_MODE_P (modev2))
30783 op0 = safe_vector_operand (op0, modev2);
30784 if (VECTOR_MODE_P (modev4))
30785 op2 = safe_vector_operand (op2, modev4);
30787 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
30788 op0 = copy_to_mode_reg (modev2, op0);
30789 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
30790 op1 = copy_to_mode_reg (modei3, op1);
30791 if ((optimize && !register_operand (op2, modev4))
30792 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
30793 op2 = copy_to_mode_reg (modev4, op2);
30794 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
30795 op3 = copy_to_mode_reg (modei5, op3);
30797 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
30799 error ("the fifth argument must be an 8-bit immediate");
30803 if (d->code == IX86_BUILTIN_PCMPESTRI128)
30805 if (optimize || !target
30806 || GET_MODE (target) != tmode0
30807 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
30808 target = gen_reg_rtx (tmode0);
30810 scratch1 = gen_reg_rtx (tmode1);
30812 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
30814 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
30816 if (optimize || !target
30817 || GET_MODE (target) != tmode1
30818 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
30819 target = gen_reg_rtx (tmode1);
30821 scratch0 = gen_reg_rtx (tmode0);
30823 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
30827 gcc_assert (d->flag);
30829 scratch0 = gen_reg_rtx (tmode0);
30830 scratch1 = gen_reg_rtx (tmode1);
30832 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
30842 target = gen_reg_rtx (SImode);
30843 emit_move_insn (target, const0_rtx);
30844 target = gen_rtx_SUBREG (QImode, target, 0);
30847 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
30848 gen_rtx_fmt_ee (EQ, QImode,
30849 gen_rtx_REG ((enum machine_mode) d->flag,
30852 return SUBREG_REG (target);
30859 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
30862 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
30863 tree exp, rtx target)
30866 tree arg0 = CALL_EXPR_ARG (exp, 0);
30867 tree arg1 = CALL_EXPR_ARG (exp, 1);
30868 tree arg2 = CALL_EXPR_ARG (exp, 2);
30869 rtx scratch0, scratch1;
30870 rtx op0 = expand_normal (arg0);
30871 rtx op1 = expand_normal (arg1);
30872 rtx op2 = expand_normal (arg2);
30873 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
30875 tmode0 = insn_data[d->icode].operand[0].mode;
30876 tmode1 = insn_data[d->icode].operand[1].mode;
30877 modev2 = insn_data[d->icode].operand[2].mode;
30878 modev3 = insn_data[d->icode].operand[3].mode;
30879 modeimm = insn_data[d->icode].operand[4].mode;
30881 if (VECTOR_MODE_P (modev2))
30882 op0 = safe_vector_operand (op0, modev2);
30883 if (VECTOR_MODE_P (modev3))
30884 op1 = safe_vector_operand (op1, modev3);
30886 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
30887 op0 = copy_to_mode_reg (modev2, op0);
30888 if ((optimize && !register_operand (op1, modev3))
30889 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
30890 op1 = copy_to_mode_reg (modev3, op1);
30892 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
30894 error ("the third argument must be an 8-bit immediate");
30898 if (d->code == IX86_BUILTIN_PCMPISTRI128)
30900 if (optimize || !target
30901 || GET_MODE (target) != tmode0
30902 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
30903 target = gen_reg_rtx (tmode0);
30905 scratch1 = gen_reg_rtx (tmode1);
30907 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
30909 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
30911 if (optimize || !target
30912 || GET_MODE (target) != tmode1
30913 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
30914 target = gen_reg_rtx (tmode1);
30916 scratch0 = gen_reg_rtx (tmode0);
30918 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
30922 gcc_assert (d->flag);
30924 scratch0 = gen_reg_rtx (tmode0);
30925 scratch1 = gen_reg_rtx (tmode1);
30927 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
30937 target = gen_reg_rtx (SImode);
30938 emit_move_insn (target, const0_rtx);
30939 target = gen_rtx_SUBREG (QImode, target, 0);
30942 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
30943 gen_rtx_fmt_ee (EQ, QImode,
30944 gen_rtx_REG ((enum machine_mode) d->flag,
30947 return SUBREG_REG (target);
30953 /* Subroutine of ix86_expand_builtin to take care of insns with
30954 variable number of operands. */
30957 ix86_expand_args_builtin (const struct builtin_description *d,
30958 tree exp, rtx target)
30960 rtx pat, real_target;
30961 unsigned int i, nargs;
30962 unsigned int nargs_constant = 0;
30963 int num_memory = 0;
30967 enum machine_mode mode;
30969 bool last_arg_count = false;
30970 enum insn_code icode = d->icode;
30971 const struct insn_data_d *insn_p = &insn_data[icode];
30972 enum machine_mode tmode = insn_p->operand[0].mode;
30973 enum machine_mode rmode = VOIDmode;
30975 enum rtx_code comparison = d->comparison;
30977 switch ((enum ix86_builtin_func_type) d->flag)
30979 case V2DF_FTYPE_V2DF_ROUND:
30980 case V4DF_FTYPE_V4DF_ROUND:
30981 case V4SF_FTYPE_V4SF_ROUND:
30982 case V8SF_FTYPE_V8SF_ROUND:
30983 case V4SI_FTYPE_V4SF_ROUND:
30984 case V8SI_FTYPE_V8SF_ROUND:
30985 return ix86_expand_sse_round (d, exp, target);
30986 case V4SI_FTYPE_V2DF_V2DF_ROUND:
30987 case V8SI_FTYPE_V4DF_V4DF_ROUND:
30988 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
30989 case INT_FTYPE_V8SF_V8SF_PTEST:
30990 case INT_FTYPE_V4DI_V4DI_PTEST:
30991 case INT_FTYPE_V4DF_V4DF_PTEST:
30992 case INT_FTYPE_V4SF_V4SF_PTEST:
30993 case INT_FTYPE_V2DI_V2DI_PTEST:
30994 case INT_FTYPE_V2DF_V2DF_PTEST:
30995 return ix86_expand_sse_ptest (d, exp, target);
30996 case FLOAT128_FTYPE_FLOAT128:
30997 case FLOAT_FTYPE_FLOAT:
30998 case INT_FTYPE_INT:
30999 case UINT64_FTYPE_INT:
31000 case UINT16_FTYPE_UINT16:
31001 case INT64_FTYPE_INT64:
31002 case INT64_FTYPE_V4SF:
31003 case INT64_FTYPE_V2DF:
31004 case INT_FTYPE_V16QI:
31005 case INT_FTYPE_V8QI:
31006 case INT_FTYPE_V8SF:
31007 case INT_FTYPE_V4DF:
31008 case INT_FTYPE_V4SF:
31009 case INT_FTYPE_V2DF:
31010 case INT_FTYPE_V32QI:
31011 case V16QI_FTYPE_V16QI:
31012 case V8SI_FTYPE_V8SF:
31013 case V8SI_FTYPE_V4SI:
31014 case V8HI_FTYPE_V8HI:
31015 case V8HI_FTYPE_V16QI:
31016 case V8QI_FTYPE_V8QI:
31017 case V8SF_FTYPE_V8SF:
31018 case V8SF_FTYPE_V8SI:
31019 case V8SF_FTYPE_V4SF:
31020 case V8SF_FTYPE_V8HI:
31021 case V4SI_FTYPE_V4SI:
31022 case V4SI_FTYPE_V16QI:
31023 case V4SI_FTYPE_V4SF:
31024 case V4SI_FTYPE_V8SI:
31025 case V4SI_FTYPE_V8HI:
31026 case V4SI_FTYPE_V4DF:
31027 case V4SI_FTYPE_V2DF:
31028 case V4HI_FTYPE_V4HI:
31029 case V4DF_FTYPE_V4DF:
31030 case V4DF_FTYPE_V4SI:
31031 case V4DF_FTYPE_V4SF:
31032 case V4DF_FTYPE_V2DF:
31033 case V4SF_FTYPE_V4SF:
31034 case V4SF_FTYPE_V4SI:
31035 case V4SF_FTYPE_V8SF:
31036 case V4SF_FTYPE_V4DF:
31037 case V4SF_FTYPE_V8HI:
31038 case V4SF_FTYPE_V2DF:
31039 case V2DI_FTYPE_V2DI:
31040 case V2DI_FTYPE_V16QI:
31041 case V2DI_FTYPE_V8HI:
31042 case V2DI_FTYPE_V4SI:
31043 case V2DF_FTYPE_V2DF:
31044 case V2DF_FTYPE_V4SI:
31045 case V2DF_FTYPE_V4DF:
31046 case V2DF_FTYPE_V4SF:
31047 case V2DF_FTYPE_V2SI:
31048 case V2SI_FTYPE_V2SI:
31049 case V2SI_FTYPE_V4SF:
31050 case V2SI_FTYPE_V2SF:
31051 case V2SI_FTYPE_V2DF:
31052 case V2SF_FTYPE_V2SF:
31053 case V2SF_FTYPE_V2SI:
31054 case V32QI_FTYPE_V32QI:
31055 case V32QI_FTYPE_V16QI:
31056 case V16HI_FTYPE_V16HI:
31057 case V16HI_FTYPE_V8HI:
31058 case V8SI_FTYPE_V8SI:
31059 case V16HI_FTYPE_V16QI:
31060 case V8SI_FTYPE_V16QI:
31061 case V4DI_FTYPE_V16QI:
31062 case V8SI_FTYPE_V8HI:
31063 case V4DI_FTYPE_V8HI:
31064 case V4DI_FTYPE_V4SI:
31065 case V4DI_FTYPE_V2DI:
31068 case V4SF_FTYPE_V4SF_VEC_MERGE:
31069 case V2DF_FTYPE_V2DF_VEC_MERGE:
31070 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
31071 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
31072 case V16QI_FTYPE_V16QI_V16QI:
31073 case V16QI_FTYPE_V8HI_V8HI:
31074 case V8QI_FTYPE_V8QI_V8QI:
31075 case V8QI_FTYPE_V4HI_V4HI:
31076 case V8HI_FTYPE_V8HI_V8HI:
31077 case V8HI_FTYPE_V16QI_V16QI:
31078 case V8HI_FTYPE_V4SI_V4SI:
31079 case V8SF_FTYPE_V8SF_V8SF:
31080 case V8SF_FTYPE_V8SF_V8SI:
31081 case V4SI_FTYPE_V4SI_V4SI:
31082 case V4SI_FTYPE_V8HI_V8HI:
31083 case V4SI_FTYPE_V4SF_V4SF:
31084 case V4SI_FTYPE_V2DF_V2DF:
31085 case V4HI_FTYPE_V4HI_V4HI:
31086 case V4HI_FTYPE_V8QI_V8QI:
31087 case V4HI_FTYPE_V2SI_V2SI:
31088 case V4DF_FTYPE_V4DF_V4DF:
31089 case V4DF_FTYPE_V4DF_V4DI:
31090 case V4SF_FTYPE_V4SF_V4SF:
31091 case V4SF_FTYPE_V4SF_V4SI:
31092 case V4SF_FTYPE_V4SF_V2SI:
31093 case V4SF_FTYPE_V4SF_V2DF:
31094 case V4SF_FTYPE_V4SF_DI:
31095 case V4SF_FTYPE_V4SF_SI:
31096 case V2DI_FTYPE_V2DI_V2DI:
31097 case V2DI_FTYPE_V16QI_V16QI:
31098 case V2DI_FTYPE_V4SI_V4SI:
31099 case V2UDI_FTYPE_V4USI_V4USI:
31100 case V2DI_FTYPE_V2DI_V16QI:
31101 case V2DI_FTYPE_V2DF_V2DF:
31102 case V2SI_FTYPE_V2SI_V2SI:
31103 case V2SI_FTYPE_V4HI_V4HI:
31104 case V2SI_FTYPE_V2SF_V2SF:
31105 case V2DF_FTYPE_V2DF_V2DF:
31106 case V2DF_FTYPE_V2DF_V4SF:
31107 case V2DF_FTYPE_V2DF_V2DI:
31108 case V2DF_FTYPE_V2DF_DI:
31109 case V2DF_FTYPE_V2DF_SI:
31110 case V2SF_FTYPE_V2SF_V2SF:
31111 case V1DI_FTYPE_V1DI_V1DI:
31112 case V1DI_FTYPE_V8QI_V8QI:
31113 case V1DI_FTYPE_V2SI_V2SI:
31114 case V32QI_FTYPE_V16HI_V16HI:
31115 case V16HI_FTYPE_V8SI_V8SI:
31116 case V32QI_FTYPE_V32QI_V32QI:
31117 case V16HI_FTYPE_V32QI_V32QI:
31118 case V16HI_FTYPE_V16HI_V16HI:
31119 case V8SI_FTYPE_V4DF_V4DF:
31120 case V8SI_FTYPE_V8SI_V8SI:
31121 case V8SI_FTYPE_V16HI_V16HI:
31122 case V4DI_FTYPE_V4DI_V4DI:
31123 case V4DI_FTYPE_V8SI_V8SI:
31124 case V4UDI_FTYPE_V8USI_V8USI:
31125 if (comparison == UNKNOWN)
31126 return ix86_expand_binop_builtin (icode, exp, target);
31129 case V4SF_FTYPE_V4SF_V4SF_SWAP:
31130 case V2DF_FTYPE_V2DF_V2DF_SWAP:
31131 gcc_assert (comparison != UNKNOWN);
31135 case V16HI_FTYPE_V16HI_V8HI_COUNT:
31136 case V16HI_FTYPE_V16HI_SI_COUNT:
31137 case V8SI_FTYPE_V8SI_V4SI_COUNT:
31138 case V8SI_FTYPE_V8SI_SI_COUNT:
31139 case V4DI_FTYPE_V4DI_V2DI_COUNT:
31140 case V4DI_FTYPE_V4DI_INT_COUNT:
31141 case V8HI_FTYPE_V8HI_V8HI_COUNT:
31142 case V8HI_FTYPE_V8HI_SI_COUNT:
31143 case V4SI_FTYPE_V4SI_V4SI_COUNT:
31144 case V4SI_FTYPE_V4SI_SI_COUNT:
31145 case V4HI_FTYPE_V4HI_V4HI_COUNT:
31146 case V4HI_FTYPE_V4HI_SI_COUNT:
31147 case V2DI_FTYPE_V2DI_V2DI_COUNT:
31148 case V2DI_FTYPE_V2DI_SI_COUNT:
31149 case V2SI_FTYPE_V2SI_V2SI_COUNT:
31150 case V2SI_FTYPE_V2SI_SI_COUNT:
31151 case V1DI_FTYPE_V1DI_V1DI_COUNT:
31152 case V1DI_FTYPE_V1DI_SI_COUNT:
31154 last_arg_count = true;
31156 case UINT64_FTYPE_UINT64_UINT64:
31157 case UINT_FTYPE_UINT_UINT:
31158 case UINT_FTYPE_UINT_USHORT:
31159 case UINT_FTYPE_UINT_UCHAR:
31160 case UINT16_FTYPE_UINT16_INT:
31161 case UINT8_FTYPE_UINT8_INT:
31164 case V2DI_FTYPE_V2DI_INT_CONVERT:
31167 nargs_constant = 1;
31169 case V4DI_FTYPE_V4DI_INT_CONVERT:
31172 nargs_constant = 1;
31174 case V8HI_FTYPE_V8HI_INT:
31175 case V8HI_FTYPE_V8SF_INT:
31176 case V8HI_FTYPE_V4SF_INT:
31177 case V8SF_FTYPE_V8SF_INT:
31178 case V4SI_FTYPE_V4SI_INT:
31179 case V4SI_FTYPE_V8SI_INT:
31180 case V4HI_FTYPE_V4HI_INT:
31181 case V4DF_FTYPE_V4DF_INT:
31182 case V4SF_FTYPE_V4SF_INT:
31183 case V4SF_FTYPE_V8SF_INT:
31184 case V2DI_FTYPE_V2DI_INT:
31185 case V2DF_FTYPE_V2DF_INT:
31186 case V2DF_FTYPE_V4DF_INT:
31187 case V16HI_FTYPE_V16HI_INT:
31188 case V8SI_FTYPE_V8SI_INT:
31189 case V4DI_FTYPE_V4DI_INT:
31190 case V2DI_FTYPE_V4DI_INT:
31192 nargs_constant = 1;
31194 case V16QI_FTYPE_V16QI_V16QI_V16QI:
31195 case V8SF_FTYPE_V8SF_V8SF_V8SF:
31196 case V4DF_FTYPE_V4DF_V4DF_V4DF:
31197 case V4SF_FTYPE_V4SF_V4SF_V4SF:
31198 case V2DF_FTYPE_V2DF_V2DF_V2DF:
31199 case V32QI_FTYPE_V32QI_V32QI_V32QI:
31202 case V32QI_FTYPE_V32QI_V32QI_INT:
31203 case V16HI_FTYPE_V16HI_V16HI_INT:
31204 case V16QI_FTYPE_V16QI_V16QI_INT:
31205 case V4DI_FTYPE_V4DI_V4DI_INT:
31206 case V8HI_FTYPE_V8HI_V8HI_INT:
31207 case V8SI_FTYPE_V8SI_V8SI_INT:
31208 case V8SI_FTYPE_V8SI_V4SI_INT:
31209 case V8SF_FTYPE_V8SF_V8SF_INT:
31210 case V8SF_FTYPE_V8SF_V4SF_INT:
31211 case V4SI_FTYPE_V4SI_V4SI_INT:
31212 case V4DF_FTYPE_V4DF_V4DF_INT:
31213 case V4DF_FTYPE_V4DF_V2DF_INT:
31214 case V4SF_FTYPE_V4SF_V4SF_INT:
31215 case V2DI_FTYPE_V2DI_V2DI_INT:
31216 case V4DI_FTYPE_V4DI_V2DI_INT:
31217 case V2DF_FTYPE_V2DF_V2DF_INT:
31219 nargs_constant = 1;
31221 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
31224 nargs_constant = 1;
31226 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
31229 nargs_constant = 1;
31231 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
31234 nargs_constant = 1;
31236 case V2DI_FTYPE_V2DI_UINT_UINT:
31238 nargs_constant = 2;
31240 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
31241 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
31242 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
31243 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
31245 nargs_constant = 1;
31247 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
31249 nargs_constant = 2;
31251 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
31252 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
31256 gcc_unreachable ();
31259 gcc_assert (nargs <= ARRAY_SIZE (args));
31261 if (comparison != UNKNOWN)
31263 gcc_assert (nargs == 2);
31264 return ix86_expand_sse_compare (d, exp, target, swap);
31267 if (rmode == VOIDmode || rmode == tmode)
31271 || GET_MODE (target) != tmode
31272 || !insn_p->operand[0].predicate (target, tmode))
31273 target = gen_reg_rtx (tmode);
31274 real_target = target;
31278 target = gen_reg_rtx (rmode);
31279 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
31282 for (i = 0; i < nargs; i++)
31284 tree arg = CALL_EXPR_ARG (exp, i);
31285 rtx op = expand_normal (arg);
31286 enum machine_mode mode = insn_p->operand[i + 1].mode;
31287 bool match = insn_p->operand[i + 1].predicate (op, mode);
31289 if (last_arg_count && (i + 1) == nargs)
31291 /* SIMD shift insns take either an 8-bit immediate or
31292 register as count. But builtin functions take int as
31293 count. If count doesn't match, we put it in register. */
31296 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
31297 if (!insn_p->operand[i + 1].predicate (op, mode))
31298 op = copy_to_reg (op);
31301 else if ((nargs - i) <= nargs_constant)
31306 case CODE_FOR_avx2_inserti128:
31307 case CODE_FOR_avx2_extracti128:
31308 error ("the last argument must be an 1-bit immediate");
31311 case CODE_FOR_sse4_1_roundsd:
31312 case CODE_FOR_sse4_1_roundss:
31314 case CODE_FOR_sse4_1_roundpd:
31315 case CODE_FOR_sse4_1_roundps:
31316 case CODE_FOR_avx_roundpd256:
31317 case CODE_FOR_avx_roundps256:
31319 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
31320 case CODE_FOR_sse4_1_roundps_sfix:
31321 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
31322 case CODE_FOR_avx_roundps_sfix256:
31324 case CODE_FOR_sse4_1_blendps:
31325 case CODE_FOR_avx_blendpd256:
31326 case CODE_FOR_avx_vpermilv4df:
31327 error ("the last argument must be a 4-bit immediate");
31330 case CODE_FOR_sse4_1_blendpd:
31331 case CODE_FOR_avx_vpermilv2df:
31332 case CODE_FOR_xop_vpermil2v2df3:
31333 case CODE_FOR_xop_vpermil2v4sf3:
31334 case CODE_FOR_xop_vpermil2v4df3:
31335 case CODE_FOR_xop_vpermil2v8sf3:
31336 error ("the last argument must be a 2-bit immediate");
31339 case CODE_FOR_avx_vextractf128v4df:
31340 case CODE_FOR_avx_vextractf128v8sf:
31341 case CODE_FOR_avx_vextractf128v8si:
31342 case CODE_FOR_avx_vinsertf128v4df:
31343 case CODE_FOR_avx_vinsertf128v8sf:
31344 case CODE_FOR_avx_vinsertf128v8si:
31345 error ("the last argument must be a 1-bit immediate");
31348 case CODE_FOR_avx_vmcmpv2df3:
31349 case CODE_FOR_avx_vmcmpv4sf3:
31350 case CODE_FOR_avx_cmpv2df3:
31351 case CODE_FOR_avx_cmpv4sf3:
31352 case CODE_FOR_avx_cmpv4df3:
31353 case CODE_FOR_avx_cmpv8sf3:
31354 error ("the last argument must be a 5-bit immediate");
31358 switch (nargs_constant)
31361 if ((nargs - i) == nargs_constant)
31363 error ("the next to last argument must be an 8-bit immediate");
31367 error ("the last argument must be an 8-bit immediate");
31370 gcc_unreachable ();
31377 if (VECTOR_MODE_P (mode))
31378 op = safe_vector_operand (op, mode);
31380 /* If we aren't optimizing, only allow one memory operand to
31382 if (memory_operand (op, mode))
31385 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
31387 if (optimize || !match || num_memory > 1)
31388 op = copy_to_mode_reg (mode, op);
31392 op = copy_to_reg (op);
31393 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
31398 args[i].mode = mode;
31404 pat = GEN_FCN (icode) (real_target, args[0].op);
31407 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
31410 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
31414 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
31415 args[2].op, args[3].op);
31418 gcc_unreachable ();
31428 /* Subroutine of ix86_expand_builtin to take care of special insns
31429 with variable number of operands. */
31432 ix86_expand_special_args_builtin (const struct builtin_description *d,
31433 tree exp, rtx target)
31437 unsigned int i, nargs, arg_adjust, memory;
31441 enum machine_mode mode;
31443 enum insn_code icode = d->icode;
31444 bool last_arg_constant = false;
31445 const struct insn_data_d *insn_p = &insn_data[icode];
31446 enum machine_mode tmode = insn_p->operand[0].mode;
31447 enum { load, store } klass;
31449 switch ((enum ix86_builtin_func_type) d->flag)
31451 case VOID_FTYPE_VOID:
31452 emit_insn (GEN_FCN (icode) (target));
31454 case VOID_FTYPE_UINT64:
31455 case VOID_FTYPE_UNSIGNED:
31461 case INT_FTYPE_VOID:
31462 case UINT64_FTYPE_VOID:
31463 case UNSIGNED_FTYPE_VOID:
31468 case UINT64_FTYPE_PUNSIGNED:
31469 case V2DI_FTYPE_PV2DI:
31470 case V4DI_FTYPE_PV4DI:
31471 case V32QI_FTYPE_PCCHAR:
31472 case V16QI_FTYPE_PCCHAR:
31473 case V8SF_FTYPE_PCV4SF:
31474 case V8SF_FTYPE_PCFLOAT:
31475 case V4SF_FTYPE_PCFLOAT:
31476 case V4DF_FTYPE_PCV2DF:
31477 case V4DF_FTYPE_PCDOUBLE:
31478 case V2DF_FTYPE_PCDOUBLE:
31479 case VOID_FTYPE_PVOID:
31484 case VOID_FTYPE_PV2SF_V4SF:
31485 case VOID_FTYPE_PV4DI_V4DI:
31486 case VOID_FTYPE_PV2DI_V2DI:
31487 case VOID_FTYPE_PCHAR_V32QI:
31488 case VOID_FTYPE_PCHAR_V16QI:
31489 case VOID_FTYPE_PFLOAT_V8SF:
31490 case VOID_FTYPE_PFLOAT_V4SF:
31491 case VOID_FTYPE_PDOUBLE_V4DF:
31492 case VOID_FTYPE_PDOUBLE_V2DF:
31493 case VOID_FTYPE_PLONGLONG_LONGLONG:
31494 case VOID_FTYPE_PULONGLONG_ULONGLONG:
31495 case VOID_FTYPE_PINT_INT:
31498 /* Reserve memory operand for target. */
31499 memory = ARRAY_SIZE (args);
31501 case V4SF_FTYPE_V4SF_PCV2SF:
31502 case V2DF_FTYPE_V2DF_PCDOUBLE:
31507 case V8SF_FTYPE_PCV8SF_V8SI:
31508 case V4DF_FTYPE_PCV4DF_V4DI:
31509 case V4SF_FTYPE_PCV4SF_V4SI:
31510 case V2DF_FTYPE_PCV2DF_V2DI:
31511 case V8SI_FTYPE_PCV8SI_V8SI:
31512 case V4DI_FTYPE_PCV4DI_V4DI:
31513 case V4SI_FTYPE_PCV4SI_V4SI:
31514 case V2DI_FTYPE_PCV2DI_V2DI:
31519 case VOID_FTYPE_PV8SF_V8SI_V8SF:
31520 case VOID_FTYPE_PV4DF_V4DI_V4DF:
31521 case VOID_FTYPE_PV4SF_V4SI_V4SF:
31522 case VOID_FTYPE_PV2DF_V2DI_V2DF:
31523 case VOID_FTYPE_PV8SI_V8SI_V8SI:
31524 case VOID_FTYPE_PV4DI_V4DI_V4DI:
31525 case VOID_FTYPE_PV4SI_V4SI_V4SI:
31526 case VOID_FTYPE_PV2DI_V2DI_V2DI:
31529 /* Reserve memory operand for target. */
31530 memory = ARRAY_SIZE (args);
31532 case VOID_FTYPE_UINT_UINT_UINT:
31533 case VOID_FTYPE_UINT64_UINT_UINT:
31534 case UCHAR_FTYPE_UINT_UINT_UINT:
31535 case UCHAR_FTYPE_UINT64_UINT_UINT:
31538 memory = ARRAY_SIZE (args);
31539 last_arg_constant = true;
31542 gcc_unreachable ();
31545 gcc_assert (nargs <= ARRAY_SIZE (args));
31547 if (klass == store)
31549 arg = CALL_EXPR_ARG (exp, 0);
31550 op = expand_normal (arg);
31551 gcc_assert (target == 0);
31554 op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
31555 target = gen_rtx_MEM (tmode, op);
31558 target = force_reg (tmode, op);
31566 || !register_operand (target, tmode)
31567 || GET_MODE (target) != tmode)
31568 target = gen_reg_rtx (tmode);
31571 for (i = 0; i < nargs; i++)
31573 enum machine_mode mode = insn_p->operand[i + 1].mode;
31576 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
31577 op = expand_normal (arg);
31578 match = insn_p->operand[i + 1].predicate (op, mode);
31580 if (last_arg_constant && (i + 1) == nargs)
31584 if (icode == CODE_FOR_lwp_lwpvalsi3
31585 || icode == CODE_FOR_lwp_lwpinssi3
31586 || icode == CODE_FOR_lwp_lwpvaldi3
31587 || icode == CODE_FOR_lwp_lwpinsdi3)
31588 error ("the last argument must be a 32-bit immediate");
31590 error ("the last argument must be an 8-bit immediate");
31598 /* This must be the memory operand. */
31599 op = force_reg (Pmode, convert_to_mode (Pmode, op, 1));
31600 op = gen_rtx_MEM (mode, op);
31601 gcc_assert (GET_MODE (op) == mode
31602 || GET_MODE (op) == VOIDmode);
31606 /* This must be register. */
31607 if (VECTOR_MODE_P (mode))
31608 op = safe_vector_operand (op, mode);
31610 gcc_assert (GET_MODE (op) == mode
31611 || GET_MODE (op) == VOIDmode);
31612 op = copy_to_mode_reg (mode, op);
31617 args[i].mode = mode;
31623 pat = GEN_FCN (icode) (target);
31626 pat = GEN_FCN (icode) (target, args[0].op);
31629 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
31632 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
31635 gcc_unreachable ();
31641 return klass == store ? 0 : target;
31644 /* Return the integer constant in ARG. Constrain it to be in the range
31645 of the subparts of VEC_TYPE; issue an error if not. */
31648 get_element_number (tree vec_type, tree arg)
31650 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
31652 if (!host_integerp (arg, 1)
31653 || (elt = tree_low_cst (arg, 1), elt > max))
31655 error ("selector must be an integer constant in the range 0..%wi", max);
31662 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31663 ix86_expand_vector_init. We DO have language-level syntax for this, in
31664 the form of (type){ init-list }. Except that since we can't place emms
31665 instructions from inside the compiler, we can't allow the use of MMX
31666 registers unless the user explicitly asks for it. So we do *not* define
31667 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
31668 we have builtins invoked by mmintrin.h that gives us license to emit
31669 these sorts of instructions. */
31672 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
31674 enum machine_mode tmode = TYPE_MODE (type);
31675 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
31676 int i, n_elt = GET_MODE_NUNITS (tmode);
31677 rtvec v = rtvec_alloc (n_elt);
31679 gcc_assert (VECTOR_MODE_P (tmode));
31680 gcc_assert (call_expr_nargs (exp) == n_elt);
31682 for (i = 0; i < n_elt; ++i)
31684 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
31685 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
31688 if (!target || !register_operand (target, tmode))
31689 target = gen_reg_rtx (tmode);
31691 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
31695 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31696 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
31697 had a language-level syntax for referencing vector elements. */
31700 ix86_expand_vec_ext_builtin (tree exp, rtx target)
31702 enum machine_mode tmode, mode0;
31707 arg0 = CALL_EXPR_ARG (exp, 0);
31708 arg1 = CALL_EXPR_ARG (exp, 1);
31710 op0 = expand_normal (arg0);
31711 elt = get_element_number (TREE_TYPE (arg0), arg1);
31713 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
31714 mode0 = TYPE_MODE (TREE_TYPE (arg0));
31715 gcc_assert (VECTOR_MODE_P (mode0));
31717 op0 = force_reg (mode0, op0);
31719 if (optimize || !target || !register_operand (target, tmode))
31720 target = gen_reg_rtx (tmode);
31722 ix86_expand_vector_extract (true, target, op0, elt);
31727 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
31728 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
31729 a language-level syntax for referencing vector elements. */
31732 ix86_expand_vec_set_builtin (tree exp)
31734 enum machine_mode tmode, mode1;
31735 tree arg0, arg1, arg2;
31737 rtx op0, op1, target;
31739 arg0 = CALL_EXPR_ARG (exp, 0);
31740 arg1 = CALL_EXPR_ARG (exp, 1);
31741 arg2 = CALL_EXPR_ARG (exp, 2);
31743 tmode = TYPE_MODE (TREE_TYPE (arg0));
31744 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
31745 gcc_assert (VECTOR_MODE_P (tmode));
31747 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
31748 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
31749 elt = get_element_number (TREE_TYPE (arg0), arg2);
31751 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
31752 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
31754 op0 = force_reg (tmode, op0);
31755 op1 = force_reg (mode1, op1);
31757 /* OP0 is the source of these builtin functions and shouldn't be
31758 modified. Create a copy, use it and return it as target. */
31759 target = gen_reg_rtx (tmode);
31760 emit_move_insn (target, op0);
31761 ix86_expand_vector_set (true, target, op1, elt);
31766 /* Expand an expression EXP that calls a built-in function,
31767 with result going to TARGET if that's convenient
31768 (and in mode MODE if that's convenient).
31769 SUBTARGET may be used as the target for computing one of EXP's operands.
31770 IGNORE is nonzero if the value is to be ignored. */
31773 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
31774 enum machine_mode mode ATTRIBUTE_UNUSED,
31775 int ignore ATTRIBUTE_UNUSED)
31777 const struct builtin_description *d;
31779 enum insn_code icode;
31780 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
31781 tree arg0, arg1, arg2, arg3, arg4;
31782 rtx op0, op1, op2, op3, op4, pat, insn;
31783 enum machine_mode mode0, mode1, mode2, mode3, mode4;
31784 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
31786 /* For CPU builtins that can be folded, fold first and expand the fold. */
31789 case IX86_BUILTIN_CPU_INIT:
31791 /* Make it call __cpu_indicator_init in libgcc. */
31792 tree call_expr, fndecl, type;
31793 type = build_function_type_list (integer_type_node, NULL_TREE);
31794 fndecl = build_fn_decl ("__cpu_indicator_init", type);
31795 call_expr = build_call_expr (fndecl, 0);
31796 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
31798 case IX86_BUILTIN_CPU_IS:
31799 case IX86_BUILTIN_CPU_SUPPORTS:
31801 tree arg0 = CALL_EXPR_ARG (exp, 0);
31802 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
31803 gcc_assert (fold_expr != NULL_TREE);
31804 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
31808 /* Determine whether the builtin function is available under the current ISA.
31809 Originally the builtin was not created if it wasn't applicable to the
31810 current ISA based on the command line switches. With function specific
31811 options, we need to check in the context of the function making the call
31812 whether it is supported. */
31813 if (ix86_builtins_isa[fcode].isa
31814 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
31816 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
31817 NULL, (enum fpmath_unit) 0, false);
31820 error ("%qE needs unknown isa option", fndecl);
31823 gcc_assert (opts != NULL);
31824 error ("%qE needs isa option %s", fndecl, opts);
31832 case IX86_BUILTIN_MASKMOVQ:
31833 case IX86_BUILTIN_MASKMOVDQU:
31834 icode = (fcode == IX86_BUILTIN_MASKMOVQ
31835 ? CODE_FOR_mmx_maskmovq
31836 : CODE_FOR_sse2_maskmovdqu);
31837 /* Note the arg order is different from the operand order. */
31838 arg1 = CALL_EXPR_ARG (exp, 0);
31839 arg2 = CALL_EXPR_ARG (exp, 1);
31840 arg0 = CALL_EXPR_ARG (exp, 2);
31841 op0 = expand_normal (arg0);
31842 op1 = expand_normal (arg1);
31843 op2 = expand_normal (arg2);
31844 mode0 = insn_data[icode].operand[0].mode;
31845 mode1 = insn_data[icode].operand[1].mode;
31846 mode2 = insn_data[icode].operand[2].mode;
31848 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
31849 op0 = gen_rtx_MEM (mode1, op0);
31851 if (!insn_data[icode].operand[0].predicate (op0, mode0))
31852 op0 = copy_to_mode_reg (mode0, op0);
31853 if (!insn_data[icode].operand[1].predicate (op1, mode1))
31854 op1 = copy_to_mode_reg (mode1, op1);
31855 if (!insn_data[icode].operand[2].predicate (op2, mode2))
31856 op2 = copy_to_mode_reg (mode2, op2);
31857 pat = GEN_FCN (icode) (op0, op1, op2);
31863 case IX86_BUILTIN_LDMXCSR:
31864 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
31865 target = assign_386_stack_local (SImode, SLOT_TEMP);
31866 emit_move_insn (target, op0);
31867 emit_insn (gen_sse_ldmxcsr (target));
31870 case IX86_BUILTIN_STMXCSR:
31871 target = assign_386_stack_local (SImode, SLOT_TEMP);
31872 emit_insn (gen_sse_stmxcsr (target));
31873 return copy_to_mode_reg (SImode, target);
31875 case IX86_BUILTIN_CLFLUSH:
31876 arg0 = CALL_EXPR_ARG (exp, 0);
31877 op0 = expand_normal (arg0);
31878 icode = CODE_FOR_sse2_clflush;
31879 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
31880 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
31882 emit_insn (gen_sse2_clflush (op0));
31885 case IX86_BUILTIN_MONITOR:
31886 arg0 = CALL_EXPR_ARG (exp, 0);
31887 arg1 = CALL_EXPR_ARG (exp, 1);
31888 arg2 = CALL_EXPR_ARG (exp, 2);
31889 op0 = expand_normal (arg0);
31890 op1 = expand_normal (arg1);
31891 op2 = expand_normal (arg2);
31893 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
31895 op1 = copy_to_mode_reg (SImode, op1);
31897 op2 = copy_to_mode_reg (SImode, op2);
31898 emit_insn (ix86_gen_monitor (op0, op1, op2));
31901 case IX86_BUILTIN_MWAIT:
31902 arg0 = CALL_EXPR_ARG (exp, 0);
31903 arg1 = CALL_EXPR_ARG (exp, 1);
31904 op0 = expand_normal (arg0);
31905 op1 = expand_normal (arg1);
31907 op0 = copy_to_mode_reg (SImode, op0);
31909 op1 = copy_to_mode_reg (SImode, op1);
31910 emit_insn (gen_sse3_mwait (op0, op1));
31913 case IX86_BUILTIN_VEC_INIT_V2SI:
31914 case IX86_BUILTIN_VEC_INIT_V4HI:
31915 case IX86_BUILTIN_VEC_INIT_V8QI:
31916 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
31918 case IX86_BUILTIN_VEC_EXT_V2DF:
31919 case IX86_BUILTIN_VEC_EXT_V2DI:
31920 case IX86_BUILTIN_VEC_EXT_V4SF:
31921 case IX86_BUILTIN_VEC_EXT_V4SI:
31922 case IX86_BUILTIN_VEC_EXT_V8HI:
31923 case IX86_BUILTIN_VEC_EXT_V2SI:
31924 case IX86_BUILTIN_VEC_EXT_V4HI:
31925 case IX86_BUILTIN_VEC_EXT_V16QI:
31926 return ix86_expand_vec_ext_builtin (exp, target);
31928 case IX86_BUILTIN_VEC_SET_V2DI:
31929 case IX86_BUILTIN_VEC_SET_V4SF:
31930 case IX86_BUILTIN_VEC_SET_V4SI:
31931 case IX86_BUILTIN_VEC_SET_V8HI:
31932 case IX86_BUILTIN_VEC_SET_V4HI:
31933 case IX86_BUILTIN_VEC_SET_V16QI:
31934 return ix86_expand_vec_set_builtin (exp);
31936 case IX86_BUILTIN_INFQ:
31937 case IX86_BUILTIN_HUGE_VALQ:
31939 REAL_VALUE_TYPE inf;
31943 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
31945 tmp = validize_mem (force_const_mem (mode, tmp));
31948 target = gen_reg_rtx (mode);
31950 emit_move_insn (target, tmp);
31954 case IX86_BUILTIN_RDPMC:
31955 case IX86_BUILTIN_RDTSC:
31956 case IX86_BUILTIN_RDTSCP:
31958 op0 = gen_reg_rtx (DImode);
31959 op1 = gen_reg_rtx (DImode);
31961 if (fcode == IX86_BUILTIN_RDPMC)
31963 arg0 = CALL_EXPR_ARG (exp, 0);
31964 op2 = expand_normal (arg0);
31965 if (!register_operand (op2, SImode))
31966 op2 = copy_to_mode_reg (SImode, op2);
31968 insn = (TARGET_64BIT
31969 ? gen_rdpmc_rex64 (op0, op1, op2)
31970 : gen_rdpmc (op0, op2));
31973 else if (fcode == IX86_BUILTIN_RDTSC)
31975 insn = (TARGET_64BIT
31976 ? gen_rdtsc_rex64 (op0, op1)
31977 : gen_rdtsc (op0));
31982 op2 = gen_reg_rtx (SImode);
31984 insn = (TARGET_64BIT
31985 ? gen_rdtscp_rex64 (op0, op1, op2)
31986 : gen_rdtscp (op0, op2));
31989 arg0 = CALL_EXPR_ARG (exp, 0);
31990 op4 = expand_normal (arg0);
31991 if (!address_operand (op4, VOIDmode))
31993 op4 = convert_memory_address (Pmode, op4);
31994 op4 = copy_addr_to_reg (op4);
31996 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
32000 target = gen_reg_rtx (mode);
32004 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
32005 op1, 1, OPTAB_DIRECT);
32006 op0 = expand_simple_binop (DImode, IOR, op0, op1,
32007 op0, 1, OPTAB_DIRECT);
32010 emit_move_insn (target, op0);
32013 case IX86_BUILTIN_FXSAVE:
32014 case IX86_BUILTIN_FXRSTOR:
32015 case IX86_BUILTIN_FXSAVE64:
32016 case IX86_BUILTIN_FXRSTOR64:
32019 case IX86_BUILTIN_FXSAVE:
32020 icode = CODE_FOR_fxsave;
32022 case IX86_BUILTIN_FXRSTOR:
32023 icode = CODE_FOR_fxrstor;
32025 case IX86_BUILTIN_FXSAVE64:
32026 icode = CODE_FOR_fxsave64;
32028 case IX86_BUILTIN_FXRSTOR64:
32029 icode = CODE_FOR_fxrstor64;
32032 gcc_unreachable ();
32035 arg0 = CALL_EXPR_ARG (exp, 0);
32036 op0 = expand_normal (arg0);
32038 if (!address_operand (op0, VOIDmode))
32040 op0 = convert_memory_address (Pmode, op0);
32041 op0 = copy_addr_to_reg (op0);
32043 op0 = gen_rtx_MEM (BLKmode, op0);
32045 pat = GEN_FCN (icode) (op0);
32050 case IX86_BUILTIN_XSAVE:
32051 case IX86_BUILTIN_XRSTOR:
32052 case IX86_BUILTIN_XSAVE64:
32053 case IX86_BUILTIN_XRSTOR64:
32054 case IX86_BUILTIN_XSAVEOPT:
32055 case IX86_BUILTIN_XSAVEOPT64:
32056 arg0 = CALL_EXPR_ARG (exp, 0);
32057 arg1 = CALL_EXPR_ARG (exp, 1);
32058 op0 = expand_normal (arg0);
32059 op1 = expand_normal (arg1);
32061 if (!address_operand (op0, VOIDmode))
32063 op0 = convert_memory_address (Pmode, op0);
32064 op0 = copy_addr_to_reg (op0);
32066 op0 = gen_rtx_MEM (BLKmode, op0);
32068 op1 = force_reg (DImode, op1);
32072 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
32073 NULL, 1, OPTAB_DIRECT);
32076 case IX86_BUILTIN_XSAVE:
32077 icode = CODE_FOR_xsave_rex64;
32079 case IX86_BUILTIN_XRSTOR:
32080 icode = CODE_FOR_xrstor_rex64;
32082 case IX86_BUILTIN_XSAVE64:
32083 icode = CODE_FOR_xsave64;
32085 case IX86_BUILTIN_XRSTOR64:
32086 icode = CODE_FOR_xrstor64;
32088 case IX86_BUILTIN_XSAVEOPT:
32089 icode = CODE_FOR_xsaveopt_rex64;
32091 case IX86_BUILTIN_XSAVEOPT64:
32092 icode = CODE_FOR_xsaveopt64;
32095 gcc_unreachable ();
32098 op2 = gen_lowpart (SImode, op2);
32099 op1 = gen_lowpart (SImode, op1);
32100 pat = GEN_FCN (icode) (op0, op1, op2);
32106 case IX86_BUILTIN_XSAVE:
32107 icode = CODE_FOR_xsave;
32109 case IX86_BUILTIN_XRSTOR:
32110 icode = CODE_FOR_xrstor;
32112 case IX86_BUILTIN_XSAVEOPT:
32113 icode = CODE_FOR_xsaveopt;
32116 gcc_unreachable ();
32118 pat = GEN_FCN (icode) (op0, op1);
32125 case IX86_BUILTIN_LLWPCB:
32126 arg0 = CALL_EXPR_ARG (exp, 0);
32127 op0 = expand_normal (arg0);
32128 icode = CODE_FOR_lwp_llwpcb;
32129 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
32130 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
32131 emit_insn (gen_lwp_llwpcb (op0));
32134 case IX86_BUILTIN_SLWPCB:
32135 icode = CODE_FOR_lwp_slwpcb;
32137 || !insn_data[icode].operand[0].predicate (target, Pmode))
32138 target = gen_reg_rtx (Pmode);
32139 emit_insn (gen_lwp_slwpcb (target));
32142 case IX86_BUILTIN_BEXTRI32:
32143 case IX86_BUILTIN_BEXTRI64:
32144 arg0 = CALL_EXPR_ARG (exp, 0);
32145 arg1 = CALL_EXPR_ARG (exp, 1);
32146 op0 = expand_normal (arg0);
32147 op1 = expand_normal (arg1);
32148 icode = (fcode == IX86_BUILTIN_BEXTRI32
32149 ? CODE_FOR_tbm_bextri_si
32150 : CODE_FOR_tbm_bextri_di);
32151 if (!CONST_INT_P (op1))
32153 error ("last argument must be an immediate");
32158 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
32159 unsigned char lsb_index = INTVAL (op1) & 0xFF;
32160 op1 = GEN_INT (length);
32161 op2 = GEN_INT (lsb_index);
32162 pat = GEN_FCN (icode) (target, op0, op1, op2);
32168 case IX86_BUILTIN_RDRAND16_STEP:
32169 icode = CODE_FOR_rdrandhi_1;
32173 case IX86_BUILTIN_RDRAND32_STEP:
32174 icode = CODE_FOR_rdrandsi_1;
32178 case IX86_BUILTIN_RDRAND64_STEP:
32179 icode = CODE_FOR_rdranddi_1;
32183 op0 = gen_reg_rtx (mode0);
32184 emit_insn (GEN_FCN (icode) (op0));
32186 arg0 = CALL_EXPR_ARG (exp, 0);
32187 op1 = expand_normal (arg0);
32188 if (!address_operand (op1, VOIDmode))
32190 op1 = convert_memory_address (Pmode, op1);
32191 op1 = copy_addr_to_reg (op1);
32193 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
32195 op1 = gen_reg_rtx (SImode);
32196 emit_move_insn (op1, CONST1_RTX (SImode));
32198 /* Emit SImode conditional move. */
32199 if (mode0 == HImode)
32201 op2 = gen_reg_rtx (SImode);
32202 emit_insn (gen_zero_extendhisi2 (op2, op0));
32204 else if (mode0 == SImode)
32207 op2 = gen_rtx_SUBREG (SImode, op0, 0);
32210 target = gen_reg_rtx (SImode);
32212 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
32214 emit_insn (gen_rtx_SET (VOIDmode, target,
32215 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
32218 case IX86_BUILTIN_RDSEED16_STEP:
32219 icode = CODE_FOR_rdseedhi_1;
32223 case IX86_BUILTIN_RDSEED32_STEP:
32224 icode = CODE_FOR_rdseedsi_1;
32228 case IX86_BUILTIN_RDSEED64_STEP:
32229 icode = CODE_FOR_rdseeddi_1;
32233 op0 = gen_reg_rtx (mode0);
32234 emit_insn (GEN_FCN (icode) (op0));
32236 arg0 = CALL_EXPR_ARG (exp, 0);
32237 op1 = expand_normal (arg0);
32238 if (!address_operand (op1, VOIDmode))
32240 op1 = convert_memory_address (Pmode, op1);
32241 op1 = copy_addr_to_reg (op1);
32243 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
32245 op2 = gen_reg_rtx (QImode);
32247 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
32249 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
32252 target = gen_reg_rtx (SImode);
32254 emit_insn (gen_zero_extendqisi2 (target, op2));
32257 case IX86_BUILTIN_ADDCARRYX32:
32258 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
32262 case IX86_BUILTIN_ADDCARRYX64:
32263 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
32267 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
32268 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
32269 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
32270 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
32272 op0 = gen_reg_rtx (QImode);
32274 /* Generate CF from input operand. */
32275 op1 = expand_normal (arg0);
32276 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
32277 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
32279 /* Gen ADCX instruction to compute X+Y+CF. */
32280 op2 = expand_normal (arg1);
32281 op3 = expand_normal (arg2);
32284 op2 = copy_to_mode_reg (mode0, op2);
32286 op3 = copy_to_mode_reg (mode0, op3);
32288 op0 = gen_reg_rtx (mode0);
32290 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
32291 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
32292 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
32294 /* Store the result. */
32295 op4 = expand_normal (arg3);
32296 if (!address_operand (op4, VOIDmode))
32298 op4 = convert_memory_address (Pmode, op4);
32299 op4 = copy_addr_to_reg (op4);
32301 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
32303 /* Return current CF value. */
32305 target = gen_reg_rtx (QImode);
32307 PUT_MODE (pat, QImode);
32308 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
32311 case IX86_BUILTIN_GATHERSIV2DF:
32312 icode = CODE_FOR_avx2_gathersiv2df;
32314 case IX86_BUILTIN_GATHERSIV4DF:
32315 icode = CODE_FOR_avx2_gathersiv4df;
32317 case IX86_BUILTIN_GATHERDIV2DF:
32318 icode = CODE_FOR_avx2_gatherdiv2df;
32320 case IX86_BUILTIN_GATHERDIV4DF:
32321 icode = CODE_FOR_avx2_gatherdiv4df;
32323 case IX86_BUILTIN_GATHERSIV4SF:
32324 icode = CODE_FOR_avx2_gathersiv4sf;
32326 case IX86_BUILTIN_GATHERSIV8SF:
32327 icode = CODE_FOR_avx2_gathersiv8sf;
32329 case IX86_BUILTIN_GATHERDIV4SF:
32330 icode = CODE_FOR_avx2_gatherdiv4sf;
32332 case IX86_BUILTIN_GATHERDIV8SF:
32333 icode = CODE_FOR_avx2_gatherdiv8sf;
32335 case IX86_BUILTIN_GATHERSIV2DI:
32336 icode = CODE_FOR_avx2_gathersiv2di;
32338 case IX86_BUILTIN_GATHERSIV4DI:
32339 icode = CODE_FOR_avx2_gathersiv4di;
32341 case IX86_BUILTIN_GATHERDIV2DI:
32342 icode = CODE_FOR_avx2_gatherdiv2di;
32344 case IX86_BUILTIN_GATHERDIV4DI:
32345 icode = CODE_FOR_avx2_gatherdiv4di;
32347 case IX86_BUILTIN_GATHERSIV4SI:
32348 icode = CODE_FOR_avx2_gathersiv4si;
32350 case IX86_BUILTIN_GATHERSIV8SI:
32351 icode = CODE_FOR_avx2_gathersiv8si;
32353 case IX86_BUILTIN_GATHERDIV4SI:
32354 icode = CODE_FOR_avx2_gatherdiv4si;
32356 case IX86_BUILTIN_GATHERDIV8SI:
32357 icode = CODE_FOR_avx2_gatherdiv8si;
32359 case IX86_BUILTIN_GATHERALTSIV4DF:
32360 icode = CODE_FOR_avx2_gathersiv4df;
32362 case IX86_BUILTIN_GATHERALTDIV8SF:
32363 icode = CODE_FOR_avx2_gatherdiv8sf;
32365 case IX86_BUILTIN_GATHERALTSIV4DI:
32366 icode = CODE_FOR_avx2_gathersiv4di;
32368 case IX86_BUILTIN_GATHERALTDIV8SI:
32369 icode = CODE_FOR_avx2_gatherdiv8si;
32373 arg0 = CALL_EXPR_ARG (exp, 0);
32374 arg1 = CALL_EXPR_ARG (exp, 1);
32375 arg2 = CALL_EXPR_ARG (exp, 2);
32376 arg3 = CALL_EXPR_ARG (exp, 3);
32377 arg4 = CALL_EXPR_ARG (exp, 4);
32378 op0 = expand_normal (arg0);
32379 op1 = expand_normal (arg1);
32380 op2 = expand_normal (arg2);
32381 op3 = expand_normal (arg3);
32382 op4 = expand_normal (arg4);
32383 /* Note the arg order is different from the operand order. */
32384 mode0 = insn_data[icode].operand[1].mode;
32385 mode2 = insn_data[icode].operand[3].mode;
32386 mode3 = insn_data[icode].operand[4].mode;
32387 mode4 = insn_data[icode].operand[5].mode;
32389 if (target == NULL_RTX
32390 || GET_MODE (target) != insn_data[icode].operand[0].mode)
32391 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
32393 subtarget = target;
32395 if (fcode == IX86_BUILTIN_GATHERALTSIV4DF
32396 || fcode == IX86_BUILTIN_GATHERALTSIV4DI)
32398 rtx half = gen_reg_rtx (V4SImode);
32399 if (!nonimmediate_operand (op2, V8SImode))
32400 op2 = copy_to_mode_reg (V8SImode, op2);
32401 emit_insn (gen_vec_extract_lo_v8si (half, op2));
32404 else if (fcode == IX86_BUILTIN_GATHERALTDIV8SF
32405 || fcode == IX86_BUILTIN_GATHERALTDIV8SI)
32407 rtx (*gen) (rtx, rtx);
32408 rtx half = gen_reg_rtx (mode0);
32409 if (mode0 == V4SFmode)
32410 gen = gen_vec_extract_lo_v8sf;
32412 gen = gen_vec_extract_lo_v8si;
32413 if (!nonimmediate_operand (op0, GET_MODE (op0)))
32414 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
32415 emit_insn (gen (half, op0));
32417 if (!nonimmediate_operand (op3, GET_MODE (op3)))
32418 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
32419 emit_insn (gen (half, op3));
32423 /* Force memory operand only with base register here. But we
32424 don't want to do it on memory operand for other builtin
32426 op1 = force_reg (Pmode, convert_to_mode (Pmode, op1, 1));
32428 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32429 op0 = copy_to_mode_reg (mode0, op0);
32430 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
32431 op1 = copy_to_mode_reg (Pmode, op1);
32432 if (!insn_data[icode].operand[3].predicate (op2, mode2))
32433 op2 = copy_to_mode_reg (mode2, op2);
32434 if (!insn_data[icode].operand[4].predicate (op3, mode3))
32435 op3 = copy_to_mode_reg (mode3, op3);
32436 if (!insn_data[icode].operand[5].predicate (op4, mode4))
32438 error ("last argument must be scale 1, 2, 4, 8");
32442 /* Optimize. If mask is known to have all high bits set,
32443 replace op0 with pc_rtx to signal that the instruction
32444 overwrites the whole destination and doesn't use its
32445 previous contents. */
32448 if (TREE_CODE (arg3) == VECTOR_CST)
32450 unsigned int negative = 0;
32451 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
32453 tree cst = VECTOR_CST_ELT (arg3, i);
32454 if (TREE_CODE (cst) == INTEGER_CST
32455 && tree_int_cst_sign_bit (cst))
32457 else if (TREE_CODE (cst) == REAL_CST
32458 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
32461 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
32464 else if (TREE_CODE (arg3) == SSA_NAME)
32466 /* Recognize also when mask is like:
32467 __v2df src = _mm_setzero_pd ();
32468 __v2df mask = _mm_cmpeq_pd (src, src);
32470 __v8sf src = _mm256_setzero_ps ();
32471 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
32472 as that is a cheaper way to load all ones into
32473 a register than having to load a constant from
32475 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
32476 if (is_gimple_call (def_stmt))
32478 tree fndecl = gimple_call_fndecl (def_stmt);
32480 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32481 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
32483 case IX86_BUILTIN_CMPPD:
32484 case IX86_BUILTIN_CMPPS:
32485 case IX86_BUILTIN_CMPPD256:
32486 case IX86_BUILTIN_CMPPS256:
32487 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
32490 case IX86_BUILTIN_CMPEQPD:
32491 case IX86_BUILTIN_CMPEQPS:
32492 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
32493 && initializer_zerop (gimple_call_arg (def_stmt,
32504 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
32509 if (fcode == IX86_BUILTIN_GATHERDIV8SF
32510 || fcode == IX86_BUILTIN_GATHERDIV8SI)
32512 enum machine_mode tmode = GET_MODE (subtarget) == V8SFmode
32513 ? V4SFmode : V4SImode;
32514 if (target == NULL_RTX)
32515 target = gen_reg_rtx (tmode);
32516 if (tmode == V4SFmode)
32517 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
32519 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
32522 target = subtarget;
32526 case IX86_BUILTIN_XABORT:
32527 icode = CODE_FOR_xabort;
32528 arg0 = CALL_EXPR_ARG (exp, 0);
32529 op0 = expand_normal (arg0);
32530 mode0 = insn_data[icode].operand[0].mode;
32531 if (!insn_data[icode].operand[0].predicate (op0, mode0))
32533 error ("the xabort's argument must be an 8-bit immediate");
32536 emit_insn (gen_xabort (op0));
32543 for (i = 0, d = bdesc_special_args;
32544 i < ARRAY_SIZE (bdesc_special_args);
32546 if (d->code == fcode)
32547 return ix86_expand_special_args_builtin (d, exp, target);
32549 for (i = 0, d = bdesc_args;
32550 i < ARRAY_SIZE (bdesc_args);
32552 if (d->code == fcode)
32555 case IX86_BUILTIN_FABSQ:
32556 case IX86_BUILTIN_COPYSIGNQ:
32558 /* Emit a normal call if SSE isn't available. */
32559 return expand_call (exp, target, ignore);
32561 return ix86_expand_args_builtin (d, exp, target);
32564 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
32565 if (d->code == fcode)
32566 return ix86_expand_sse_comi (d, exp, target);
32568 for (i = 0, d = bdesc_pcmpestr;
32569 i < ARRAY_SIZE (bdesc_pcmpestr);
32571 if (d->code == fcode)
32572 return ix86_expand_sse_pcmpestr (d, exp, target);
32574 for (i = 0, d = bdesc_pcmpistr;
32575 i < ARRAY_SIZE (bdesc_pcmpistr);
32577 if (d->code == fcode)
32578 return ix86_expand_sse_pcmpistr (d, exp, target);
32580 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
32581 if (d->code == fcode)
32582 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
32583 (enum ix86_builtin_func_type)
32584 d->flag, d->comparison);
32586 gcc_unreachable ();
32589 /* Returns a function decl for a vectorized version of the builtin function
32590 with builtin function code FN and the result vector type TYPE, or NULL_TREE
32591 if it is not available. */
32594 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
32597 enum machine_mode in_mode, out_mode;
32599 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
32601 if (TREE_CODE (type_out) != VECTOR_TYPE
32602 || TREE_CODE (type_in) != VECTOR_TYPE
32603 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
32606 out_mode = TYPE_MODE (TREE_TYPE (type_out));
32607 out_n = TYPE_VECTOR_SUBPARTS (type_out);
32608 in_mode = TYPE_MODE (TREE_TYPE (type_in));
32609 in_n = TYPE_VECTOR_SUBPARTS (type_in);
32613 case BUILT_IN_SQRT:
32614 if (out_mode == DFmode && in_mode == DFmode)
32616 if (out_n == 2 && in_n == 2)
32617 return ix86_builtins[IX86_BUILTIN_SQRTPD];
32618 else if (out_n == 4 && in_n == 4)
32619 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
32623 case BUILT_IN_SQRTF:
32624 if (out_mode == SFmode && in_mode == SFmode)
32626 if (out_n == 4 && in_n == 4)
32627 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
32628 else if (out_n == 8 && in_n == 8)
32629 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
32633 case BUILT_IN_IFLOOR:
32634 case BUILT_IN_LFLOOR:
32635 case BUILT_IN_LLFLOOR:
32636 /* The round insn does not trap on denormals. */
32637 if (flag_trapping_math || !TARGET_ROUND)
32640 if (out_mode == SImode && in_mode == DFmode)
32642 if (out_n == 4 && in_n == 2)
32643 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
32644 else if (out_n == 8 && in_n == 4)
32645 return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
32649 case BUILT_IN_IFLOORF:
32650 case BUILT_IN_LFLOORF:
32651 case BUILT_IN_LLFLOORF:
32652 /* The round insn does not trap on denormals. */
32653 if (flag_trapping_math || !TARGET_ROUND)
32656 if (out_mode == SImode && in_mode == SFmode)
32658 if (out_n == 4 && in_n == 4)
32659 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
32660 else if (out_n == 8 && in_n == 8)
32661 return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
32665 case BUILT_IN_ICEIL:
32666 case BUILT_IN_LCEIL:
32667 case BUILT_IN_LLCEIL:
32668 /* The round insn does not trap on denormals. */
32669 if (flag_trapping_math || !TARGET_ROUND)
32672 if (out_mode == SImode && in_mode == DFmode)
32674 if (out_n == 4 && in_n == 2)
32675 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
32676 else if (out_n == 8 && in_n == 4)
32677 return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
32681 case BUILT_IN_ICEILF:
32682 case BUILT_IN_LCEILF:
32683 case BUILT_IN_LLCEILF:
32684 /* The round insn does not trap on denormals. */
32685 if (flag_trapping_math || !TARGET_ROUND)
32688 if (out_mode == SImode && in_mode == SFmode)
32690 if (out_n == 4 && in_n == 4)
32691 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
32692 else if (out_n == 8 && in_n == 8)
32693 return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
32697 case BUILT_IN_IRINT:
32698 case BUILT_IN_LRINT:
32699 case BUILT_IN_LLRINT:
32700 if (out_mode == SImode && in_mode == DFmode)
32702 if (out_n == 4 && in_n == 2)
32703 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
32704 else if (out_n == 8 && in_n == 4)
32705 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
32709 case BUILT_IN_IRINTF:
32710 case BUILT_IN_LRINTF:
32711 case BUILT_IN_LLRINTF:
32712 if (out_mode == SImode && in_mode == SFmode)
32714 if (out_n == 4 && in_n == 4)
32715 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
32716 else if (out_n == 8 && in_n == 8)
32717 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
32721 case BUILT_IN_IROUND:
32722 case BUILT_IN_LROUND:
32723 case BUILT_IN_LLROUND:
32724 /* The round insn does not trap on denormals. */
32725 if (flag_trapping_math || !TARGET_ROUND)
32728 if (out_mode == SImode && in_mode == DFmode)
32730 if (out_n == 4 && in_n == 2)
32731 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
32732 else if (out_n == 8 && in_n == 4)
32733 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
32737 case BUILT_IN_IROUNDF:
32738 case BUILT_IN_LROUNDF:
32739 case BUILT_IN_LLROUNDF:
32740 /* The round insn does not trap on denormals. */
32741 if (flag_trapping_math || !TARGET_ROUND)
32744 if (out_mode == SImode && in_mode == SFmode)
32746 if (out_n == 4 && in_n == 4)
32747 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
32748 else if (out_n == 8 && in_n == 8)
32749 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
32753 case BUILT_IN_COPYSIGN:
32754 if (out_mode == DFmode && in_mode == DFmode)
32756 if (out_n == 2 && in_n == 2)
32757 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
32758 else if (out_n == 4 && in_n == 4)
32759 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
32763 case BUILT_IN_COPYSIGNF:
32764 if (out_mode == SFmode && in_mode == SFmode)
32766 if (out_n == 4 && in_n == 4)
32767 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
32768 else if (out_n == 8 && in_n == 8)
32769 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
32773 case BUILT_IN_FLOOR:
32774 /* The round insn does not trap on denormals. */
32775 if (flag_trapping_math || !TARGET_ROUND)
32778 if (out_mode == DFmode && in_mode == DFmode)
32780 if (out_n == 2 && in_n == 2)
32781 return ix86_builtins[IX86_BUILTIN_FLOORPD];
32782 else if (out_n == 4 && in_n == 4)
32783 return ix86_builtins[IX86_BUILTIN_FLOORPD256];
32787 case BUILT_IN_FLOORF:
32788 /* The round insn does not trap on denormals. */
32789 if (flag_trapping_math || !TARGET_ROUND)
32792 if (out_mode == SFmode && in_mode == SFmode)
32794 if (out_n == 4 && in_n == 4)
32795 return ix86_builtins[IX86_BUILTIN_FLOORPS];
32796 else if (out_n == 8 && in_n == 8)
32797 return ix86_builtins[IX86_BUILTIN_FLOORPS256];
32801 case BUILT_IN_CEIL:
32802 /* The round insn does not trap on denormals. */
32803 if (flag_trapping_math || !TARGET_ROUND)
32806 if (out_mode == DFmode && in_mode == DFmode)
32808 if (out_n == 2 && in_n == 2)
32809 return ix86_builtins[IX86_BUILTIN_CEILPD];
32810 else if (out_n == 4 && in_n == 4)
32811 return ix86_builtins[IX86_BUILTIN_CEILPD256];
32815 case BUILT_IN_CEILF:
32816 /* The round insn does not trap on denormals. */
32817 if (flag_trapping_math || !TARGET_ROUND)
32820 if (out_mode == SFmode && in_mode == SFmode)
32822 if (out_n == 4 && in_n == 4)
32823 return ix86_builtins[IX86_BUILTIN_CEILPS];
32824 else if (out_n == 8 && in_n == 8)
32825 return ix86_builtins[IX86_BUILTIN_CEILPS256];
32829 case BUILT_IN_TRUNC:
32830 /* The round insn does not trap on denormals. */
32831 if (flag_trapping_math || !TARGET_ROUND)
32834 if (out_mode == DFmode && in_mode == DFmode)
32836 if (out_n == 2 && in_n == 2)
32837 return ix86_builtins[IX86_BUILTIN_TRUNCPD];
32838 else if (out_n == 4 && in_n == 4)
32839 return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
32843 case BUILT_IN_TRUNCF:
32844 /* The round insn does not trap on denormals. */
32845 if (flag_trapping_math || !TARGET_ROUND)
32848 if (out_mode == SFmode && in_mode == SFmode)
32850 if (out_n == 4 && in_n == 4)
32851 return ix86_builtins[IX86_BUILTIN_TRUNCPS];
32852 else if (out_n == 8 && in_n == 8)
32853 return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
32857 case BUILT_IN_RINT:
32858 /* The round insn does not trap on denormals. */
32859 if (flag_trapping_math || !TARGET_ROUND)
32862 if (out_mode == DFmode && in_mode == DFmode)
32864 if (out_n == 2 && in_n == 2)
32865 return ix86_builtins[IX86_BUILTIN_RINTPD];
32866 else if (out_n == 4 && in_n == 4)
32867 return ix86_builtins[IX86_BUILTIN_RINTPD256];
32871 case BUILT_IN_RINTF:
32872 /* The round insn does not trap on denormals. */
32873 if (flag_trapping_math || !TARGET_ROUND)
32876 if (out_mode == SFmode && in_mode == SFmode)
32878 if (out_n == 4 && in_n == 4)
32879 return ix86_builtins[IX86_BUILTIN_RINTPS];
32880 else if (out_n == 8 && in_n == 8)
32881 return ix86_builtins[IX86_BUILTIN_RINTPS256];
32885 case BUILT_IN_ROUND:
32886 /* The round insn does not trap on denormals. */
32887 if (flag_trapping_math || !TARGET_ROUND)
32890 if (out_mode == DFmode && in_mode == DFmode)
32892 if (out_n == 2 && in_n == 2)
32893 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
32894 else if (out_n == 4 && in_n == 4)
32895 return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
32899 case BUILT_IN_ROUNDF:
32900 /* The round insn does not trap on denormals. */
32901 if (flag_trapping_math || !TARGET_ROUND)
32904 if (out_mode == SFmode && in_mode == SFmode)
32906 if (out_n == 4 && in_n == 4)
32907 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
32908 else if (out_n == 8 && in_n == 8)
32909 return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
32914 if (out_mode == DFmode && in_mode == DFmode)
32916 if (out_n == 2 && in_n == 2)
32917 return ix86_builtins[IX86_BUILTIN_VFMADDPD];
32918 if (out_n == 4 && in_n == 4)
32919 return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
32923 case BUILT_IN_FMAF:
32924 if (out_mode == SFmode && in_mode == SFmode)
32926 if (out_n == 4 && in_n == 4)
32927 return ix86_builtins[IX86_BUILTIN_VFMADDPS];
32928 if (out_n == 8 && in_n == 8)
32929 return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
32937 /* Dispatch to a handler for a vectorization library. */
32938 if (ix86_veclib_handler)
32939 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
32945 /* Handler for an SVML-style interface to
32946 a library with vectorized intrinsics. */
32949 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
32952 tree fntype, new_fndecl, args;
32955 enum machine_mode el_mode, in_mode;
32958 /* The SVML is suitable for unsafe math only. */
32959 if (!flag_unsafe_math_optimizations)
32962 el_mode = TYPE_MODE (TREE_TYPE (type_out));
32963 n = TYPE_VECTOR_SUBPARTS (type_out);
32964 in_mode = TYPE_MODE (TREE_TYPE (type_in));
32965 in_n = TYPE_VECTOR_SUBPARTS (type_in);
32966 if (el_mode != in_mode
32974 case BUILT_IN_LOG10:
32976 case BUILT_IN_TANH:
32978 case BUILT_IN_ATAN:
32979 case BUILT_IN_ATAN2:
32980 case BUILT_IN_ATANH:
32981 case BUILT_IN_CBRT:
32982 case BUILT_IN_SINH:
32984 case BUILT_IN_ASINH:
32985 case BUILT_IN_ASIN:
32986 case BUILT_IN_COSH:
32988 case BUILT_IN_ACOSH:
32989 case BUILT_IN_ACOS:
32990 if (el_mode != DFmode || n != 2)
32994 case BUILT_IN_EXPF:
32995 case BUILT_IN_LOGF:
32996 case BUILT_IN_LOG10F:
32997 case BUILT_IN_POWF:
32998 case BUILT_IN_TANHF:
32999 case BUILT_IN_TANF:
33000 case BUILT_IN_ATANF:
33001 case BUILT_IN_ATAN2F:
33002 case BUILT_IN_ATANHF:
33003 case BUILT_IN_CBRTF:
33004 case BUILT_IN_SINHF:
33005 case BUILT_IN_SINF:
33006 case BUILT_IN_ASINHF:
33007 case BUILT_IN_ASINF:
33008 case BUILT_IN_COSHF:
33009 case BUILT_IN_COSF:
33010 case BUILT_IN_ACOSHF:
33011 case BUILT_IN_ACOSF:
33012 if (el_mode != SFmode || n != 4)
33020 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
33022 if (fn == BUILT_IN_LOGF)
33023 strcpy (name, "vmlsLn4");
33024 else if (fn == BUILT_IN_LOG)
33025 strcpy (name, "vmldLn2");
33028 sprintf (name, "vmls%s", bname+10);
33029 name[strlen (name)-1] = '4';
33032 sprintf (name, "vmld%s2", bname+10);
33034 /* Convert to uppercase. */
33038 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
33040 args = TREE_CHAIN (args))
33044 fntype = build_function_type_list (type_out, type_in, NULL);
33046 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
33048 /* Build a function declaration for the vectorized function. */
33049 new_fndecl = build_decl (BUILTINS_LOCATION,
33050 FUNCTION_DECL, get_identifier (name), fntype);
33051 TREE_PUBLIC (new_fndecl) = 1;
33052 DECL_EXTERNAL (new_fndecl) = 1;
33053 DECL_IS_NOVOPS (new_fndecl) = 1;
33054 TREE_READONLY (new_fndecl) = 1;
33059 /* Handler for an ACML-style interface to
33060 a library with vectorized intrinsics. */
33063 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
33065 char name[20] = "__vr.._";
33066 tree fntype, new_fndecl, args;
33069 enum machine_mode el_mode, in_mode;
33072 /* The ACML is 64bits only and suitable for unsafe math only as
33073 it does not correctly support parts of IEEE with the required
33074 precision such as denormals. */
33076 || !flag_unsafe_math_optimizations)
33079 el_mode = TYPE_MODE (TREE_TYPE (type_out));
33080 n = TYPE_VECTOR_SUBPARTS (type_out);
33081 in_mode = TYPE_MODE (TREE_TYPE (type_in));
33082 in_n = TYPE_VECTOR_SUBPARTS (type_in);
33083 if (el_mode != in_mode
33093 case BUILT_IN_LOG2:
33094 case BUILT_IN_LOG10:
33097 if (el_mode != DFmode
33102 case BUILT_IN_SINF:
33103 case BUILT_IN_COSF:
33104 case BUILT_IN_EXPF:
33105 case BUILT_IN_POWF:
33106 case BUILT_IN_LOGF:
33107 case BUILT_IN_LOG2F:
33108 case BUILT_IN_LOG10F:
33111 if (el_mode != SFmode
33120 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
33121 sprintf (name + 7, "%s", bname+10);
33124 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
33126 args = TREE_CHAIN (args))
33130 fntype = build_function_type_list (type_out, type_in, NULL);
33132 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
33134 /* Build a function declaration for the vectorized function. */
33135 new_fndecl = build_decl (BUILTINS_LOCATION,
33136 FUNCTION_DECL, get_identifier (name), fntype);
33137 TREE_PUBLIC (new_fndecl) = 1;
33138 DECL_EXTERNAL (new_fndecl) = 1;
33139 DECL_IS_NOVOPS (new_fndecl) = 1;
33140 TREE_READONLY (new_fndecl) = 1;
33145 /* Returns a decl of a function that implements gather load with
33146 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
33147 Return NULL_TREE if it is not available. */
33150 ix86_vectorize_builtin_gather (const_tree mem_vectype,
33151 const_tree index_type, int scale)
33154 enum ix86_builtins code;
33159 if ((TREE_CODE (index_type) != INTEGER_TYPE
33160 && !POINTER_TYPE_P (index_type))
33161 || (TYPE_MODE (index_type) != SImode
33162 && TYPE_MODE (index_type) != DImode))
33165 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
33168 /* v*gather* insn sign extends index to pointer mode. */
33169 if (TYPE_PRECISION (index_type) < POINTER_SIZE
33170 && TYPE_UNSIGNED (index_type))
33175 || (scale & (scale - 1)) != 0)
33178 si = TYPE_MODE (index_type) == SImode;
33179 switch (TYPE_MODE (mem_vectype))
33182 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
33185 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
33188 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
33191 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
33194 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
33197 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
33200 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
33203 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
33209 return ix86_builtins[code];
33212 /* Returns a code for a target-specific builtin that implements
33213 reciprocal of the function, or NULL_TREE if not available. */
33216 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
33217 bool sqrt ATTRIBUTE_UNUSED)
33219 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
33220 && flag_finite_math_only && !flag_trapping_math
33221 && flag_unsafe_math_optimizations))
33225 /* Machine dependent builtins. */
33228 /* Vectorized version of sqrt to rsqrt conversion. */
33229 case IX86_BUILTIN_SQRTPS_NR:
33230 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
33232 case IX86_BUILTIN_SQRTPS_NR256:
33233 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
33239 /* Normal builtins. */
33242 /* Sqrt to rsqrt conversion. */
33243 case BUILT_IN_SQRTF:
33244 return ix86_builtins[IX86_BUILTIN_RSQRTF];
33251 /* Helper for avx_vpermilps256_operand et al. This is also used by
33252 the expansion functions to turn the parallel back into a mask.
33253 The return value is 0 for no match and the imm8+1 for a match. */
33256 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
33258 unsigned i, nelt = GET_MODE_NUNITS (mode);
33260 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
33262 if (XVECLEN (par, 0) != (int) nelt)
33265 /* Validate that all of the elements are constants, and not totally
33266 out of range. Copy the data into an integral array to make the
33267 subsequent checks easier. */
33268 for (i = 0; i < nelt; ++i)
33270 rtx er = XVECEXP (par, 0, i);
33271 unsigned HOST_WIDE_INT ei;
33273 if (!CONST_INT_P (er))
33284 /* In the 256-bit DFmode case, we can only move elements within
33286 for (i = 0; i < 2; ++i)
33290 mask |= ipar[i] << i;
33292 for (i = 2; i < 4; ++i)
33296 mask |= (ipar[i] - 2) << i;
33301 /* In the 256-bit SFmode case, we have full freedom of movement
33302 within the low 128-bit lane, but the high 128-bit lane must
33303 mirror the exact same pattern. */
33304 for (i = 0; i < 4; ++i)
33305 if (ipar[i] + 4 != ipar[i + 4])
33312 /* In the 128-bit case, we've full freedom in the placement of
33313 the elements from the source operand. */
33314 for (i = 0; i < nelt; ++i)
33315 mask |= ipar[i] << (i * (nelt / 2));
33319 gcc_unreachable ();
33322 /* Make sure success has a non-zero value by adding one. */
33326 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
33327 the expansion functions to turn the parallel back into a mask.
33328 The return value is 0 for no match and the imm8+1 for a match. */
33331 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
33333 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
33335 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
33337 if (XVECLEN (par, 0) != (int) nelt)
33340 /* Validate that all of the elements are constants, and not totally
33341 out of range. Copy the data into an integral array to make the
33342 subsequent checks easier. */
33343 for (i = 0; i < nelt; ++i)
33345 rtx er = XVECEXP (par, 0, i);
33346 unsigned HOST_WIDE_INT ei;
33348 if (!CONST_INT_P (er))
33351 if (ei >= 2 * nelt)
33356 /* Validate that the halves of the permute are halves. */
33357 for (i = 0; i < nelt2 - 1; ++i)
33358 if (ipar[i] + 1 != ipar[i + 1])
33360 for (i = nelt2; i < nelt - 1; ++i)
33361 if (ipar[i] + 1 != ipar[i + 1])
33364 /* Reconstruct the mask. */
33365 for (i = 0; i < 2; ++i)
33367 unsigned e = ipar[i * nelt2];
33371 mask |= e << (i * 4);
33374 /* Make sure success has a non-zero value by adding one. */
33378 /* Store OPERAND to the memory after reload is completed. This means
33379 that we can't easily use assign_stack_local. */
33381 ix86_force_to_memory (enum machine_mode mode, rtx operand)
33385 gcc_assert (reload_completed);
33386 if (ix86_using_red_zone ())
33388 result = gen_rtx_MEM (mode,
33389 gen_rtx_PLUS (Pmode,
33391 GEN_INT (-RED_ZONE_SIZE)));
33392 emit_move_insn (result, operand);
33394 else if (TARGET_64BIT)
33400 operand = gen_lowpart (DImode, operand);
33404 gen_rtx_SET (VOIDmode,
33405 gen_rtx_MEM (DImode,
33406 gen_rtx_PRE_DEC (DImode,
33407 stack_pointer_rtx)),
33411 gcc_unreachable ();
33413 result = gen_rtx_MEM (mode, stack_pointer_rtx);
33422 split_double_mode (mode, &operand, 1, operands, operands + 1);
33424 gen_rtx_SET (VOIDmode,
33425 gen_rtx_MEM (SImode,
33426 gen_rtx_PRE_DEC (Pmode,
33427 stack_pointer_rtx)),
33430 gen_rtx_SET (VOIDmode,
33431 gen_rtx_MEM (SImode,
33432 gen_rtx_PRE_DEC (Pmode,
33433 stack_pointer_rtx)),
33438 /* Store HImodes as SImodes. */
33439 operand = gen_lowpart (SImode, operand);
33443 gen_rtx_SET (VOIDmode,
33444 gen_rtx_MEM (GET_MODE (operand),
33445 gen_rtx_PRE_DEC (SImode,
33446 stack_pointer_rtx)),
33450 gcc_unreachable ();
33452 result = gen_rtx_MEM (mode, stack_pointer_rtx);
33457 /* Free operand from the memory. */
33459 ix86_free_from_memory (enum machine_mode mode)
33461 if (!ix86_using_red_zone ())
33465 if (mode == DImode || TARGET_64BIT)
33469 /* Use LEA to deallocate stack space. In peephole2 it will be converted
33470 to pop or add instruction if registers are available. */
33471 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
33472 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
33477 /* Return a register priority for hard reg REGNO. */
33479 ix86_register_priority (int hard_regno)
33481 /* ebp and r13 as the base always wants a displacement, r12 as the
33482 base always wants an index. So discourage their usage in an
33484 if (hard_regno == R12_REG || hard_regno == R13_REG)
33486 if (hard_regno == BP_REG)
33488 /* New x86-64 int registers result in bigger code size. Discourage
33490 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
33492 /* New x86-64 SSE registers result in bigger code size. Discourage
33494 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
33496 /* Usage of AX register results in smaller code. Prefer it. */
33497 if (hard_regno == 0)
33502 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
33504 Put float CONST_DOUBLE in the constant pool instead of fp regs.
33505 QImode must go into class Q_REGS.
33506 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
33507 movdf to do mem-to-mem moves through integer regs. */
33510 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
33512 enum machine_mode mode = GET_MODE (x);
33514 /* We're only allowed to return a subclass of CLASS. Many of the
33515 following checks fail for NO_REGS, so eliminate that early. */
33516 if (regclass == NO_REGS)
33519 /* All classes can load zeros. */
33520 if (x == CONST0_RTX (mode))
33523 /* Force constants into memory if we are loading a (nonzero) constant into
33524 an MMX or SSE register. This is because there are no MMX/SSE instructions
33525 to load from a constant. */
33527 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
33530 /* Prefer SSE regs only, if we can use them for math. */
33531 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
33532 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
33534 /* Floating-point constants need more complex checks. */
33535 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
33537 /* General regs can load everything. */
33538 if (reg_class_subset_p (regclass, GENERAL_REGS))
33541 /* Floats can load 0 and 1 plus some others. Note that we eliminated
33542 zero above. We only want to wind up preferring 80387 registers if
33543 we plan on doing computation with them. */
33545 && standard_80387_constant_p (x) > 0)
33547 /* Limit class to non-sse. */
33548 if (regclass == FLOAT_SSE_REGS)
33550 if (regclass == FP_TOP_SSE_REGS)
33552 if (regclass == FP_SECOND_SSE_REGS)
33553 return FP_SECOND_REG;
33554 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
33561 /* Generally when we see PLUS here, it's the function invariant
33562 (plus soft-fp const_int). Which can only be computed into general
33564 if (GET_CODE (x) == PLUS)
33565 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
33567 /* QImode constants are easy to load, but non-constant QImode data
33568 must go into Q_REGS. */
33569 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
33571 if (reg_class_subset_p (regclass, Q_REGS))
33573 if (reg_class_subset_p (Q_REGS, regclass))
33581 /* Discourage putting floating-point values in SSE registers unless
33582 SSE math is being used, and likewise for the 387 registers. */
33584 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
33586 enum machine_mode mode = GET_MODE (x);
33588 /* Restrict the output reload class to the register bank that we are doing
33589 math on. If we would like not to return a subset of CLASS, reject this
33590 alternative: if reload cannot do this, it will still use its choice. */
33591 mode = GET_MODE (x);
33592 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
33593 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
33595 if (X87_FLOAT_MODE_P (mode))
33597 if (regclass == FP_TOP_SSE_REGS)
33599 else if (regclass == FP_SECOND_SSE_REGS)
33600 return FP_SECOND_REG;
33602 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
33609 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
33610 enum machine_mode mode, secondary_reload_info *sri)
33612 /* Double-word spills from general registers to non-offsettable memory
33613 references (zero-extended addresses) require special handling. */
33616 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
33617 && rclass == GENERAL_REGS
33618 && !offsettable_memref_p (x))
33621 ? CODE_FOR_reload_noff_load
33622 : CODE_FOR_reload_noff_store);
33623 /* Add the cost of moving address to a temporary. */
33624 sri->extra_cost = 1;
33629 /* QImode spills from non-QI registers require
33630 intermediate register on 32bit targets. */
33632 && !in_p && mode == QImode
33633 && (rclass == GENERAL_REGS
33634 || rclass == LEGACY_REGS
33635 || rclass == NON_Q_REGS
33638 || rclass == INDEX_REGS))
33647 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
33648 regno = true_regnum (x);
33650 /* Return Q_REGS if the operand is in memory. */
33655 /* This condition handles corner case where an expression involving
33656 pointers gets vectorized. We're trying to use the address of a
33657 stack slot as a vector initializer.
33659 (set (reg:V2DI 74 [ vect_cst_.2 ])
33660 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
33662 Eventually frame gets turned into sp+offset like this:
33664 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33665 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33666 (const_int 392 [0x188]))))
33668 That later gets turned into:
33670 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33671 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
33672 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
33674 We'll have the following reload recorded:
33676 Reload 0: reload_in (DI) =
33677 (plus:DI (reg/f:DI 7 sp)
33678 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
33679 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33680 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
33681 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
33682 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
33683 reload_reg_rtx: (reg:V2DI 22 xmm1)
33685 Which isn't going to work since SSE instructions can't handle scalar
33686 additions. Returning GENERAL_REGS forces the addition into integer
33687 register and reload can handle subsequent reloads without problems. */
33689 if (in_p && GET_CODE (x) == PLUS
33690 && SSE_CLASS_P (rclass)
33691 && SCALAR_INT_MODE_P (mode))
33692 return GENERAL_REGS;
33697 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
33700 ix86_class_likely_spilled_p (reg_class_t rclass)
33711 case SSE_FIRST_REG:
33713 case FP_SECOND_REG:
33723 /* If we are copying between general and FP registers, we need a memory
33724 location. The same is true for SSE and MMX registers.
33726 To optimize register_move_cost performance, allow inline variant.
33728 The macro can't work reliably when one of the CLASSES is class containing
33729 registers from multiple units (SSE, MMX, integer). We avoid this by never
33730 combining those units in single alternative in the machine description.
33731 Ensure that this constraint holds to avoid unexpected surprises.
33733 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
33734 enforce these sanity checks. */
33737 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
33738 enum machine_mode mode, int strict)
33740 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
33741 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
33742 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
33743 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
33744 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
33745 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
33747 gcc_assert (!strict || lra_in_progress);
33751 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
33754 /* ??? This is a lie. We do have moves between mmx/general, and for
33755 mmx/sse2. But by saying we need secondary memory we discourage the
33756 register allocator from using the mmx registers unless needed. */
33757 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
33760 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
33762 /* SSE1 doesn't have any direct moves from other classes. */
33766 /* If the target says that inter-unit moves are more expensive
33767 than moving through memory, then don't generate them. */
33768 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
33769 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
33772 /* Between SSE and general, we have moves no larger than word size. */
33773 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
33781 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
33782 enum machine_mode mode, int strict)
33784 return inline_secondary_memory_needed (class1, class2, mode, strict);
33787 /* Implement the TARGET_CLASS_MAX_NREGS hook.
33789 On the 80386, this is the size of MODE in words,
33790 except in the FP regs, where a single reg is always enough. */
33792 static unsigned char
33793 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
33795 if (MAYBE_INTEGER_CLASS_P (rclass))
33797 if (mode == XFmode)
33798 return (TARGET_64BIT ? 2 : 3);
33799 else if (mode == XCmode)
33800 return (TARGET_64BIT ? 4 : 6);
33802 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
33806 if (COMPLEX_MODE_P (mode))
33813 /* Return true if the registers in CLASS cannot represent the change from
33814 modes FROM to TO. */
33817 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
33818 enum reg_class regclass)
33823 /* x87 registers can't do subreg at all, as all values are reformatted
33824 to extended precision. */
33825 if (MAYBE_FLOAT_CLASS_P (regclass))
33828 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
33830 /* Vector registers do not support QI or HImode loads. If we don't
33831 disallow a change to these modes, reload will assume it's ok to
33832 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
33833 the vec_dupv4hi pattern. */
33834 if (GET_MODE_SIZE (from) < 4)
33837 /* Vector registers do not support subreg with nonzero offsets, which
33838 are otherwise valid for integer registers. Since we can't see
33839 whether we have a nonzero offset from here, prohibit all
33840 nonparadoxical subregs changing size. */
33841 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
33848 /* Return the cost of moving data of mode M between a
33849 register and memory. A value of 2 is the default; this cost is
33850 relative to those in `REGISTER_MOVE_COST'.
33852 This function is used extensively by register_move_cost that is used to
33853 build tables at startup. Make it inline in this case.
33854 When IN is 2, return maximum of in and out move cost.
33856 If moving between registers and memory is more expensive than
33857 between two registers, you should define this macro to express the
33860 Model also increased moving costs of QImode registers in non
33864 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
33868 if (FLOAT_CLASS_P (regclass))
33886 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
33887 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
33889 if (SSE_CLASS_P (regclass))
33892 switch (GET_MODE_SIZE (mode))
33907 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
33908 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
33910 if (MMX_CLASS_P (regclass))
33913 switch (GET_MODE_SIZE (mode))
33925 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
33926 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
33928 switch (GET_MODE_SIZE (mode))
33931 if (Q_CLASS_P (regclass) || TARGET_64BIT)
33934 return ix86_cost->int_store[0];
33935 if (TARGET_PARTIAL_REG_DEPENDENCY
33936 && optimize_function_for_speed_p (cfun))
33937 cost = ix86_cost->movzbl_load;
33939 cost = ix86_cost->int_load[0];
33941 return MAX (cost, ix86_cost->int_store[0]);
33947 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
33949 return ix86_cost->movzbl_load;
33951 return ix86_cost->int_store[0] + 4;
33956 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
33957 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
33959 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
33960 if (mode == TFmode)
33963 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
33965 cost = ix86_cost->int_load[2];
33967 cost = ix86_cost->int_store[2];
33968 return (cost * (((int) GET_MODE_SIZE (mode)
33969 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
33974 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
33977 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
33981 /* Return the cost of moving data from a register in class CLASS1 to
33982 one in class CLASS2.
33984 It is not required that the cost always equal 2 when FROM is the same as TO;
33985 on some machines it is expensive to move between registers if they are not
33986 general registers. */
33989 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
33990 reg_class_t class2_i)
33992 enum reg_class class1 = (enum reg_class) class1_i;
33993 enum reg_class class2 = (enum reg_class) class2_i;
33995 /* In case we require secondary memory, compute cost of the store followed
33996 by load. In order to avoid bad register allocation choices, we need
33997 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
33999 if (inline_secondary_memory_needed (class1, class2, mode, 0))
34003 cost += inline_memory_move_cost (mode, class1, 2);
34004 cost += inline_memory_move_cost (mode, class2, 2);
34006 /* In case of copying from general_purpose_register we may emit multiple
34007 stores followed by single load causing memory size mismatch stall.
34008 Count this as arbitrarily high cost of 20. */
34009 if (targetm.class_max_nregs (class1, mode)
34010 > targetm.class_max_nregs (class2, mode))
34013 /* In the case of FP/MMX moves, the registers actually overlap, and we
34014 have to switch modes in order to treat them differently. */
34015 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
34016 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
34022 /* Moves between SSE/MMX and integer unit are expensive. */
34023 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
34024 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
34026 /* ??? By keeping returned value relatively high, we limit the number
34027 of moves between integer and MMX/SSE registers for all targets.
34028 Additionally, high value prevents problem with x86_modes_tieable_p(),
34029 where integer modes in MMX/SSE registers are not tieable
34030 because of missing QImode and HImode moves to, from or between
34031 MMX/SSE registers. */
34032 return MAX (8, ix86_cost->mmxsse_to_integer);
34034 if (MAYBE_FLOAT_CLASS_P (class1))
34035 return ix86_cost->fp_move;
34036 if (MAYBE_SSE_CLASS_P (class1))
34037 return ix86_cost->sse_move;
34038 if (MAYBE_MMX_CLASS_P (class1))
34039 return ix86_cost->mmx_move;
34043 /* Return TRUE if hard register REGNO can hold a value of machine-mode
34047 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
34049 /* Flags and only flags can only hold CCmode values. */
34050 if (CC_REGNO_P (regno))
34051 return GET_MODE_CLASS (mode) == MODE_CC;
34052 if (GET_MODE_CLASS (mode) == MODE_CC
34053 || GET_MODE_CLASS (mode) == MODE_RANDOM
34054 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
34056 if (STACK_REGNO_P (regno))
34057 return VALID_FP_MODE_P (mode);
34058 if (SSE_REGNO_P (regno))
34060 /* We implement the move patterns for all vector modes into and
34061 out of SSE registers, even when no operation instructions
34062 are available. OImode move is available only when AVX is
34064 return ((TARGET_AVX && mode == OImode)
34065 || VALID_AVX256_REG_MODE (mode)
34066 || VALID_SSE_REG_MODE (mode)
34067 || VALID_SSE2_REG_MODE (mode)
34068 || VALID_MMX_REG_MODE (mode)
34069 || VALID_MMX_REG_MODE_3DNOW (mode));
34071 if (MMX_REGNO_P (regno))
34073 /* We implement the move patterns for 3DNOW modes even in MMX mode,
34074 so if the register is available at all, then we can move data of
34075 the given mode into or out of it. */
34076 return (VALID_MMX_REG_MODE (mode)
34077 || VALID_MMX_REG_MODE_3DNOW (mode));
34080 if (mode == QImode)
34082 /* Take care for QImode values - they can be in non-QI regs,
34083 but then they do cause partial register stalls. */
34084 if (ANY_QI_REGNO_P (regno))
34086 if (!TARGET_PARTIAL_REG_STALL)
34088 /* LRA checks if the hard register is OK for the given mode.
34089 QImode values can live in non-QI regs, so we allow all
34091 if (lra_in_progress)
34093 return !can_create_pseudo_p ();
34095 /* We handle both integer and floats in the general purpose registers. */
34096 else if (VALID_INT_MODE_P (mode))
34098 else if (VALID_FP_MODE_P (mode))
34100 else if (VALID_DFP_MODE_P (mode))
34102 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
34103 on to use that value in smaller contexts, this can easily force a
34104 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
34105 supporting DImode, allow it. */
34106 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
34112 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
34113 tieable integer mode. */
34116 ix86_tieable_integer_mode_p (enum machine_mode mode)
34125 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
34128 return TARGET_64BIT;
34135 /* Return true if MODE1 is accessible in a register that can hold MODE2
34136 without copying. That is, all register classes that can hold MODE2
34137 can also hold MODE1. */
34140 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
34142 if (mode1 == mode2)
34145 if (ix86_tieable_integer_mode_p (mode1)
34146 && ix86_tieable_integer_mode_p (mode2))
34149 /* MODE2 being XFmode implies fp stack or general regs, which means we
34150 can tie any smaller floating point modes to it. Note that we do not
34151 tie this with TFmode. */
34152 if (mode2 == XFmode)
34153 return mode1 == SFmode || mode1 == DFmode;
34155 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
34156 that we can tie it with SFmode. */
34157 if (mode2 == DFmode)
34158 return mode1 == SFmode;
34160 /* If MODE2 is only appropriate for an SSE register, then tie with
34161 any other mode acceptable to SSE registers. */
34162 if (GET_MODE_SIZE (mode2) == 32
34163 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
34164 return (GET_MODE_SIZE (mode1) == 32
34165 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
34166 if (GET_MODE_SIZE (mode2) == 16
34167 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
34168 return (GET_MODE_SIZE (mode1) == 16
34169 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
34171 /* If MODE2 is appropriate for an MMX register, then tie
34172 with any other mode acceptable to MMX registers. */
34173 if (GET_MODE_SIZE (mode2) == 8
34174 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
34175 return (GET_MODE_SIZE (mode1) == 8
34176 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
34181 /* Return the cost of moving between two registers of mode MODE. */
34184 ix86_set_reg_reg_cost (enum machine_mode mode)
34186 unsigned int units = UNITS_PER_WORD;
34188 switch (GET_MODE_CLASS (mode))
34194 units = GET_MODE_SIZE (CCmode);
34198 if ((TARGET_SSE && mode == TFmode)
34199 || (TARGET_80387 && mode == XFmode)
34200 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
34201 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
34202 units = GET_MODE_SIZE (mode);
34205 case MODE_COMPLEX_FLOAT:
34206 if ((TARGET_SSE && mode == TCmode)
34207 || (TARGET_80387 && mode == XCmode)
34208 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
34209 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
34210 units = GET_MODE_SIZE (mode);
34213 case MODE_VECTOR_INT:
34214 case MODE_VECTOR_FLOAT:
34215 if ((TARGET_AVX && VALID_AVX256_REG_MODE (mode))
34216 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
34217 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
34218 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
34219 units = GET_MODE_SIZE (mode);
34222 /* Return the cost of moving between two registers of mode MODE,
34223 assuming that the move will be in pieces of at most UNITS bytes. */
34224 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
34227 /* Compute a (partial) cost for rtx X. Return true if the complete
34228 cost has been computed, and false if subexpressions should be
34229 scanned. In either case, *TOTAL contains the cost result. */
34232 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
34235 enum rtx_code code = (enum rtx_code) code_i;
34236 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
34237 enum machine_mode mode = GET_MODE (x);
34238 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
34243 if (register_operand (SET_DEST (x), VOIDmode)
34244 && reg_or_0_operand (SET_SRC (x), VOIDmode))
34246 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
34255 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
34257 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
34259 else if (flag_pic && SYMBOLIC_CONST (x)
34261 || (!GET_CODE (x) != LABEL_REF
34262 && (GET_CODE (x) != SYMBOL_REF
34263 || !SYMBOL_REF_LOCAL_P (x)))))
34270 if (mode == VOIDmode)
34275 switch (standard_80387_constant_p (x))
34280 default: /* Other constants */
34287 if (SSE_FLOAT_MODE_P (mode))
34290 switch (standard_sse_constant_p (x))
34294 case 1: /* 0: xor eliminates false dependency */
34297 default: /* -1: cmp contains false dependency */
34302 /* Fall back to (MEM (SYMBOL_REF)), since that's where
34303 it'll probably end up. Add a penalty for size. */
34304 *total = (COSTS_N_INSNS (1)
34305 + (flag_pic != 0 && !TARGET_64BIT)
34306 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
34310 /* The zero extensions is often completely free on x86_64, so make
34311 it as cheap as possible. */
34312 if (TARGET_64BIT && mode == DImode
34313 && GET_MODE (XEXP (x, 0)) == SImode)
34315 else if (TARGET_ZERO_EXTEND_WITH_AND)
34316 *total = cost->add;
34318 *total = cost->movzx;
34322 *total = cost->movsx;
34326 if (SCALAR_INT_MODE_P (mode)
34327 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
34328 && CONST_INT_P (XEXP (x, 1)))
34330 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
34333 *total = cost->add;
34336 if ((value == 2 || value == 3)
34337 && cost->lea <= cost->shift_const)
34339 *total = cost->lea;
34349 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
34351 /* ??? Should be SSE vector operation cost. */
34352 /* At least for published AMD latencies, this really is the same
34353 as the latency for a simple fpu operation like fabs. */
34354 /* V*QImode is emulated with 1-11 insns. */
34355 if (mode == V16QImode || mode == V32QImode)
34358 if (TARGET_XOP && mode == V16QImode)
34360 /* For XOP we use vpshab, which requires a broadcast of the
34361 value to the variable shift insn. For constants this
34362 means a V16Q const in mem; even when we can perform the
34363 shift with one insn set the cost to prefer paddb. */
34364 if (CONSTANT_P (XEXP (x, 1)))
34366 *total = (cost->fabs
34367 + rtx_cost (XEXP (x, 0), code, 0, speed)
34368 + (speed ? 2 : COSTS_N_BYTES (16)));
34373 else if (TARGET_SSSE3)
34375 *total = cost->fabs * count;
34378 *total = cost->fabs;
34380 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
34382 if (CONST_INT_P (XEXP (x, 1)))
34384 if (INTVAL (XEXP (x, 1)) > 32)
34385 *total = cost->shift_const + COSTS_N_INSNS (2);
34387 *total = cost->shift_const * 2;
34391 if (GET_CODE (XEXP (x, 1)) == AND)
34392 *total = cost->shift_var * 2;
34394 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
34399 if (CONST_INT_P (XEXP (x, 1)))
34400 *total = cost->shift_const;
34401 else if (GET_CODE (XEXP (x, 1)) == SUBREG
34402 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
34404 /* Return the cost after shift-and truncation. */
34405 *total = cost->shift_var;
34409 *total = cost->shift_var;
34417 gcc_assert (FLOAT_MODE_P (mode));
34418 gcc_assert (TARGET_FMA || TARGET_FMA4);
34420 /* ??? SSE scalar/vector cost should be used here. */
34421 /* ??? Bald assumption that fma has the same cost as fmul. */
34422 *total = cost->fmul;
34423 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
34425 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
34427 if (GET_CODE (sub) == NEG)
34428 sub = XEXP (sub, 0);
34429 *total += rtx_cost (sub, FMA, 0, speed);
34432 if (GET_CODE (sub) == NEG)
34433 sub = XEXP (sub, 0);
34434 *total += rtx_cost (sub, FMA, 2, speed);
34439 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34441 /* ??? SSE scalar cost should be used here. */
34442 *total = cost->fmul;
34445 else if (X87_FLOAT_MODE_P (mode))
34447 *total = cost->fmul;
34450 else if (FLOAT_MODE_P (mode))
34452 /* ??? SSE vector cost should be used here. */
34453 *total = cost->fmul;
34456 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
34458 /* V*QImode is emulated with 7-13 insns. */
34459 if (mode == V16QImode || mode == V32QImode)
34462 if (TARGET_XOP && mode == V16QImode)
34464 else if (TARGET_SSSE3)
34466 *total = cost->fmul * 2 + cost->fabs * extra;
34468 /* V*DImode is emulated with 5-8 insns. */
34469 else if (mode == V2DImode || mode == V4DImode)
34471 if (TARGET_XOP && mode == V2DImode)
34472 *total = cost->fmul * 2 + cost->fabs * 3;
34474 *total = cost->fmul * 3 + cost->fabs * 5;
34476 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
34477 insns, including two PMULUDQ. */
34478 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
34479 *total = cost->fmul * 2 + cost->fabs * 5;
34481 *total = cost->fmul;
34486 rtx op0 = XEXP (x, 0);
34487 rtx op1 = XEXP (x, 1);
34489 if (CONST_INT_P (XEXP (x, 1)))
34491 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
34492 for (nbits = 0; value != 0; value &= value - 1)
34496 /* This is arbitrary. */
34499 /* Compute costs correctly for widening multiplication. */
34500 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
34501 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
34502 == GET_MODE_SIZE (mode))
34504 int is_mulwiden = 0;
34505 enum machine_mode inner_mode = GET_MODE (op0);
34507 if (GET_CODE (op0) == GET_CODE (op1))
34508 is_mulwiden = 1, op1 = XEXP (op1, 0);
34509 else if (CONST_INT_P (op1))
34511 if (GET_CODE (op0) == SIGN_EXTEND)
34512 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
34515 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
34519 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
34522 *total = (cost->mult_init[MODE_INDEX (mode)]
34523 + nbits * cost->mult_bit
34524 + rtx_cost (op0, outer_code, opno, speed)
34525 + rtx_cost (op1, outer_code, opno, speed));
34534 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34535 /* ??? SSE cost should be used here. */
34536 *total = cost->fdiv;
34537 else if (X87_FLOAT_MODE_P (mode))
34538 *total = cost->fdiv;
34539 else if (FLOAT_MODE_P (mode))
34540 /* ??? SSE vector cost should be used here. */
34541 *total = cost->fdiv;
34543 *total = cost->divide[MODE_INDEX (mode)];
34547 if (GET_MODE_CLASS (mode) == MODE_INT
34548 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
34550 if (GET_CODE (XEXP (x, 0)) == PLUS
34551 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
34552 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
34553 && CONSTANT_P (XEXP (x, 1)))
34555 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
34556 if (val == 2 || val == 4 || val == 8)
34558 *total = cost->lea;
34559 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
34560 outer_code, opno, speed);
34561 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
34562 outer_code, opno, speed);
34563 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
34567 else if (GET_CODE (XEXP (x, 0)) == MULT
34568 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
34570 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
34571 if (val == 2 || val == 4 || val == 8)
34573 *total = cost->lea;
34574 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
34575 outer_code, opno, speed);
34576 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
34580 else if (GET_CODE (XEXP (x, 0)) == PLUS)
34582 *total = cost->lea;
34583 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
34584 outer_code, opno, speed);
34585 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
34586 outer_code, opno, speed);
34587 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
34594 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34596 /* ??? SSE cost should be used here. */
34597 *total = cost->fadd;
34600 else if (X87_FLOAT_MODE_P (mode))
34602 *total = cost->fadd;
34605 else if (FLOAT_MODE_P (mode))
34607 /* ??? SSE vector cost should be used here. */
34608 *total = cost->fadd;
34616 if (GET_MODE_CLASS (mode) == MODE_INT
34617 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
34619 *total = (cost->add * 2
34620 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
34621 << (GET_MODE (XEXP (x, 0)) != DImode))
34622 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
34623 << (GET_MODE (XEXP (x, 1)) != DImode)));
34629 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34631 /* ??? SSE cost should be used here. */
34632 *total = cost->fchs;
34635 else if (X87_FLOAT_MODE_P (mode))
34637 *total = cost->fchs;
34640 else if (FLOAT_MODE_P (mode))
34642 /* ??? SSE vector cost should be used here. */
34643 *total = cost->fchs;
34649 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
34651 /* ??? Should be SSE vector operation cost. */
34652 /* At least for published AMD latencies, this really is the same
34653 as the latency for a simple fpu operation like fabs. */
34654 *total = cost->fabs;
34656 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
34657 *total = cost->add * 2;
34659 *total = cost->add;
34663 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
34664 && XEXP (XEXP (x, 0), 1) == const1_rtx
34665 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
34666 && XEXP (x, 1) == const0_rtx)
34668 /* This kind of construct is implemented using test[bwl].
34669 Treat it as if we had an AND. */
34670 *total = (cost->add
34671 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
34672 + rtx_cost (const1_rtx, outer_code, opno, speed));
34678 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
34683 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34684 /* ??? SSE cost should be used here. */
34685 *total = cost->fabs;
34686 else if (X87_FLOAT_MODE_P (mode))
34687 *total = cost->fabs;
34688 else if (FLOAT_MODE_P (mode))
34689 /* ??? SSE vector cost should be used here. */
34690 *total = cost->fabs;
34694 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
34695 /* ??? SSE cost should be used here. */
34696 *total = cost->fsqrt;
34697 else if (X87_FLOAT_MODE_P (mode))
34698 *total = cost->fsqrt;
34699 else if (FLOAT_MODE_P (mode))
34700 /* ??? SSE vector cost should be used here. */
34701 *total = cost->fsqrt;
34705 if (XINT (x, 1) == UNSPEC_TP)
34712 case VEC_DUPLICATE:
34713 /* ??? Assume all of these vector manipulation patterns are
34714 recognizable. In which case they all pretty much have the
34716 *total = cost->fabs;
34726 static int current_machopic_label_num;
34728 /* Given a symbol name and its associated stub, write out the
34729 definition of the stub. */
34732 machopic_output_stub (FILE *file, const char *symb, const char *stub)
34734 unsigned int length;
34735 char *binder_name, *symbol_name, lazy_ptr_name[32];
34736 int label = ++current_machopic_label_num;
34738 /* For 64-bit we shouldn't get here. */
34739 gcc_assert (!TARGET_64BIT);
34741 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
34742 symb = targetm.strip_name_encoding (symb);
34744 length = strlen (stub);
34745 binder_name = XALLOCAVEC (char, length + 32);
34746 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
34748 length = strlen (symb);
34749 symbol_name = XALLOCAVEC (char, length + 32);
34750 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
34752 sprintf (lazy_ptr_name, "L%d$lz", label);
34754 if (MACHOPIC_ATT_STUB)
34755 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
34756 else if (MACHOPIC_PURE)
34757 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
34759 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
34761 fprintf (file, "%s:\n", stub);
34762 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34764 if (MACHOPIC_ATT_STUB)
34766 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
34768 else if (MACHOPIC_PURE)
34771 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34772 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
34773 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
34774 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
34775 label, lazy_ptr_name, label);
34776 fprintf (file, "\tjmp\t*%%ecx\n");
34779 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
34781 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
34782 it needs no stub-binding-helper. */
34783 if (MACHOPIC_ATT_STUB)
34786 fprintf (file, "%s:\n", binder_name);
34790 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
34791 fprintf (file, "\tpushl\t%%ecx\n");
34794 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
34796 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
34798 /* N.B. Keep the correspondence of these
34799 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
34800 old-pic/new-pic/non-pic stubs; altering this will break
34801 compatibility with existing dylibs. */
34804 /* 25-byte PIC stub using "CALL get_pc_thunk". */
34805 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
34808 /* 16-byte -mdynamic-no-pic stub. */
34809 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
34811 fprintf (file, "%s:\n", lazy_ptr_name);
34812 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
34813 fprintf (file, ASM_LONG "%s\n", binder_name);
34815 #endif /* TARGET_MACHO */
34817 /* Order the registers for register allocator. */
34820 x86_order_regs_for_local_alloc (void)
34825 /* First allocate the local general purpose registers. */
34826 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
34827 if (GENERAL_REGNO_P (i) && call_used_regs[i])
34828 reg_alloc_order [pos++] = i;
34830 /* Global general purpose registers. */
34831 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
34832 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
34833 reg_alloc_order [pos++] = i;
34835 /* x87 registers come first in case we are doing FP math
34837 if (!TARGET_SSE_MATH)
34838 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
34839 reg_alloc_order [pos++] = i;
34841 /* SSE registers. */
34842 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
34843 reg_alloc_order [pos++] = i;
34844 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
34845 reg_alloc_order [pos++] = i;
34847 /* x87 registers. */
34848 if (TARGET_SSE_MATH)
34849 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
34850 reg_alloc_order [pos++] = i;
34852 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
34853 reg_alloc_order [pos++] = i;
34855 /* Initialize the rest of array as we do not allocate some registers
34857 while (pos < FIRST_PSEUDO_REGISTER)
34858 reg_alloc_order [pos++] = 0;
34861 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
34862 in struct attribute_spec handler. */
34864 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
34866 int flags ATTRIBUTE_UNUSED,
34867 bool *no_add_attrs)
34869 if (TREE_CODE (*node) != FUNCTION_TYPE
34870 && TREE_CODE (*node) != METHOD_TYPE
34871 && TREE_CODE (*node) != FIELD_DECL
34872 && TREE_CODE (*node) != TYPE_DECL)
34874 warning (OPT_Wattributes, "%qE attribute only applies to functions",
34876 *no_add_attrs = true;
34881 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
34883 *no_add_attrs = true;
34886 if (is_attribute_p ("callee_pop_aggregate_return", name))
34890 cst = TREE_VALUE (args);
34891 if (TREE_CODE (cst) != INTEGER_CST)
34893 warning (OPT_Wattributes,
34894 "%qE attribute requires an integer constant argument",
34896 *no_add_attrs = true;
34898 else if (compare_tree_int (cst, 0) != 0
34899 && compare_tree_int (cst, 1) != 0)
34901 warning (OPT_Wattributes,
34902 "argument to %qE attribute is neither zero, nor one",
34904 *no_add_attrs = true;
34913 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
34914 struct attribute_spec.handler. */
34916 ix86_handle_abi_attribute (tree *node, tree name,
34917 tree args ATTRIBUTE_UNUSED,
34918 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
34920 if (TREE_CODE (*node) != FUNCTION_TYPE
34921 && TREE_CODE (*node) != METHOD_TYPE
34922 && TREE_CODE (*node) != FIELD_DECL
34923 && TREE_CODE (*node) != TYPE_DECL)
34925 warning (OPT_Wattributes, "%qE attribute only applies to functions",
34927 *no_add_attrs = true;
34931 /* Can combine regparm with all attributes but fastcall. */
34932 if (is_attribute_p ("ms_abi", name))
34934 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
34936 error ("ms_abi and sysv_abi attributes are not compatible");
34941 else if (is_attribute_p ("sysv_abi", name))
34943 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
34945 error ("ms_abi and sysv_abi attributes are not compatible");
34954 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
34955 struct attribute_spec.handler. */
34957 ix86_handle_struct_attribute (tree *node, tree name,
34958 tree args ATTRIBUTE_UNUSED,
34959 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
34962 if (DECL_P (*node))
34964 if (TREE_CODE (*node) == TYPE_DECL)
34965 type = &TREE_TYPE (*node);
34970 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
34972 warning (OPT_Wattributes, "%qE attribute ignored",
34974 *no_add_attrs = true;
34977 else if ((is_attribute_p ("ms_struct", name)
34978 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
34979 || ((is_attribute_p ("gcc_struct", name)
34980 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
34982 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
34984 *no_add_attrs = true;
34991 ix86_handle_fndecl_attribute (tree *node, tree name,
34992 tree args ATTRIBUTE_UNUSED,
34993 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
34995 if (TREE_CODE (*node) != FUNCTION_DECL)
34997 warning (OPT_Wattributes, "%qE attribute only applies to functions",
34999 *no_add_attrs = true;
35005 ix86_ms_bitfield_layout_p (const_tree record_type)
35007 return ((TARGET_MS_BITFIELD_LAYOUT
35008 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
35009 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
35012 /* Returns an expression indicating where the this parameter is
35013 located on entry to the FUNCTION. */
35016 x86_this_parameter (tree function)
35018 tree type = TREE_TYPE (function);
35019 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
35024 const int *parm_regs;
35026 if (ix86_function_type_abi (type) == MS_ABI)
35027 parm_regs = x86_64_ms_abi_int_parameter_registers;
35029 parm_regs = x86_64_int_parameter_registers;
35030 return gen_rtx_REG (Pmode, parm_regs[aggr]);
35033 nregs = ix86_function_regparm (type, function);
35035 if (nregs > 0 && !stdarg_p (type))
35038 unsigned int ccvt = ix86_get_callcvt (type);
35040 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
35041 regno = aggr ? DX_REG : CX_REG;
35042 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
35046 return gen_rtx_MEM (SImode,
35047 plus_constant (Pmode, stack_pointer_rtx, 4));
35056 return gen_rtx_MEM (SImode,
35057 plus_constant (Pmode,
35058 stack_pointer_rtx, 4));
35061 return gen_rtx_REG (SImode, regno);
35064 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
35068 /* Determine whether x86_output_mi_thunk can succeed. */
35071 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
35072 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
35073 HOST_WIDE_INT vcall_offset, const_tree function)
35075 /* 64-bit can handle anything. */
35079 /* For 32-bit, everything's fine if we have one free register. */
35080 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
35083 /* Need a free register for vcall_offset. */
35087 /* Need a free register for GOT references. */
35088 if (flag_pic && !targetm.binds_local_p (function))
35091 /* Otherwise ok. */
35095 /* Output the assembler code for a thunk function. THUNK_DECL is the
35096 declaration for the thunk function itself, FUNCTION is the decl for
35097 the target function. DELTA is an immediate constant offset to be
35098 added to THIS. If VCALL_OFFSET is nonzero, the word at
35099 *(*this + vcall_offset) should be added to THIS. */
35102 x86_output_mi_thunk (FILE *file,
35103 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
35104 HOST_WIDE_INT vcall_offset, tree function)
35106 rtx this_param = x86_this_parameter (function);
35107 rtx this_reg, tmp, fnaddr;
35108 unsigned int tmp_regno;
35111 tmp_regno = R10_REG;
35114 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
35115 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
35116 tmp_regno = AX_REG;
35117 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
35118 tmp_regno = DX_REG;
35120 tmp_regno = CX_REG;
35123 emit_note (NOTE_INSN_PROLOGUE_END);
35125 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
35126 pull it in now and let DELTA benefit. */
35127 if (REG_P (this_param))
35128 this_reg = this_param;
35129 else if (vcall_offset)
35131 /* Put the this parameter into %eax. */
35132 this_reg = gen_rtx_REG (Pmode, AX_REG);
35133 emit_move_insn (this_reg, this_param);
35136 this_reg = NULL_RTX;
35138 /* Adjust the this parameter by a fixed constant. */
35141 rtx delta_rtx = GEN_INT (delta);
35142 rtx delta_dst = this_reg ? this_reg : this_param;
35146 if (!x86_64_general_operand (delta_rtx, Pmode))
35148 tmp = gen_rtx_REG (Pmode, tmp_regno);
35149 emit_move_insn (tmp, delta_rtx);
35154 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
35157 /* Adjust the this parameter by a value stored in the vtable. */
35160 rtx vcall_addr, vcall_mem, this_mem;
35162 tmp = gen_rtx_REG (Pmode, tmp_regno);
35164 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
35165 if (Pmode != ptr_mode)
35166 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
35167 emit_move_insn (tmp, this_mem);
35169 /* Adjust the this parameter. */
35170 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
35172 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
35174 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
35175 emit_move_insn (tmp2, GEN_INT (vcall_offset));
35176 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
35179 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
35180 if (Pmode != ptr_mode)
35181 emit_insn (gen_addsi_1_zext (this_reg,
35182 gen_rtx_REG (ptr_mode,
35186 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
35189 /* If necessary, drop THIS back to its stack slot. */
35190 if (this_reg && this_reg != this_param)
35191 emit_move_insn (this_param, this_reg);
35193 fnaddr = XEXP (DECL_RTL (function), 0);
35196 if (!flag_pic || targetm.binds_local_p (function)
35201 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
35202 tmp = gen_rtx_CONST (Pmode, tmp);
35203 fnaddr = gen_rtx_MEM (Pmode, tmp);
35208 if (!flag_pic || targetm.binds_local_p (function))
35211 else if (TARGET_MACHO)
35213 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
35214 fnaddr = XEXP (fnaddr, 0);
35216 #endif /* TARGET_MACHO */
35219 tmp = gen_rtx_REG (Pmode, CX_REG);
35220 output_set_got (tmp, NULL_RTX);
35222 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
35223 fnaddr = gen_rtx_PLUS (Pmode, fnaddr, tmp);
35224 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
35228 /* Our sibling call patterns do not allow memories, because we have no
35229 predicate that can distinguish between frame and non-frame memory.
35230 For our purposes here, we can get away with (ab)using a jump pattern,
35231 because we're going to do no optimization. */
35232 if (MEM_P (fnaddr))
35233 emit_jump_insn (gen_indirect_jump (fnaddr));
35236 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
35237 fnaddr = legitimize_pic_address (fnaddr,
35238 gen_rtx_REG (Pmode, tmp_regno));
35240 if (!sibcall_insn_operand (fnaddr, word_mode))
35242 tmp = gen_rtx_REG (word_mode, tmp_regno);
35243 if (GET_MODE (fnaddr) != word_mode)
35244 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
35245 emit_move_insn (tmp, fnaddr);
35249 tmp = gen_rtx_MEM (QImode, fnaddr);
35250 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
35251 tmp = emit_call_insn (tmp);
35252 SIBLING_CALL_P (tmp) = 1;
35256 /* Emit just enough of rest_of_compilation to get the insns emitted.
35257 Note that use_thunk calls assemble_start_function et al. */
35258 tmp = get_insns ();
35259 shorten_branches (tmp);
35260 final_start_function (tmp, file, 1);
35261 final (tmp, file, 1);
35262 final_end_function ();
35266 x86_file_start (void)
35268 default_file_start ();
35270 darwin_file_start ();
35272 if (X86_FILE_START_VERSION_DIRECTIVE)
35273 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
35274 if (X86_FILE_START_FLTUSED)
35275 fputs ("\t.global\t__fltused\n", asm_out_file);
35276 if (ix86_asm_dialect == ASM_INTEL)
35277 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
35281 x86_field_alignment (tree field, int computed)
35283 enum machine_mode mode;
35284 tree type = TREE_TYPE (field);
35286 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
35288 mode = TYPE_MODE (strip_array_types (type));
35289 if (mode == DFmode || mode == DCmode
35290 || GET_MODE_CLASS (mode) == MODE_INT
35291 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
35292 return MIN (32, computed);
35296 /* Output assembler code to FILE to increment profiler label # LABELNO
35297 for profiling a function entry. */
35299 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
35301 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
35306 #ifndef NO_PROFILE_COUNTERS
35307 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
35310 if (!TARGET_PECOFF && flag_pic)
35311 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
35313 fprintf (file, "\tcall\t%s\n", mcount_name);
35317 #ifndef NO_PROFILE_COUNTERS
35318 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
35321 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
35325 #ifndef NO_PROFILE_COUNTERS
35326 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
35329 fprintf (file, "\tcall\t%s\n", mcount_name);
35333 /* We don't have exact information about the insn sizes, but we may assume
35334 quite safely that we are informed about all 1 byte insns and memory
35335 address sizes. This is enough to eliminate unnecessary padding in
35339 min_insn_size (rtx insn)
35343 if (!INSN_P (insn) || !active_insn_p (insn))
35346 /* Discard alignments we've emit and jump instructions. */
35347 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
35348 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
35351 /* Important case - calls are always 5 bytes.
35352 It is common to have many calls in the row. */
35354 && symbolic_reference_mentioned_p (PATTERN (insn))
35355 && !SIBLING_CALL_P (insn))
35357 len = get_attr_length (insn);
35361 /* For normal instructions we rely on get_attr_length being exact,
35362 with a few exceptions. */
35363 if (!JUMP_P (insn))
35365 enum attr_type type = get_attr_type (insn);
35370 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
35371 || asm_noperands (PATTERN (insn)) >= 0)
35378 /* Otherwise trust get_attr_length. */
35382 l = get_attr_length_address (insn);
35383 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
35392 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35394 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
35398 ix86_avoid_jump_mispredicts (void)
35400 rtx insn, start = get_insns ();
35401 int nbytes = 0, njumps = 0;
35404 /* Look for all minimal intervals of instructions containing 4 jumps.
35405 The intervals are bounded by START and INSN. NBYTES is the total
35406 size of instructions in the interval including INSN and not including
35407 START. When the NBYTES is smaller than 16 bytes, it is possible
35408 that the end of START and INSN ends up in the same 16byte page.
35410 The smallest offset in the page INSN can start is the case where START
35411 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
35412 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
35414 for (insn = start; insn; insn = NEXT_INSN (insn))
35418 if (LABEL_P (insn))
35420 int align = label_to_alignment (insn);
35421 int max_skip = label_to_max_skip (insn);
35425 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
35426 already in the current 16 byte page, because otherwise
35427 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
35428 bytes to reach 16 byte boundary. */
35430 || (align <= 3 && max_skip != (1 << align) - 1))
35433 fprintf (dump_file, "Label %i with max_skip %i\n",
35434 INSN_UID (insn), max_skip);
35437 while (nbytes + max_skip >= 16)
35439 start = NEXT_INSN (start);
35440 if (JUMP_P (start) || CALL_P (start))
35441 njumps--, isjump = 1;
35444 nbytes -= min_insn_size (start);
35450 min_size = min_insn_size (insn);
35451 nbytes += min_size;
35453 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
35454 INSN_UID (insn), min_size);
35455 if (JUMP_P (insn) || CALL_P (insn))
35462 start = NEXT_INSN (start);
35463 if (JUMP_P (start) || CALL_P (start))
35464 njumps--, isjump = 1;
35467 nbytes -= min_insn_size (start);
35469 gcc_assert (njumps >= 0);
35471 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
35472 INSN_UID (start), INSN_UID (insn), nbytes);
35474 if (njumps == 3 && isjump && nbytes < 16)
35476 int padsize = 15 - nbytes + min_insn_size (insn);
35479 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
35480 INSN_UID (insn), padsize);
35481 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
35487 /* AMD Athlon works faster
35488 when RET is not destination of conditional jump or directly preceded
35489 by other jump instruction. We avoid the penalty by inserting NOP just
35490 before the RET instructions in such cases. */
35492 ix86_pad_returns (void)
35497 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
35499 basic_block bb = e->src;
35500 rtx ret = BB_END (bb);
35502 bool replace = false;
35504 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
35505 || optimize_bb_for_size_p (bb))
35507 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
35508 if (active_insn_p (prev) || LABEL_P (prev))
35510 if (prev && LABEL_P (prev))
35515 FOR_EACH_EDGE (e, ei, bb->preds)
35516 if (EDGE_FREQUENCY (e) && e->src->index >= 0
35517 && !(e->flags & EDGE_FALLTHRU))
35522 prev = prev_active_insn (ret);
35524 && ((JUMP_P (prev) && any_condjump_p (prev))
35527 /* Empty functions get branch mispredict even when
35528 the jump destination is not visible to us. */
35529 if (!prev && !optimize_function_for_size_p (cfun))
35534 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
35540 /* Count the minimum number of instructions in BB. Return 4 if the
35541 number of instructions >= 4. */
35544 ix86_count_insn_bb (basic_block bb)
35547 int insn_count = 0;
35549 /* Count number of instructions in this block. Return 4 if the number
35550 of instructions >= 4. */
35551 FOR_BB_INSNS (bb, insn)
35553 /* Only happen in exit blocks. */
35555 && ANY_RETURN_P (PATTERN (insn)))
35558 if (NONDEBUG_INSN_P (insn)
35559 && GET_CODE (PATTERN (insn)) != USE
35560 && GET_CODE (PATTERN (insn)) != CLOBBER)
35563 if (insn_count >= 4)
35572 /* Count the minimum number of instructions in code path in BB.
35573 Return 4 if the number of instructions >= 4. */
35576 ix86_count_insn (basic_block bb)
35580 int min_prev_count;
35582 /* Only bother counting instructions along paths with no
35583 more than 2 basic blocks between entry and exit. Given
35584 that BB has an edge to exit, determine if a predecessor
35585 of BB has an edge from entry. If so, compute the number
35586 of instructions in the predecessor block. If there
35587 happen to be multiple such blocks, compute the minimum. */
35588 min_prev_count = 4;
35589 FOR_EACH_EDGE (e, ei, bb->preds)
35592 edge_iterator prev_ei;
35594 if (e->src == ENTRY_BLOCK_PTR)
35596 min_prev_count = 0;
35599 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
35601 if (prev_e->src == ENTRY_BLOCK_PTR)
35603 int count = ix86_count_insn_bb (e->src);
35604 if (count < min_prev_count)
35605 min_prev_count = count;
35611 if (min_prev_count < 4)
35612 min_prev_count += ix86_count_insn_bb (bb);
35614 return min_prev_count;
35617 /* Pad short function to 4 instructions. */
35620 ix86_pad_short_function (void)
35625 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
35627 rtx ret = BB_END (e->src);
35628 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
35630 int insn_count = ix86_count_insn (e->src);
35632 /* Pad short function. */
35633 if (insn_count < 4)
35637 /* Find epilogue. */
35640 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
35641 insn = PREV_INSN (insn);
35646 /* Two NOPs count as one instruction. */
35647 insn_count = 2 * (4 - insn_count);
35648 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
35654 /* Fix up a Windows system unwinder issue. If an EH region falls thru into
35655 the epilogue, the Windows system unwinder will apply epilogue logic and
35656 produce incorrect offsets. This can be avoided by adding a nop between
35657 the last insn that can throw and the first insn of the epilogue. */
35660 ix86_seh_fixup_eh_fallthru (void)
35665 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
35669 /* Find the beginning of the epilogue. */
35670 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
35671 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
35676 /* We only care about preceeding insns that can throw. */
35677 insn = prev_active_insn (insn);
35678 if (insn == NULL || !can_throw_internal (insn))
35681 /* Do not separate calls from their debug information. */
35682 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
35684 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
35685 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
35690 emit_insn_after (gen_nops (const1_rtx), insn);
35694 /* Implement machine specific optimizations. We implement padding of returns
35695 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
35699 /* We are freeing block_for_insn in the toplev to keep compatibility
35700 with old MDEP_REORGS that are not CFG based. Recompute it now. */
35701 compute_bb_for_insn ();
35703 if (TARGET_SEH && current_function_has_exception_handlers ())
35704 ix86_seh_fixup_eh_fallthru ();
35706 if (optimize && optimize_function_for_speed_p (cfun))
35708 if (TARGET_PAD_SHORT_FUNCTION)
35709 ix86_pad_short_function ();
35710 else if (TARGET_PAD_RETURNS)
35711 ix86_pad_returns ();
35712 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
35713 if (TARGET_FOUR_JUMP_LIMIT)
35714 ix86_avoid_jump_mispredicts ();
35719 /* Return nonzero when QImode register that must be represented via REX prefix
35722 x86_extended_QIreg_mentioned_p (rtx insn)
35725 extract_insn_cached (insn);
35726 for (i = 0; i < recog_data.n_operands; i++)
35727 if (GENERAL_REG_P (recog_data.operand[i])
35728 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
35733 /* Return nonzero when P points to register encoded via REX prefix.
35734 Called via for_each_rtx. */
35736 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
35738 unsigned int regno;
35741 regno = REGNO (*p);
35742 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
35745 /* Return true when INSN mentions register that must be encoded using REX
35748 x86_extended_reg_mentioned_p (rtx insn)
35750 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
35751 extended_reg_mentioned_1, NULL);
35754 /* If profitable, negate (without causing overflow) integer constant
35755 of mode MODE at location LOC. Return true in this case. */
35757 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
35761 if (!CONST_INT_P (*loc))
35767 /* DImode x86_64 constants must fit in 32 bits. */
35768 gcc_assert (x86_64_immediate_operand (*loc, mode));
35779 gcc_unreachable ();
35782 /* Avoid overflows. */
35783 if (mode_signbit_p (mode, *loc))
35786 val = INTVAL (*loc);
35788 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
35789 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
35790 if ((val < 0 && val != -128)
35793 *loc = GEN_INT (-val);
35800 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
35801 optabs would emit if we didn't have TFmode patterns. */
35804 x86_emit_floatuns (rtx operands[2])
35806 rtx neglab, donelab, i0, i1, f0, in, out;
35807 enum machine_mode mode, inmode;
35809 inmode = GET_MODE (operands[1]);
35810 gcc_assert (inmode == SImode || inmode == DImode);
35813 in = force_reg (inmode, operands[1]);
35814 mode = GET_MODE (out);
35815 neglab = gen_label_rtx ();
35816 donelab = gen_label_rtx ();
35817 f0 = gen_reg_rtx (mode);
35819 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
35821 expand_float (out, in, 0);
35823 emit_jump_insn (gen_jump (donelab));
35826 emit_label (neglab);
35828 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
35830 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
35832 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
35834 expand_float (f0, i0, 0);
35836 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
35838 emit_label (donelab);
35841 /* AVX2 does support 32-byte integer vector operations,
35842 thus the longest vector we are faced with is V32QImode. */
35843 #define MAX_VECT_LEN 32
35845 struct expand_vec_perm_d
35847 rtx target, op0, op1;
35848 unsigned char perm[MAX_VECT_LEN];
35849 enum machine_mode vmode;
35850 unsigned char nelt;
35851 bool one_operand_p;
35855 static bool canonicalize_perm (struct expand_vec_perm_d *d);
35856 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
35857 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
35859 /* Get a vector mode of the same size as the original but with elements
35860 twice as wide. This is only guaranteed to apply to integral vectors. */
35862 static inline enum machine_mode
35863 get_mode_wider_vector (enum machine_mode o)
35865 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
35866 enum machine_mode n = GET_MODE_WIDER_MODE (o);
35867 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
35868 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
35872 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
35873 with all elements equal to VAR. Return true if successful. */
35876 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
35877 rtx target, rtx val)
35900 /* First attempt to recognize VAL as-is. */
35901 dup = gen_rtx_VEC_DUPLICATE (mode, val);
35902 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
35903 if (recog_memoized (insn) < 0)
35906 /* If that fails, force VAL into a register. */
35909 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
35910 seq = get_insns ();
35913 emit_insn_before (seq, insn);
35915 ok = recog_memoized (insn) >= 0;
35924 if (TARGET_SSE || TARGET_3DNOW_A)
35928 val = gen_lowpart (SImode, val);
35929 x = gen_rtx_TRUNCATE (HImode, val);
35930 x = gen_rtx_VEC_DUPLICATE (mode, x);
35931 emit_insn (gen_rtx_SET (VOIDmode, target, x));
35944 struct expand_vec_perm_d dperm;
35948 memset (&dperm, 0, sizeof (dperm));
35949 dperm.target = target;
35950 dperm.vmode = mode;
35951 dperm.nelt = GET_MODE_NUNITS (mode);
35952 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
35953 dperm.one_operand_p = true;
35955 /* Extend to SImode using a paradoxical SUBREG. */
35956 tmp1 = gen_reg_rtx (SImode);
35957 emit_move_insn (tmp1, gen_lowpart (SImode, val));
35959 /* Insert the SImode value as low element of a V4SImode vector. */
35960 tmp2 = gen_lowpart (V4SImode, dperm.op0);
35961 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
35963 ok = (expand_vec_perm_1 (&dperm)
35964 || expand_vec_perm_broadcast_1 (&dperm));
35976 /* Replicate the value once into the next wider mode and recurse. */
35978 enum machine_mode smode, wsmode, wvmode;
35981 smode = GET_MODE_INNER (mode);
35982 wvmode = get_mode_wider_vector (mode);
35983 wsmode = GET_MODE_INNER (wvmode);
35985 val = convert_modes (wsmode, smode, val, true);
35986 x = expand_simple_binop (wsmode, ASHIFT, val,
35987 GEN_INT (GET_MODE_BITSIZE (smode)),
35988 NULL_RTX, 1, OPTAB_LIB_WIDEN);
35989 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
35991 x = gen_lowpart (wvmode, target);
35992 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
36000 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
36001 rtx x = gen_reg_rtx (hvmode);
36003 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
36006 x = gen_rtx_VEC_CONCAT (mode, x, x);
36007 emit_insn (gen_rtx_SET (VOIDmode, target, x));
36016 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36017 whose ONE_VAR element is VAR, and other elements are zero. Return true
36021 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
36022 rtx target, rtx var, int one_var)
36024 enum machine_mode vsimode;
36027 bool use_vector_set = false;
36032 /* For SSE4.1, we normally use vector set. But if the second
36033 element is zero and inter-unit moves are OK, we use movq
36035 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
36036 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
36042 use_vector_set = TARGET_SSE4_1;
36045 use_vector_set = TARGET_SSE2;
36048 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
36055 use_vector_set = TARGET_AVX;
36058 /* Use ix86_expand_vector_set in 64bit mode only. */
36059 use_vector_set = TARGET_AVX && TARGET_64BIT;
36065 if (use_vector_set)
36067 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
36068 var = force_reg (GET_MODE_INNER (mode), var);
36069 ix86_expand_vector_set (mmx_ok, target, var, one_var);
36085 var = force_reg (GET_MODE_INNER (mode), var);
36086 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
36087 emit_insn (gen_rtx_SET (VOIDmode, target, x));
36092 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
36093 new_target = gen_reg_rtx (mode);
36095 new_target = target;
36096 var = force_reg (GET_MODE_INNER (mode), var);
36097 x = gen_rtx_VEC_DUPLICATE (mode, var);
36098 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
36099 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
36102 /* We need to shuffle the value to the correct position, so
36103 create a new pseudo to store the intermediate result. */
36105 /* With SSE2, we can use the integer shuffle insns. */
36106 if (mode != V4SFmode && TARGET_SSE2)
36108 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
36110 GEN_INT (one_var == 1 ? 0 : 1),
36111 GEN_INT (one_var == 2 ? 0 : 1),
36112 GEN_INT (one_var == 3 ? 0 : 1)));
36113 if (target != new_target)
36114 emit_move_insn (target, new_target);
36118 /* Otherwise convert the intermediate result to V4SFmode and
36119 use the SSE1 shuffle instructions. */
36120 if (mode != V4SFmode)
36122 tmp = gen_reg_rtx (V4SFmode);
36123 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
36128 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
36130 GEN_INT (one_var == 1 ? 0 : 1),
36131 GEN_INT (one_var == 2 ? 0+4 : 1+4),
36132 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
36134 if (mode != V4SFmode)
36135 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
36136 else if (tmp != target)
36137 emit_move_insn (target, tmp);
36139 else if (target != new_target)
36140 emit_move_insn (target, new_target);
36145 vsimode = V4SImode;
36151 vsimode = V2SImode;
36157 /* Zero extend the variable element to SImode and recurse. */
36158 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
36160 x = gen_reg_rtx (vsimode);
36161 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
36163 gcc_unreachable ();
36165 emit_move_insn (target, gen_lowpart (mode, x));
36173 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
36174 consisting of the values in VALS. It is known that all elements
36175 except ONE_VAR are constants. Return true if successful. */
36178 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
36179 rtx target, rtx vals, int one_var)
36181 rtx var = XVECEXP (vals, 0, one_var);
36182 enum machine_mode wmode;
36185 const_vec = copy_rtx (vals);
36186 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
36187 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
36195 /* For the two element vectors, it's just as easy to use
36196 the general case. */
36200 /* Use ix86_expand_vector_set in 64bit mode only. */
36223 /* There's no way to set one QImode entry easily. Combine
36224 the variable value with its adjacent constant value, and
36225 promote to an HImode set. */
36226 x = XVECEXP (vals, 0, one_var ^ 1);
36229 var = convert_modes (HImode, QImode, var, true);
36230 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
36231 NULL_RTX, 1, OPTAB_LIB_WIDEN);
36232 x = GEN_INT (INTVAL (x) & 0xff);
36236 var = convert_modes (HImode, QImode, var, true);
36237 x = gen_int_mode (INTVAL (x) << 8, HImode);
36239 if (x != const0_rtx)
36240 var = expand_simple_binop (HImode, IOR, var, x, var,
36241 1, OPTAB_LIB_WIDEN);
36243 x = gen_reg_rtx (wmode);
36244 emit_move_insn (x, gen_lowpart (wmode, const_vec));
36245 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
36247 emit_move_insn (target, gen_lowpart (mode, x));
36254 emit_move_insn (target, const_vec);
36255 ix86_expand_vector_set (mmx_ok, target, var, one_var);
36259 /* A subroutine of ix86_expand_vector_init_general. Use vector
36260 concatenate to handle the most general case: all values variable,
36261 and none identical. */
36264 ix86_expand_vector_init_concat (enum machine_mode mode,
36265 rtx target, rtx *ops, int n)
36267 enum machine_mode cmode, hmode = VOIDmode;
36268 rtx first[8], second[4];
36308 gcc_unreachable ();
36311 if (!register_operand (ops[1], cmode))
36312 ops[1] = force_reg (cmode, ops[1]);
36313 if (!register_operand (ops[0], cmode))
36314 ops[0] = force_reg (cmode, ops[0]);
36315 emit_insn (gen_rtx_SET (VOIDmode, target,
36316 gen_rtx_VEC_CONCAT (mode, ops[0],
36336 gcc_unreachable ();
36352 gcc_unreachable ();
36357 /* FIXME: We process inputs backward to help RA. PR 36222. */
36360 for (; i > 0; i -= 2, j--)
36362 first[j] = gen_reg_rtx (cmode);
36363 v = gen_rtvec (2, ops[i - 1], ops[i]);
36364 ix86_expand_vector_init (false, first[j],
36365 gen_rtx_PARALLEL (cmode, v));
36371 gcc_assert (hmode != VOIDmode);
36372 for (i = j = 0; i < n; i += 2, j++)
36374 second[j] = gen_reg_rtx (hmode);
36375 ix86_expand_vector_init_concat (hmode, second [j],
36379 ix86_expand_vector_init_concat (mode, target, second, n);
36382 ix86_expand_vector_init_concat (mode, target, first, n);
36386 gcc_unreachable ();
36390 /* A subroutine of ix86_expand_vector_init_general. Use vector
36391 interleave to handle the most general case: all values variable,
36392 and none identical. */
36395 ix86_expand_vector_init_interleave (enum machine_mode mode,
36396 rtx target, rtx *ops, int n)
36398 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
36401 rtx (*gen_load_even) (rtx, rtx, rtx);
36402 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
36403 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
36408 gen_load_even = gen_vec_setv8hi;
36409 gen_interleave_first_low = gen_vec_interleave_lowv4si;
36410 gen_interleave_second_low = gen_vec_interleave_lowv2di;
36411 inner_mode = HImode;
36412 first_imode = V4SImode;
36413 second_imode = V2DImode;
36414 third_imode = VOIDmode;
36417 gen_load_even = gen_vec_setv16qi;
36418 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
36419 gen_interleave_second_low = gen_vec_interleave_lowv4si;
36420 inner_mode = QImode;
36421 first_imode = V8HImode;
36422 second_imode = V4SImode;
36423 third_imode = V2DImode;
36426 gcc_unreachable ();
36429 for (i = 0; i < n; i++)
36431 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
36432 op0 = gen_reg_rtx (SImode);
36433 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
36435 /* Insert the SImode value as low element of V4SImode vector. */
36436 op1 = gen_reg_rtx (V4SImode);
36437 op0 = gen_rtx_VEC_MERGE (V4SImode,
36438 gen_rtx_VEC_DUPLICATE (V4SImode,
36440 CONST0_RTX (V4SImode),
36442 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
36444 /* Cast the V4SImode vector back to a vector in orignal mode. */
36445 op0 = gen_reg_rtx (mode);
36446 emit_move_insn (op0, gen_lowpart (mode, op1));
36448 /* Load even elements into the second positon. */
36449 emit_insn (gen_load_even (op0,
36450 force_reg (inner_mode,
36454 /* Cast vector to FIRST_IMODE vector. */
36455 ops[i] = gen_reg_rtx (first_imode);
36456 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
36459 /* Interleave low FIRST_IMODE vectors. */
36460 for (i = j = 0; i < n; i += 2, j++)
36462 op0 = gen_reg_rtx (first_imode);
36463 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
36465 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
36466 ops[j] = gen_reg_rtx (second_imode);
36467 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
36470 /* Interleave low SECOND_IMODE vectors. */
36471 switch (second_imode)
36474 for (i = j = 0; i < n / 2; i += 2, j++)
36476 op0 = gen_reg_rtx (second_imode);
36477 emit_insn (gen_interleave_second_low (op0, ops[i],
36480 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
36482 ops[j] = gen_reg_rtx (third_imode);
36483 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
36485 second_imode = V2DImode;
36486 gen_interleave_second_low = gen_vec_interleave_lowv2di;
36490 op0 = gen_reg_rtx (second_imode);
36491 emit_insn (gen_interleave_second_low (op0, ops[0],
36494 /* Cast the SECOND_IMODE vector back to a vector on original
36496 emit_insn (gen_rtx_SET (VOIDmode, target,
36497 gen_lowpart (mode, op0)));
36501 gcc_unreachable ();
36505 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
36506 all values variable, and none identical. */
36509 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
36510 rtx target, rtx vals)
36512 rtx ops[32], op0, op1;
36513 enum machine_mode half_mode = VOIDmode;
36520 if (!mmx_ok && !TARGET_SSE)
36532 n = GET_MODE_NUNITS (mode);
36533 for (i = 0; i < n; i++)
36534 ops[i] = XVECEXP (vals, 0, i);
36535 ix86_expand_vector_init_concat (mode, target, ops, n);
36539 half_mode = V16QImode;
36543 half_mode = V8HImode;
36547 n = GET_MODE_NUNITS (mode);
36548 for (i = 0; i < n; i++)
36549 ops[i] = XVECEXP (vals, 0, i);
36550 op0 = gen_reg_rtx (half_mode);
36551 op1 = gen_reg_rtx (half_mode);
36552 ix86_expand_vector_init_interleave (half_mode, op0, ops,
36554 ix86_expand_vector_init_interleave (half_mode, op1,
36555 &ops [n >> 1], n >> 2);
36556 emit_insn (gen_rtx_SET (VOIDmode, target,
36557 gen_rtx_VEC_CONCAT (mode, op0, op1)));
36561 if (!TARGET_SSE4_1)
36569 /* Don't use ix86_expand_vector_init_interleave if we can't
36570 move from GPR to SSE register directly. */
36571 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
36574 n = GET_MODE_NUNITS (mode);
36575 for (i = 0; i < n; i++)
36576 ops[i] = XVECEXP (vals, 0, i);
36577 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
36585 gcc_unreachable ();
36589 int i, j, n_elts, n_words, n_elt_per_word;
36590 enum machine_mode inner_mode;
36591 rtx words[4], shift;
36593 inner_mode = GET_MODE_INNER (mode);
36594 n_elts = GET_MODE_NUNITS (mode);
36595 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
36596 n_elt_per_word = n_elts / n_words;
36597 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
36599 for (i = 0; i < n_words; ++i)
36601 rtx word = NULL_RTX;
36603 for (j = 0; j < n_elt_per_word; ++j)
36605 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
36606 elt = convert_modes (word_mode, inner_mode, elt, true);
36612 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
36613 word, 1, OPTAB_LIB_WIDEN);
36614 word = expand_simple_binop (word_mode, IOR, word, elt,
36615 word, 1, OPTAB_LIB_WIDEN);
36623 emit_move_insn (target, gen_lowpart (mode, words[0]));
36624 else if (n_words == 2)
36626 rtx tmp = gen_reg_rtx (mode);
36627 emit_clobber (tmp);
36628 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
36629 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
36630 emit_move_insn (target, tmp);
36632 else if (n_words == 4)
36634 rtx tmp = gen_reg_rtx (V4SImode);
36635 gcc_assert (word_mode == SImode);
36636 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
36637 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
36638 emit_move_insn (target, gen_lowpart (mode, tmp));
36641 gcc_unreachable ();
36645 /* Initialize vector TARGET via VALS. Suppress the use of MMX
36646 instructions unless MMX_OK is true. */
36649 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
36651 enum machine_mode mode = GET_MODE (target);
36652 enum machine_mode inner_mode = GET_MODE_INNER (mode);
36653 int n_elts = GET_MODE_NUNITS (mode);
36654 int n_var = 0, one_var = -1;
36655 bool all_same = true, all_const_zero = true;
36659 for (i = 0; i < n_elts; ++i)
36661 x = XVECEXP (vals, 0, i);
36662 if (!(CONST_INT_P (x)
36663 || GET_CODE (x) == CONST_DOUBLE
36664 || GET_CODE (x) == CONST_FIXED))
36665 n_var++, one_var = i;
36666 else if (x != CONST0_RTX (inner_mode))
36667 all_const_zero = false;
36668 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
36672 /* Constants are best loaded from the constant pool. */
36675 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
36679 /* If all values are identical, broadcast the value. */
36681 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
36682 XVECEXP (vals, 0, 0)))
36685 /* Values where only one field is non-constant are best loaded from
36686 the pool and overwritten via move later. */
36690 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
36691 XVECEXP (vals, 0, one_var),
36695 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
36699 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
36703 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
36705 enum machine_mode mode = GET_MODE (target);
36706 enum machine_mode inner_mode = GET_MODE_INNER (mode);
36707 enum machine_mode half_mode;
36708 bool use_vec_merge = false;
36710 static rtx (*gen_extract[6][2]) (rtx, rtx)
36712 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
36713 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
36714 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
36715 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
36716 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
36717 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
36719 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
36721 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
36722 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
36723 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
36724 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
36725 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
36726 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
36736 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
36737 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
36739 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
36741 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
36742 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
36748 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
36752 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
36753 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
36755 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
36757 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
36758 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
36765 /* For the two element vectors, we implement a VEC_CONCAT with
36766 the extraction of the other element. */
36768 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
36769 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
36772 op0 = val, op1 = tmp;
36774 op0 = tmp, op1 = val;
36776 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
36777 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
36782 use_vec_merge = TARGET_SSE4_1;
36789 use_vec_merge = true;
36793 /* tmp = target = A B C D */
36794 tmp = copy_to_reg (target);
36795 /* target = A A B B */
36796 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
36797 /* target = X A B B */
36798 ix86_expand_vector_set (false, target, val, 0);
36799 /* target = A X C D */
36800 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
36801 const1_rtx, const0_rtx,
36802 GEN_INT (2+4), GEN_INT (3+4)));
36806 /* tmp = target = A B C D */
36807 tmp = copy_to_reg (target);
36808 /* tmp = X B C D */
36809 ix86_expand_vector_set (false, tmp, val, 0);
36810 /* target = A B X D */
36811 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
36812 const0_rtx, const1_rtx,
36813 GEN_INT (0+4), GEN_INT (3+4)));
36817 /* tmp = target = A B C D */
36818 tmp = copy_to_reg (target);
36819 /* tmp = X B C D */
36820 ix86_expand_vector_set (false, tmp, val, 0);
36821 /* target = A B X D */
36822 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
36823 const0_rtx, const1_rtx,
36824 GEN_INT (2+4), GEN_INT (0+4)));
36828 gcc_unreachable ();
36833 use_vec_merge = TARGET_SSE4_1;
36837 /* Element 0 handled by vec_merge below. */
36840 use_vec_merge = true;
36846 /* With SSE2, use integer shuffles to swap element 0 and ELT,
36847 store into element 0, then shuffle them back. */
36851 order[0] = GEN_INT (elt);
36852 order[1] = const1_rtx;
36853 order[2] = const2_rtx;
36854 order[3] = GEN_INT (3);
36855 order[elt] = const0_rtx;
36857 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
36858 order[1], order[2], order[3]));
36860 ix86_expand_vector_set (false, target, val, 0);
36862 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
36863 order[1], order[2], order[3]));
36867 /* For SSE1, we have to reuse the V4SF code. */
36868 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
36869 gen_lowpart (SFmode, val), elt);
36874 use_vec_merge = TARGET_SSE2;
36877 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
36881 use_vec_merge = TARGET_SSE4_1;
36888 half_mode = V16QImode;
36894 half_mode = V8HImode;
36900 half_mode = V4SImode;
36906 half_mode = V2DImode;
36912 half_mode = V4SFmode;
36918 half_mode = V2DFmode;
36924 /* Compute offset. */
36928 gcc_assert (i <= 1);
36930 /* Extract the half. */
36931 tmp = gen_reg_rtx (half_mode);
36932 emit_insn (gen_extract[j][i] (tmp, target));
36934 /* Put val in tmp at elt. */
36935 ix86_expand_vector_set (false, tmp, val, elt);
36938 emit_insn (gen_insert[j][i] (target, target, tmp));
36947 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
36948 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
36949 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
36953 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
36955 emit_move_insn (mem, target);
36957 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
36958 emit_move_insn (tmp, val);
36960 emit_move_insn (target, mem);
36965 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
36967 enum machine_mode mode = GET_MODE (vec);
36968 enum machine_mode inner_mode = GET_MODE_INNER (mode);
36969 bool use_vec_extr = false;
36982 use_vec_extr = true;
36986 use_vec_extr = TARGET_SSE4_1;
36998 tmp = gen_reg_rtx (mode);
36999 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
37000 GEN_INT (elt), GEN_INT (elt),
37001 GEN_INT (elt+4), GEN_INT (elt+4)));
37005 tmp = gen_reg_rtx (mode);
37006 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
37010 gcc_unreachable ();
37013 use_vec_extr = true;
37018 use_vec_extr = TARGET_SSE4_1;
37032 tmp = gen_reg_rtx (mode);
37033 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
37034 GEN_INT (elt), GEN_INT (elt),
37035 GEN_INT (elt), GEN_INT (elt)));
37039 tmp = gen_reg_rtx (mode);
37040 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
37044 gcc_unreachable ();
37047 use_vec_extr = true;
37052 /* For SSE1, we have to reuse the V4SF code. */
37053 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
37054 gen_lowpart (V4SFmode, vec), elt);
37060 use_vec_extr = TARGET_SSE2;
37063 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
37067 use_vec_extr = TARGET_SSE4_1;
37073 tmp = gen_reg_rtx (V4SFmode);
37075 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
37077 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
37078 ix86_expand_vector_extract (false, target, tmp, elt & 3);
37086 tmp = gen_reg_rtx (V2DFmode);
37088 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
37090 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
37091 ix86_expand_vector_extract (false, target, tmp, elt & 1);
37099 tmp = gen_reg_rtx (V16QImode);
37101 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
37103 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
37104 ix86_expand_vector_extract (false, target, tmp, elt & 15);
37112 tmp = gen_reg_rtx (V8HImode);
37114 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
37116 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
37117 ix86_expand_vector_extract (false, target, tmp, elt & 7);
37125 tmp = gen_reg_rtx (V4SImode);
37127 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
37129 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
37130 ix86_expand_vector_extract (false, target, tmp, elt & 3);
37138 tmp = gen_reg_rtx (V2DImode);
37140 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
37142 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
37143 ix86_expand_vector_extract (false, target, tmp, elt & 1);
37149 /* ??? Could extract the appropriate HImode element and shift. */
37156 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
37157 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
37159 /* Let the rtl optimizers know about the zero extension performed. */
37160 if (inner_mode == QImode || inner_mode == HImode)
37162 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
37163 target = gen_lowpart (SImode, target);
37166 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
37170 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37172 emit_move_insn (mem, vec);
37174 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
37175 emit_move_insn (target, tmp);
37179 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
37180 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
37181 The upper bits of DEST are undefined, though they shouldn't cause
37182 exceptions (some bits from src or all zeros are ok). */
37185 emit_reduc_half (rtx dest, rtx src, int i)
37188 switch (GET_MODE (src))
37192 tem = gen_sse_movhlps (dest, src, src);
37194 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
37195 GEN_INT (1 + 4), GEN_INT (1 + 4));
37198 tem = gen_vec_interleave_highv2df (dest, src, src);
37204 tem = gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, dest),
37205 gen_lowpart (V1TImode, src),
37210 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
37212 tem = gen_avx_shufps256 (dest, src, src,
37213 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
37217 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
37219 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
37226 tem = gen_avx2_permv2ti (gen_lowpart (V4DImode, dest),
37227 gen_lowpart (V4DImode, src),
37228 gen_lowpart (V4DImode, src),
37231 tem = gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode, dest),
37232 gen_lowpart (V2TImode, src),
37236 gcc_unreachable ();
37241 /* Expand a vector reduction. FN is the binary pattern to reduce;
37242 DEST is the destination; IN is the input vector. */
37245 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
37247 rtx half, dst, vec = in;
37248 enum machine_mode mode = GET_MODE (in);
37251 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
37253 && mode == V8HImode
37254 && fn == gen_uminv8hi3)
37256 emit_insn (gen_sse4_1_phminposuw (dest, in));
37260 for (i = GET_MODE_BITSIZE (mode);
37261 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
37264 half = gen_reg_rtx (mode);
37265 emit_reduc_half (half, vec, i);
37266 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
37269 dst = gen_reg_rtx (mode);
37270 emit_insn (fn (dst, half, vec));
37275 /* Target hook for scalar_mode_supported_p. */
37277 ix86_scalar_mode_supported_p (enum machine_mode mode)
37279 if (DECIMAL_FLOAT_MODE_P (mode))
37280 return default_decimal_float_supported_p ();
37281 else if (mode == TFmode)
37284 return default_scalar_mode_supported_p (mode);
37287 /* Implements target hook vector_mode_supported_p. */
37289 ix86_vector_mode_supported_p (enum machine_mode mode)
37291 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37293 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37295 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37297 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
37299 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
37304 /* Target hook for c_mode_for_suffix. */
37305 static enum machine_mode
37306 ix86_c_mode_for_suffix (char suffix)
37316 /* Worker function for TARGET_MD_ASM_CLOBBERS.
37318 We do this in the new i386 backend to maintain source compatibility
37319 with the old cc0-based compiler. */
37322 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
37323 tree inputs ATTRIBUTE_UNUSED,
37326 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
37328 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
37333 /* Implements target vector targetm.asm.encode_section_info. */
37335 static void ATTRIBUTE_UNUSED
37336 ix86_encode_section_info (tree decl, rtx rtl, int first)
37338 default_encode_section_info (decl, rtl, first);
37340 if (TREE_CODE (decl) == VAR_DECL
37341 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
37342 && ix86_in_large_data_p (decl))
37343 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
37346 /* Worker function for REVERSE_CONDITION. */
37349 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
37351 return (mode != CCFPmode && mode != CCFPUmode
37352 ? reverse_condition (code)
37353 : reverse_condition_maybe_unordered (code));
37356 /* Output code to perform an x87 FP register move, from OPERANDS[1]
37360 output_387_reg_move (rtx insn, rtx *operands)
37362 if (REG_P (operands[0]))
37364 if (REG_P (operands[1])
37365 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
37367 if (REGNO (operands[0]) == FIRST_STACK_REG)
37368 return output_387_ffreep (operands, 0);
37369 return "fstp\t%y0";
37371 if (STACK_TOP_P (operands[0]))
37372 return "fld%Z1\t%y1";
37375 else if (MEM_P (operands[0]))
37377 gcc_assert (REG_P (operands[1]));
37378 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
37379 return "fstp%Z0\t%y0";
37382 /* There is no non-popping store to memory for XFmode.
37383 So if we need one, follow the store with a load. */
37384 if (GET_MODE (operands[0]) == XFmode)
37385 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
37387 return "fst%Z0\t%y0";
37394 /* Output code to perform a conditional jump to LABEL, if C2 flag in
37395 FP status register is set. */
37398 ix86_emit_fp_unordered_jump (rtx label)
37400 rtx reg = gen_reg_rtx (HImode);
37403 emit_insn (gen_x86_fnstsw_1 (reg));
37405 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
37407 emit_insn (gen_x86_sahf_1 (reg));
37409 temp = gen_rtx_REG (CCmode, FLAGS_REG);
37410 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
37414 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
37416 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
37417 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
37420 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
37421 gen_rtx_LABEL_REF (VOIDmode, label),
37423 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
37425 emit_jump_insn (temp);
37426 predict_jump (REG_BR_PROB_BASE * 10 / 100);
37429 /* Output code to perform a log1p XFmode calculation. */
37431 void ix86_emit_i387_log1p (rtx op0, rtx op1)
37433 rtx label1 = gen_label_rtx ();
37434 rtx label2 = gen_label_rtx ();
37436 rtx tmp = gen_reg_rtx (XFmode);
37437 rtx tmp2 = gen_reg_rtx (XFmode);
37440 emit_insn (gen_absxf2 (tmp, op1));
37441 test = gen_rtx_GE (VOIDmode, tmp,
37442 CONST_DOUBLE_FROM_REAL_VALUE (
37443 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
37445 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
37447 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
37448 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
37449 emit_jump (label2);
37451 emit_label (label1);
37452 emit_move_insn (tmp, CONST1_RTX (XFmode));
37453 emit_insn (gen_addxf3 (tmp, op1, tmp));
37454 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
37455 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
37457 emit_label (label2);
37460 /* Emit code for round calculation. */
37461 void ix86_emit_i387_round (rtx op0, rtx op1)
37463 enum machine_mode inmode = GET_MODE (op1);
37464 enum machine_mode outmode = GET_MODE (op0);
37465 rtx e1, e2, res, tmp, tmp1, half;
37466 rtx scratch = gen_reg_rtx (HImode);
37467 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
37468 rtx jump_label = gen_label_rtx ();
37470 rtx (*gen_abs) (rtx, rtx);
37471 rtx (*gen_neg) (rtx, rtx);
37476 gen_abs = gen_abssf2;
37479 gen_abs = gen_absdf2;
37482 gen_abs = gen_absxf2;
37485 gcc_unreachable ();
37491 gen_neg = gen_negsf2;
37494 gen_neg = gen_negdf2;
37497 gen_neg = gen_negxf2;
37500 gen_neg = gen_neghi2;
37503 gen_neg = gen_negsi2;
37506 gen_neg = gen_negdi2;
37509 gcc_unreachable ();
37512 e1 = gen_reg_rtx (inmode);
37513 e2 = gen_reg_rtx (inmode);
37514 res = gen_reg_rtx (outmode);
37516 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
37518 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
37520 /* scratch = fxam(op1) */
37521 emit_insn (gen_rtx_SET (VOIDmode, scratch,
37522 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
37524 /* e1 = fabs(op1) */
37525 emit_insn (gen_abs (e1, op1));
37527 /* e2 = e1 + 0.5 */
37528 half = force_reg (inmode, half);
37529 emit_insn (gen_rtx_SET (VOIDmode, e2,
37530 gen_rtx_PLUS (inmode, e1, half)));
37532 /* res = floor(e2) */
37533 if (inmode != XFmode)
37535 tmp1 = gen_reg_rtx (XFmode);
37537 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
37538 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
37548 rtx tmp0 = gen_reg_rtx (XFmode);
37550 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
37552 emit_insn (gen_rtx_SET (VOIDmode, res,
37553 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
37554 UNSPEC_TRUNC_NOOP)));
37558 emit_insn (gen_frndintxf2_floor (res, tmp1));
37561 emit_insn (gen_lfloorxfhi2 (res, tmp1));
37564 emit_insn (gen_lfloorxfsi2 (res, tmp1));
37567 emit_insn (gen_lfloorxfdi2 (res, tmp1));
37570 gcc_unreachable ();
37573 /* flags = signbit(a) */
37574 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
37576 /* if (flags) then res = -res */
37577 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
37578 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
37579 gen_rtx_LABEL_REF (VOIDmode, jump_label),
37581 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
37582 predict_jump (REG_BR_PROB_BASE * 50 / 100);
37583 JUMP_LABEL (insn) = jump_label;
37585 emit_insn (gen_neg (res, res));
37587 emit_label (jump_label);
37588 LABEL_NUSES (jump_label) = 1;
37590 emit_move_insn (op0, res);
37593 /* Output code to perform a Newton-Rhapson approximation of a single precision
37594 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
37596 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
37598 rtx x0, x1, e0, e1;
37600 x0 = gen_reg_rtx (mode);
37601 e0 = gen_reg_rtx (mode);
37602 e1 = gen_reg_rtx (mode);
37603 x1 = gen_reg_rtx (mode);
37605 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
37607 b = force_reg (mode, b);
37609 /* x0 = rcp(b) estimate */
37610 emit_insn (gen_rtx_SET (VOIDmode, x0,
37611 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
37614 emit_insn (gen_rtx_SET (VOIDmode, e0,
37615 gen_rtx_MULT (mode, x0, b)));
37618 emit_insn (gen_rtx_SET (VOIDmode, e0,
37619 gen_rtx_MULT (mode, x0, e0)));
37622 emit_insn (gen_rtx_SET (VOIDmode, e1,
37623 gen_rtx_PLUS (mode, x0, x0)));
37626 emit_insn (gen_rtx_SET (VOIDmode, x1,
37627 gen_rtx_MINUS (mode, e1, e0)));
37630 emit_insn (gen_rtx_SET (VOIDmode, res,
37631 gen_rtx_MULT (mode, a, x1)));
37634 /* Output code to perform a Newton-Rhapson approximation of a
37635 single precision floating point [reciprocal] square root. */
37637 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
37640 rtx x0, e0, e1, e2, e3, mthree, mhalf;
37643 x0 = gen_reg_rtx (mode);
37644 e0 = gen_reg_rtx (mode);
37645 e1 = gen_reg_rtx (mode);
37646 e2 = gen_reg_rtx (mode);
37647 e3 = gen_reg_rtx (mode);
37649 real_from_integer (&r, VOIDmode, -3, -1, 0);
37650 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
37652 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
37653 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
37655 if (VECTOR_MODE_P (mode))
37657 mthree = ix86_build_const_vector (mode, true, mthree);
37658 mhalf = ix86_build_const_vector (mode, true, mhalf);
37661 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
37662 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
37664 a = force_reg (mode, a);
37666 /* x0 = rsqrt(a) estimate */
37667 emit_insn (gen_rtx_SET (VOIDmode, x0,
37668 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
37671 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
37676 zero = gen_reg_rtx (mode);
37677 mask = gen_reg_rtx (mode);
37679 zero = force_reg (mode, CONST0_RTX(mode));
37680 emit_insn (gen_rtx_SET (VOIDmode, mask,
37681 gen_rtx_NE (mode, zero, a)));
37683 emit_insn (gen_rtx_SET (VOIDmode, x0,
37684 gen_rtx_AND (mode, x0, mask)));
37688 emit_insn (gen_rtx_SET (VOIDmode, e0,
37689 gen_rtx_MULT (mode, x0, a)));
37691 emit_insn (gen_rtx_SET (VOIDmode, e1,
37692 gen_rtx_MULT (mode, e0, x0)));
37695 mthree = force_reg (mode, mthree);
37696 emit_insn (gen_rtx_SET (VOIDmode, e2,
37697 gen_rtx_PLUS (mode, e1, mthree)));
37699 mhalf = force_reg (mode, mhalf);
37701 /* e3 = -.5 * x0 */
37702 emit_insn (gen_rtx_SET (VOIDmode, e3,
37703 gen_rtx_MULT (mode, x0, mhalf)));
37705 /* e3 = -.5 * e0 */
37706 emit_insn (gen_rtx_SET (VOIDmode, e3,
37707 gen_rtx_MULT (mode, e0, mhalf)));
37708 /* ret = e2 * e3 */
37709 emit_insn (gen_rtx_SET (VOIDmode, res,
37710 gen_rtx_MULT (mode, e2, e3)));
37713 #ifdef TARGET_SOLARIS
37714 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
37717 i386_solaris_elf_named_section (const char *name, unsigned int flags,
37720 /* With Binutils 2.15, the "@unwind" marker must be specified on
37721 every occurrence of the ".eh_frame" section, not just the first
37724 && strcmp (name, ".eh_frame") == 0)
37726 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
37727 flags & SECTION_WRITE ? "aw" : "a");
37732 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
37734 solaris_elf_asm_comdat_section (name, flags, decl);
37739 default_elf_asm_named_section (name, flags, decl);
37741 #endif /* TARGET_SOLARIS */
37743 /* Return the mangling of TYPE if it is an extended fundamental type. */
37745 static const char *
37746 ix86_mangle_type (const_tree type)
37748 type = TYPE_MAIN_VARIANT (type);
37750 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
37751 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
37754 switch (TYPE_MODE (type))
37757 /* __float128 is "g". */
37760 /* "long double" or __float80 is "e". */
37767 /* For 32-bit code we can save PIC register setup by using
37768 __stack_chk_fail_local hidden function instead of calling
37769 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
37770 register, so it is better to call __stack_chk_fail directly. */
37772 static tree ATTRIBUTE_UNUSED
37773 ix86_stack_protect_fail (void)
37775 return TARGET_64BIT
37776 ? default_external_stack_protect_fail ()
37777 : default_hidden_stack_protect_fail ();
37780 /* Select a format to encode pointers in exception handling data. CODE
37781 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
37782 true if the symbol may be affected by dynamic relocations.
37784 ??? All x86 object file formats are capable of representing this.
37785 After all, the relocation needed is the same as for the call insn.
37786 Whether or not a particular assembler allows us to enter such, I
37787 guess we'll have to see. */
37789 asm_preferred_eh_data_format (int code, int global)
37793 int type = DW_EH_PE_sdata8;
37795 || ix86_cmodel == CM_SMALL_PIC
37796 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
37797 type = DW_EH_PE_sdata4;
37798 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
37800 if (ix86_cmodel == CM_SMALL
37801 || (ix86_cmodel == CM_MEDIUM && code))
37802 return DW_EH_PE_udata4;
37803 return DW_EH_PE_absptr;
37806 /* Expand copysign from SIGN to the positive value ABS_VALUE
37807 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
37810 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
37812 enum machine_mode mode = GET_MODE (sign);
37813 rtx sgn = gen_reg_rtx (mode);
37814 if (mask == NULL_RTX)
37816 enum machine_mode vmode;
37818 if (mode == SFmode)
37820 else if (mode == DFmode)
37825 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
37826 if (!VECTOR_MODE_P (mode))
37828 /* We need to generate a scalar mode mask in this case. */
37829 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
37830 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
37831 mask = gen_reg_rtx (mode);
37832 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
37836 mask = gen_rtx_NOT (mode, mask);
37837 emit_insn (gen_rtx_SET (VOIDmode, sgn,
37838 gen_rtx_AND (mode, mask, sign)));
37839 emit_insn (gen_rtx_SET (VOIDmode, result,
37840 gen_rtx_IOR (mode, abs_value, sgn)));
37843 /* Expand fabs (OP0) and return a new rtx that holds the result. The
37844 mask for masking out the sign-bit is stored in *SMASK, if that is
37847 ix86_expand_sse_fabs (rtx op0, rtx *smask)
37849 enum machine_mode vmode, mode = GET_MODE (op0);
37852 xa = gen_reg_rtx (mode);
37853 if (mode == SFmode)
37855 else if (mode == DFmode)
37859 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
37860 if (!VECTOR_MODE_P (mode))
37862 /* We need to generate a scalar mode mask in this case. */
37863 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
37864 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
37865 mask = gen_reg_rtx (mode);
37866 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
37868 emit_insn (gen_rtx_SET (VOIDmode, xa,
37869 gen_rtx_AND (mode, op0, mask)));
37877 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
37878 swapping the operands if SWAP_OPERANDS is true. The expanded
37879 code is a forward jump to a newly created label in case the
37880 comparison is true. The generated label rtx is returned. */
37882 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
37883 bool swap_operands)
37894 label = gen_label_rtx ();
37895 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
37896 emit_insn (gen_rtx_SET (VOIDmode, tmp,
37897 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
37898 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
37899 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
37900 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
37901 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
37902 JUMP_LABEL (tmp) = label;
37907 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
37908 using comparison code CODE. Operands are swapped for the comparison if
37909 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
37911 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
37912 bool swap_operands)
37914 rtx (*insn)(rtx, rtx, rtx, rtx);
37915 enum machine_mode mode = GET_MODE (op0);
37916 rtx mask = gen_reg_rtx (mode);
37925 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
37927 emit_insn (insn (mask, op0, op1,
37928 gen_rtx_fmt_ee (code, mode, op0, op1)));
37932 /* Generate and return a rtx of mode MODE for 2**n where n is the number
37933 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
37935 ix86_gen_TWO52 (enum machine_mode mode)
37937 REAL_VALUE_TYPE TWO52r;
37940 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
37941 TWO52 = const_double_from_real_value (TWO52r, mode);
37942 TWO52 = force_reg (mode, TWO52);
37947 /* Expand SSE sequence for computing lround from OP1 storing
37950 ix86_expand_lround (rtx op0, rtx op1)
37952 /* C code for the stuff we're doing below:
37953 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
37956 enum machine_mode mode = GET_MODE (op1);
37957 const struct real_format *fmt;
37958 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
37961 /* load nextafter (0.5, 0.0) */
37962 fmt = REAL_MODE_FORMAT (mode);
37963 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
37964 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
37966 /* adj = copysign (0.5, op1) */
37967 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
37968 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
37970 /* adj = op1 + adj */
37971 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
37973 /* op0 = (imode)adj */
37974 expand_fix (op0, adj, 0);
37977 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
37980 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
37982 /* C code for the stuff we're doing below (for do_floor):
37984 xi -= (double)xi > op1 ? 1 : 0;
37987 enum machine_mode fmode = GET_MODE (op1);
37988 enum machine_mode imode = GET_MODE (op0);
37989 rtx ireg, freg, label, tmp;
37991 /* reg = (long)op1 */
37992 ireg = gen_reg_rtx (imode);
37993 expand_fix (ireg, op1, 0);
37995 /* freg = (double)reg */
37996 freg = gen_reg_rtx (fmode);
37997 expand_float (freg, ireg, 0);
37999 /* ireg = (freg > op1) ? ireg - 1 : ireg */
38000 label = ix86_expand_sse_compare_and_jump (UNLE,
38001 freg, op1, !do_floor);
38002 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
38003 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
38004 emit_move_insn (ireg, tmp);
38006 emit_label (label);
38007 LABEL_NUSES (label) = 1;
38009 emit_move_insn (op0, ireg);
38012 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
38013 result in OPERAND0. */
38015 ix86_expand_rint (rtx operand0, rtx operand1)
38017 /* C code for the stuff we're doing below:
38018 xa = fabs (operand1);
38019 if (!isless (xa, 2**52))
38021 xa = xa + 2**52 - 2**52;
38022 return copysign (xa, operand1);
38024 enum machine_mode mode = GET_MODE (operand0);
38025 rtx res, xa, label, TWO52, mask;
38027 res = gen_reg_rtx (mode);
38028 emit_move_insn (res, operand1);
38030 /* xa = abs (operand1) */
38031 xa = ix86_expand_sse_fabs (res, &mask);
38033 /* if (!isless (xa, TWO52)) goto label; */
38034 TWO52 = ix86_gen_TWO52 (mode);
38035 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38037 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
38038 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
38040 ix86_sse_copysign_to_positive (res, xa, res, mask);
38042 emit_label (label);
38043 LABEL_NUSES (label) = 1;
38045 emit_move_insn (operand0, res);
38048 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38051 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
38053 /* C code for the stuff we expand below.
38054 double xa = fabs (x), x2;
38055 if (!isless (xa, TWO52))
38057 xa = xa + TWO52 - TWO52;
38058 x2 = copysign (xa, x);
38067 enum machine_mode mode = GET_MODE (operand0);
38068 rtx xa, TWO52, tmp, label, one, res, mask;
38070 TWO52 = ix86_gen_TWO52 (mode);
38072 /* Temporary for holding the result, initialized to the input
38073 operand to ease control flow. */
38074 res = gen_reg_rtx (mode);
38075 emit_move_insn (res, operand1);
38077 /* xa = abs (operand1) */
38078 xa = ix86_expand_sse_fabs (res, &mask);
38080 /* if (!isless (xa, TWO52)) goto label; */
38081 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38083 /* xa = xa + TWO52 - TWO52; */
38084 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
38085 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
38087 /* xa = copysign (xa, operand1) */
38088 ix86_sse_copysign_to_positive (xa, xa, res, mask);
38090 /* generate 1.0 or -1.0 */
38091 one = force_reg (mode,
38092 const_double_from_real_value (do_floor
38093 ? dconst1 : dconstm1, mode));
38095 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38096 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
38097 emit_insn (gen_rtx_SET (VOIDmode, tmp,
38098 gen_rtx_AND (mode, one, tmp)));
38099 /* We always need to subtract here to preserve signed zero. */
38100 tmp = expand_simple_binop (mode, MINUS,
38101 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
38102 emit_move_insn (res, tmp);
38104 emit_label (label);
38105 LABEL_NUSES (label) = 1;
38107 emit_move_insn (operand0, res);
38110 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
38113 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
38115 /* C code for the stuff we expand below.
38116 double xa = fabs (x), x2;
38117 if (!isless (xa, TWO52))
38119 x2 = (double)(long)x;
38126 if (HONOR_SIGNED_ZEROS (mode))
38127 return copysign (x2, x);
38130 enum machine_mode mode = GET_MODE (operand0);
38131 rtx xa, xi, TWO52, tmp, label, one, res, mask;
38133 TWO52 = ix86_gen_TWO52 (mode);
38135 /* Temporary for holding the result, initialized to the input
38136 operand to ease control flow. */
38137 res = gen_reg_rtx (mode);
38138 emit_move_insn (res, operand1);
38140 /* xa = abs (operand1) */
38141 xa = ix86_expand_sse_fabs (res, &mask);
38143 /* if (!isless (xa, TWO52)) goto label; */
38144 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38146 /* xa = (double)(long)x */
38147 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
38148 expand_fix (xi, res, 0);
38149 expand_float (xa, xi, 0);
38152 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
38154 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
38155 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
38156 emit_insn (gen_rtx_SET (VOIDmode, tmp,
38157 gen_rtx_AND (mode, one, tmp)));
38158 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
38159 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
38160 emit_move_insn (res, tmp);
38162 if (HONOR_SIGNED_ZEROS (mode))
38163 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
38165 emit_label (label);
38166 LABEL_NUSES (label) = 1;
38168 emit_move_insn (operand0, res);
38171 /* Expand SSE sequence for computing round from OPERAND1 storing
38172 into OPERAND0. Sequence that works without relying on DImode truncation
38173 via cvttsd2siq that is only available on 64bit targets. */
38175 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
38177 /* C code for the stuff we expand below.
38178 double xa = fabs (x), xa2, x2;
38179 if (!isless (xa, TWO52))
38181 Using the absolute value and copying back sign makes
38182 -0.0 -> -0.0 correct.
38183 xa2 = xa + TWO52 - TWO52;
38188 else if (dxa > 0.5)
38190 x2 = copysign (xa2, x);
38193 enum machine_mode mode = GET_MODE (operand0);
38194 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
38196 TWO52 = ix86_gen_TWO52 (mode);
38198 /* Temporary for holding the result, initialized to the input
38199 operand to ease control flow. */
38200 res = gen_reg_rtx (mode);
38201 emit_move_insn (res, operand1);
38203 /* xa = abs (operand1) */
38204 xa = ix86_expand_sse_fabs (res, &mask);
38206 /* if (!isless (xa, TWO52)) goto label; */
38207 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38209 /* xa2 = xa + TWO52 - TWO52; */
38210 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
38211 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
38213 /* dxa = xa2 - xa; */
38214 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
38216 /* generate 0.5, 1.0 and -0.5 */
38217 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
38218 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
38219 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
38223 tmp = gen_reg_rtx (mode);
38224 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
38225 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
38226 emit_insn (gen_rtx_SET (VOIDmode, tmp,
38227 gen_rtx_AND (mode, one, tmp)));
38228 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
38229 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
38230 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
38231 emit_insn (gen_rtx_SET (VOIDmode, tmp,
38232 gen_rtx_AND (mode, one, tmp)));
38233 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
38235 /* res = copysign (xa2, operand1) */
38236 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
38238 emit_label (label);
38239 LABEL_NUSES (label) = 1;
38241 emit_move_insn (operand0, res);
38244 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38247 ix86_expand_trunc (rtx operand0, rtx operand1)
38249 /* C code for SSE variant we expand below.
38250 double xa = fabs (x), x2;
38251 if (!isless (xa, TWO52))
38253 x2 = (double)(long)x;
38254 if (HONOR_SIGNED_ZEROS (mode))
38255 return copysign (x2, x);
38258 enum machine_mode mode = GET_MODE (operand0);
38259 rtx xa, xi, TWO52, label, res, mask;
38261 TWO52 = ix86_gen_TWO52 (mode);
38263 /* Temporary for holding the result, initialized to the input
38264 operand to ease control flow. */
38265 res = gen_reg_rtx (mode);
38266 emit_move_insn (res, operand1);
38268 /* xa = abs (operand1) */
38269 xa = ix86_expand_sse_fabs (res, &mask);
38271 /* if (!isless (xa, TWO52)) goto label; */
38272 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38274 /* x = (double)(long)x */
38275 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
38276 expand_fix (xi, res, 0);
38277 expand_float (res, xi, 0);
38279 if (HONOR_SIGNED_ZEROS (mode))
38280 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
38282 emit_label (label);
38283 LABEL_NUSES (label) = 1;
38285 emit_move_insn (operand0, res);
38288 /* Expand SSE sequence for computing trunc from OPERAND1 storing
38291 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
38293 enum machine_mode mode = GET_MODE (operand0);
38294 rtx xa, mask, TWO52, label, one, res, smask, tmp;
38296 /* C code for SSE variant we expand below.
38297 double xa = fabs (x), x2;
38298 if (!isless (xa, TWO52))
38300 xa2 = xa + TWO52 - TWO52;
38304 x2 = copysign (xa2, x);
38308 TWO52 = ix86_gen_TWO52 (mode);
38310 /* Temporary for holding the result, initialized to the input
38311 operand to ease control flow. */
38312 res = gen_reg_rtx (mode);
38313 emit_move_insn (res, operand1);
38315 /* xa = abs (operand1) */
38316 xa = ix86_expand_sse_fabs (res, &smask);
38318 /* if (!isless (xa, TWO52)) goto label; */
38319 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38321 /* res = xa + TWO52 - TWO52; */
38322 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
38323 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
38324 emit_move_insn (res, tmp);
38327 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
38329 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
38330 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
38331 emit_insn (gen_rtx_SET (VOIDmode, mask,
38332 gen_rtx_AND (mode, mask, one)));
38333 tmp = expand_simple_binop (mode, MINUS,
38334 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
38335 emit_move_insn (res, tmp);
38337 /* res = copysign (res, operand1) */
38338 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
38340 emit_label (label);
38341 LABEL_NUSES (label) = 1;
38343 emit_move_insn (operand0, res);
38346 /* Expand SSE sequence for computing round from OPERAND1 storing
38349 ix86_expand_round (rtx operand0, rtx operand1)
38351 /* C code for the stuff we're doing below:
38352 double xa = fabs (x);
38353 if (!isless (xa, TWO52))
38355 xa = (double)(long)(xa + nextafter (0.5, 0.0));
38356 return copysign (xa, x);
38358 enum machine_mode mode = GET_MODE (operand0);
38359 rtx res, TWO52, xa, label, xi, half, mask;
38360 const struct real_format *fmt;
38361 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
38363 /* Temporary for holding the result, initialized to the input
38364 operand to ease control flow. */
38365 res = gen_reg_rtx (mode);
38366 emit_move_insn (res, operand1);
38368 TWO52 = ix86_gen_TWO52 (mode);
38369 xa = ix86_expand_sse_fabs (res, &mask);
38370 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
38372 /* load nextafter (0.5, 0.0) */
38373 fmt = REAL_MODE_FORMAT (mode);
38374 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
38375 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
38377 /* xa = xa + 0.5 */
38378 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
38379 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
38381 /* xa = (double)(int64_t)xa */
38382 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
38383 expand_fix (xi, xa, 0);
38384 expand_float (xa, xi, 0);
38386 /* res = copysign (xa, operand1) */
38387 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
38389 emit_label (label);
38390 LABEL_NUSES (label) = 1;
38392 emit_move_insn (operand0, res);
38395 /* Expand SSE sequence for computing round
38396 from OP1 storing into OP0 using sse4 round insn. */
38398 ix86_expand_round_sse4 (rtx op0, rtx op1)
38400 enum machine_mode mode = GET_MODE (op0);
38401 rtx e1, e2, res, half;
38402 const struct real_format *fmt;
38403 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
38404 rtx (*gen_copysign) (rtx, rtx, rtx);
38405 rtx (*gen_round) (rtx, rtx, rtx);
38410 gen_copysign = gen_copysignsf3;
38411 gen_round = gen_sse4_1_roundsf2;
38414 gen_copysign = gen_copysigndf3;
38415 gen_round = gen_sse4_1_rounddf2;
38418 gcc_unreachable ();
38421 /* round (a) = trunc (a + copysign (0.5, a)) */
38423 /* load nextafter (0.5, 0.0) */
38424 fmt = REAL_MODE_FORMAT (mode);
38425 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
38426 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
38427 half = const_double_from_real_value (pred_half, mode);
38429 /* e1 = copysign (0.5, op1) */
38430 e1 = gen_reg_rtx (mode);
38431 emit_insn (gen_copysign (e1, half, op1));
38433 /* e2 = op1 + e1 */
38434 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
38436 /* res = trunc (e2) */
38437 res = gen_reg_rtx (mode);
38438 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
38440 emit_move_insn (op0, res);
38444 /* Table of valid machine attributes. */
38445 static const struct attribute_spec ix86_attribute_table[] =
38447 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
38448 affects_type_identity } */
38449 /* Stdcall attribute says callee is responsible for popping arguments
38450 if they are not variable. */
38451 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38453 /* Fastcall attribute says callee is responsible for popping arguments
38454 if they are not variable. */
38455 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38457 /* Thiscall attribute says callee is responsible for popping arguments
38458 if they are not variable. */
38459 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38461 /* Cdecl attribute says the callee is a normal C declaration */
38462 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38464 /* Regparm attribute specifies how many integer arguments are to be
38465 passed in registers. */
38466 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
38468 /* Sseregparm attribute says we are using x86_64 calling conventions
38469 for FP arguments. */
38470 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
38472 /* The transactional memory builtins are implicitly regparm or fastcall
38473 depending on the ABI. Override the generic do-nothing attribute that
38474 these builtins were declared with. */
38475 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
38477 /* force_align_arg_pointer says this function realigns the stack at entry. */
38478 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
38479 false, true, true, ix86_handle_cconv_attribute, false },
38480 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38481 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
38482 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
38483 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
38486 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
38488 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
38490 #ifdef SUBTARGET_ATTRIBUTE_TABLE
38491 SUBTARGET_ATTRIBUTE_TABLE,
38493 /* ms_abi and sysv_abi calling convention function attributes. */
38494 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
38495 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
38496 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
38498 { "callee_pop_aggregate_return", 1, 1, false, true, true,
38499 ix86_handle_callee_pop_aggregate_return, true },
38501 { NULL, 0, 0, false, false, false, NULL, false }
38504 /* Implement targetm.vectorize.builtin_vectorization_cost. */
38506 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
38508 int misalign ATTRIBUTE_UNUSED)
38512 switch (type_of_cost)
38515 return ix86_cost->scalar_stmt_cost;
38518 return ix86_cost->scalar_load_cost;
38521 return ix86_cost->scalar_store_cost;
38524 return ix86_cost->vec_stmt_cost;
38527 return ix86_cost->vec_align_load_cost;
38530 return ix86_cost->vec_store_cost;
38532 case vec_to_scalar:
38533 return ix86_cost->vec_to_scalar_cost;
38535 case scalar_to_vec:
38536 return ix86_cost->scalar_to_vec_cost;
38538 case unaligned_load:
38539 case unaligned_store:
38540 return ix86_cost->vec_unalign_load_cost;
38542 case cond_branch_taken:
38543 return ix86_cost->cond_taken_branch_cost;
38545 case cond_branch_not_taken:
38546 return ix86_cost->cond_not_taken_branch_cost;
38549 case vec_promote_demote:
38550 return ix86_cost->vec_stmt_cost;
38552 case vec_construct:
38553 elements = TYPE_VECTOR_SUBPARTS (vectype);
38554 return elements / 2 + 1;
38557 gcc_unreachable ();
38561 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
38562 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
38563 insn every time. */
38565 static GTY(()) rtx vselect_insn;
38567 /* Initialize vselect_insn. */
38570 init_vselect_insn (void)
38575 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
38576 for (i = 0; i < MAX_VECT_LEN; ++i)
38577 XVECEXP (x, 0, i) = const0_rtx;
38578 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
38580 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
38582 vselect_insn = emit_insn (x);
38586 /* Construct (set target (vec_select op0 (parallel perm))) and
38587 return true if that's a valid instruction in the active ISA. */
38590 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
38591 unsigned nelt, bool testing_p)
38594 rtx x, save_vconcat;
38597 if (vselect_insn == NULL_RTX)
38598 init_vselect_insn ();
38600 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
38601 PUT_NUM_ELEM (XVEC (x, 0), nelt);
38602 for (i = 0; i < nelt; ++i)
38603 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
38604 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
38605 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
38606 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
38607 SET_DEST (PATTERN (vselect_insn)) = target;
38608 icode = recog_memoized (vselect_insn);
38610 if (icode >= 0 && !testing_p)
38611 emit_insn (copy_rtx (PATTERN (vselect_insn)));
38613 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
38614 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
38615 INSN_CODE (vselect_insn) = -1;
38620 /* Similar, but generate a vec_concat from op0 and op1 as well. */
38623 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
38624 const unsigned char *perm, unsigned nelt,
38627 enum machine_mode v2mode;
38631 if (vselect_insn == NULL_RTX)
38632 init_vselect_insn ();
38634 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
38635 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
38636 PUT_MODE (x, v2mode);
38639 ok = expand_vselect (target, x, perm, nelt, testing_p);
38640 XEXP (x, 0) = const0_rtx;
38641 XEXP (x, 1) = const0_rtx;
38645 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38646 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
38649 expand_vec_perm_blend (struct expand_vec_perm_d *d)
38651 enum machine_mode vmode = d->vmode;
38652 unsigned i, mask, nelt = d->nelt;
38653 rtx target, op0, op1, x;
38654 rtx rperm[32], vperm;
38656 if (d->one_operand_p)
38658 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
38660 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
38662 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
38667 /* This is a blend, not a permute. Elements must stay in their
38668 respective lanes. */
38669 for (i = 0; i < nelt; ++i)
38671 unsigned e = d->perm[i];
38672 if (!(e == i || e == i + nelt))
38679 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
38680 decision should be extracted elsewhere, so that we only try that
38681 sequence once all budget==3 options have been tried. */
38682 target = d->target;
38695 for (i = 0; i < nelt; ++i)
38696 mask |= (d->perm[i] >= nelt) << i;
38700 for (i = 0; i < 2; ++i)
38701 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
38706 for (i = 0; i < 4; ++i)
38707 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
38712 /* See if bytes move in pairs so we can use pblendw with
38713 an immediate argument, rather than pblendvb with a vector
38715 for (i = 0; i < 16; i += 2)
38716 if (d->perm[i] + 1 != d->perm[i + 1])
38719 for (i = 0; i < nelt; ++i)
38720 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
38723 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
38724 vperm = force_reg (vmode, vperm);
38726 if (GET_MODE_SIZE (vmode) == 16)
38727 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
38729 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
38733 for (i = 0; i < 8; ++i)
38734 mask |= (d->perm[i * 2] >= 16) << i;
38739 target = gen_lowpart (vmode, target);
38740 op0 = gen_lowpart (vmode, op0);
38741 op1 = gen_lowpart (vmode, op1);
38745 /* See if bytes move in pairs. If not, vpblendvb must be used. */
38746 for (i = 0; i < 32; i += 2)
38747 if (d->perm[i] + 1 != d->perm[i + 1])
38749 /* See if bytes move in quadruplets. If yes, vpblendd
38750 with immediate can be used. */
38751 for (i = 0; i < 32; i += 4)
38752 if (d->perm[i] + 2 != d->perm[i + 2])
38756 /* See if bytes move the same in both lanes. If yes,
38757 vpblendw with immediate can be used. */
38758 for (i = 0; i < 16; i += 2)
38759 if (d->perm[i] + 16 != d->perm[i + 16])
38762 /* Use vpblendw. */
38763 for (i = 0; i < 16; ++i)
38764 mask |= (d->perm[i * 2] >= 32) << i;
38769 /* Use vpblendd. */
38770 for (i = 0; i < 8; ++i)
38771 mask |= (d->perm[i * 4] >= 32) << i;
38776 /* See if words move in pairs. If yes, vpblendd can be used. */
38777 for (i = 0; i < 16; i += 2)
38778 if (d->perm[i] + 1 != d->perm[i + 1])
38782 /* See if words move the same in both lanes. If not,
38783 vpblendvb must be used. */
38784 for (i = 0; i < 8; i++)
38785 if (d->perm[i] + 8 != d->perm[i + 8])
38787 /* Use vpblendvb. */
38788 for (i = 0; i < 32; ++i)
38789 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
38793 target = gen_lowpart (vmode, target);
38794 op0 = gen_lowpart (vmode, op0);
38795 op1 = gen_lowpart (vmode, op1);
38796 goto finish_pblendvb;
38799 /* Use vpblendw. */
38800 for (i = 0; i < 16; ++i)
38801 mask |= (d->perm[i] >= 16) << i;
38805 /* Use vpblendd. */
38806 for (i = 0; i < 8; ++i)
38807 mask |= (d->perm[i * 2] >= 16) << i;
38812 /* Use vpblendd. */
38813 for (i = 0; i < 4; ++i)
38814 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
38819 gcc_unreachable ();
38822 /* This matches five different patterns with the different modes. */
38823 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
38824 x = gen_rtx_SET (VOIDmode, target, x);
38830 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38831 in terms of the variable form of vpermilps.
38833 Note that we will have already failed the immediate input vpermilps,
38834 which requires that the high and low part shuffle be identical; the
38835 variable form doesn't require that. */
38838 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
38840 rtx rperm[8], vperm;
38843 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
38846 /* We can only permute within the 128-bit lane. */
38847 for (i = 0; i < 8; ++i)
38849 unsigned e = d->perm[i];
38850 if (i < 4 ? e >= 4 : e < 4)
38857 for (i = 0; i < 8; ++i)
38859 unsigned e = d->perm[i];
38861 /* Within each 128-bit lane, the elements of op0 are numbered
38862 from 0 and the elements of op1 are numbered from 4. */
38868 rperm[i] = GEN_INT (e);
38871 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
38872 vperm = force_reg (V8SImode, vperm);
38873 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
38878 /* Return true if permutation D can be performed as VMODE permutation
38882 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
38884 unsigned int i, j, chunk;
38886 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
38887 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
38888 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
38891 if (GET_MODE_NUNITS (vmode) >= d->nelt)
38894 chunk = d->nelt / GET_MODE_NUNITS (vmode);
38895 for (i = 0; i < d->nelt; i += chunk)
38896 if (d->perm[i] & (chunk - 1))
38899 for (j = 1; j < chunk; ++j)
38900 if (d->perm[i] + j != d->perm[i + j])
38906 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
38907 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
38910 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
38912 unsigned i, nelt, eltsz, mask;
38913 unsigned char perm[32];
38914 enum machine_mode vmode = V16QImode;
38915 rtx rperm[32], vperm, target, op0, op1;
38919 if (!d->one_operand_p)
38921 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
38924 && valid_perm_using_mode_p (V2TImode, d))
38929 /* Use vperm2i128 insn. The pattern uses
38930 V4DImode instead of V2TImode. */
38931 target = gen_lowpart (V4DImode, d->target);
38932 op0 = gen_lowpart (V4DImode, d->op0);
38933 op1 = gen_lowpart (V4DImode, d->op1);
38935 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
38936 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
38937 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
38945 if (GET_MODE_SIZE (d->vmode) == 16)
38950 else if (GET_MODE_SIZE (d->vmode) == 32)
38955 /* V4DImode should be already handled through
38956 expand_vselect by vpermq instruction. */
38957 gcc_assert (d->vmode != V4DImode);
38960 if (d->vmode == V8SImode
38961 || d->vmode == V16HImode
38962 || d->vmode == V32QImode)
38964 /* First see if vpermq can be used for
38965 V8SImode/V16HImode/V32QImode. */
38966 if (valid_perm_using_mode_p (V4DImode, d))
38968 for (i = 0; i < 4; i++)
38969 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
38972 return expand_vselect (gen_lowpart (V4DImode, d->target),
38973 gen_lowpart (V4DImode, d->op0),
38977 /* Next see if vpermd can be used. */
38978 if (valid_perm_using_mode_p (V8SImode, d))
38981 /* Or if vpermps can be used. */
38982 else if (d->vmode == V8SFmode)
38985 if (vmode == V32QImode)
38987 /* vpshufb only works intra lanes, it is not
38988 possible to shuffle bytes in between the lanes. */
38989 for (i = 0; i < nelt; ++i)
38990 if ((d->perm[i] ^ i) & (nelt / 2))
39001 if (vmode == V8SImode)
39002 for (i = 0; i < 8; ++i)
39003 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
39006 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
39007 if (!d->one_operand_p)
39008 mask = 2 * nelt - 1;
39009 else if (vmode == V16QImode)
39012 mask = nelt / 2 - 1;
39014 for (i = 0; i < nelt; ++i)
39016 unsigned j, e = d->perm[i] & mask;
39017 for (j = 0; j < eltsz; ++j)
39018 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
39022 vperm = gen_rtx_CONST_VECTOR (vmode,
39023 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
39024 vperm = force_reg (vmode, vperm);
39026 target = gen_lowpart (vmode, d->target);
39027 op0 = gen_lowpart (vmode, d->op0);
39028 if (d->one_operand_p)
39030 if (vmode == V16QImode)
39031 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
39032 else if (vmode == V32QImode)
39033 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
39034 else if (vmode == V8SFmode)
39035 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
39037 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
39041 op1 = gen_lowpart (vmode, d->op1);
39042 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
39048 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
39049 in a single instruction. */
39052 expand_vec_perm_1 (struct expand_vec_perm_d *d)
39054 unsigned i, nelt = d->nelt;
39055 unsigned char perm2[MAX_VECT_LEN];
39057 /* Check plain VEC_SELECT first, because AVX has instructions that could
39058 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
39059 input where SEL+CONCAT may not. */
39060 if (d->one_operand_p)
39062 int mask = nelt - 1;
39063 bool identity_perm = true;
39064 bool broadcast_perm = true;
39066 for (i = 0; i < nelt; i++)
39068 perm2[i] = d->perm[i] & mask;
39070 identity_perm = false;
39072 broadcast_perm = false;
39078 emit_move_insn (d->target, d->op0);
39081 else if (broadcast_perm && TARGET_AVX2)
39083 /* Use vpbroadcast{b,w,d}. */
39084 rtx (*gen) (rtx, rtx) = NULL;
39088 gen = gen_avx2_pbroadcastv32qi_1;
39091 gen = gen_avx2_pbroadcastv16hi_1;
39094 gen = gen_avx2_pbroadcastv8si_1;
39097 gen = gen_avx2_pbroadcastv16qi;
39100 gen = gen_avx2_pbroadcastv8hi;
39103 gen = gen_avx2_vec_dupv8sf_1;
39105 /* For other modes prefer other shuffles this function creates. */
39111 emit_insn (gen (d->target, d->op0));
39116 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
39119 /* There are plenty of patterns in sse.md that are written for
39120 SEL+CONCAT and are not replicated for a single op. Perhaps
39121 that should be changed, to avoid the nastiness here. */
39123 /* Recognize interleave style patterns, which means incrementing
39124 every other permutation operand. */
39125 for (i = 0; i < nelt; i += 2)
39127 perm2[i] = d->perm[i] & mask;
39128 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
39130 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
39134 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
39137 for (i = 0; i < nelt; i += 4)
39139 perm2[i + 0] = d->perm[i + 0] & mask;
39140 perm2[i + 1] = d->perm[i + 1] & mask;
39141 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
39142 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
39145 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
39151 /* Finally, try the fully general two operand permute. */
39152 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
39156 /* Recognize interleave style patterns with reversed operands. */
39157 if (!d->one_operand_p)
39159 for (i = 0; i < nelt; ++i)
39161 unsigned e = d->perm[i];
39169 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
39174 /* Try the SSE4.1 blend variable merge instructions. */
39175 if (expand_vec_perm_blend (d))
39178 /* Try one of the AVX vpermil variable permutations. */
39179 if (expand_vec_perm_vpermil (d))
39182 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
39183 vpshufb, vpermd, vpermps or vpermq variable permutation. */
39184 if (expand_vec_perm_pshufb (d))
39190 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
39191 in terms of a pair of pshuflw + pshufhw instructions. */
39194 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
39196 unsigned char perm2[MAX_VECT_LEN];
39200 if (d->vmode != V8HImode || !d->one_operand_p)
39203 /* The two permutations only operate in 64-bit lanes. */
39204 for (i = 0; i < 4; ++i)
39205 if (d->perm[i] >= 4)
39207 for (i = 4; i < 8; ++i)
39208 if (d->perm[i] < 4)
39214 /* Emit the pshuflw. */
39215 memcpy (perm2, d->perm, 4);
39216 for (i = 4; i < 8; ++i)
39218 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
39221 /* Emit the pshufhw. */
39222 memcpy (perm2 + 4, d->perm + 4, 4);
39223 for (i = 0; i < 4; ++i)
39225 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
39231 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39232 the permutation using the SSSE3 palignr instruction. This succeeds
39233 when all of the elements in PERM fit within one vector and we merely
39234 need to shift them down so that a single vector permutation has a
39235 chance to succeed. */
39238 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
39240 unsigned i, nelt = d->nelt;
39245 /* Even with AVX, palignr only operates on 128-bit vectors. */
39246 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
39249 min = nelt, max = 0;
39250 for (i = 0; i < nelt; ++i)
39252 unsigned e = d->perm[i];
39258 if (min == 0 || max - min >= nelt)
39261 /* Given that we have SSSE3, we know we'll be able to implement the
39262 single operand permutation after the palignr with pshufb. */
39266 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
39267 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
39268 gen_lowpart (TImode, d->op1),
39269 gen_lowpart (TImode, d->op0), shift));
39271 d->op0 = d->op1 = d->target;
39272 d->one_operand_p = true;
39275 for (i = 0; i < nelt; ++i)
39277 unsigned e = d->perm[i] - min;
39283 /* Test for the degenerate case where the alignment by itself
39284 produces the desired permutation. */
39288 ok = expand_vec_perm_1 (d);
39294 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
39296 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39297 a two vector permutation into a single vector permutation by using
39298 an interleave operation to merge the vectors. */
39301 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
39303 struct expand_vec_perm_d dremap, dfinal;
39304 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
39305 unsigned HOST_WIDE_INT contents;
39306 unsigned char remap[2 * MAX_VECT_LEN];
39308 bool ok, same_halves = false;
39310 if (GET_MODE_SIZE (d->vmode) == 16)
39312 if (d->one_operand_p)
39315 else if (GET_MODE_SIZE (d->vmode) == 32)
39319 /* For 32-byte modes allow even d->one_operand_p.
39320 The lack of cross-lane shuffling in some instructions
39321 might prevent a single insn shuffle. */
39323 dfinal.testing_p = true;
39324 /* If expand_vec_perm_interleave3 can expand this into
39325 a 3 insn sequence, give up and let it be expanded as
39326 3 insn sequence. While that is one insn longer,
39327 it doesn't need a memory operand and in the common
39328 case that both interleave low and high permutations
39329 with the same operands are adjacent needs 4 insns
39330 for both after CSE. */
39331 if (expand_vec_perm_interleave3 (&dfinal))
39337 /* Examine from whence the elements come. */
39339 for (i = 0; i < nelt; ++i)
39340 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
39342 memset (remap, 0xff, sizeof (remap));
39345 if (GET_MODE_SIZE (d->vmode) == 16)
39347 unsigned HOST_WIDE_INT h1, h2, h3, h4;
39349 /* Split the two input vectors into 4 halves. */
39350 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
39355 /* If the elements from the low halves use interleave low, and similarly
39356 for interleave high. If the elements are from mis-matched halves, we
39357 can use shufps for V4SF/V4SI or do a DImode shuffle. */
39358 if ((contents & (h1 | h3)) == contents)
39361 for (i = 0; i < nelt2; ++i)
39364 remap[i + nelt] = i * 2 + 1;
39365 dremap.perm[i * 2] = i;
39366 dremap.perm[i * 2 + 1] = i + nelt;
39368 if (!TARGET_SSE2 && d->vmode == V4SImode)
39369 dremap.vmode = V4SFmode;
39371 else if ((contents & (h2 | h4)) == contents)
39374 for (i = 0; i < nelt2; ++i)
39376 remap[i + nelt2] = i * 2;
39377 remap[i + nelt + nelt2] = i * 2 + 1;
39378 dremap.perm[i * 2] = i + nelt2;
39379 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
39381 if (!TARGET_SSE2 && d->vmode == V4SImode)
39382 dremap.vmode = V4SFmode;
39384 else if ((contents & (h1 | h4)) == contents)
39387 for (i = 0; i < nelt2; ++i)
39390 remap[i + nelt + nelt2] = i + nelt2;
39391 dremap.perm[i] = i;
39392 dremap.perm[i + nelt2] = i + nelt + nelt2;
39397 dremap.vmode = V2DImode;
39399 dremap.perm[0] = 0;
39400 dremap.perm[1] = 3;
39403 else if ((contents & (h2 | h3)) == contents)
39406 for (i = 0; i < nelt2; ++i)
39408 remap[i + nelt2] = i;
39409 remap[i + nelt] = i + nelt2;
39410 dremap.perm[i] = i + nelt2;
39411 dremap.perm[i + nelt2] = i + nelt;
39416 dremap.vmode = V2DImode;
39418 dremap.perm[0] = 1;
39419 dremap.perm[1] = 2;
39427 unsigned int nelt4 = nelt / 4, nzcnt = 0;
39428 unsigned HOST_WIDE_INT q[8];
39429 unsigned int nonzero_halves[4];
39431 /* Split the two input vectors into 8 quarters. */
39432 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
39433 for (i = 1; i < 8; ++i)
39434 q[i] = q[0] << (nelt4 * i);
39435 for (i = 0; i < 4; ++i)
39436 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
39438 nonzero_halves[nzcnt] = i;
39444 gcc_assert (d->one_operand_p);
39445 nonzero_halves[1] = nonzero_halves[0];
39446 same_halves = true;
39448 else if (d->one_operand_p)
39450 gcc_assert (nonzero_halves[0] == 0);
39451 gcc_assert (nonzero_halves[1] == 1);
39456 if (d->perm[0] / nelt2 == nonzero_halves[1])
39458 /* Attempt to increase the likelihood that dfinal
39459 shuffle will be intra-lane. */
39460 char tmph = nonzero_halves[0];
39461 nonzero_halves[0] = nonzero_halves[1];
39462 nonzero_halves[1] = tmph;
39465 /* vperm2f128 or vperm2i128. */
39466 for (i = 0; i < nelt2; ++i)
39468 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
39469 remap[i + nonzero_halves[0] * nelt2] = i;
39470 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
39471 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
39474 if (d->vmode != V8SFmode
39475 && d->vmode != V4DFmode
39476 && d->vmode != V8SImode)
39478 dremap.vmode = V8SImode;
39480 for (i = 0; i < 4; ++i)
39482 dremap.perm[i] = i + nonzero_halves[0] * 4;
39483 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
39487 else if (d->one_operand_p)
39489 else if (TARGET_AVX2
39490 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
39493 for (i = 0; i < nelt4; ++i)
39496 remap[i + nelt] = i * 2 + 1;
39497 remap[i + nelt2] = i * 2 + nelt2;
39498 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
39499 dremap.perm[i * 2] = i;
39500 dremap.perm[i * 2 + 1] = i + nelt;
39501 dremap.perm[i * 2 + nelt2] = i + nelt2;
39502 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
39505 else if (TARGET_AVX2
39506 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
39509 for (i = 0; i < nelt4; ++i)
39511 remap[i + nelt4] = i * 2;
39512 remap[i + nelt + nelt4] = i * 2 + 1;
39513 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
39514 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
39515 dremap.perm[i * 2] = i + nelt4;
39516 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
39517 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
39518 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
39525 /* Use the remapping array set up above to move the elements from their
39526 swizzled locations into their final destinations. */
39528 for (i = 0; i < nelt; ++i)
39530 unsigned e = remap[d->perm[i]];
39531 gcc_assert (e < nelt);
39532 /* If same_halves is true, both halves of the remapped vector are the
39533 same. Avoid cross-lane accesses if possible. */
39534 if (same_halves && i >= nelt2)
39536 gcc_assert (e < nelt2);
39537 dfinal.perm[i] = e + nelt2;
39540 dfinal.perm[i] = e;
39542 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
39543 dfinal.op1 = dfinal.op0;
39544 dfinal.one_operand_p = true;
39545 dremap.target = dfinal.op0;
39547 /* Test if the final remap can be done with a single insn. For V4SFmode or
39548 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
39550 ok = expand_vec_perm_1 (&dfinal);
39551 seq = get_insns ();
39560 if (dremap.vmode != dfinal.vmode)
39562 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
39563 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
39564 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
39567 ok = expand_vec_perm_1 (&dremap);
39574 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39575 a single vector cross-lane permutation into vpermq followed
39576 by any of the single insn permutations. */
39579 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
39581 struct expand_vec_perm_d dremap, dfinal;
39582 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
39583 unsigned contents[2];
39587 && (d->vmode == V32QImode || d->vmode == V16HImode)
39588 && d->one_operand_p))
39593 for (i = 0; i < nelt2; ++i)
39595 contents[0] |= 1u << (d->perm[i] / nelt4);
39596 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
39599 for (i = 0; i < 2; ++i)
39601 unsigned int cnt = 0;
39602 for (j = 0; j < 4; ++j)
39603 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
39611 dremap.vmode = V4DImode;
39613 dremap.target = gen_reg_rtx (V4DImode);
39614 dremap.op0 = gen_lowpart (V4DImode, d->op0);
39615 dremap.op1 = dremap.op0;
39616 dremap.one_operand_p = true;
39617 for (i = 0; i < 2; ++i)
39619 unsigned int cnt = 0;
39620 for (j = 0; j < 4; ++j)
39621 if ((contents[i] & (1u << j)) != 0)
39622 dremap.perm[2 * i + cnt++] = j;
39623 for (; cnt < 2; ++cnt)
39624 dremap.perm[2 * i + cnt] = 0;
39628 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
39629 dfinal.op1 = dfinal.op0;
39630 dfinal.one_operand_p = true;
39631 for (i = 0, j = 0; i < nelt; ++i)
39635 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
39636 if ((d->perm[i] / nelt4) == dremap.perm[j])
39638 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
39639 dfinal.perm[i] |= nelt4;
39641 gcc_unreachable ();
39644 ok = expand_vec_perm_1 (&dremap);
39647 ok = expand_vec_perm_1 (&dfinal);
39653 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
39654 a vector permutation using two instructions, vperm2f128 resp.
39655 vperm2i128 followed by any single in-lane permutation. */
39658 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
39660 struct expand_vec_perm_d dfirst, dsecond;
39661 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
39665 || GET_MODE_SIZE (d->vmode) != 32
39666 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
39670 dsecond.one_operand_p = false;
39671 dsecond.testing_p = true;
39673 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
39674 immediate. For perm < 16 the second permutation uses
39675 d->op0 as first operand, for perm >= 16 it uses d->op1
39676 as first operand. The second operand is the result of
39678 for (perm = 0; perm < 32; perm++)
39680 /* Ignore permutations which do not move anything cross-lane. */
39683 /* The second shuffle for e.g. V4DFmode has
39684 0123 and ABCD operands.
39685 Ignore AB23, as 23 is already in the second lane
39686 of the first operand. */
39687 if ((perm & 0xc) == (1 << 2)) continue;
39688 /* And 01CD, as 01 is in the first lane of the first
39690 if ((perm & 3) == 0) continue;
39691 /* And 4567, as then the vperm2[fi]128 doesn't change
39692 anything on the original 4567 second operand. */
39693 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
39697 /* The second shuffle for e.g. V4DFmode has
39698 4567 and ABCD operands.
39699 Ignore AB67, as 67 is already in the second lane
39700 of the first operand. */
39701 if ((perm & 0xc) == (3 << 2)) continue;
39702 /* And 45CD, as 45 is in the first lane of the first
39704 if ((perm & 3) == 2) continue;
39705 /* And 0123, as then the vperm2[fi]128 doesn't change
39706 anything on the original 0123 first operand. */
39707 if ((perm & 0xf) == (1 << 2)) continue;
39710 for (i = 0; i < nelt; i++)
39712 j = d->perm[i] / nelt2;
39713 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
39714 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
39715 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
39716 dsecond.perm[i] = d->perm[i] & (nelt - 1);
39724 ok = expand_vec_perm_1 (&dsecond);
39735 /* Found a usable second shuffle. dfirst will be
39736 vperm2f128 on d->op0 and d->op1. */
39737 dsecond.testing_p = false;
39739 dfirst.target = gen_reg_rtx (d->vmode);
39740 for (i = 0; i < nelt; i++)
39741 dfirst.perm[i] = (i & (nelt2 - 1))
39742 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
39744 ok = expand_vec_perm_1 (&dfirst);
39747 /* And dsecond is some single insn shuffle, taking
39748 d->op0 and result of vperm2f128 (if perm < 16) or
39749 d->op1 and result of vperm2f128 (otherwise). */
39750 dsecond.op1 = dfirst.target;
39752 dsecond.op0 = dfirst.op1;
39754 ok = expand_vec_perm_1 (&dsecond);
39760 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
39761 if (d->one_operand_p)
39768 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
39769 a two vector permutation using 2 intra-lane interleave insns
39770 and cross-lane shuffle for 32-byte vectors. */
39773 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
39776 rtx (*gen) (rtx, rtx, rtx);
39778 if (d->one_operand_p)
39780 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
39782 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
39788 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
39790 for (i = 0; i < nelt; i += 2)
39791 if (d->perm[i] != d->perm[0] + i / 2
39792 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
39802 gen = gen_vec_interleave_highv32qi;
39804 gen = gen_vec_interleave_lowv32qi;
39808 gen = gen_vec_interleave_highv16hi;
39810 gen = gen_vec_interleave_lowv16hi;
39814 gen = gen_vec_interleave_highv8si;
39816 gen = gen_vec_interleave_lowv8si;
39820 gen = gen_vec_interleave_highv4di;
39822 gen = gen_vec_interleave_lowv4di;
39826 gen = gen_vec_interleave_highv8sf;
39828 gen = gen_vec_interleave_lowv8sf;
39832 gen = gen_vec_interleave_highv4df;
39834 gen = gen_vec_interleave_lowv4df;
39837 gcc_unreachable ();
39840 emit_insn (gen (d->target, d->op0, d->op1));
39844 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
39845 a single vector permutation using a single intra-lane vector
39846 permutation, vperm2f128 swapping the lanes and vblend* insn blending
39847 the non-swapped and swapped vectors together. */
39850 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
39852 struct expand_vec_perm_d dfirst, dsecond;
39853 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
39856 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
39860 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
39861 || !d->one_operand_p)
39865 for (i = 0; i < nelt; i++)
39866 dfirst.perm[i] = 0xff;
39867 for (i = 0, msk = 0; i < nelt; i++)
39869 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
39870 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
39872 dfirst.perm[j] = d->perm[i];
39876 for (i = 0; i < nelt; i++)
39877 if (dfirst.perm[i] == 0xff)
39878 dfirst.perm[i] = i;
39881 dfirst.target = gen_reg_rtx (dfirst.vmode);
39884 ok = expand_vec_perm_1 (&dfirst);
39885 seq = get_insns ();
39897 dsecond.op0 = dfirst.target;
39898 dsecond.op1 = dfirst.target;
39899 dsecond.one_operand_p = true;
39900 dsecond.target = gen_reg_rtx (dsecond.vmode);
39901 for (i = 0; i < nelt; i++)
39902 dsecond.perm[i] = i ^ nelt2;
39904 ok = expand_vec_perm_1 (&dsecond);
39907 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
39908 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
39912 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
39913 permutation using two vperm2f128, followed by a vshufpd insn blending
39914 the two vectors together. */
39917 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
39919 struct expand_vec_perm_d dfirst, dsecond, dthird;
39922 if (!TARGET_AVX || (d->vmode != V4DFmode))
39932 dfirst.perm[0] = (d->perm[0] & ~1);
39933 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
39934 dfirst.perm[2] = (d->perm[2] & ~1);
39935 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
39936 dsecond.perm[0] = (d->perm[1] & ~1);
39937 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
39938 dsecond.perm[2] = (d->perm[3] & ~1);
39939 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
39940 dthird.perm[0] = (d->perm[0] % 2);
39941 dthird.perm[1] = (d->perm[1] % 2) + 4;
39942 dthird.perm[2] = (d->perm[2] % 2) + 2;
39943 dthird.perm[3] = (d->perm[3] % 2) + 6;
39945 dfirst.target = gen_reg_rtx (dfirst.vmode);
39946 dsecond.target = gen_reg_rtx (dsecond.vmode);
39947 dthird.op0 = dfirst.target;
39948 dthird.op1 = dsecond.target;
39949 dthird.one_operand_p = false;
39951 canonicalize_perm (&dfirst);
39952 canonicalize_perm (&dsecond);
39954 ok = expand_vec_perm_1 (&dfirst)
39955 && expand_vec_perm_1 (&dsecond)
39956 && expand_vec_perm_1 (&dthird);
39963 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
39964 permutation with two pshufb insns and an ior. We should have already
39965 failed all two instruction sequences. */
39968 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
39970 rtx rperm[2][16], vperm, l, h, op, m128;
39971 unsigned int i, nelt, eltsz;
39973 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
39975 gcc_assert (!d->one_operand_p);
39978 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
39980 /* Generate two permutation masks. If the required element is within
39981 the given vector it is shuffled into the proper lane. If the required
39982 element is in the other vector, force a zero into the lane by setting
39983 bit 7 in the permutation mask. */
39984 m128 = GEN_INT (-128);
39985 for (i = 0; i < nelt; ++i)
39987 unsigned j, e = d->perm[i];
39988 unsigned which = (e >= nelt);
39992 for (j = 0; j < eltsz; ++j)
39994 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
39995 rperm[1-which][i*eltsz + j] = m128;
39999 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
40000 vperm = force_reg (V16QImode, vperm);
40002 l = gen_reg_rtx (V16QImode);
40003 op = gen_lowpart (V16QImode, d->op0);
40004 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
40006 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
40007 vperm = force_reg (V16QImode, vperm);
40009 h = gen_reg_rtx (V16QImode);
40010 op = gen_lowpart (V16QImode, d->op1);
40011 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
40013 op = gen_lowpart (V16QImode, d->target);
40014 emit_insn (gen_iorv16qi3 (op, l, h));
40019 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
40020 with two vpshufb insns, vpermq and vpor. We should have already failed
40021 all two or three instruction sequences. */
40024 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
40026 rtx rperm[2][32], vperm, l, h, hp, op, m128;
40027 unsigned int i, nelt, eltsz;
40030 || !d->one_operand_p
40031 || (d->vmode != V32QImode && d->vmode != V16HImode))
40038 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
40040 /* Generate two permutation masks. If the required element is within
40041 the same lane, it is shuffled in. If the required element from the
40042 other lane, force a zero by setting bit 7 in the permutation mask.
40043 In the other mask the mask has non-negative elements if element
40044 is requested from the other lane, but also moved to the other lane,
40045 so that the result of vpshufb can have the two V2TImode halves
40047 m128 = GEN_INT (-128);
40048 for (i = 0; i < nelt; ++i)
40050 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
40051 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
40053 for (j = 0; j < eltsz; ++j)
40055 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
40056 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
40060 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
40061 vperm = force_reg (V32QImode, vperm);
40063 h = gen_reg_rtx (V32QImode);
40064 op = gen_lowpart (V32QImode, d->op0);
40065 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
40067 /* Swap the 128-byte lanes of h into hp. */
40068 hp = gen_reg_rtx (V4DImode);
40069 op = gen_lowpart (V4DImode, h);
40070 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
40073 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
40074 vperm = force_reg (V32QImode, vperm);
40076 l = gen_reg_rtx (V32QImode);
40077 op = gen_lowpart (V32QImode, d->op0);
40078 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
40080 op = gen_lowpart (V32QImode, d->target);
40081 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
40086 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
40087 and extract-odd permutations of two V32QImode and V16QImode operand
40088 with two vpshufb insns, vpor and vpermq. We should have already
40089 failed all two or three instruction sequences. */
40092 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
40094 rtx rperm[2][32], vperm, l, h, ior, op, m128;
40095 unsigned int i, nelt, eltsz;
40098 || d->one_operand_p
40099 || (d->vmode != V32QImode && d->vmode != V16HImode))
40102 for (i = 0; i < d->nelt; ++i)
40103 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
40110 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
40112 /* Generate two permutation masks. In the first permutation mask
40113 the first quarter will contain indexes for the first half
40114 of the op0, the second quarter will contain bit 7 set, third quarter
40115 will contain indexes for the second half of the op0 and the
40116 last quarter bit 7 set. In the second permutation mask
40117 the first quarter will contain bit 7 set, the second quarter
40118 indexes for the first half of the op1, the third quarter bit 7 set
40119 and last quarter indexes for the second half of the op1.
40120 I.e. the first mask e.g. for V32QImode extract even will be:
40121 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
40122 (all values masked with 0xf except for -128) and second mask
40123 for extract even will be
40124 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
40125 m128 = GEN_INT (-128);
40126 for (i = 0; i < nelt; ++i)
40128 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
40129 unsigned which = d->perm[i] >= nelt;
40130 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
40132 for (j = 0; j < eltsz; ++j)
40134 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
40135 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
40139 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
40140 vperm = force_reg (V32QImode, vperm);
40142 l = gen_reg_rtx (V32QImode);
40143 op = gen_lowpart (V32QImode, d->op0);
40144 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
40146 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
40147 vperm = force_reg (V32QImode, vperm);
40149 h = gen_reg_rtx (V32QImode);
40150 op = gen_lowpart (V32QImode, d->op1);
40151 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
40153 ior = gen_reg_rtx (V32QImode);
40154 emit_insn (gen_iorv32qi3 (ior, l, h));
40156 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
40157 op = gen_lowpart (V4DImode, d->target);
40158 ior = gen_lowpart (V4DImode, ior);
40159 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
40160 const1_rtx, GEN_INT (3)));
40165 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
40166 and extract-odd permutations. */
40169 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
40176 t1 = gen_reg_rtx (V4DFmode);
40177 t2 = gen_reg_rtx (V4DFmode);
40179 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40180 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
40181 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
40183 /* Now an unpck[lh]pd will produce the result required. */
40185 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
40187 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
40193 int mask = odd ? 0xdd : 0x88;
40195 t1 = gen_reg_rtx (V8SFmode);
40196 t2 = gen_reg_rtx (V8SFmode);
40197 t3 = gen_reg_rtx (V8SFmode);
40199 /* Shuffle within the 128-bit lanes to produce:
40200 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
40201 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
40204 /* Shuffle the lanes around to produce:
40205 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
40206 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
40209 /* Shuffle within the 128-bit lanes to produce:
40210 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
40211 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
40213 /* Shuffle within the 128-bit lanes to produce:
40214 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
40215 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
40217 /* Shuffle the lanes around to produce:
40218 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
40219 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
40228 /* These are always directly implementable by expand_vec_perm_1. */
40229 gcc_unreachable ();
40233 return expand_vec_perm_pshufb2 (d);
40236 /* We need 2*log2(N)-1 operations to achieve odd/even
40237 with interleave. */
40238 t1 = gen_reg_rtx (V8HImode);
40239 t2 = gen_reg_rtx (V8HImode);
40240 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
40241 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
40242 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
40243 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
40245 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
40247 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
40254 return expand_vec_perm_pshufb2 (d);
40257 t1 = gen_reg_rtx (V16QImode);
40258 t2 = gen_reg_rtx (V16QImode);
40259 t3 = gen_reg_rtx (V16QImode);
40260 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
40261 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
40262 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
40263 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
40264 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
40265 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
40267 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
40269 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
40276 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
40281 struct expand_vec_perm_d d_copy = *d;
40282 d_copy.vmode = V4DFmode;
40283 d_copy.target = gen_lowpart (V4DFmode, d->target);
40284 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
40285 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
40286 return expand_vec_perm_even_odd_1 (&d_copy, odd);
40289 t1 = gen_reg_rtx (V4DImode);
40290 t2 = gen_reg_rtx (V4DImode);
40292 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
40293 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
40294 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
40296 /* Now an vpunpck[lh]qdq will produce the result required. */
40298 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
40300 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
40307 struct expand_vec_perm_d d_copy = *d;
40308 d_copy.vmode = V8SFmode;
40309 d_copy.target = gen_lowpart (V8SFmode, d->target);
40310 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
40311 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
40312 return expand_vec_perm_even_odd_1 (&d_copy, odd);
40315 t1 = gen_reg_rtx (V8SImode);
40316 t2 = gen_reg_rtx (V8SImode);
40318 /* Shuffle the lanes around into
40319 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
40320 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t1),
40321 gen_lowpart (V4DImode, d->op0),
40322 gen_lowpart (V4DImode, d->op1),
40324 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode, t2),
40325 gen_lowpart (V4DImode, d->op0),
40326 gen_lowpart (V4DImode, d->op1),
40329 /* Swap the 2nd and 3rd position in each lane into
40330 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
40331 emit_insn (gen_avx2_pshufdv3 (t1, t1,
40332 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40333 emit_insn (gen_avx2_pshufdv3 (t2, t2,
40334 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
40336 /* Now an vpunpck[lh]qdq will produce
40337 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
40339 t3 = gen_avx2_interleave_highv4di (gen_lowpart (V4DImode, d->target),
40340 gen_lowpart (V4DImode, t1),
40341 gen_lowpart (V4DImode, t2));
40343 t3 = gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode, d->target),
40344 gen_lowpart (V4DImode, t1),
40345 gen_lowpart (V4DImode, t2));
40350 gcc_unreachable ();
40356 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40357 extract-even and extract-odd permutations. */
40360 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
40362 unsigned i, odd, nelt = d->nelt;
40365 if (odd != 0 && odd != 1)
40368 for (i = 1; i < nelt; ++i)
40369 if (d->perm[i] != 2 * i + odd)
40372 return expand_vec_perm_even_odd_1 (d, odd);
40375 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
40376 permutations. We assume that expand_vec_perm_1 has already failed. */
40379 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
40381 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
40382 enum machine_mode vmode = d->vmode;
40383 unsigned char perm2[4];
40391 /* These are special-cased in sse.md so that we can optionally
40392 use the vbroadcast instruction. They expand to two insns
40393 if the input happens to be in a register. */
40394 gcc_unreachable ();
40400 /* These are always implementable using standard shuffle patterns. */
40401 gcc_unreachable ();
40405 /* These can be implemented via interleave. We save one insn by
40406 stopping once we have promoted to V4SImode and then use pshufd. */
40410 rtx (*gen) (rtx, rtx, rtx)
40411 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
40412 : gen_vec_interleave_lowv8hi;
40416 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
40417 : gen_vec_interleave_highv8hi;
40422 dest = gen_reg_rtx (vmode);
40423 emit_insn (gen (dest, op0, op0));
40424 vmode = get_mode_wider_vector (vmode);
40425 op0 = gen_lowpart (vmode, dest);
40427 while (vmode != V4SImode);
40429 memset (perm2, elt, 4);
40430 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4,
40439 /* For AVX2 broadcasts of the first element vpbroadcast* or
40440 vpermq should be used by expand_vec_perm_1. */
40441 gcc_assert (!TARGET_AVX2 || d->perm[0]);
40445 gcc_unreachable ();
40449 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
40450 broadcast permutations. */
40453 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
40455 unsigned i, elt, nelt = d->nelt;
40457 if (!d->one_operand_p)
40461 for (i = 1; i < nelt; ++i)
40462 if (d->perm[i] != elt)
40465 return expand_vec_perm_broadcast_1 (d);
40468 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
40469 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
40470 all the shorter instruction sequences. */
40473 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
40475 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
40476 unsigned int i, nelt, eltsz;
40480 || d->one_operand_p
40481 || (d->vmode != V32QImode && d->vmode != V16HImode))
40488 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
40490 /* Generate 4 permutation masks. If the required element is within
40491 the same lane, it is shuffled in. If the required element from the
40492 other lane, force a zero by setting bit 7 in the permutation mask.
40493 In the other mask the mask has non-negative elements if element
40494 is requested from the other lane, but also moved to the other lane,
40495 so that the result of vpshufb can have the two V2TImode halves
40497 m128 = GEN_INT (-128);
40498 for (i = 0; i < 32; ++i)
40500 rperm[0][i] = m128;
40501 rperm[1][i] = m128;
40502 rperm[2][i] = m128;
40503 rperm[3][i] = m128;
40509 for (i = 0; i < nelt; ++i)
40511 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
40512 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
40513 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
40515 for (j = 0; j < eltsz; ++j)
40516 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
40517 used[which] = true;
40520 for (i = 0; i < 2; ++i)
40522 if (!used[2 * i + 1])
40527 vperm = gen_rtx_CONST_VECTOR (V32QImode,
40528 gen_rtvec_v (32, rperm[2 * i + 1]));
40529 vperm = force_reg (V32QImode, vperm);
40530 h[i] = gen_reg_rtx (V32QImode);
40531 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
40532 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
40535 /* Swap the 128-byte lanes of h[X]. */
40536 for (i = 0; i < 2; ++i)
40538 if (h[i] == NULL_RTX)
40540 op = gen_reg_rtx (V4DImode);
40541 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
40542 const2_rtx, GEN_INT (3), const0_rtx,
40544 h[i] = gen_lowpart (V32QImode, op);
40547 for (i = 0; i < 2; ++i)
40554 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
40555 vperm = force_reg (V32QImode, vperm);
40556 l[i] = gen_reg_rtx (V32QImode);
40557 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
40558 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
40561 for (i = 0; i < 2; ++i)
40565 op = gen_reg_rtx (V32QImode);
40566 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
40573 gcc_assert (l[0] && l[1]);
40574 op = gen_lowpart (V32QImode, d->target);
40575 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
40579 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
40580 With all of the interface bits taken care of, perform the expansion
40581 in D and return true on success. */
40584 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
40586 /* Try a single instruction expansion. */
40587 if (expand_vec_perm_1 (d))
40590 /* Try sequences of two instructions. */
40592 if (expand_vec_perm_pshuflw_pshufhw (d))
40595 if (expand_vec_perm_palignr (d))
40598 if (expand_vec_perm_interleave2 (d))
40601 if (expand_vec_perm_broadcast (d))
40604 if (expand_vec_perm_vpermq_perm_1 (d))
40607 if (expand_vec_perm_vperm2f128 (d))
40610 /* Try sequences of three instructions. */
40612 if (expand_vec_perm_2vperm2f128_vshuf (d))
40615 if (expand_vec_perm_pshufb2 (d))
40618 if (expand_vec_perm_interleave3 (d))
40621 if (expand_vec_perm_vperm2f128_vblend (d))
40624 /* Try sequences of four instructions. */
40626 if (expand_vec_perm_vpshufb2_vpermq (d))
40629 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
40632 /* ??? Look for narrow permutations whose element orderings would
40633 allow the promotion to a wider mode. */
40635 /* ??? Look for sequences of interleave or a wider permute that place
40636 the data into the correct lanes for a half-vector shuffle like
40637 pshuf[lh]w or vpermilps. */
40639 /* ??? Look for sequences of interleave that produce the desired results.
40640 The combinatorics of punpck[lh] get pretty ugly... */
40642 if (expand_vec_perm_even_odd (d))
40645 /* Even longer sequences. */
40646 if (expand_vec_perm_vpshufb4_vpermq2 (d))
40652 /* If a permutation only uses one operand, make it clear. Returns true
40653 if the permutation references both operands. */
40656 canonicalize_perm (struct expand_vec_perm_d *d)
40658 int i, which, nelt = d->nelt;
40660 for (i = which = 0; i < nelt; ++i)
40661 which |= (d->perm[i] < nelt ? 1 : 2);
40663 d->one_operand_p = true;
40670 if (!rtx_equal_p (d->op0, d->op1))
40672 d->one_operand_p = false;
40675 /* The elements of PERM do not suggest that only the first operand
40676 is used, but both operands are identical. Allow easier matching
40677 of the permutation by folding the permutation into the single
40682 for (i = 0; i < nelt; ++i)
40683 d->perm[i] &= nelt - 1;
40692 return (which == 3);
40696 ix86_expand_vec_perm_const (rtx operands[4])
40698 struct expand_vec_perm_d d;
40699 unsigned char perm[MAX_VECT_LEN];
40704 d.target = operands[0];
40705 d.op0 = operands[1];
40706 d.op1 = operands[2];
40709 d.vmode = GET_MODE (d.target);
40710 gcc_assert (VECTOR_MODE_P (d.vmode));
40711 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
40712 d.testing_p = false;
40714 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
40715 gcc_assert (XVECLEN (sel, 0) == nelt);
40716 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
40718 for (i = 0; i < nelt; ++i)
40720 rtx e = XVECEXP (sel, 0, i);
40721 int ei = INTVAL (e) & (2 * nelt - 1);
40726 two_args = canonicalize_perm (&d);
40728 if (ix86_expand_vec_perm_const_1 (&d))
40731 /* If the selector says both arguments are needed, but the operands are the
40732 same, the above tried to expand with one_operand_p and flattened selector.
40733 If that didn't work, retry without one_operand_p; we succeeded with that
40735 if (two_args && d.one_operand_p)
40737 d.one_operand_p = false;
40738 memcpy (d.perm, perm, sizeof (perm));
40739 return ix86_expand_vec_perm_const_1 (&d);
40745 /* Implement targetm.vectorize.vec_perm_const_ok. */
40748 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
40749 const unsigned char *sel)
40751 struct expand_vec_perm_d d;
40752 unsigned int i, nelt, which;
40756 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
40757 d.testing_p = true;
40759 /* Given sufficient ISA support we can just return true here
40760 for selected vector modes. */
40761 if (GET_MODE_SIZE (d.vmode) == 16)
40763 /* All implementable with a single vpperm insn. */
40766 /* All implementable with 2 pshufb + 1 ior. */
40769 /* All implementable with shufpd or unpck[lh]pd. */
40774 /* Extract the values from the vector CST into the permutation
40776 memcpy (d.perm, sel, nelt);
40777 for (i = which = 0; i < nelt; ++i)
40779 unsigned char e = d.perm[i];
40780 gcc_assert (e < 2 * nelt);
40781 which |= (e < nelt ? 1 : 2);
40784 /* For all elements from second vector, fold the elements to first. */
40786 for (i = 0; i < nelt; ++i)
40789 /* Check whether the mask can be applied to the vector type. */
40790 d.one_operand_p = (which != 3);
40792 /* Implementable with shufps or pshufd. */
40793 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
40796 /* Otherwise we have to go through the motions and see if we can
40797 figure out how to generate the requested permutation. */
40798 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
40799 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
40800 if (!d.one_operand_p)
40801 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
40804 ret = ix86_expand_vec_perm_const_1 (&d);
40811 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
40813 struct expand_vec_perm_d d;
40819 d.vmode = GET_MODE (targ);
40820 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
40821 d.one_operand_p = false;
40822 d.testing_p = false;
40824 for (i = 0; i < nelt; ++i)
40825 d.perm[i] = i * 2 + odd;
40827 /* We'll either be able to implement the permutation directly... */
40828 if (expand_vec_perm_1 (&d))
40831 /* ... or we use the special-case patterns. */
40832 expand_vec_perm_even_odd_1 (&d, odd);
40836 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
40838 struct expand_vec_perm_d d;
40839 unsigned i, nelt, base;
40845 d.vmode = GET_MODE (targ);
40846 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
40847 d.one_operand_p = false;
40848 d.testing_p = false;
40850 base = high_p ? nelt / 2 : 0;
40851 for (i = 0; i < nelt / 2; ++i)
40853 d.perm[i * 2] = i + base;
40854 d.perm[i * 2 + 1] = i + base + nelt;
40857 /* Note that for AVX this isn't one instruction. */
40858 ok = ix86_expand_vec_perm_const_1 (&d);
40863 /* Expand a vector operation CODE for a V*QImode in terms of the
40864 same operation on V*HImode. */
40867 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
40869 enum machine_mode qimode = GET_MODE (dest);
40870 enum machine_mode himode;
40871 rtx (*gen_il) (rtx, rtx, rtx);
40872 rtx (*gen_ih) (rtx, rtx, rtx);
40873 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
40874 struct expand_vec_perm_d d;
40875 bool ok, full_interleave;
40876 bool uns_p = false;
40883 gen_il = gen_vec_interleave_lowv16qi;
40884 gen_ih = gen_vec_interleave_highv16qi;
40887 himode = V16HImode;
40888 gen_il = gen_avx2_interleave_lowv32qi;
40889 gen_ih = gen_avx2_interleave_highv32qi;
40892 gcc_unreachable ();
40895 op2_l = op2_h = op2;
40899 /* Unpack data such that we've got a source byte in each low byte of
40900 each word. We don't care what goes into the high byte of each word.
40901 Rather than trying to get zero in there, most convenient is to let
40902 it be a copy of the low byte. */
40903 op2_l = gen_reg_rtx (qimode);
40904 op2_h = gen_reg_rtx (qimode);
40905 emit_insn (gen_il (op2_l, op2, op2));
40906 emit_insn (gen_ih (op2_h, op2, op2));
40909 op1_l = gen_reg_rtx (qimode);
40910 op1_h = gen_reg_rtx (qimode);
40911 emit_insn (gen_il (op1_l, op1, op1));
40912 emit_insn (gen_ih (op1_h, op1, op1));
40913 full_interleave = qimode == V16QImode;
40921 op1_l = gen_reg_rtx (himode);
40922 op1_h = gen_reg_rtx (himode);
40923 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
40924 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
40925 full_interleave = true;
40928 gcc_unreachable ();
40931 /* Perform the operation. */
40932 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
40934 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
40936 gcc_assert (res_l && res_h);
40938 /* Merge the data back into the right place. */
40940 d.op0 = gen_lowpart (qimode, res_l);
40941 d.op1 = gen_lowpart (qimode, res_h);
40943 d.nelt = GET_MODE_NUNITS (qimode);
40944 d.one_operand_p = false;
40945 d.testing_p = false;
40947 if (full_interleave)
40949 /* For SSE2, we used an full interleave, so the desired
40950 results are in the even elements. */
40951 for (i = 0; i < 32; ++i)
40956 /* For AVX, the interleave used above was not cross-lane. So the
40957 extraction is evens but with the second and third quarter swapped.
40958 Happily, that is even one insn shorter than even extraction. */
40959 for (i = 0; i < 32; ++i)
40960 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
40963 ok = ix86_expand_vec_perm_const_1 (&d);
40966 set_unique_reg_note (get_last_insn (), REG_EQUAL,
40967 gen_rtx_fmt_ee (code, qimode, op1, op2));
40970 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
40971 if op is CONST_VECTOR with all odd elements equal to their
40972 preceeding element. */
40975 const_vector_equal_evenodd_p (rtx op)
40977 enum machine_mode mode = GET_MODE (op);
40978 int i, nunits = GET_MODE_NUNITS (mode);
40979 if (GET_CODE (op) != CONST_VECTOR
40980 || nunits != CONST_VECTOR_NUNITS (op))
40982 for (i = 0; i < nunits; i += 2)
40983 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
40989 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
40990 bool uns_p, bool odd_p)
40992 enum machine_mode mode = GET_MODE (op1);
40993 enum machine_mode wmode = GET_MODE (dest);
40995 rtx orig_op1 = op1, orig_op2 = op2;
40997 if (!nonimmediate_operand (op1, mode))
40998 op1 = force_reg (mode, op1);
40999 if (!nonimmediate_operand (op2, mode))
41000 op2 = force_reg (mode, op2);
41002 /* We only play even/odd games with vectors of SImode. */
41003 gcc_assert (mode == V4SImode || mode == V8SImode);
41005 /* If we're looking for the odd results, shift those members down to
41006 the even slots. For some cpus this is faster than a PSHUFD. */
41009 /* For XOP use vpmacsdqh, but only for smult, as it is only
41011 if (TARGET_XOP && mode == V4SImode && !uns_p)
41013 x = force_reg (wmode, CONST0_RTX (wmode));
41014 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
41018 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
41019 if (!const_vector_equal_evenodd_p (orig_op1))
41020 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
41021 x, NULL, 1, OPTAB_DIRECT);
41022 if (!const_vector_equal_evenodd_p (orig_op2))
41023 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
41024 x, NULL, 1, OPTAB_DIRECT);
41025 op1 = gen_lowpart (mode, op1);
41026 op2 = gen_lowpart (mode, op2);
41029 if (mode == V8SImode)
41032 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
41034 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
41037 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
41038 else if (TARGET_SSE4_1)
41039 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
41042 rtx s1, s2, t0, t1, t2;
41044 /* The easiest way to implement this without PMULDQ is to go through
41045 the motions as if we are performing a full 64-bit multiply. With
41046 the exception that we need to do less shuffling of the elements. */
41048 /* Compute the sign-extension, aka highparts, of the two operands. */
41049 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
41050 op1, pc_rtx, pc_rtx);
41051 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
41052 op2, pc_rtx, pc_rtx);
41054 /* Multiply LO(A) * HI(B), and vice-versa. */
41055 t1 = gen_reg_rtx (wmode);
41056 t2 = gen_reg_rtx (wmode);
41057 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
41058 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
41060 /* Multiply LO(A) * LO(B). */
41061 t0 = gen_reg_rtx (wmode);
41062 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
41064 /* Combine and shift the highparts into place. */
41065 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
41066 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
41069 /* Combine high and low parts. */
41070 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
41077 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
41078 bool uns_p, bool high_p)
41080 enum machine_mode wmode = GET_MODE (dest);
41081 enum machine_mode mode = GET_MODE (op1);
41082 rtx t1, t2, t3, t4, mask;
41087 t1 = gen_reg_rtx (mode);
41088 t2 = gen_reg_rtx (mode);
41089 if (TARGET_XOP && !uns_p)
41091 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
41092 shuffle the elements once so that all elements are in the right
41093 place for immediate use: { A C B D }. */
41094 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
41095 const1_rtx, GEN_INT (3)));
41096 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
41097 const1_rtx, GEN_INT (3)));
41101 /* Put the elements into place for the multiply. */
41102 ix86_expand_vec_interleave (t1, op1, op1, high_p);
41103 ix86_expand_vec_interleave (t2, op2, op2, high_p);
41106 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
41110 /* Shuffle the elements between the lanes. After this we
41111 have { A B E F | C D G H } for each operand. */
41112 t1 = gen_reg_rtx (V4DImode);
41113 t2 = gen_reg_rtx (V4DImode);
41114 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
41115 const0_rtx, const2_rtx,
41116 const1_rtx, GEN_INT (3)));
41117 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
41118 const0_rtx, const2_rtx,
41119 const1_rtx, GEN_INT (3)));
41121 /* Shuffle the elements within the lanes. After this we
41122 have { A A B B | C C D D } or { E E F F | G G H H }. */
41123 t3 = gen_reg_rtx (V8SImode);
41124 t4 = gen_reg_rtx (V8SImode);
41125 mask = GEN_INT (high_p
41126 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
41127 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
41128 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
41129 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
41131 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
41136 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
41137 uns_p, OPTAB_DIRECT);
41138 t2 = expand_binop (mode,
41139 uns_p ? umul_highpart_optab : smul_highpart_optab,
41140 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
41141 gcc_assert (t1 && t2);
41143 ix86_expand_vec_interleave (gen_lowpart (mode, dest), t1, t2, high_p);
41148 t1 = gen_reg_rtx (wmode);
41149 t2 = gen_reg_rtx (wmode);
41150 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
41151 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
41153 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
41157 gcc_unreachable ();
41162 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
41166 res_1 = gen_reg_rtx (V4SImode);
41167 res_2 = gen_reg_rtx (V4SImode);
41168 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_1),
41169 op1, op2, true, false);
41170 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode, res_2),
41171 op1, op2, true, true);
41173 /* Move the results in element 2 down to element 1; we don't care
41174 what goes in elements 2 and 3. Then we can merge the parts
41175 back together with an interleave.
41177 Note that two other sequences were tried:
41178 (1) Use interleaves at the start instead of psrldq, which allows
41179 us to use a single shufps to merge things back at the end.
41180 (2) Use shufps here to combine the two vectors, then pshufd to
41181 put the elements in the correct order.
41182 In both cases the cost of the reformatting stall was too high
41183 and the overall sequence slower. */
41185 emit_insn (gen_sse2_pshufd_1 (res_1, res_1, const0_rtx, const2_rtx,
41186 const0_rtx, const0_rtx));
41187 emit_insn (gen_sse2_pshufd_1 (res_2, res_2, const0_rtx, const2_rtx,
41188 const0_rtx, const0_rtx));
41189 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
41191 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
41195 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
41197 enum machine_mode mode = GET_MODE (op0);
41198 rtx t1, t2, t3, t4, t5, t6;
41200 if (TARGET_XOP && mode == V2DImode)
41202 /* op1: A,B,C,D, op2: E,F,G,H */
41203 op1 = gen_lowpart (V4SImode, op1);
41204 op2 = gen_lowpart (V4SImode, op2);
41206 t1 = gen_reg_rtx (V4SImode);
41207 t2 = gen_reg_rtx (V4SImode);
41208 t3 = gen_reg_rtx (V2DImode);
41209 t4 = gen_reg_rtx (V2DImode);
41212 emit_insn (gen_sse2_pshufd_1 (t1, op1,
41218 /* t2: (B*E),(A*F),(D*G),(C*H) */
41219 emit_insn (gen_mulv4si3 (t2, t1, op2));
41221 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
41222 emit_insn (gen_xop_phadddq (t3, t2));
41224 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
41225 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
41227 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
41228 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
41232 enum machine_mode nmode;
41233 rtx (*umul) (rtx, rtx, rtx);
41235 if (mode == V2DImode)
41237 umul = gen_vec_widen_umult_even_v4si;
41240 else if (mode == V4DImode)
41242 umul = gen_vec_widen_umult_even_v8si;
41246 gcc_unreachable ();
41249 /* Multiply low parts. */
41250 t1 = gen_reg_rtx (mode);
41251 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
41253 /* Shift input vectors right 32 bits so we can multiply high parts. */
41255 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
41256 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
41258 /* Multiply high parts by low parts. */
41259 t4 = gen_reg_rtx (mode);
41260 t5 = gen_reg_rtx (mode);
41261 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
41262 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
41264 /* Combine and shift the highparts back. */
41265 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
41266 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
41268 /* Combine high and low parts. */
41269 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
41272 set_unique_reg_note (get_last_insn (), REG_EQUAL,
41273 gen_rtx_MULT (mode, op1, op2));
41276 /* Expand an insert into a vector register through pinsr insn.
41277 Return true if successful. */
41280 ix86_expand_pinsr (rtx *operands)
41282 rtx dst = operands[0];
41283 rtx src = operands[3];
41285 unsigned int size = INTVAL (operands[1]);
41286 unsigned int pos = INTVAL (operands[2]);
41288 if (GET_CODE (dst) == SUBREG)
41290 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
41291 dst = SUBREG_REG (dst);
41294 if (GET_CODE (src) == SUBREG)
41295 src = SUBREG_REG (src);
41297 switch (GET_MODE (dst))
41304 enum machine_mode srcmode, dstmode;
41305 rtx (*pinsr)(rtx, rtx, rtx, rtx);
41307 srcmode = mode_for_size (size, MODE_INT, 0);
41312 if (!TARGET_SSE4_1)
41314 dstmode = V16QImode;
41315 pinsr = gen_sse4_1_pinsrb;
41321 dstmode = V8HImode;
41322 pinsr = gen_sse2_pinsrw;
41326 if (!TARGET_SSE4_1)
41328 dstmode = V4SImode;
41329 pinsr = gen_sse4_1_pinsrd;
41333 gcc_assert (TARGET_64BIT);
41334 if (!TARGET_SSE4_1)
41336 dstmode = V2DImode;
41337 pinsr = gen_sse4_1_pinsrq;
41344 dst = gen_lowpart (dstmode, dst);
41345 src = gen_lowpart (srcmode, src);
41349 emit_insn (pinsr (dst, dst, src, GEN_INT (1 << pos)));
41358 /* This function returns the calling abi specific va_list type node.
41359 It returns the FNDECL specific va_list type. */
41362 ix86_fn_abi_va_list (tree fndecl)
41365 return va_list_type_node;
41366 gcc_assert (fndecl != NULL_TREE);
41368 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
41369 return ms_va_list_type_node;
41371 return sysv_va_list_type_node;
41374 /* Returns the canonical va_list type specified by TYPE. If there
41375 is no valid TYPE provided, it return NULL_TREE. */
41378 ix86_canonical_va_list_type (tree type)
41382 /* Resolve references and pointers to va_list type. */
41383 if (TREE_CODE (type) == MEM_REF)
41384 type = TREE_TYPE (type);
41385 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
41386 type = TREE_TYPE (type);
41387 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
41388 type = TREE_TYPE (type);
41390 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
41392 wtype = va_list_type_node;
41393 gcc_assert (wtype != NULL_TREE);
41395 if (TREE_CODE (wtype) == ARRAY_TYPE)
41397 /* If va_list is an array type, the argument may have decayed
41398 to a pointer type, e.g. by being passed to another function.
41399 In that case, unwrap both types so that we can compare the
41400 underlying records. */
41401 if (TREE_CODE (htype) == ARRAY_TYPE
41402 || POINTER_TYPE_P (htype))
41404 wtype = TREE_TYPE (wtype);
41405 htype = TREE_TYPE (htype);
41408 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
41409 return va_list_type_node;
41410 wtype = sysv_va_list_type_node;
41411 gcc_assert (wtype != NULL_TREE);
41413 if (TREE_CODE (wtype) == ARRAY_TYPE)
41415 /* If va_list is an array type, the argument may have decayed
41416 to a pointer type, e.g. by being passed to another function.
41417 In that case, unwrap both types so that we can compare the
41418 underlying records. */
41419 if (TREE_CODE (htype) == ARRAY_TYPE
41420 || POINTER_TYPE_P (htype))
41422 wtype = TREE_TYPE (wtype);
41423 htype = TREE_TYPE (htype);
41426 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
41427 return sysv_va_list_type_node;
41428 wtype = ms_va_list_type_node;
41429 gcc_assert (wtype != NULL_TREE);
41431 if (TREE_CODE (wtype) == ARRAY_TYPE)
41433 /* If va_list is an array type, the argument may have decayed
41434 to a pointer type, e.g. by being passed to another function.
41435 In that case, unwrap both types so that we can compare the
41436 underlying records. */
41437 if (TREE_CODE (htype) == ARRAY_TYPE
41438 || POINTER_TYPE_P (htype))
41440 wtype = TREE_TYPE (wtype);
41441 htype = TREE_TYPE (htype);
41444 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
41445 return ms_va_list_type_node;
41448 return std_canonical_va_list_type (type);
41451 /* Iterate through the target-specific builtin types for va_list.
41452 IDX denotes the iterator, *PTREE is set to the result type of
41453 the va_list builtin, and *PNAME to its internal type.
41454 Returns zero if there is no element for this index, otherwise
41455 IDX should be increased upon the next call.
41456 Note, do not iterate a base builtin's name like __builtin_va_list.
41457 Used from c_common_nodes_and_builtins. */
41460 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
41470 *ptree = ms_va_list_type_node;
41471 *pname = "__builtin_ms_va_list";
41475 *ptree = sysv_va_list_type_node;
41476 *pname = "__builtin_sysv_va_list";
41484 #undef TARGET_SCHED_DISPATCH
41485 #define TARGET_SCHED_DISPATCH has_dispatch
41486 #undef TARGET_SCHED_DISPATCH_DO
41487 #define TARGET_SCHED_DISPATCH_DO do_dispatch
41488 #undef TARGET_SCHED_REASSOCIATION_WIDTH
41489 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
41490 #undef TARGET_SCHED_REORDER
41491 #define TARGET_SCHED_REORDER ix86_sched_reorder
41492 #undef TARGET_SCHED_ADJUST_PRIORITY
41493 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
41494 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
41495 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
41496 ix86_dependencies_evaluation_hook
41498 /* The size of the dispatch window is the total number of bytes of
41499 object code allowed in a window. */
41500 #define DISPATCH_WINDOW_SIZE 16
41502 /* Number of dispatch windows considered for scheduling. */
41503 #define MAX_DISPATCH_WINDOWS 3
41505 /* Maximum number of instructions in a window. */
41508 /* Maximum number of immediate operands in a window. */
41511 /* Maximum number of immediate bits allowed in a window. */
41512 #define MAX_IMM_SIZE 128
41514 /* Maximum number of 32 bit immediates allowed in a window. */
41515 #define MAX_IMM_32 4
41517 /* Maximum number of 64 bit immediates allowed in a window. */
41518 #define MAX_IMM_64 2
41520 /* Maximum total of loads or prefetches allowed in a window. */
41523 /* Maximum total of stores allowed in a window. */
41524 #define MAX_STORE 1
41530 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
41531 enum dispatch_group {
41546 /* Number of allowable groups in a dispatch window. It is an array
41547 indexed by dispatch_group enum. 100 is used as a big number,
41548 because the number of these kind of operations does not have any
41549 effect in dispatch window, but we need them for other reasons in
41551 static unsigned int num_allowable_groups[disp_last] = {
41552 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
41555 char group_name[disp_last + 1][16] = {
41556 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
41557 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
41558 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
41561 /* Instruction path. */
41564 path_single, /* Single micro op. */
41565 path_double, /* Double micro op. */
41566 path_multi, /* Instructions with more than 2 micro op.. */
41570 /* sched_insn_info defines a window to the instructions scheduled in
41571 the basic block. It contains a pointer to the insn_info table and
41572 the instruction scheduled.
41574 Windows are allocated for each basic block and are linked
41576 typedef struct sched_insn_info_s {
41578 enum dispatch_group group;
41579 enum insn_path path;
41584 /* Linked list of dispatch windows. This is a two way list of
41585 dispatch windows of a basic block. It contains information about
41586 the number of uops in the window and the total number of
41587 instructions and of bytes in the object code for this dispatch
41589 typedef struct dispatch_windows_s {
41590 int num_insn; /* Number of insn in the window. */
41591 int num_uops; /* Number of uops in the window. */
41592 int window_size; /* Number of bytes in the window. */
41593 int window_num; /* Window number between 0 or 1. */
41594 int num_imm; /* Number of immediates in an insn. */
41595 int num_imm_32; /* Number of 32 bit immediates in an insn. */
41596 int num_imm_64; /* Number of 64 bit immediates in an insn. */
41597 int imm_size; /* Total immediates in the window. */
41598 int num_loads; /* Total memory loads in the window. */
41599 int num_stores; /* Total memory stores in the window. */
41600 int violation; /* Violation exists in window. */
41601 sched_insn_info *window; /* Pointer to the window. */
41602 struct dispatch_windows_s *next;
41603 struct dispatch_windows_s *prev;
41604 } dispatch_windows;
41606 /* Immediate valuse used in an insn. */
41607 typedef struct imm_info_s
41614 static dispatch_windows *dispatch_window_list;
41615 static dispatch_windows *dispatch_window_list1;
41617 /* Get dispatch group of insn. */
41619 static enum dispatch_group
41620 get_mem_group (rtx insn)
41622 enum attr_memory memory;
41624 if (INSN_CODE (insn) < 0)
41625 return disp_no_group;
41626 memory = get_attr_memory (insn);
41627 if (memory == MEMORY_STORE)
41630 if (memory == MEMORY_LOAD)
41633 if (memory == MEMORY_BOTH)
41634 return disp_load_store;
41636 return disp_no_group;
41639 /* Return true if insn is a compare instruction. */
41644 enum attr_type type;
41646 type = get_attr_type (insn);
41647 return (type == TYPE_TEST
41648 || type == TYPE_ICMP
41649 || type == TYPE_FCMP
41650 || GET_CODE (PATTERN (insn)) == COMPARE);
41653 /* Return true if a dispatch violation encountered. */
41656 dispatch_violation (void)
41658 if (dispatch_window_list->next)
41659 return dispatch_window_list->next->violation;
41660 return dispatch_window_list->violation;
41663 /* Return true if insn is a branch instruction. */
41666 is_branch (rtx insn)
41668 return (CALL_P (insn) || JUMP_P (insn));
41671 /* Return true if insn is a prefetch instruction. */
41674 is_prefetch (rtx insn)
41676 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
41679 /* This function initializes a dispatch window and the list container holding a
41680 pointer to the window. */
41683 init_window (int window_num)
41686 dispatch_windows *new_list;
41688 if (window_num == 0)
41689 new_list = dispatch_window_list;
41691 new_list = dispatch_window_list1;
41693 new_list->num_insn = 0;
41694 new_list->num_uops = 0;
41695 new_list->window_size = 0;
41696 new_list->next = NULL;
41697 new_list->prev = NULL;
41698 new_list->window_num = window_num;
41699 new_list->num_imm = 0;
41700 new_list->num_imm_32 = 0;
41701 new_list->num_imm_64 = 0;
41702 new_list->imm_size = 0;
41703 new_list->num_loads = 0;
41704 new_list->num_stores = 0;
41705 new_list->violation = false;
41707 for (i = 0; i < MAX_INSN; i++)
41709 new_list->window[i].insn = NULL;
41710 new_list->window[i].group = disp_no_group;
41711 new_list->window[i].path = no_path;
41712 new_list->window[i].byte_len = 0;
41713 new_list->window[i].imm_bytes = 0;
41718 /* This function allocates and initializes a dispatch window and the
41719 list container holding a pointer to the window. */
41721 static dispatch_windows *
41722 allocate_window (void)
41724 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
41725 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
41730 /* This routine initializes the dispatch scheduling information. It
41731 initiates building dispatch scheduler tables and constructs the
41732 first dispatch window. */
41735 init_dispatch_sched (void)
41737 /* Allocate a dispatch list and a window. */
41738 dispatch_window_list = allocate_window ();
41739 dispatch_window_list1 = allocate_window ();
41744 /* This function returns true if a branch is detected. End of a basic block
41745 does not have to be a branch, but here we assume only branches end a
41749 is_end_basic_block (enum dispatch_group group)
41751 return group == disp_branch;
41754 /* This function is called when the end of a window processing is reached. */
41757 process_end_window (void)
41759 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
41760 if (dispatch_window_list->next)
41762 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
41763 gcc_assert (dispatch_window_list->window_size
41764 + dispatch_window_list1->window_size <= 48);
41770 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
41771 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
41772 for 48 bytes of instructions. Note that these windows are not dispatch
41773 windows that their sizes are DISPATCH_WINDOW_SIZE. */
41775 static dispatch_windows *
41776 allocate_next_window (int window_num)
41778 if (window_num == 0)
41780 if (dispatch_window_list->next)
41783 return dispatch_window_list;
41786 dispatch_window_list->next = dispatch_window_list1;
41787 dispatch_window_list1->prev = dispatch_window_list;
41789 return dispatch_window_list1;
41792 /* Increment the number of immediate operands of an instruction. */
41795 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
41800 switch ( GET_CODE (*in_rtx))
41805 (imm_values->imm)++;
41806 if (x86_64_immediate_operand (*in_rtx, SImode))
41807 (imm_values->imm32)++;
41809 (imm_values->imm64)++;
41813 (imm_values->imm)++;
41814 (imm_values->imm64)++;
41818 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
41820 (imm_values->imm)++;
41821 (imm_values->imm32)++;
41832 /* Compute number of immediate operands of an instruction. */
41835 find_constant (rtx in_rtx, imm_info *imm_values)
41837 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
41838 (rtx_function) find_constant_1, (void *) imm_values);
41841 /* Return total size of immediate operands of an instruction along with number
41842 of corresponding immediate-operands. It initializes its parameters to zero
41843 befor calling FIND_CONSTANT.
41844 INSN is the input instruction. IMM is the total of immediates.
41845 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
41849 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
41851 imm_info imm_values = {0, 0, 0};
41853 find_constant (insn, &imm_values);
41854 *imm = imm_values.imm;
41855 *imm32 = imm_values.imm32;
41856 *imm64 = imm_values.imm64;
41857 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
41860 /* This function indicates if an operand of an instruction is an
41864 has_immediate (rtx insn)
41866 int num_imm_operand;
41867 int num_imm32_operand;
41868 int num_imm64_operand;
41871 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
41872 &num_imm64_operand);
41876 /* Return single or double path for instructions. */
41878 static enum insn_path
41879 get_insn_path (rtx insn)
41881 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
41883 if ((int)path == 0)
41884 return path_single;
41886 if ((int)path == 1)
41887 return path_double;
41892 /* Return insn dispatch group. */
41894 static enum dispatch_group
41895 get_insn_group (rtx insn)
41897 enum dispatch_group group = get_mem_group (insn);
41901 if (is_branch (insn))
41902 return disp_branch;
41907 if (has_immediate (insn))
41910 if (is_prefetch (insn))
41911 return disp_prefetch;
41913 return disp_no_group;
41916 /* Count number of GROUP restricted instructions in a dispatch
41917 window WINDOW_LIST. */
41920 count_num_restricted (rtx insn, dispatch_windows *window_list)
41922 enum dispatch_group group = get_insn_group (insn);
41924 int num_imm_operand;
41925 int num_imm32_operand;
41926 int num_imm64_operand;
41928 if (group == disp_no_group)
41931 if (group == disp_imm)
41933 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
41934 &num_imm64_operand);
41935 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
41936 || num_imm_operand + window_list->num_imm > MAX_IMM
41937 || (num_imm32_operand > 0
41938 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
41939 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
41940 || (num_imm64_operand > 0
41941 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
41942 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
41943 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
41944 && num_imm64_operand > 0
41945 && ((window_list->num_imm_64 > 0
41946 && window_list->num_insn >= 2)
41947 || window_list->num_insn >= 3)))
41953 if ((group == disp_load_store
41954 && (window_list->num_loads >= MAX_LOAD
41955 || window_list->num_stores >= MAX_STORE))
41956 || ((group == disp_load
41957 || group == disp_prefetch)
41958 && window_list->num_loads >= MAX_LOAD)
41959 || (group == disp_store
41960 && window_list->num_stores >= MAX_STORE))
41966 /* This function returns true if insn satisfies dispatch rules on the
41967 last window scheduled. */
41970 fits_dispatch_window (rtx insn)
41972 dispatch_windows *window_list = dispatch_window_list;
41973 dispatch_windows *window_list_next = dispatch_window_list->next;
41974 unsigned int num_restrict;
41975 enum dispatch_group group = get_insn_group (insn);
41976 enum insn_path path = get_insn_path (insn);
41979 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
41980 instructions should be given the lowest priority in the
41981 scheduling process in Haifa scheduler to make sure they will be
41982 scheduled in the same dispatch window as the reference to them. */
41983 if (group == disp_jcc || group == disp_cmp)
41986 /* Check nonrestricted. */
41987 if (group == disp_no_group || group == disp_branch)
41990 /* Get last dispatch window. */
41991 if (window_list_next)
41992 window_list = window_list_next;
41994 if (window_list->window_num == 1)
41996 sum = window_list->prev->window_size + window_list->window_size;
41999 || (min_insn_size (insn) + sum) >= 48)
42000 /* Window 1 is full. Go for next window. */
42004 num_restrict = count_num_restricted (insn, window_list);
42006 if (num_restrict > num_allowable_groups[group])
42009 /* See if it fits in the first window. */
42010 if (window_list->window_num == 0)
42012 /* The first widow should have only single and double path
42014 if (path == path_double
42015 && (window_list->num_uops + 2) > MAX_INSN)
42017 else if (path != path_single)
42023 /* Add an instruction INSN with NUM_UOPS micro-operations to the
42024 dispatch window WINDOW_LIST. */
42027 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
42029 int byte_len = min_insn_size (insn);
42030 int num_insn = window_list->num_insn;
42032 sched_insn_info *window = window_list->window;
42033 enum dispatch_group group = get_insn_group (insn);
42034 enum insn_path path = get_insn_path (insn);
42035 int num_imm_operand;
42036 int num_imm32_operand;
42037 int num_imm64_operand;
42039 if (!window_list->violation && group != disp_cmp
42040 && !fits_dispatch_window (insn))
42041 window_list->violation = true;
42043 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
42044 &num_imm64_operand);
42046 /* Initialize window with new instruction. */
42047 window[num_insn].insn = insn;
42048 window[num_insn].byte_len = byte_len;
42049 window[num_insn].group = group;
42050 window[num_insn].path = path;
42051 window[num_insn].imm_bytes = imm_size;
42053 window_list->window_size += byte_len;
42054 window_list->num_insn = num_insn + 1;
42055 window_list->num_uops = window_list->num_uops + num_uops;
42056 window_list->imm_size += imm_size;
42057 window_list->num_imm += num_imm_operand;
42058 window_list->num_imm_32 += num_imm32_operand;
42059 window_list->num_imm_64 += num_imm64_operand;
42061 if (group == disp_store)
42062 window_list->num_stores += 1;
42063 else if (group == disp_load
42064 || group == disp_prefetch)
42065 window_list->num_loads += 1;
42066 else if (group == disp_load_store)
42068 window_list->num_stores += 1;
42069 window_list->num_loads += 1;
42073 /* Adds a scheduled instruction, INSN, to the current dispatch window.
42074 If the total bytes of instructions or the number of instructions in
42075 the window exceed allowable, it allocates a new window. */
42078 add_to_dispatch_window (rtx insn)
42081 dispatch_windows *window_list;
42082 dispatch_windows *next_list;
42083 dispatch_windows *window0_list;
42084 enum insn_path path;
42085 enum dispatch_group insn_group;
42093 if (INSN_CODE (insn) < 0)
42096 byte_len = min_insn_size (insn);
42097 window_list = dispatch_window_list;
42098 next_list = window_list->next;
42099 path = get_insn_path (insn);
42100 insn_group = get_insn_group (insn);
42102 /* Get the last dispatch window. */
42104 window_list = dispatch_window_list->next;
42106 if (path == path_single)
42108 else if (path == path_double)
42111 insn_num_uops = (int) path;
42113 /* If current window is full, get a new window.
42114 Window number zero is full, if MAX_INSN uops are scheduled in it.
42115 Window number one is full, if window zero's bytes plus window
42116 one's bytes is 32, or if the bytes of the new instruction added
42117 to the total makes it greater than 48, or it has already MAX_INSN
42118 instructions in it. */
42119 num_insn = window_list->num_insn;
42120 num_uops = window_list->num_uops;
42121 window_num = window_list->window_num;
42122 insn_fits = fits_dispatch_window (insn);
42124 if (num_insn >= MAX_INSN
42125 || num_uops + insn_num_uops > MAX_INSN
42128 window_num = ~window_num & 1;
42129 window_list = allocate_next_window (window_num);
42132 if (window_num == 0)
42134 add_insn_window (insn, window_list, insn_num_uops);
42135 if (window_list->num_insn >= MAX_INSN
42136 && insn_group == disp_branch)
42138 process_end_window ();
42142 else if (window_num == 1)
42144 window0_list = window_list->prev;
42145 sum = window0_list->window_size + window_list->window_size;
42147 || (byte_len + sum) >= 48)
42149 process_end_window ();
42150 window_list = dispatch_window_list;
42153 add_insn_window (insn, window_list, insn_num_uops);
42156 gcc_unreachable ();
42158 if (is_end_basic_block (insn_group))
42160 /* End of basic block is reached do end-basic-block process. */
42161 process_end_window ();
42166 /* Print the dispatch window, WINDOW_NUM, to FILE. */
42168 DEBUG_FUNCTION static void
42169 debug_dispatch_window_file (FILE *file, int window_num)
42171 dispatch_windows *list;
42174 if (window_num == 0)
42175 list = dispatch_window_list;
42177 list = dispatch_window_list1;
42179 fprintf (file, "Window #%d:\n", list->window_num);
42180 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
42181 list->num_insn, list->num_uops, list->window_size);
42182 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42183 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
42185 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
42187 fprintf (file, " insn info:\n");
42189 for (i = 0; i < MAX_INSN; i++)
42191 if (!list->window[i].insn)
42193 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
42194 i, group_name[list->window[i].group],
42195 i, (void *)list->window[i].insn,
42196 i, list->window[i].path,
42197 i, list->window[i].byte_len,
42198 i, list->window[i].imm_bytes);
42202 /* Print to stdout a dispatch window. */
42204 DEBUG_FUNCTION void
42205 debug_dispatch_window (int window_num)
42207 debug_dispatch_window_file (stdout, window_num);
42210 /* Print INSN dispatch information to FILE. */
42212 DEBUG_FUNCTION static void
42213 debug_insn_dispatch_info_file (FILE *file, rtx insn)
42216 enum insn_path path;
42217 enum dispatch_group group;
42219 int num_imm_operand;
42220 int num_imm32_operand;
42221 int num_imm64_operand;
42223 if (INSN_CODE (insn) < 0)
42226 byte_len = min_insn_size (insn);
42227 path = get_insn_path (insn);
42228 group = get_insn_group (insn);
42229 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
42230 &num_imm64_operand);
42232 fprintf (file, " insn info:\n");
42233 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
42234 group_name[group], path, byte_len);
42235 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
42236 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
42239 /* Print to STDERR the status of the ready list with respect to
42240 dispatch windows. */
42242 DEBUG_FUNCTION void
42243 debug_ready_dispatch (void)
42246 int no_ready = number_in_ready ();
42248 fprintf (stdout, "Number of ready: %d\n", no_ready);
42250 for (i = 0; i < no_ready; i++)
42251 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
42254 /* This routine is the driver of the dispatch scheduler. */
42257 do_dispatch (rtx insn, int mode)
42259 if (mode == DISPATCH_INIT)
42260 init_dispatch_sched ();
42261 else if (mode == ADD_TO_DISPATCH_WINDOW)
42262 add_to_dispatch_window (insn);
42265 /* Return TRUE if Dispatch Scheduling is supported. */
42268 has_dispatch (rtx insn, int action)
42270 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3)
42271 && flag_dispatch_scheduler)
42277 case IS_DISPATCH_ON:
42282 return is_cmp (insn);
42284 case DISPATCH_VIOLATION:
42285 return dispatch_violation ();
42287 case FITS_DISPATCH_WINDOW:
42288 return fits_dispatch_window (insn);
42294 /* Implementation of reassociation_width target hook used by
42295 reassoc phase to identify parallelism level in reassociated
42296 tree. Statements tree_code is passed in OPC. Arguments type
42299 Currently parallel reassociation is enabled for Atom
42300 processors only and we set reassociation width to be 2
42301 because Atom may issue up to 2 instructions per cycle.
42303 Return value should be fixed if parallel reassociation is
42304 enabled for other processors. */
42307 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
42308 enum machine_mode mode)
42312 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
42314 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
42320 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
42321 place emms and femms instructions. */
42323 static enum machine_mode
42324 ix86_preferred_simd_mode (enum machine_mode mode)
42332 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
42334 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
42336 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
42338 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
42341 if (TARGET_AVX && !TARGET_PREFER_AVX128)
42347 if (!TARGET_VECTORIZE_DOUBLE)
42349 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
42351 else if (TARGET_SSE2)
42360 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
42363 static unsigned int
42364 ix86_autovectorize_vector_sizes (void)
42366 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
42371 /* Return class of registers which could be used for pseudo of MODE
42372 and of class RCLASS for spilling instead of memory. Return NO_REGS
42373 if it is not possible or non-profitable. */
42375 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
42377 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
42378 && (mode == SImode || (TARGET_64BIT && mode == DImode))
42379 && INTEGER_CLASS_P (rclass))
42384 /* Implement targetm.vectorize.init_cost. */
42387 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
42389 unsigned *cost = XNEWVEC (unsigned, 3);
42390 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
42394 /* Implement targetm.vectorize.add_stmt_cost. */
42397 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
42398 struct _stmt_vec_info *stmt_info, int misalign,
42399 enum vect_cost_model_location where)
42401 unsigned *cost = (unsigned *) data;
42402 unsigned retval = 0;
42404 if (flag_vect_cost_model)
42406 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
42407 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
42409 /* Statements in an inner loop relative to the loop being
42410 vectorized are weighted more heavily. The value here is
42411 arbitrary and could potentially be improved with analysis. */
42412 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
42413 count *= 50; /* FIXME. */
42415 retval = (unsigned) (count * stmt_cost);
42416 cost[where] += retval;
42422 /* Implement targetm.vectorize.finish_cost. */
42425 ix86_finish_cost (void *data, unsigned *prologue_cost,
42426 unsigned *body_cost, unsigned *epilogue_cost)
42428 unsigned *cost = (unsigned *) data;
42429 *prologue_cost = cost[vect_prologue];
42430 *body_cost = cost[vect_body];
42431 *epilogue_cost = cost[vect_epilogue];
42434 /* Implement targetm.vectorize.destroy_cost_data. */
42437 ix86_destroy_cost_data (void *data)
42442 /* Validate target specific memory model bits in VAL. */
42444 static unsigned HOST_WIDE_INT
42445 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
42447 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
42450 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
42452 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
42454 warning (OPT_Winvalid_memory_model,
42455 "Unknown architecture specific memory model");
42456 return MEMMODEL_SEQ_CST;
42458 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
42459 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
42461 warning (OPT_Winvalid_memory_model,
42462 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
42463 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
42465 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
42467 warning (OPT_Winvalid_memory_model,
42468 "HLE_RELEASE not used with RELEASE or stronger memory model");
42469 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
42474 /* Initialize the GCC target structure. */
42475 #undef TARGET_RETURN_IN_MEMORY
42476 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
42478 #undef TARGET_LEGITIMIZE_ADDRESS
42479 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
42481 #undef TARGET_ATTRIBUTE_TABLE
42482 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
42483 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42484 # undef TARGET_MERGE_DECL_ATTRIBUTES
42485 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
42488 #undef TARGET_COMP_TYPE_ATTRIBUTES
42489 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
42491 #undef TARGET_INIT_BUILTINS
42492 #define TARGET_INIT_BUILTINS ix86_init_builtins
42493 #undef TARGET_BUILTIN_DECL
42494 #define TARGET_BUILTIN_DECL ix86_builtin_decl
42495 #undef TARGET_EXPAND_BUILTIN
42496 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
42498 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
42499 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
42500 ix86_builtin_vectorized_function
42502 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
42503 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
42505 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
42506 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
42508 #undef TARGET_VECTORIZE_BUILTIN_GATHER
42509 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
42511 #undef TARGET_BUILTIN_RECIPROCAL
42512 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
42514 #undef TARGET_ASM_FUNCTION_EPILOGUE
42515 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
42517 #undef TARGET_ENCODE_SECTION_INFO
42518 #ifndef SUBTARGET_ENCODE_SECTION_INFO
42519 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
42521 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
42524 #undef TARGET_ASM_OPEN_PAREN
42525 #define TARGET_ASM_OPEN_PAREN ""
42526 #undef TARGET_ASM_CLOSE_PAREN
42527 #define TARGET_ASM_CLOSE_PAREN ""
42529 #undef TARGET_ASM_BYTE_OP
42530 #define TARGET_ASM_BYTE_OP ASM_BYTE
42532 #undef TARGET_ASM_ALIGNED_HI_OP
42533 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
42534 #undef TARGET_ASM_ALIGNED_SI_OP
42535 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
42537 #undef TARGET_ASM_ALIGNED_DI_OP
42538 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
42541 #undef TARGET_PROFILE_BEFORE_PROLOGUE
42542 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
42544 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
42545 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
42547 #undef TARGET_ASM_UNALIGNED_HI_OP
42548 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
42549 #undef TARGET_ASM_UNALIGNED_SI_OP
42550 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
42551 #undef TARGET_ASM_UNALIGNED_DI_OP
42552 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
42554 #undef TARGET_PRINT_OPERAND
42555 #define TARGET_PRINT_OPERAND ix86_print_operand
42556 #undef TARGET_PRINT_OPERAND_ADDRESS
42557 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
42558 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
42559 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
42560 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
42561 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
42563 #undef TARGET_SCHED_INIT_GLOBAL
42564 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
42565 #undef TARGET_SCHED_ADJUST_COST
42566 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
42567 #undef TARGET_SCHED_ISSUE_RATE
42568 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
42569 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
42570 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
42571 ia32_multipass_dfa_lookahead
42573 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
42574 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
42576 #undef TARGET_MEMMODEL_CHECK
42577 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
42580 #undef TARGET_HAVE_TLS
42581 #define TARGET_HAVE_TLS true
42583 #undef TARGET_CANNOT_FORCE_CONST_MEM
42584 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
42585 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
42586 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
42588 #undef TARGET_DELEGITIMIZE_ADDRESS
42589 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
42591 #undef TARGET_MS_BITFIELD_LAYOUT_P
42592 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
42595 #undef TARGET_BINDS_LOCAL_P
42596 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
42598 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42599 #undef TARGET_BINDS_LOCAL_P
42600 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
42603 #undef TARGET_ASM_OUTPUT_MI_THUNK
42604 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
42605 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
42606 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
42608 #undef TARGET_ASM_FILE_START
42609 #define TARGET_ASM_FILE_START x86_file_start
42611 #undef TARGET_OPTION_OVERRIDE
42612 #define TARGET_OPTION_OVERRIDE ix86_option_override
42614 #undef TARGET_REGISTER_MOVE_COST
42615 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
42616 #undef TARGET_MEMORY_MOVE_COST
42617 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
42618 #undef TARGET_RTX_COSTS
42619 #define TARGET_RTX_COSTS ix86_rtx_costs
42620 #undef TARGET_ADDRESS_COST
42621 #define TARGET_ADDRESS_COST ix86_address_cost
42623 #undef TARGET_FIXED_CONDITION_CODE_REGS
42624 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
42625 #undef TARGET_CC_MODES_COMPATIBLE
42626 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
42628 #undef TARGET_MACHINE_DEPENDENT_REORG
42629 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
42631 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
42632 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
42634 #undef TARGET_BUILD_BUILTIN_VA_LIST
42635 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
42637 #undef TARGET_FOLD_BUILTIN
42638 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
42640 #undef TARGET_COMPARE_VERSION_PRIORITY
42641 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
42643 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
42644 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
42645 ix86_generate_version_dispatcher_body
42647 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
42648 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
42649 ix86_get_function_versions_dispatcher
42651 #undef TARGET_ENUM_VA_LIST_P
42652 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
42654 #undef TARGET_FN_ABI_VA_LIST
42655 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
42657 #undef TARGET_CANONICAL_VA_LIST_TYPE
42658 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
42660 #undef TARGET_EXPAND_BUILTIN_VA_START
42661 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
42663 #undef TARGET_MD_ASM_CLOBBERS
42664 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
42666 #undef TARGET_PROMOTE_PROTOTYPES
42667 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
42668 #undef TARGET_STRUCT_VALUE_RTX
42669 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
42670 #undef TARGET_SETUP_INCOMING_VARARGS
42671 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
42672 #undef TARGET_MUST_PASS_IN_STACK
42673 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
42674 #undef TARGET_FUNCTION_ARG_ADVANCE
42675 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
42676 #undef TARGET_FUNCTION_ARG
42677 #define TARGET_FUNCTION_ARG ix86_function_arg
42678 #undef TARGET_FUNCTION_ARG_BOUNDARY
42679 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
42680 #undef TARGET_PASS_BY_REFERENCE
42681 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
42682 #undef TARGET_INTERNAL_ARG_POINTER
42683 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
42684 #undef TARGET_UPDATE_STACK_BOUNDARY
42685 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
42686 #undef TARGET_GET_DRAP_RTX
42687 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
42688 #undef TARGET_STRICT_ARGUMENT_NAMING
42689 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
42690 #undef TARGET_STATIC_CHAIN
42691 #define TARGET_STATIC_CHAIN ix86_static_chain
42692 #undef TARGET_TRAMPOLINE_INIT
42693 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
42694 #undef TARGET_RETURN_POPS_ARGS
42695 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
42697 #undef TARGET_LEGITIMATE_COMBINED_INSN
42698 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
42700 #undef TARGET_ASAN_SHADOW_OFFSET
42701 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
42703 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
42704 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
42706 #undef TARGET_SCALAR_MODE_SUPPORTED_P
42707 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
42709 #undef TARGET_VECTOR_MODE_SUPPORTED_P
42710 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
42712 #undef TARGET_C_MODE_FOR_SUFFIX
42713 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
42716 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
42717 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
42720 #ifdef SUBTARGET_INSERT_ATTRIBUTES
42721 #undef TARGET_INSERT_ATTRIBUTES
42722 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
42725 #undef TARGET_MANGLE_TYPE
42726 #define TARGET_MANGLE_TYPE ix86_mangle_type
42729 #undef TARGET_STACK_PROTECT_FAIL
42730 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
42733 #undef TARGET_FUNCTION_VALUE
42734 #define TARGET_FUNCTION_VALUE ix86_function_value
42736 #undef TARGET_FUNCTION_VALUE_REGNO_P
42737 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
42739 #undef TARGET_PROMOTE_FUNCTION_MODE
42740 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
42742 #undef TARGET_MEMBER_TYPE_FORCES_BLK
42743 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
42745 #undef TARGET_INSTANTIATE_DECLS
42746 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
42748 #undef TARGET_SECONDARY_RELOAD
42749 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
42751 #undef TARGET_CLASS_MAX_NREGS
42752 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
42754 #undef TARGET_PREFERRED_RELOAD_CLASS
42755 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
42756 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
42757 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
42758 #undef TARGET_CLASS_LIKELY_SPILLED_P
42759 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
42761 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
42762 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
42763 ix86_builtin_vectorization_cost
42764 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
42765 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
42766 ix86_vectorize_vec_perm_const_ok
42767 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
42768 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
42769 ix86_preferred_simd_mode
42770 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
42771 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
42772 ix86_autovectorize_vector_sizes
42773 #undef TARGET_VECTORIZE_INIT_COST
42774 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
42775 #undef TARGET_VECTORIZE_ADD_STMT_COST
42776 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
42777 #undef TARGET_VECTORIZE_FINISH_COST
42778 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
42779 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
42780 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
42782 #undef TARGET_SET_CURRENT_FUNCTION
42783 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
42785 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
42786 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
42788 #undef TARGET_OPTION_SAVE
42789 #define TARGET_OPTION_SAVE ix86_function_specific_save
42791 #undef TARGET_OPTION_RESTORE
42792 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
42794 #undef TARGET_OPTION_PRINT
42795 #define TARGET_OPTION_PRINT ix86_function_specific_print
42797 #undef TARGET_OPTION_FUNCTION_VERSIONS
42798 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
42800 #undef TARGET_CAN_INLINE_P
42801 #define TARGET_CAN_INLINE_P ix86_can_inline_p
42803 #undef TARGET_EXPAND_TO_RTL_HOOK
42804 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
42806 #undef TARGET_LEGITIMATE_ADDRESS_P
42807 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
42809 #undef TARGET_LRA_P
42810 #define TARGET_LRA_P hook_bool_void_true
42812 #undef TARGET_REGISTER_PRIORITY
42813 #define TARGET_REGISTER_PRIORITY ix86_register_priority
42815 #undef TARGET_LEGITIMATE_CONSTANT_P
42816 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
42818 #undef TARGET_FRAME_POINTER_REQUIRED
42819 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
42821 #undef TARGET_CAN_ELIMINATE
42822 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
42824 #undef TARGET_EXTRA_LIVE_ON_ENTRY
42825 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
42827 #undef TARGET_ASM_CODE_END
42828 #define TARGET_ASM_CODE_END ix86_code_end
42830 #undef TARGET_CONDITIONAL_REGISTER_USAGE
42831 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
42834 #undef TARGET_INIT_LIBFUNCS
42835 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
42838 #undef TARGET_SPILL_CLASS
42839 #define TARGET_SPILL_CLASS ix86_spill_class
42841 struct gcc_target targetm = TARGET_INITIALIZER;
42843 #include "gt-i386.h"