1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
66 enum upper_128bits_state
73 typedef struct block_info_def
75 /* State of the upper 128bits of AVX registers at exit. */
76 enum upper_128bits_state state
;
77 /* TRUE if state of the upper 128bits of AVX registers is unchanged
80 /* TRUE if block has been processed. */
82 /* TRUE if block has been scanned. */
84 /* Previous state of the upper 128bits of AVX registers at entry. */
85 enum upper_128bits_state prev
;
88 #define BLOCK_INFO(B) ((block_info) (B)->aux)
90 enum call_avx256_state
92 /* Callee returns 256bit AVX register. */
93 callee_return_avx256
= -1,
94 /* Callee returns and passes 256bit AVX register. */
95 callee_return_pass_avx256
,
96 /* Callee passes 256bit AVX register. */
98 /* Callee doesn't return nor passe 256bit AVX register, or no
99 256bit AVX register in function return. */
101 /* vzeroupper intrinsic. */
105 /* Check if a 256bit AVX register is referenced in stores. */
108 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
111 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
112 || (GET_CODE (set
) == SET
113 && REG_P (SET_SRC (set
))
114 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
116 enum upper_128bits_state
*state
117 = (enum upper_128bits_state
*) data
;
122 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
123 in basic block BB. Delete it if upper 128bit AVX registers are
124 unused. If it isn't deleted, move it to just before a jump insn.
126 STATE is state of the upper 128bits of AVX registers at entry. */
129 move_or_delete_vzeroupper_2 (basic_block bb
,
130 enum upper_128bits_state state
)
133 rtx vzeroupper_insn
= NULL_RTX
;
138 if (BLOCK_INFO (bb
)->unchanged
)
141 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
144 BLOCK_INFO (bb
)->state
= state
;
148 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
151 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
152 bb
->index
, BLOCK_INFO (bb
)->state
);
156 BLOCK_INFO (bb
)->prev
= state
;
159 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
164 /* BB_END changes when it is deleted. */
165 bb_end
= BB_END (bb
);
167 while (insn
!= bb_end
)
169 insn
= NEXT_INSN (insn
);
171 if (!NONDEBUG_INSN_P (insn
))
174 /* Move vzeroupper before jump/call. */
175 if (JUMP_P (insn
) || CALL_P (insn
))
177 if (!vzeroupper_insn
)
180 if (PREV_INSN (insn
) != vzeroupper_insn
)
184 fprintf (dump_file
, "Move vzeroupper after:\n");
185 print_rtl_single (dump_file
, PREV_INSN (insn
));
186 fprintf (dump_file
, "before:\n");
187 print_rtl_single (dump_file
, insn
);
189 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
192 vzeroupper_insn
= NULL_RTX
;
196 pat
= PATTERN (insn
);
198 /* Check insn for vzeroupper intrinsic. */
199 if (GET_CODE (pat
) == UNSPEC_VOLATILE
200 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
204 /* Found vzeroupper intrinsic. */
205 fprintf (dump_file
, "Found vzeroupper:\n");
206 print_rtl_single (dump_file
, insn
);
211 /* Check insn for vzeroall intrinsic. */
212 if (GET_CODE (pat
) == PARALLEL
213 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
214 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
219 /* Delete pending vzeroupper insertion. */
222 delete_insn (vzeroupper_insn
);
223 vzeroupper_insn
= NULL_RTX
;
226 else if (state
!= used
)
228 note_stores (pat
, check_avx256_stores
, &state
);
235 /* Process vzeroupper intrinsic. */
236 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
240 /* Since the upper 128bits are cleared, callee must not pass
241 256bit AVX register. We only need to check if callee
242 returns 256bit AVX register. */
243 if (avx256
== callee_return_avx256
)
249 /* Remove unnecessary vzeroupper since upper 128bits are
253 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
254 print_rtl_single (dump_file
, insn
);
260 /* Set state to UNUSED if callee doesn't return 256bit AVX
262 if (avx256
!= callee_return_pass_avx256
)
265 if (avx256
== callee_return_pass_avx256
266 || avx256
== callee_pass_avx256
)
268 /* Must remove vzeroupper since callee passes in 256bit
272 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
273 print_rtl_single (dump_file
, insn
);
279 vzeroupper_insn
= insn
;
285 BLOCK_INFO (bb
)->state
= state
;
286 BLOCK_INFO (bb
)->unchanged
= unchanged
;
287 BLOCK_INFO (bb
)->scanned
= true;
290 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
291 bb
->index
, unchanged
? "unchanged" : "changed",
295 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
296 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
297 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
301 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
305 enum upper_128bits_state state
, old_state
, new_state
;
309 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
310 block
->index
, BLOCK_INFO (block
)->processed
);
312 if (BLOCK_INFO (block
)->processed
)
317 /* Check all predecessor edges of this block. */
318 seen_unknown
= false;
319 FOR_EACH_EDGE (e
, ei
, block
->preds
)
323 switch (BLOCK_INFO (e
->src
)->state
)
326 if (!unknown_is_unused
)
340 old_state
= BLOCK_INFO (block
)->state
;
341 move_or_delete_vzeroupper_2 (block
, state
);
342 new_state
= BLOCK_INFO (block
)->state
;
344 if (state
!= unknown
|| new_state
== used
)
345 BLOCK_INFO (block
)->processed
= true;
347 /* Need to rescan if the upper 128bits of AVX registers are changed
349 if (new_state
!= old_state
)
351 if (new_state
== used
)
352 cfun
->machine
->rescan_vzeroupper_p
= 1;
359 /* Go through the instruction stream looking for vzeroupper. Delete
360 it if upper 128bit AVX registers are unused. If it isn't deleted,
361 move it to just before a jump insn. */
364 move_or_delete_vzeroupper (void)
369 fibheap_t worklist
, pending
, fibheap_swap
;
370 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
375 /* Set up block info for each basic block. */
376 alloc_aux_for_blocks (sizeof (struct block_info_def
));
378 /* Process outgoing edges of entry point. */
380 fprintf (dump_file
, "Process outgoing edges of entry point\n");
382 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
384 move_or_delete_vzeroupper_2 (e
->dest
,
385 cfun
->machine
->caller_pass_avx256_p
387 BLOCK_INFO (e
->dest
)->processed
= true;
390 /* Compute reverse completion order of depth first search of the CFG
391 so that the data-flow runs faster. */
392 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
393 bb_order
= XNEWVEC (int, last_basic_block
);
394 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
395 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
396 bb_order
[rc_order
[i
]] = i
;
399 worklist
= fibheap_new ();
400 pending
= fibheap_new ();
401 visited
= sbitmap_alloc (last_basic_block
);
402 in_worklist
= sbitmap_alloc (last_basic_block
);
403 in_pending
= sbitmap_alloc (last_basic_block
);
404 sbitmap_zero (in_worklist
);
406 /* Don't check outgoing edges of entry point. */
407 sbitmap_ones (in_pending
);
409 if (BLOCK_INFO (bb
)->processed
)
410 RESET_BIT (in_pending
, bb
->index
);
413 move_or_delete_vzeroupper_1 (bb
, false);
414 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
418 fprintf (dump_file
, "Check remaining basic blocks\n");
420 while (!fibheap_empty (pending
))
422 fibheap_swap
= pending
;
424 worklist
= fibheap_swap
;
425 sbitmap_swap
= in_pending
;
426 in_pending
= in_worklist
;
427 in_worklist
= sbitmap_swap
;
429 sbitmap_zero (visited
);
431 cfun
->machine
->rescan_vzeroupper_p
= 0;
433 while (!fibheap_empty (worklist
))
435 bb
= (basic_block
) fibheap_extract_min (worklist
);
436 RESET_BIT (in_worklist
, bb
->index
);
437 gcc_assert (!TEST_BIT (visited
, bb
->index
));
438 if (!TEST_BIT (visited
, bb
->index
))
442 SET_BIT (visited
, bb
->index
);
444 if (move_or_delete_vzeroupper_1 (bb
, false))
445 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
447 if (e
->dest
== EXIT_BLOCK_PTR
448 || BLOCK_INFO (e
->dest
)->processed
)
451 if (TEST_BIT (visited
, e
->dest
->index
))
453 if (!TEST_BIT (in_pending
, e
->dest
->index
))
455 /* Send E->DEST to next round. */
456 SET_BIT (in_pending
, e
->dest
->index
);
457 fibheap_insert (pending
,
458 bb_order
[e
->dest
->index
],
462 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
464 /* Add E->DEST to current round. */
465 SET_BIT (in_worklist
, e
->dest
->index
);
466 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
473 if (!cfun
->machine
->rescan_vzeroupper_p
)
478 fibheap_delete (worklist
);
479 fibheap_delete (pending
);
480 sbitmap_free (visited
);
481 sbitmap_free (in_worklist
);
482 sbitmap_free (in_pending
);
485 fprintf (dump_file
, "Process remaining basic blocks\n");
488 move_or_delete_vzeroupper_1 (bb
, true);
490 free_aux_for_blocks ();
493 static rtx
legitimize_dllimport_symbol (rtx
, bool);
495 #ifndef CHECK_STACK_LIMIT
496 #define CHECK_STACK_LIMIT (-1)
499 /* Return index of given mode in mult and division cost tables. */
500 #define MODE_INDEX(mode) \
501 ((mode) == QImode ? 0 \
502 : (mode) == HImode ? 1 \
503 : (mode) == SImode ? 2 \
504 : (mode) == DImode ? 3 \
507 /* Processor costs (relative to an add) */
508 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
509 #define COSTS_N_BYTES(N) ((N) * 2)
511 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
514 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
515 COSTS_N_BYTES (2), /* cost of an add instruction */
516 COSTS_N_BYTES (3), /* cost of a lea instruction */
517 COSTS_N_BYTES (2), /* variable shift costs */
518 COSTS_N_BYTES (3), /* constant shift costs */
519 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
520 COSTS_N_BYTES (3), /* HI */
521 COSTS_N_BYTES (3), /* SI */
522 COSTS_N_BYTES (3), /* DI */
523 COSTS_N_BYTES (5)}, /* other */
524 0, /* cost of multiply per each bit set */
525 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
526 COSTS_N_BYTES (3), /* HI */
527 COSTS_N_BYTES (3), /* SI */
528 COSTS_N_BYTES (3), /* DI */
529 COSTS_N_BYTES (5)}, /* other */
530 COSTS_N_BYTES (3), /* cost of movsx */
531 COSTS_N_BYTES (3), /* cost of movzx */
532 0, /* "large" insn */
534 2, /* cost for loading QImode using movzbl */
535 {2, 2, 2}, /* cost of loading integer registers
536 in QImode, HImode and SImode.
537 Relative to reg-reg move (2). */
538 {2, 2, 2}, /* cost of storing integer registers */
539 2, /* cost of reg,reg fld/fst */
540 {2, 2, 2}, /* cost of loading fp registers
541 in SFmode, DFmode and XFmode */
542 {2, 2, 2}, /* cost of storing fp registers
543 in SFmode, DFmode and XFmode */
544 3, /* cost of moving MMX register */
545 {3, 3}, /* cost of loading MMX registers
546 in SImode and DImode */
547 {3, 3}, /* cost of storing MMX registers
548 in SImode and DImode */
549 3, /* cost of moving SSE register */
550 {3, 3, 3}, /* cost of loading SSE registers
551 in SImode, DImode and TImode */
552 {3, 3, 3}, /* cost of storing SSE registers
553 in SImode, DImode and TImode */
554 3, /* MMX or SSE register to integer */
555 0, /* size of l1 cache */
556 0, /* size of l2 cache */
557 0, /* size of prefetch block */
558 0, /* number of parallel prefetches */
560 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
562 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
563 COSTS_N_BYTES (2), /* cost of FABS instruction. */
564 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
565 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
569 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 1, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 1, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
583 /* Processor costs (relative to an add) */
585 struct processor_costs i386_cost
= { /* 386 specific costs */
586 COSTS_N_INSNS (1), /* cost of an add instruction */
587 COSTS_N_INSNS (1), /* cost of a lea instruction */
588 COSTS_N_INSNS (3), /* variable shift costs */
589 COSTS_N_INSNS (2), /* constant shift costs */
590 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
591 COSTS_N_INSNS (6), /* HI */
592 COSTS_N_INSNS (6), /* SI */
593 COSTS_N_INSNS (6), /* DI */
594 COSTS_N_INSNS (6)}, /* other */
595 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
596 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
597 COSTS_N_INSNS (23), /* HI */
598 COSTS_N_INSNS (23), /* SI */
599 COSTS_N_INSNS (23), /* DI */
600 COSTS_N_INSNS (23)}, /* other */
601 COSTS_N_INSNS (3), /* cost of movsx */
602 COSTS_N_INSNS (2), /* cost of movzx */
603 15, /* "large" insn */
605 4, /* cost for loading QImode using movzbl */
606 {2, 4, 2}, /* cost of loading integer registers
607 in QImode, HImode and SImode.
608 Relative to reg-reg move (2). */
609 {2, 4, 2}, /* cost of storing integer registers */
610 2, /* cost of reg,reg fld/fst */
611 {8, 8, 8}, /* cost of loading fp registers
612 in SFmode, DFmode and XFmode */
613 {8, 8, 8}, /* cost of storing fp registers
614 in SFmode, DFmode and XFmode */
615 2, /* cost of moving MMX register */
616 {4, 8}, /* cost of loading MMX registers
617 in SImode and DImode */
618 {4, 8}, /* cost of storing MMX registers
619 in SImode and DImode */
620 2, /* cost of moving SSE register */
621 {4, 8, 16}, /* cost of loading SSE registers
622 in SImode, DImode and TImode */
623 {4, 8, 16}, /* cost of storing SSE registers
624 in SImode, DImode and TImode */
625 3, /* MMX or SSE register to integer */
626 0, /* size of l1 cache */
627 0, /* size of l2 cache */
628 0, /* size of prefetch block */
629 0, /* number of parallel prefetches */
631 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
632 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
633 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
634 COSTS_N_INSNS (22), /* cost of FABS instruction. */
635 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
636 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
640 DUMMY_STRINGOP_ALGS
},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
655 struct processor_costs i486_cost
= { /* 486 specific costs */
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (1), /* cost of a lea instruction */
658 COSTS_N_INSNS (3), /* variable shift costs */
659 COSTS_N_INSNS (2), /* constant shift costs */
660 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (12), /* HI */
662 COSTS_N_INSNS (12), /* SI */
663 COSTS_N_INSNS (12), /* DI */
664 COSTS_N_INSNS (12)}, /* other */
665 1, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (40), /* HI */
668 COSTS_N_INSNS (40), /* SI */
669 COSTS_N_INSNS (40), /* DI */
670 COSTS_N_INSNS (40)}, /* other */
671 COSTS_N_INSNS (3), /* cost of movsx */
672 COSTS_N_INSNS (2), /* cost of movzx */
673 15, /* "large" insn */
675 4, /* cost for loading QImode using movzbl */
676 {2, 4, 2}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {2, 4, 2}, /* cost of storing integer registers */
680 2, /* cost of reg,reg fld/fst */
681 {8, 8, 8}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {8, 8, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 8}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 8}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 8, 16}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 8, 16}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 3, /* MMX or SSE register to integer */
696 4, /* size of l1 cache. 486 has 8kB cache
697 shared for code and data, so 4kB is
698 not really precise. */
699 4, /* size of l2 cache */
700 0, /* size of prefetch block */
701 0, /* number of parallel prefetches */
703 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
704 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
705 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
706 COSTS_N_INSNS (3), /* cost of FABS instruction. */
707 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
708 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
712 DUMMY_STRINGOP_ALGS
},
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
727 struct processor_costs pentium_cost
= {
728 COSTS_N_INSNS (1), /* cost of an add instruction */
729 COSTS_N_INSNS (1), /* cost of a lea instruction */
730 COSTS_N_INSNS (4), /* variable shift costs */
731 COSTS_N_INSNS (1), /* constant shift costs */
732 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
733 COSTS_N_INSNS (11), /* HI */
734 COSTS_N_INSNS (11), /* SI */
735 COSTS_N_INSNS (11), /* DI */
736 COSTS_N_INSNS (11)}, /* other */
737 0, /* cost of multiply per each bit set */
738 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
739 COSTS_N_INSNS (25), /* HI */
740 COSTS_N_INSNS (25), /* SI */
741 COSTS_N_INSNS (25), /* DI */
742 COSTS_N_INSNS (25)}, /* other */
743 COSTS_N_INSNS (3), /* cost of movsx */
744 COSTS_N_INSNS (2), /* cost of movzx */
745 8, /* "large" insn */
747 6, /* cost for loading QImode using movzbl */
748 {2, 4, 2}, /* cost of loading integer registers
749 in QImode, HImode and SImode.
750 Relative to reg-reg move (2). */
751 {2, 4, 2}, /* cost of storing integer registers */
752 2, /* cost of reg,reg fld/fst */
753 {2, 2, 6}, /* cost of loading fp registers
754 in SFmode, DFmode and XFmode */
755 {4, 4, 6}, /* cost of storing fp registers
756 in SFmode, DFmode and XFmode */
757 8, /* cost of moving MMX register */
758 {8, 8}, /* cost of loading MMX registers
759 in SImode and DImode */
760 {8, 8}, /* cost of storing MMX registers
761 in SImode and DImode */
762 2, /* cost of moving SSE register */
763 {4, 8, 16}, /* cost of loading SSE registers
764 in SImode, DImode and TImode */
765 {4, 8, 16}, /* cost of storing SSE registers
766 in SImode, DImode and TImode */
767 3, /* MMX or SSE register to integer */
768 8, /* size of l1 cache. */
769 8, /* size of l2 cache */
770 0, /* size of prefetch block */
771 0, /* number of parallel prefetches */
773 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
774 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
775 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
776 COSTS_N_INSNS (1), /* cost of FABS instruction. */
777 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
778 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
779 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
780 DUMMY_STRINGOP_ALGS
},
781 {{libcall
, {{-1, rep_prefix_4_byte
}}},
782 DUMMY_STRINGOP_ALGS
},
783 1, /* scalar_stmt_cost. */
784 1, /* scalar load_cost. */
785 1, /* scalar_store_cost. */
786 1, /* vec_stmt_cost. */
787 1, /* vec_to_scalar_cost. */
788 1, /* scalar_to_vec_cost. */
789 1, /* vec_align_load_cost. */
790 2, /* vec_unalign_load_cost. */
791 1, /* vec_store_cost. */
792 3, /* cond_taken_branch_cost. */
793 1, /* cond_not_taken_branch_cost. */
797 struct processor_costs pentiumpro_cost
= {
798 COSTS_N_INSNS (1), /* cost of an add instruction */
799 COSTS_N_INSNS (1), /* cost of a lea instruction */
800 COSTS_N_INSNS (1), /* variable shift costs */
801 COSTS_N_INSNS (1), /* constant shift costs */
802 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
803 COSTS_N_INSNS (4), /* HI */
804 COSTS_N_INSNS (4), /* SI */
805 COSTS_N_INSNS (4), /* DI */
806 COSTS_N_INSNS (4)}, /* other */
807 0, /* cost of multiply per each bit set */
808 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
809 COSTS_N_INSNS (17), /* HI */
810 COSTS_N_INSNS (17), /* SI */
811 COSTS_N_INSNS (17), /* DI */
812 COSTS_N_INSNS (17)}, /* other */
813 COSTS_N_INSNS (1), /* cost of movsx */
814 COSTS_N_INSNS (1), /* cost of movzx */
815 8, /* "large" insn */
817 2, /* cost for loading QImode using movzbl */
818 {4, 4, 4}, /* cost of loading integer registers
819 in QImode, HImode and SImode.
820 Relative to reg-reg move (2). */
821 {2, 2, 2}, /* cost of storing integer registers */
822 2, /* cost of reg,reg fld/fst */
823 {2, 2, 6}, /* cost of loading fp registers
824 in SFmode, DFmode and XFmode */
825 {4, 4, 6}, /* cost of storing fp registers
826 in SFmode, DFmode and XFmode */
827 2, /* cost of moving MMX register */
828 {2, 2}, /* cost of loading MMX registers
829 in SImode and DImode */
830 {2, 2}, /* cost of storing MMX registers
831 in SImode and DImode */
832 2, /* cost of moving SSE register */
833 {2, 2, 8}, /* cost of loading SSE registers
834 in SImode, DImode and TImode */
835 {2, 2, 8}, /* cost of storing SSE registers
836 in SImode, DImode and TImode */
837 3, /* MMX or SSE register to integer */
838 8, /* size of l1 cache. */
839 256, /* size of l2 cache */
840 32, /* size of prefetch block */
841 6, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (2), /* cost of FABS instruction. */
847 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
849 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
850 (we ensure the alignment). For small blocks inline loop is still a
851 noticeable win, for bigger blocks either rep movsl or rep movsb is
852 way to go. Rep movsb has apparently more expensive startup time in CPU,
853 but after 4K the difference is down in the noise. */
854 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
855 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
856 DUMMY_STRINGOP_ALGS
},
857 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
858 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
859 DUMMY_STRINGOP_ALGS
},
860 1, /* scalar_stmt_cost. */
861 1, /* scalar load_cost. */
862 1, /* scalar_store_cost. */
863 1, /* vec_stmt_cost. */
864 1, /* vec_to_scalar_cost. */
865 1, /* scalar_to_vec_cost. */
866 1, /* vec_align_load_cost. */
867 2, /* vec_unalign_load_cost. */
868 1, /* vec_store_cost. */
869 3, /* cond_taken_branch_cost. */
870 1, /* cond_not_taken_branch_cost. */
874 struct processor_costs geode_cost
= {
875 COSTS_N_INSNS (1), /* cost of an add instruction */
876 COSTS_N_INSNS (1), /* cost of a lea instruction */
877 COSTS_N_INSNS (2), /* variable shift costs */
878 COSTS_N_INSNS (1), /* constant shift costs */
879 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
880 COSTS_N_INSNS (4), /* HI */
881 COSTS_N_INSNS (7), /* SI */
882 COSTS_N_INSNS (7), /* DI */
883 COSTS_N_INSNS (7)}, /* other */
884 0, /* cost of multiply per each bit set */
885 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
886 COSTS_N_INSNS (23), /* HI */
887 COSTS_N_INSNS (39), /* SI */
888 COSTS_N_INSNS (39), /* DI */
889 COSTS_N_INSNS (39)}, /* other */
890 COSTS_N_INSNS (1), /* cost of movsx */
891 COSTS_N_INSNS (1), /* cost of movzx */
892 8, /* "large" insn */
894 1, /* cost for loading QImode using movzbl */
895 {1, 1, 1}, /* cost of loading integer registers
896 in QImode, HImode and SImode.
897 Relative to reg-reg move (2). */
898 {1, 1, 1}, /* cost of storing integer registers */
899 1, /* cost of reg,reg fld/fst */
900 {1, 1, 1}, /* cost of loading fp registers
901 in SFmode, DFmode and XFmode */
902 {4, 6, 6}, /* cost of storing fp registers
903 in SFmode, DFmode and XFmode */
905 1, /* cost of moving MMX register */
906 {1, 1}, /* cost of loading MMX registers
907 in SImode and DImode */
908 {1, 1}, /* cost of storing MMX registers
909 in SImode and DImode */
910 1, /* cost of moving SSE register */
911 {1, 1, 1}, /* cost of loading SSE registers
912 in SImode, DImode and TImode */
913 {1, 1, 1}, /* cost of storing SSE registers
914 in SImode, DImode and TImode */
915 1, /* MMX or SSE register to integer */
916 64, /* size of l1 cache. */
917 128, /* size of l2 cache. */
918 32, /* size of prefetch block */
919 1, /* number of parallel prefetches */
921 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
922 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
923 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
924 COSTS_N_INSNS (1), /* cost of FABS instruction. */
925 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
926 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
930 DUMMY_STRINGOP_ALGS
},
931 1, /* scalar_stmt_cost. */
932 1, /* scalar load_cost. */
933 1, /* scalar_store_cost. */
934 1, /* vec_stmt_cost. */
935 1, /* vec_to_scalar_cost. */
936 1, /* scalar_to_vec_cost. */
937 1, /* vec_align_load_cost. */
938 2, /* vec_unalign_load_cost. */
939 1, /* vec_store_cost. */
940 3, /* cond_taken_branch_cost. */
941 1, /* cond_not_taken_branch_cost. */
945 struct processor_costs k6_cost
= {
946 COSTS_N_INSNS (1), /* cost of an add instruction */
947 COSTS_N_INSNS (2), /* cost of a lea instruction */
948 COSTS_N_INSNS (1), /* variable shift costs */
949 COSTS_N_INSNS (1), /* constant shift costs */
950 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
951 COSTS_N_INSNS (3), /* HI */
952 COSTS_N_INSNS (3), /* SI */
953 COSTS_N_INSNS (3), /* DI */
954 COSTS_N_INSNS (3)}, /* other */
955 0, /* cost of multiply per each bit set */
956 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
957 COSTS_N_INSNS (18), /* HI */
958 COSTS_N_INSNS (18), /* SI */
959 COSTS_N_INSNS (18), /* DI */
960 COSTS_N_INSNS (18)}, /* other */
961 COSTS_N_INSNS (2), /* cost of movsx */
962 COSTS_N_INSNS (2), /* cost of movzx */
963 8, /* "large" insn */
965 3, /* cost for loading QImode using movzbl */
966 {4, 5, 4}, /* cost of loading integer registers
967 in QImode, HImode and SImode.
968 Relative to reg-reg move (2). */
969 {2, 3, 2}, /* cost of storing integer registers */
970 4, /* cost of reg,reg fld/fst */
971 {6, 6, 6}, /* cost of loading fp registers
972 in SFmode, DFmode and XFmode */
973 {4, 4, 4}, /* cost of storing fp registers
974 in SFmode, DFmode and XFmode */
975 2, /* cost of moving MMX register */
976 {2, 2}, /* cost of loading MMX registers
977 in SImode and DImode */
978 {2, 2}, /* cost of storing MMX registers
979 in SImode and DImode */
980 2, /* cost of moving SSE register */
981 {2, 2, 8}, /* cost of loading SSE registers
982 in SImode, DImode and TImode */
983 {2, 2, 8}, /* cost of storing SSE registers
984 in SImode, DImode and TImode */
985 6, /* MMX or SSE register to integer */
986 32, /* size of l1 cache. */
987 32, /* size of l2 cache. Some models
988 have integrated l2 cache, but
989 optimizing for k6 is not important
990 enough to worry about that. */
991 32, /* size of prefetch block */
992 1, /* number of parallel prefetches */
994 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
995 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
996 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
997 COSTS_N_INSNS (2), /* cost of FABS instruction. */
998 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
999 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1003 DUMMY_STRINGOP_ALGS
},
1004 1, /* scalar_stmt_cost. */
1005 1, /* scalar load_cost. */
1006 1, /* scalar_store_cost. */
1007 1, /* vec_stmt_cost. */
1008 1, /* vec_to_scalar_cost. */
1009 1, /* scalar_to_vec_cost. */
1010 1, /* vec_align_load_cost. */
1011 2, /* vec_unalign_load_cost. */
1012 1, /* vec_store_cost. */
1013 3, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1018 struct processor_costs athlon_cost
= {
1019 COSTS_N_INSNS (1), /* cost of an add instruction */
1020 COSTS_N_INSNS (2), /* cost of a lea instruction */
1021 COSTS_N_INSNS (1), /* variable shift costs */
1022 COSTS_N_INSNS (1), /* constant shift costs */
1023 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1024 COSTS_N_INSNS (5), /* HI */
1025 COSTS_N_INSNS (5), /* SI */
1026 COSTS_N_INSNS (5), /* DI */
1027 COSTS_N_INSNS (5)}, /* other */
1028 0, /* cost of multiply per each bit set */
1029 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1030 COSTS_N_INSNS (26), /* HI */
1031 COSTS_N_INSNS (42), /* SI */
1032 COSTS_N_INSNS (74), /* DI */
1033 COSTS_N_INSNS (74)}, /* other */
1034 COSTS_N_INSNS (1), /* cost of movsx */
1035 COSTS_N_INSNS (1), /* cost of movzx */
1036 8, /* "large" insn */
1038 4, /* cost for loading QImode using movzbl */
1039 {3, 4, 3}, /* cost of loading integer registers
1040 in QImode, HImode and SImode.
1041 Relative to reg-reg move (2). */
1042 {3, 4, 3}, /* cost of storing integer registers */
1043 4, /* cost of reg,reg fld/fst */
1044 {4, 4, 12}, /* cost of loading fp registers
1045 in SFmode, DFmode and XFmode */
1046 {6, 6, 8}, /* cost of storing fp registers
1047 in SFmode, DFmode and XFmode */
1048 2, /* cost of moving MMX register */
1049 {4, 4}, /* cost of loading MMX registers
1050 in SImode and DImode */
1051 {4, 4}, /* cost of storing MMX registers
1052 in SImode and DImode */
1053 2, /* cost of moving SSE register */
1054 {4, 4, 6}, /* cost of loading SSE registers
1055 in SImode, DImode and TImode */
1056 {4, 4, 5}, /* cost of storing SSE registers
1057 in SImode, DImode and TImode */
1058 5, /* MMX or SSE register to integer */
1059 64, /* size of l1 cache. */
1060 256, /* size of l2 cache. */
1061 64, /* size of prefetch block */
1062 6, /* number of parallel prefetches */
1063 5, /* Branch cost */
1064 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1065 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1066 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1067 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1068 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1069 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1070 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1071 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1072 128 bytes for memset. */
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1076 DUMMY_STRINGOP_ALGS
},
1077 1, /* scalar_stmt_cost. */
1078 1, /* scalar load_cost. */
1079 1, /* scalar_store_cost. */
1080 1, /* vec_stmt_cost. */
1081 1, /* vec_to_scalar_cost. */
1082 1, /* scalar_to_vec_cost. */
1083 1, /* vec_align_load_cost. */
1084 2, /* vec_unalign_load_cost. */
1085 1, /* vec_store_cost. */
1086 3, /* cond_taken_branch_cost. */
1087 1, /* cond_not_taken_branch_cost. */
1091 struct processor_costs k8_cost
= {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (2), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (3), /* SI */
1099 COSTS_N_INSNS (4), /* DI */
1100 COSTS_N_INSNS (5)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (26), /* HI */
1104 COSTS_N_INSNS (42), /* SI */
1105 COSTS_N_INSNS (74), /* DI */
1106 COSTS_N_INSNS (74)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {3, 4, 3}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {3, 4, 3}, /* cost of storing integer registers */
1116 4, /* cost of reg,reg fld/fst */
1117 {4, 4, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {6, 6, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {3, 3}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 3, 6}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 5}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 5, /* MMX or SSE register to integer */
1132 64, /* size of l1 cache. */
1133 512, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 3, /* Branch cost */
1142 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1148 /* K8 has optimized REP instruction for medium sized blocks, but for very
1149 small blocks it is better to use loop. For large blocks, libcall can
1150 do nontemporary accesses and beat inline considerably. */
1151 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1152 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1153 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1154 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1155 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1156 4, /* scalar_stmt_cost. */
1157 2, /* scalar load_cost. */
1158 2, /* scalar_store_cost. */
1159 5, /* vec_stmt_cost. */
1160 0, /* vec_to_scalar_cost. */
1161 2, /* scalar_to_vec_cost. */
1162 2, /* vec_align_load_cost. */
1163 3, /* vec_unalign_load_cost. */
1164 3, /* vec_store_cost. */
1165 3, /* cond_taken_branch_cost. */
1166 2, /* cond_not_taken_branch_cost. */
1169 struct processor_costs amdfam10_cost
= {
1170 COSTS_N_INSNS (1), /* cost of an add instruction */
1171 COSTS_N_INSNS (2), /* cost of a lea instruction */
1172 COSTS_N_INSNS (1), /* variable shift costs */
1173 COSTS_N_INSNS (1), /* constant shift costs */
1174 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1175 COSTS_N_INSNS (4), /* HI */
1176 COSTS_N_INSNS (3), /* SI */
1177 COSTS_N_INSNS (4), /* DI */
1178 COSTS_N_INSNS (5)}, /* other */
1179 0, /* cost of multiply per each bit set */
1180 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1181 COSTS_N_INSNS (35), /* HI */
1182 COSTS_N_INSNS (51), /* SI */
1183 COSTS_N_INSNS (83), /* DI */
1184 COSTS_N_INSNS (83)}, /* other */
1185 COSTS_N_INSNS (1), /* cost of movsx */
1186 COSTS_N_INSNS (1), /* cost of movzx */
1187 8, /* "large" insn */
1189 4, /* cost for loading QImode using movzbl */
1190 {3, 4, 3}, /* cost of loading integer registers
1191 in QImode, HImode and SImode.
1192 Relative to reg-reg move (2). */
1193 {3, 4, 3}, /* cost of storing integer registers */
1194 4, /* cost of reg,reg fld/fst */
1195 {4, 4, 12}, /* cost of loading fp registers
1196 in SFmode, DFmode and XFmode */
1197 {6, 6, 8}, /* cost of storing fp registers
1198 in SFmode, DFmode and XFmode */
1199 2, /* cost of moving MMX register */
1200 {3, 3}, /* cost of loading MMX registers
1201 in SImode and DImode */
1202 {4, 4}, /* cost of storing MMX registers
1203 in SImode and DImode */
1204 2, /* cost of moving SSE register */
1205 {4, 4, 3}, /* cost of loading SSE registers
1206 in SImode, DImode and TImode */
1207 {4, 4, 5}, /* cost of storing SSE registers
1208 in SImode, DImode and TImode */
1209 3, /* MMX or SSE register to integer */
1211 MOVD reg64, xmmreg Double FSTORE 4
1212 MOVD reg32, xmmreg Double FSTORE 4
1214 MOVD reg64, xmmreg Double FADD 3
1216 MOVD reg32, xmmreg Double FADD 3
1218 64, /* size of l1 cache. */
1219 512, /* size of l2 cache. */
1220 64, /* size of prefetch block */
1221 /* New AMD processors never drop prefetches; if they cannot be performed
1222 immediately, they are queued. We set number of simultaneous prefetches
1223 to a large constant to reflect this (it probably is not a good idea not
1224 to limit number of prefetches at all, as their execution also takes some
1226 100, /* number of parallel prefetches */
1227 2, /* Branch cost */
1228 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1229 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1230 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1231 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1232 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1233 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1235 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1236 very small blocks it is better to use loop. For large blocks, libcall can
1237 do nontemporary accesses and beat inline considerably. */
1238 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1239 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1240 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1241 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1242 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1243 4, /* scalar_stmt_cost. */
1244 2, /* scalar load_cost. */
1245 2, /* scalar_store_cost. */
1246 6, /* vec_stmt_cost. */
1247 0, /* vec_to_scalar_cost. */
1248 2, /* scalar_to_vec_cost. */
1249 2, /* vec_align_load_cost. */
1250 2, /* vec_unalign_load_cost. */
1251 2, /* vec_store_cost. */
1252 2, /* cond_taken_branch_cost. */
1253 1, /* cond_not_taken_branch_cost. */
1256 struct processor_costs bdver1_cost
= {
1257 COSTS_N_INSNS (1), /* cost of an add instruction */
1258 COSTS_N_INSNS (1), /* cost of a lea instruction */
1259 COSTS_N_INSNS (1), /* variable shift costs */
1260 COSTS_N_INSNS (1), /* constant shift costs */
1261 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1262 COSTS_N_INSNS (4), /* HI */
1263 COSTS_N_INSNS (4), /* SI */
1264 COSTS_N_INSNS (6), /* DI */
1265 COSTS_N_INSNS (6)}, /* other */
1266 0, /* cost of multiply per each bit set */
1267 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1268 COSTS_N_INSNS (35), /* HI */
1269 COSTS_N_INSNS (51), /* SI */
1270 COSTS_N_INSNS (83), /* DI */
1271 COSTS_N_INSNS (83)}, /* other */
1272 COSTS_N_INSNS (1), /* cost of movsx */
1273 COSTS_N_INSNS (1), /* cost of movzx */
1274 8, /* "large" insn */
1276 4, /* cost for loading QImode using movzbl */
1277 {5, 5, 4}, /* cost of loading integer registers
1278 in QImode, HImode and SImode.
1279 Relative to reg-reg move (2). */
1280 {4, 4, 4}, /* cost of storing integer registers */
1281 2, /* cost of reg,reg fld/fst */
1282 {5, 5, 12}, /* cost of loading fp registers
1283 in SFmode, DFmode and XFmode */
1284 {4, 4, 8}, /* cost of storing fp registers
1285 in SFmode, DFmode and XFmode */
1286 2, /* cost of moving MMX register */
1287 {4, 4}, /* cost of loading MMX registers
1288 in SImode and DImode */
1289 {4, 4}, /* cost of storing MMX registers
1290 in SImode and DImode */
1291 2, /* cost of moving SSE register */
1292 {4, 4, 4}, /* cost of loading SSE registers
1293 in SImode, DImode and TImode */
1294 {4, 4, 4}, /* cost of storing SSE registers
1295 in SImode, DImode and TImode */
1296 2, /* MMX or SSE register to integer */
1298 MOVD reg64, xmmreg Double FSTORE 4
1299 MOVD reg32, xmmreg Double FSTORE 4
1301 MOVD reg64, xmmreg Double FADD 3
1303 MOVD reg32, xmmreg Double FADD 3
1305 16, /* size of l1 cache. */
1306 2048, /* size of l2 cache. */
1307 64, /* size of prefetch block */
1308 /* New AMD processors never drop prefetches; if they cannot be performed
1309 immediately, they are queued. We set number of simultaneous prefetches
1310 to a large constant to reflect this (it probably is not a good idea not
1311 to limit number of prefetches at all, as their execution also takes some
1313 100, /* number of parallel prefetches */
1314 2, /* Branch cost */
1315 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1316 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1317 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1318 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1319 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1320 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1322 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1323 very small blocks it is better to use loop. For large blocks, libcall
1324 can do nontemporary accesses and beat inline considerably. */
1325 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1326 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1327 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1328 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1329 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1330 6, /* scalar_stmt_cost. */
1331 4, /* scalar load_cost. */
1332 4, /* scalar_store_cost. */
1333 6, /* vec_stmt_cost. */
1334 0, /* vec_to_scalar_cost. */
1335 2, /* scalar_to_vec_cost. */
1336 4, /* vec_align_load_cost. */
1337 4, /* vec_unalign_load_cost. */
1338 4, /* vec_store_cost. */
1339 2, /* cond_taken_branch_cost. */
1340 1, /* cond_not_taken_branch_cost. */
1343 struct processor_costs bdver2_cost
= {
1344 COSTS_N_INSNS (1), /* cost of an add instruction */
1345 COSTS_N_INSNS (1), /* cost of a lea instruction */
1346 COSTS_N_INSNS (1), /* variable shift costs */
1347 COSTS_N_INSNS (1), /* constant shift costs */
1348 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1349 COSTS_N_INSNS (4), /* HI */
1350 COSTS_N_INSNS (4), /* SI */
1351 COSTS_N_INSNS (6), /* DI */
1352 COSTS_N_INSNS (6)}, /* other */
1353 0, /* cost of multiply per each bit set */
1354 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1355 COSTS_N_INSNS (35), /* HI */
1356 COSTS_N_INSNS (51), /* SI */
1357 COSTS_N_INSNS (83), /* DI */
1358 COSTS_N_INSNS (83)}, /* other */
1359 COSTS_N_INSNS (1), /* cost of movsx */
1360 COSTS_N_INSNS (1), /* cost of movzx */
1361 8, /* "large" insn */
1363 4, /* cost for loading QImode using movzbl */
1364 {5, 5, 4}, /* cost of loading integer registers
1365 in QImode, HImode and SImode.
1366 Relative to reg-reg move (2). */
1367 {4, 4, 4}, /* cost of storing integer registers */
1368 2, /* cost of reg,reg fld/fst */
1369 {5, 5, 12}, /* cost of loading fp registers
1370 in SFmode, DFmode and XFmode */
1371 {4, 4, 8}, /* cost of storing fp registers
1372 in SFmode, DFmode and XFmode */
1373 2, /* cost of moving MMX register */
1374 {4, 4}, /* cost of loading MMX registers
1375 in SImode and DImode */
1376 {4, 4}, /* cost of storing MMX registers
1377 in SImode and DImode */
1378 2, /* cost of moving SSE register */
1379 {4, 4, 4}, /* cost of loading SSE registers
1380 in SImode, DImode and TImode */
1381 {4, 4, 4}, /* cost of storing SSE registers
1382 in SImode, DImode and TImode */
1383 2, /* MMX or SSE register to integer */
1385 MOVD reg64, xmmreg Double FSTORE 4
1386 MOVD reg32, xmmreg Double FSTORE 4
1388 MOVD reg64, xmmreg Double FADD 3
1390 MOVD reg32, xmmreg Double FADD 3
1392 16, /* size of l1 cache. */
1393 2048, /* size of l2 cache. */
1394 64, /* size of prefetch block */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1400 100, /* number of parallel prefetches */
1401 2, /* Branch cost */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1409 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1410 very small blocks it is better to use loop. For large blocks, libcall
1411 can do nontemporary accesses and beat inline considerably. */
1412 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1413 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1414 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1415 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1416 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1417 6, /* scalar_stmt_cost. */
1418 4, /* scalar load_cost. */
1419 4, /* scalar_store_cost. */
1420 6, /* vec_stmt_cost. */
1421 0, /* vec_to_scalar_cost. */
1422 2, /* scalar_to_vec_cost. */
1423 4, /* vec_align_load_cost. */
1424 4, /* vec_unalign_load_cost. */
1425 4, /* vec_store_cost. */
1426 2, /* cond_taken_branch_cost. */
1427 1, /* cond_not_taken_branch_cost. */
1430 struct processor_costs btver1_cost
= {
1431 COSTS_N_INSNS (1), /* cost of an add instruction */
1432 COSTS_N_INSNS (2), /* cost of a lea instruction */
1433 COSTS_N_INSNS (1), /* variable shift costs */
1434 COSTS_N_INSNS (1), /* constant shift costs */
1435 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1436 COSTS_N_INSNS (4), /* HI */
1437 COSTS_N_INSNS (3), /* SI */
1438 COSTS_N_INSNS (4), /* DI */
1439 COSTS_N_INSNS (5)}, /* other */
1440 0, /* cost of multiply per each bit set */
1441 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1442 COSTS_N_INSNS (35), /* HI */
1443 COSTS_N_INSNS (51), /* SI */
1444 COSTS_N_INSNS (83), /* DI */
1445 COSTS_N_INSNS (83)}, /* other */
1446 COSTS_N_INSNS (1), /* cost of movsx */
1447 COSTS_N_INSNS (1), /* cost of movzx */
1448 8, /* "large" insn */
1450 4, /* cost for loading QImode using movzbl */
1451 {3, 4, 3}, /* cost of loading integer registers
1452 in QImode, HImode and SImode.
1453 Relative to reg-reg move (2). */
1454 {3, 4, 3}, /* cost of storing integer registers */
1455 4, /* cost of reg,reg fld/fst */
1456 {4, 4, 12}, /* cost of loading fp registers
1457 in SFmode, DFmode and XFmode */
1458 {6, 6, 8}, /* cost of storing fp registers
1459 in SFmode, DFmode and XFmode */
1460 2, /* cost of moving MMX register */
1461 {3, 3}, /* cost of loading MMX registers
1462 in SImode and DImode */
1463 {4, 4}, /* cost of storing MMX registers
1464 in SImode and DImode */
1465 2, /* cost of moving SSE register */
1466 {4, 4, 3}, /* cost of loading SSE registers
1467 in SImode, DImode and TImode */
1468 {4, 4, 5}, /* cost of storing SSE registers
1469 in SImode, DImode and TImode */
1470 3, /* MMX or SSE register to integer */
1472 MOVD reg64, xmmreg Double FSTORE 4
1473 MOVD reg32, xmmreg Double FSTORE 4
1475 MOVD reg64, xmmreg Double FADD 3
1477 MOVD reg32, xmmreg Double FADD 3
1479 32, /* size of l1 cache. */
1480 512, /* size of l2 cache. */
1481 64, /* size of prefetch block */
1482 100, /* number of parallel prefetches */
1483 2, /* Branch cost */
1484 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1485 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1486 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1487 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1488 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1489 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1491 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1492 very small blocks it is better to use loop. For large blocks, libcall can
1493 do nontemporary accesses and beat inline considerably. */
1494 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1495 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1496 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1497 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1498 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1499 4, /* scalar_stmt_cost. */
1500 2, /* scalar load_cost. */
1501 2, /* scalar_store_cost. */
1502 6, /* vec_stmt_cost. */
1503 0, /* vec_to_scalar_cost. */
1504 2, /* scalar_to_vec_cost. */
1505 2, /* vec_align_load_cost. */
1506 2, /* vec_unalign_load_cost. */
1507 2, /* vec_store_cost. */
1508 2, /* cond_taken_branch_cost. */
1509 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs btver2_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (2), /* cost of a lea instruction */
1515 COSTS_N_INSNS (1), /* variable shift costs */
1516 COSTS_N_INSNS (1), /* constant shift costs */
1517 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (4), /* HI */
1519 COSTS_N_INSNS (3), /* SI */
1520 COSTS_N_INSNS (4), /* DI */
1521 COSTS_N_INSNS (5)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (35), /* HI */
1525 COSTS_N_INSNS (51), /* SI */
1526 COSTS_N_INSNS (83), /* DI */
1527 COSTS_N_INSNS (83)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 8, /* "large" insn */
1532 4, /* cost for loading QImode using movzbl */
1533 {3, 4, 3}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {3, 4, 3}, /* cost of storing integer registers */
1537 4, /* cost of reg,reg fld/fst */
1538 {4, 4, 12}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {6, 6, 8}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {3, 3}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {4, 4}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 2, /* cost of moving SSE register */
1548 {4, 4, 3}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {4, 4, 5}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 3, /* MMX or SSE register to integer */
1554 MOVD reg64, xmmreg Double FSTORE 4
1555 MOVD reg32, xmmreg Double FSTORE 4
1557 MOVD reg64, xmmreg Double FADD 3
1559 MOVD reg32, xmmreg Double FADD 3
1561 32, /* size of l1 cache. */
1562 2048, /* size of l2 cache. */
1563 64, /* size of prefetch block */
1564 100, /* number of parallel prefetches */
1565 2, /* Branch cost */
1566 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1567 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1568 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1569 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1570 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1571 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1573 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1574 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1575 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1576 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1577 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1578 4, /* scalar_stmt_cost. */
1579 2, /* scalar load_cost. */
1580 2, /* scalar_store_cost. */
1581 6, /* vec_stmt_cost. */
1582 0, /* vec_to_scalar_cost. */
1583 2, /* scalar_to_vec_cost. */
1584 2, /* vec_align_load_cost. */
1585 2, /* vec_unalign_load_cost. */
1586 2, /* vec_store_cost. */
1587 2, /* cond_taken_branch_cost. */
1588 1, /* cond_not_taken_branch_cost. */
1592 struct processor_costs pentium4_cost
= {
1593 COSTS_N_INSNS (1), /* cost of an add instruction */
1594 COSTS_N_INSNS (3), /* cost of a lea instruction */
1595 COSTS_N_INSNS (4), /* variable shift costs */
1596 COSTS_N_INSNS (4), /* constant shift costs */
1597 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1598 COSTS_N_INSNS (15), /* HI */
1599 COSTS_N_INSNS (15), /* SI */
1600 COSTS_N_INSNS (15), /* DI */
1601 COSTS_N_INSNS (15)}, /* other */
1602 0, /* cost of multiply per each bit set */
1603 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1604 COSTS_N_INSNS (56), /* HI */
1605 COSTS_N_INSNS (56), /* SI */
1606 COSTS_N_INSNS (56), /* DI */
1607 COSTS_N_INSNS (56)}, /* other */
1608 COSTS_N_INSNS (1), /* cost of movsx */
1609 COSTS_N_INSNS (1), /* cost of movzx */
1610 16, /* "large" insn */
1612 2, /* cost for loading QImode using movzbl */
1613 {4, 5, 4}, /* cost of loading integer registers
1614 in QImode, HImode and SImode.
1615 Relative to reg-reg move (2). */
1616 {2, 3, 2}, /* cost of storing integer registers */
1617 2, /* cost of reg,reg fld/fst */
1618 {2, 2, 6}, /* cost of loading fp registers
1619 in SFmode, DFmode and XFmode */
1620 {4, 4, 6}, /* cost of storing fp registers
1621 in SFmode, DFmode and XFmode */
1622 2, /* cost of moving MMX register */
1623 {2, 2}, /* cost of loading MMX registers
1624 in SImode and DImode */
1625 {2, 2}, /* cost of storing MMX registers
1626 in SImode and DImode */
1627 12, /* cost of moving SSE register */
1628 {12, 12, 12}, /* cost of loading SSE registers
1629 in SImode, DImode and TImode */
1630 {2, 2, 8}, /* cost of storing SSE registers
1631 in SImode, DImode and TImode */
1632 10, /* MMX or SSE register to integer */
1633 8, /* size of l1 cache. */
1634 256, /* size of l2 cache. */
1635 64, /* size of prefetch block */
1636 6, /* number of parallel prefetches */
1637 2, /* Branch cost */
1638 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1639 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1640 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1641 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1642 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1643 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1644 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1645 DUMMY_STRINGOP_ALGS
},
1646 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1648 DUMMY_STRINGOP_ALGS
},
1649 1, /* scalar_stmt_cost. */
1650 1, /* scalar load_cost. */
1651 1, /* scalar_store_cost. */
1652 1, /* vec_stmt_cost. */
1653 1, /* vec_to_scalar_cost. */
1654 1, /* scalar_to_vec_cost. */
1655 1, /* vec_align_load_cost. */
1656 2, /* vec_unalign_load_cost. */
1657 1, /* vec_store_cost. */
1658 3, /* cond_taken_branch_cost. */
1659 1, /* cond_not_taken_branch_cost. */
1663 struct processor_costs nocona_cost
= {
1664 COSTS_N_INSNS (1), /* cost of an add instruction */
1665 COSTS_N_INSNS (1), /* cost of a lea instruction */
1666 COSTS_N_INSNS (1), /* variable shift costs */
1667 COSTS_N_INSNS (1), /* constant shift costs */
1668 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1669 COSTS_N_INSNS (10), /* HI */
1670 COSTS_N_INSNS (10), /* SI */
1671 COSTS_N_INSNS (10), /* DI */
1672 COSTS_N_INSNS (10)}, /* other */
1673 0, /* cost of multiply per each bit set */
1674 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1675 COSTS_N_INSNS (66), /* HI */
1676 COSTS_N_INSNS (66), /* SI */
1677 COSTS_N_INSNS (66), /* DI */
1678 COSTS_N_INSNS (66)}, /* other */
1679 COSTS_N_INSNS (1), /* cost of movsx */
1680 COSTS_N_INSNS (1), /* cost of movzx */
1681 16, /* "large" insn */
1682 17, /* MOVE_RATIO */
1683 4, /* cost for loading QImode using movzbl */
1684 {4, 4, 4}, /* cost of loading integer registers
1685 in QImode, HImode and SImode.
1686 Relative to reg-reg move (2). */
1687 {4, 4, 4}, /* cost of storing integer registers */
1688 3, /* cost of reg,reg fld/fst */
1689 {12, 12, 12}, /* cost of loading fp registers
1690 in SFmode, DFmode and XFmode */
1691 {4, 4, 4}, /* cost of storing fp registers
1692 in SFmode, DFmode and XFmode */
1693 6, /* cost of moving MMX register */
1694 {12, 12}, /* cost of loading MMX registers
1695 in SImode and DImode */
1696 {12, 12}, /* cost of storing MMX registers
1697 in SImode and DImode */
1698 6, /* cost of moving SSE register */
1699 {12, 12, 12}, /* cost of loading SSE registers
1700 in SImode, DImode and TImode */
1701 {12, 12, 12}, /* cost of storing SSE registers
1702 in SImode, DImode and TImode */
1703 8, /* MMX or SSE register to integer */
1704 8, /* size of l1 cache. */
1705 1024, /* size of l2 cache. */
1706 128, /* size of prefetch block */
1707 8, /* number of parallel prefetches */
1708 1, /* Branch cost */
1709 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1711 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1714 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1715 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1716 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1717 {100000, unrolled_loop
}, {-1, libcall
}}}},
1718 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1720 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1721 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1722 1, /* scalar_stmt_cost. */
1723 1, /* scalar load_cost. */
1724 1, /* scalar_store_cost. */
1725 1, /* vec_stmt_cost. */
1726 1, /* vec_to_scalar_cost. */
1727 1, /* scalar_to_vec_cost. */
1728 1, /* vec_align_load_cost. */
1729 2, /* vec_unalign_load_cost. */
1730 1, /* vec_store_cost. */
1731 3, /* cond_taken_branch_cost. */
1732 1, /* cond_not_taken_branch_cost. */
1736 struct processor_costs atom_cost
= {
1737 COSTS_N_INSNS (1), /* cost of an add instruction */
1738 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1739 COSTS_N_INSNS (1), /* variable shift costs */
1740 COSTS_N_INSNS (1), /* constant shift costs */
1741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1742 COSTS_N_INSNS (4), /* HI */
1743 COSTS_N_INSNS (3), /* SI */
1744 COSTS_N_INSNS (4), /* DI */
1745 COSTS_N_INSNS (2)}, /* other */
1746 0, /* cost of multiply per each bit set */
1747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1748 COSTS_N_INSNS (26), /* HI */
1749 COSTS_N_INSNS (42), /* SI */
1750 COSTS_N_INSNS (74), /* DI */
1751 COSTS_N_INSNS (74)}, /* other */
1752 COSTS_N_INSNS (1), /* cost of movsx */
1753 COSTS_N_INSNS (1), /* cost of movzx */
1754 8, /* "large" insn */
1755 17, /* MOVE_RATIO */
1756 4, /* cost for loading QImode using movzbl */
1757 {4, 4, 4}, /* cost of loading integer registers
1758 in QImode, HImode and SImode.
1759 Relative to reg-reg move (2). */
1760 {4, 4, 4}, /* cost of storing integer registers */
1761 4, /* cost of reg,reg fld/fst */
1762 {12, 12, 12}, /* cost of loading fp registers
1763 in SFmode, DFmode and XFmode */
1764 {6, 6, 8}, /* cost of storing fp registers
1765 in SFmode, DFmode and XFmode */
1766 2, /* cost of moving MMX register */
1767 {8, 8}, /* cost of loading MMX registers
1768 in SImode and DImode */
1769 {8, 8}, /* cost of storing MMX registers
1770 in SImode and DImode */
1771 2, /* cost of moving SSE register */
1772 {8, 8, 8}, /* cost of loading SSE registers
1773 in SImode, DImode and TImode */
1774 {8, 8, 8}, /* cost of storing SSE registers
1775 in SImode, DImode and TImode */
1776 5, /* MMX or SSE register to integer */
1777 32, /* size of l1 cache. */
1778 256, /* size of l2 cache. */
1779 64, /* size of prefetch block */
1780 6, /* number of parallel prefetches */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1789 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1790 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1792 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1793 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1794 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1795 1, /* scalar_stmt_cost. */
1796 1, /* scalar load_cost. */
1797 1, /* scalar_store_cost. */
1798 1, /* vec_stmt_cost. */
1799 1, /* vec_to_scalar_cost. */
1800 1, /* scalar_to_vec_cost. */
1801 1, /* vec_align_load_cost. */
1802 2, /* vec_unalign_load_cost. */
1803 1, /* vec_store_cost. */
1804 3, /* cond_taken_branch_cost. */
1805 1, /* cond_not_taken_branch_cost. */
1808 /* Generic64 should produce code tuned for Nocona and K8. */
1810 struct processor_costs generic64_cost
= {
1811 COSTS_N_INSNS (1), /* cost of an add instruction */
1812 /* On all chips taken into consideration lea is 2 cycles and more. With
1813 this cost however our current implementation of synth_mult results in
1814 use of unnecessary temporary registers causing regression on several
1815 SPECfp benchmarks. */
1816 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1817 COSTS_N_INSNS (1), /* variable shift costs */
1818 COSTS_N_INSNS (1), /* constant shift costs */
1819 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1820 COSTS_N_INSNS (4), /* HI */
1821 COSTS_N_INSNS (3), /* SI */
1822 COSTS_N_INSNS (4), /* DI */
1823 COSTS_N_INSNS (2)}, /* other */
1824 0, /* cost of multiply per each bit set */
1825 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1826 COSTS_N_INSNS (26), /* HI */
1827 COSTS_N_INSNS (42), /* SI */
1828 COSTS_N_INSNS (74), /* DI */
1829 COSTS_N_INSNS (74)}, /* other */
1830 COSTS_N_INSNS (1), /* cost of movsx */
1831 COSTS_N_INSNS (1), /* cost of movzx */
1832 8, /* "large" insn */
1833 17, /* MOVE_RATIO */
1834 4, /* cost for loading QImode using movzbl */
1835 {4, 4, 4}, /* cost of loading integer registers
1836 in QImode, HImode and SImode.
1837 Relative to reg-reg move (2). */
1838 {4, 4, 4}, /* cost of storing integer registers */
1839 4, /* cost of reg,reg fld/fst */
1840 {12, 12, 12}, /* cost of loading fp registers
1841 in SFmode, DFmode and XFmode */
1842 {6, 6, 8}, /* cost of storing fp registers
1843 in SFmode, DFmode and XFmode */
1844 2, /* cost of moving MMX register */
1845 {8, 8}, /* cost of loading MMX registers
1846 in SImode and DImode */
1847 {8, 8}, /* cost of storing MMX registers
1848 in SImode and DImode */
1849 2, /* cost of moving SSE register */
1850 {8, 8, 8}, /* cost of loading SSE registers
1851 in SImode, DImode and TImode */
1852 {8, 8, 8}, /* cost of storing SSE registers
1853 in SImode, DImode and TImode */
1854 5, /* MMX or SSE register to integer */
1855 32, /* size of l1 cache. */
1856 512, /* size of l2 cache. */
1857 64, /* size of prefetch block */
1858 6, /* number of parallel prefetches */
1859 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1860 value is increased to perhaps more appropriate value of 5. */
1861 3, /* Branch cost */
1862 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1863 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1864 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1865 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1866 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1867 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1868 {DUMMY_STRINGOP_ALGS
,
1869 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1870 {DUMMY_STRINGOP_ALGS
,
1871 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1872 1, /* scalar_stmt_cost. */
1873 1, /* scalar load_cost. */
1874 1, /* scalar_store_cost. */
1875 1, /* vec_stmt_cost. */
1876 1, /* vec_to_scalar_cost. */
1877 1, /* scalar_to_vec_cost. */
1878 1, /* vec_align_load_cost. */
1879 2, /* vec_unalign_load_cost. */
1880 1, /* vec_store_cost. */
1881 3, /* cond_taken_branch_cost. */
1882 1, /* cond_not_taken_branch_cost. */
1885 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1888 struct processor_costs generic32_cost
= {
1889 COSTS_N_INSNS (1), /* cost of an add instruction */
1890 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1891 COSTS_N_INSNS (1), /* variable shift costs */
1892 COSTS_N_INSNS (1), /* constant shift costs */
1893 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1894 COSTS_N_INSNS (4), /* HI */
1895 COSTS_N_INSNS (3), /* SI */
1896 COSTS_N_INSNS (4), /* DI */
1897 COSTS_N_INSNS (2)}, /* other */
1898 0, /* cost of multiply per each bit set */
1899 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1900 COSTS_N_INSNS (26), /* HI */
1901 COSTS_N_INSNS (42), /* SI */
1902 COSTS_N_INSNS (74), /* DI */
1903 COSTS_N_INSNS (74)}, /* other */
1904 COSTS_N_INSNS (1), /* cost of movsx */
1905 COSTS_N_INSNS (1), /* cost of movzx */
1906 8, /* "large" insn */
1907 17, /* MOVE_RATIO */
1908 4, /* cost for loading QImode using movzbl */
1909 {4, 4, 4}, /* cost of loading integer registers
1910 in QImode, HImode and SImode.
1911 Relative to reg-reg move (2). */
1912 {4, 4, 4}, /* cost of storing integer registers */
1913 4, /* cost of reg,reg fld/fst */
1914 {12, 12, 12}, /* cost of loading fp registers
1915 in SFmode, DFmode and XFmode */
1916 {6, 6, 8}, /* cost of storing fp registers
1917 in SFmode, DFmode and XFmode */
1918 2, /* cost of moving MMX register */
1919 {8, 8}, /* cost of loading MMX registers
1920 in SImode and DImode */
1921 {8, 8}, /* cost of storing MMX registers
1922 in SImode and DImode */
1923 2, /* cost of moving SSE register */
1924 {8, 8, 8}, /* cost of loading SSE registers
1925 in SImode, DImode and TImode */
1926 {8, 8, 8}, /* cost of storing SSE registers
1927 in SImode, DImode and TImode */
1928 5, /* MMX or SSE register to integer */
1929 32, /* size of l1 cache. */
1930 256, /* size of l2 cache. */
1931 64, /* size of prefetch block */
1932 6, /* number of parallel prefetches */
1933 3, /* Branch cost */
1934 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1935 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1936 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1937 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1938 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1939 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1940 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1941 DUMMY_STRINGOP_ALGS
},
1942 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1943 DUMMY_STRINGOP_ALGS
},
1944 1, /* scalar_stmt_cost. */
1945 1, /* scalar load_cost. */
1946 1, /* scalar_store_cost. */
1947 1, /* vec_stmt_cost. */
1948 1, /* vec_to_scalar_cost. */
1949 1, /* scalar_to_vec_cost. */
1950 1, /* vec_align_load_cost. */
1951 2, /* vec_unalign_load_cost. */
1952 1, /* vec_store_cost. */
1953 3, /* cond_taken_branch_cost. */
1954 1, /* cond_not_taken_branch_cost. */
1957 /* Set by -mtune. */
1958 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1960 /* Set by -mtune or -Os. */
1961 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1963 /* Processor feature/optimization bitmasks. */
1964 #define m_386 (1<<PROCESSOR_I386)
1965 #define m_486 (1<<PROCESSOR_I486)
1966 #define m_PENT (1<<PROCESSOR_PENTIUM)
1967 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1968 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1969 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1970 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1971 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1972 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1973 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1974 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1975 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1976 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1977 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1978 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1979 #define m_ATOM (1<<PROCESSOR_ATOM)
1981 #define m_GEODE (1<<PROCESSOR_GEODE)
1982 #define m_K6 (1<<PROCESSOR_K6)
1983 #define m_K6_GEODE (m_K6 | m_GEODE)
1984 #define m_K8 (1<<PROCESSOR_K8)
1985 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1986 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1987 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1988 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1989 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1990 #define m_BDVER (m_BDVER1 | m_BDVER2)
1991 #define m_BTVER (m_BTVER1 | m_BTVER2)
1992 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1993 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1994 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1996 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1997 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1999 /* Generic instruction choice should be common subset of supported CPUs
2000 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
2001 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
2003 /* Feature tests against the various tunings. */
2004 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
2006 /* Feature tests against the various tunings used to create ix86_tune_features
2007 based on the processor mask. */
2008 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
2009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
2010 negatively, so enabling for Generic64 seems like good code size
2011 tradeoff. We can't enable it for 32bit generic because it does not
2012 work well with PPro base chips. */
2013 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
2015 /* X86_TUNE_PUSH_MEMORY */
2016 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2018 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
2021 /* X86_TUNE_UNROLL_STRLEN */
2022 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
2024 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
2025 on simulation result. But after P4 was made, no performance benefit
2026 was observed with branch hints. It also increases the code size.
2027 As a result, icc never generates branch hints. */
2030 /* X86_TUNE_DOUBLE_WITH_ADD */
2033 /* X86_TUNE_USE_SAHF */
2034 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
2036 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
2037 partial dependencies. */
2038 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2040 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
2041 register stalls on Generic32 compilation setting as well. However
2042 in current implementation the partial register stalls are not eliminated
2043 very well - they can be introduced via subregs synthesized by combine
2044 and can happen in caller/callee saving sequences. Because this option
2045 pays back little on PPro based chips and is in conflict with partial reg
2046 dependencies used by Athlon/P4 based chips, it is better to leave it off
2047 for generic32 for now. */
2050 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
2051 m_CORE2I7
| m_GENERIC
,
2053 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
2054 * on 16-bit immediate moves into memory on Core2 and Corei7. */
2055 m_CORE2I7
| m_GENERIC
,
2057 /* X86_TUNE_USE_HIMODE_FIOP */
2058 m_386
| m_486
| m_K6_GEODE
,
2060 /* X86_TUNE_USE_SIMODE_FIOP */
2061 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
2063 /* X86_TUNE_USE_MOV0 */
2066 /* X86_TUNE_USE_CLTD */
2067 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
2069 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
2072 /* X86_TUNE_SPLIT_LONG_MOVES */
2075 /* X86_TUNE_READ_MODIFY_WRITE */
2078 /* X86_TUNE_READ_MODIFY */
2081 /* X86_TUNE_PROMOTE_QIMODE */
2082 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2084 /* X86_TUNE_FAST_PREFIX */
2085 ~(m_386
| m_486
| m_PENT
),
2087 /* X86_TUNE_SINGLE_STRINGOP */
2088 m_386
| m_P4_NOCONA
,
2090 /* X86_TUNE_QIMODE_MATH */
2093 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2094 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2095 might be considered for Generic32 if our scheme for avoiding partial
2096 stalls was more effective. */
2099 /* X86_TUNE_PROMOTE_QI_REGS */
2102 /* X86_TUNE_PROMOTE_HI_REGS */
2105 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2106 over esp addition. */
2107 m_386
| m_486
| m_PENT
| m_PPRO
,
2109 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2110 over esp addition. */
2113 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2114 over esp subtraction. */
2115 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2117 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2118 over esp subtraction. */
2119 m_PENT
| m_K6_GEODE
,
2121 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2122 for DFmode copies */
2123 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2125 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2126 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2128 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2129 conflict here in between PPro/Pentium4 based chips that thread 128bit
2130 SSE registers as single units versus K8 based chips that divide SSE
2131 registers to two 64bit halves. This knob promotes all store destinations
2132 to be 128bit to allow register renaming on 128bit SSE units, but usually
2133 results in one extra microop on 64bit SSE units. Experimental results
2134 shows that disabling this option on P4 brings over 20% SPECfp regression,
2135 while enabling it on K8 brings roughly 2.4% regression that can be partly
2136 masked by careful scheduling of moves. */
2137 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2139 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2140 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
2142 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2145 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2149 are resolved on SSE register parts instead of whole registers, so we may
2150 maintain just lower part of scalar values in proper format leaving the
2151 upper part undefined. */
2154 /* X86_TUNE_SSE_TYPELESS_STORES */
2157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2158 m_PPRO
| m_P4_NOCONA
,
2160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2161 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2163 /* X86_TUNE_PROLOGUE_USING_MOVE */
2164 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2166 /* X86_TUNE_EPILOGUE_USING_MOVE */
2167 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2169 /* X86_TUNE_SHIFT1 */
2172 /* X86_TUNE_USE_FFREEP */
2175 /* X86_TUNE_INTER_UNIT_MOVES */
2176 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2178 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2179 ~(m_AMDFAM10
| m_BDVER
),
2181 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2182 than 4 branch instructions in the 16 byte window. */
2183 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2185 /* X86_TUNE_SCHEDULE */
2186 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2188 /* X86_TUNE_USE_BT */
2189 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2191 /* X86_TUNE_USE_INCDEC */
2192 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2194 /* X86_TUNE_PAD_RETURNS */
2195 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2197 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2200 /* X86_TUNE_EXT_80387_CONSTANTS */
2201 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2203 /* X86_TUNE_SHORTEN_X87_SSE */
2206 /* X86_TUNE_AVOID_VECTOR_DECODE */
2207 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2209 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2210 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2213 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2214 vector path on AMD machines. */
2215 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2217 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2219 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2221 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2225 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2226 but one byte longer. */
2229 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2230 operand that cannot be represented using a modRM byte. The XOR
2231 replacement is long decoded, so this split helps here as well. */
2234 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2236 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2238 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2239 from integer to FP. */
2242 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2243 with a subsequent conditional jump instruction into a single
2244 compare-and-branch uop. */
2247 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2248 will impact LEA instruction selection. */
2251 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2255 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2256 at -O3. For the moment, the prefetching seems badly tuned for Intel
2258 m_K6_GEODE
| m_AMD_MULTIPLE
,
2260 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2261 the auto-vectorizer. */
2264 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2265 during reassociation of integer computation. */
2268 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2269 during reassociation of fp computation. */
2272 /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
2273 regs instead of memory. */
2274 m_COREI7
| m_CORE2I7
2277 /* Feature tests against the various architecture variations. */
2278 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2280 /* Feature tests against the various architecture variations, used to create
2281 ix86_arch_features based on the processor mask. */
2282 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2283 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2284 ~(m_386
| m_486
| m_PENT
| m_K6
),
2286 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2289 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2292 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2295 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2299 static const unsigned int x86_accumulate_outgoing_args
2300 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2302 static const unsigned int x86_arch_always_fancy_math_387
2303 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2305 static const unsigned int x86_avx256_split_unaligned_load
2306 = m_COREI7
| m_GENERIC
;
2308 static const unsigned int x86_avx256_split_unaligned_store
2309 = m_COREI7
| m_BDVER
| m_GENERIC
;
2311 /* In case the average insn count for single function invocation is
2312 lower than this constant, emit fast (but longer) prologue and
2314 #define FAST_PROLOGUE_INSN_COUNT 20
2316 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2317 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2318 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2319 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2321 /* Array of the smallest class containing reg number REGNO, indexed by
2322 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2324 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2326 /* ax, dx, cx, bx */
2327 AREG
, DREG
, CREG
, BREG
,
2328 /* si, di, bp, sp */
2329 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2331 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2332 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2335 /* flags, fpsr, fpcr, frame */
2336 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2338 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2341 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2344 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2345 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2346 /* SSE REX registers */
2347 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2351 /* The "default" register map used in 32bit mode. */
2353 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2355 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2356 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2357 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2358 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2359 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2360 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2361 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2364 /* The "default" register map used in 64bit mode. */
2366 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2368 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2369 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2370 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2371 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2372 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2373 8,9,10,11,12,13,14,15, /* extended integer registers */
2374 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2377 /* Define the register numbers to be used in Dwarf debugging information.
2378 The SVR4 reference port C compiler uses the following register numbers
2379 in its Dwarf output code:
2380 0 for %eax (gcc regno = 0)
2381 1 for %ecx (gcc regno = 2)
2382 2 for %edx (gcc regno = 1)
2383 3 for %ebx (gcc regno = 3)
2384 4 for %esp (gcc regno = 7)
2385 5 for %ebp (gcc regno = 6)
2386 6 for %esi (gcc regno = 4)
2387 7 for %edi (gcc regno = 5)
2388 The following three DWARF register numbers are never generated by
2389 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2390 believes these numbers have these meanings.
2391 8 for %eip (no gcc equivalent)
2392 9 for %eflags (gcc regno = 17)
2393 10 for %trapno (no gcc equivalent)
2394 It is not at all clear how we should number the FP stack registers
2395 for the x86 architecture. If the version of SDB on x86/svr4 were
2396 a bit less brain dead with respect to floating-point then we would
2397 have a precedent to follow with respect to DWARF register numbers
2398 for x86 FP registers, but the SDB on x86/svr4 is so completely
2399 broken with respect to FP registers that it is hardly worth thinking
2400 of it as something to strive for compatibility with.
2401 The version of x86/svr4 SDB I have at the moment does (partially)
2402 seem to believe that DWARF register number 11 is associated with
2403 the x86 register %st(0), but that's about all. Higher DWARF
2404 register numbers don't seem to be associated with anything in
2405 particular, and even for DWARF regno 11, SDB only seems to under-
2406 stand that it should say that a variable lives in %st(0) (when
2407 asked via an `=' command) if we said it was in DWARF regno 11,
2408 but SDB still prints garbage when asked for the value of the
2409 variable in question (via a `/' command).
2410 (Also note that the labels SDB prints for various FP stack regs
2411 when doing an `x' command are all wrong.)
2412 Note that these problems generally don't affect the native SVR4
2413 C compiler because it doesn't allow the use of -O with -g and
2414 because when it is *not* optimizing, it allocates a memory
2415 location for each floating-point variable, and the memory
2416 location is what gets described in the DWARF AT_location
2417 attribute for the variable in question.
2418 Regardless of the severe mental illness of the x86/svr4 SDB, we
2419 do something sensible here and we use the following DWARF
2420 register numbers. Note that these are all stack-top-relative
2422 11 for %st(0) (gcc regno = 8)
2423 12 for %st(1) (gcc regno = 9)
2424 13 for %st(2) (gcc regno = 10)
2425 14 for %st(3) (gcc regno = 11)
2426 15 for %st(4) (gcc regno = 12)
2427 16 for %st(5) (gcc regno = 13)
2428 17 for %st(6) (gcc regno = 14)
2429 18 for %st(7) (gcc regno = 15)
2431 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2433 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2434 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2435 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2436 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2437 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2438 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2439 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2442 /* Define parameter passing and return registers. */
2444 static int const x86_64_int_parameter_registers
[6] =
2446 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2449 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2451 CX_REG
, DX_REG
, R8_REG
, R9_REG
2454 static int const x86_64_int_return_registers
[4] =
2456 AX_REG
, DX_REG
, DI_REG
, SI_REG
2459 /* Define the structure for the machine field in struct function. */
2461 struct GTY(()) stack_local_entry
{
2462 unsigned short mode
;
2465 struct stack_local_entry
*next
;
2468 /* Structure describing stack frame layout.
2469 Stack grows downward:
2475 saved static chain if ix86_static_chain_on_stack
2477 saved frame pointer if frame_pointer_needed
2478 <- HARD_FRAME_POINTER
2484 <- sse_regs_save_offset
2487 [va_arg registers] |
2491 [padding2] | = to_allocate
2500 int outgoing_arguments_size
;
2502 /* The offsets relative to ARG_POINTER. */
2503 HOST_WIDE_INT frame_pointer_offset
;
2504 HOST_WIDE_INT hard_frame_pointer_offset
;
2505 HOST_WIDE_INT stack_pointer_offset
;
2506 HOST_WIDE_INT hfp_save_offset
;
2507 HOST_WIDE_INT reg_save_offset
;
2508 HOST_WIDE_INT sse_reg_save_offset
;
2510 /* When save_regs_using_mov is set, emit prologue using
2511 move instead of push instructions. */
2512 bool save_regs_using_mov
;
2515 /* Which cpu are we scheduling for. */
2516 enum attr_cpu ix86_schedule
;
2518 /* Which cpu are we optimizing for. */
2519 enum processor_type ix86_tune
;
2521 /* Which instruction set architecture to use. */
2522 enum processor_type ix86_arch
;
2524 /* True if processor has SSE prefetch instruction. */
2525 unsigned char x86_prefetch_sse
;
2527 /* -mstackrealign option */
2528 static const char ix86_force_align_arg_pointer_string
[]
2529 = "force_align_arg_pointer";
2531 static rtx (*ix86_gen_leave
) (void);
2532 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2533 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2534 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2535 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2536 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2537 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2538 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2539 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2540 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2541 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2542 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2544 /* Preferred alignment for stack boundary in bits. */
2545 unsigned int ix86_preferred_stack_boundary
;
2547 /* Alignment for incoming stack boundary in bits specified at
2549 static unsigned int ix86_user_incoming_stack_boundary
;
2551 /* Default alignment for incoming stack boundary in bits. */
2552 static unsigned int ix86_default_incoming_stack_boundary
;
2554 /* Alignment for incoming stack boundary in bits. */
2555 unsigned int ix86_incoming_stack_boundary
;
2557 /* Calling abi specific va_list type nodes. */
2558 static GTY(()) tree sysv_va_list_type_node
;
2559 static GTY(()) tree ms_va_list_type_node
;
2561 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2562 char internal_label_prefix
[16];
2563 int internal_label_prefix_len
;
2565 /* Fence to use after loop using movnt. */
2568 /* Register class used for passing given 64bit part of the argument.
2569 These represent classes as documented by the PS ABI, with the exception
2570 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2571 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2573 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2574 whenever possible (upper half does contain padding). */
2575 enum x86_64_reg_class
2578 X86_64_INTEGER_CLASS
,
2579 X86_64_INTEGERSI_CLASS
,
2586 X86_64_COMPLEX_X87_CLASS
,
2590 #define MAX_CLASSES 4
2592 /* Table of constants used by fldpi, fldln2, etc.... */
2593 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2594 static bool ext_80387_constants_init
= 0;
2597 static struct machine_function
* ix86_init_machine_status (void);
2598 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2599 static bool ix86_function_value_regno_p (const unsigned int);
2600 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2602 static rtx
ix86_static_chain (const_tree
, bool);
2603 static int ix86_function_regparm (const_tree
, const_tree
);
2604 static void ix86_compute_frame_layout (struct ix86_frame
*);
2605 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2607 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2608 static tree
ix86_canonical_va_list_type (tree
);
2609 static void predict_jump (int);
2610 static unsigned int split_stack_prologue_scratch_regno (void);
2611 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2613 enum ix86_function_specific_strings
2615 IX86_FUNCTION_SPECIFIC_ARCH
,
2616 IX86_FUNCTION_SPECIFIC_TUNE
,
2617 IX86_FUNCTION_SPECIFIC_MAX
2620 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2621 const char *, enum fpmath_unit
, bool);
2622 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2623 static void ix86_function_specific_save (struct cl_target_option
*);
2624 static void ix86_function_specific_restore (struct cl_target_option
*);
2625 static void ix86_function_specific_print (FILE *, int,
2626 struct cl_target_option
*);
2627 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2628 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2629 struct gcc_options
*);
2630 static bool ix86_can_inline_p (tree
, tree
);
2631 static void ix86_set_current_function (tree
);
2632 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2634 static enum calling_abi
ix86_function_abi (const_tree
);
2637 #ifndef SUBTARGET32_DEFAULT_CPU
2638 #define SUBTARGET32_DEFAULT_CPU "i386"
2641 /* The svr4 ABI for the i386 says that records and unions are returned
2643 #ifndef DEFAULT_PCC_STRUCT_RETURN
2644 #define DEFAULT_PCC_STRUCT_RETURN 1
2647 /* Whether -mtune= or -march= were specified */
2648 static int ix86_tune_defaulted
;
2649 static int ix86_arch_specified
;
2651 /* Vectorization library interface and handlers. */
2652 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2654 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2655 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2657 /* Processor target table, indexed by processor number */
2660 const struct processor_costs
*cost
; /* Processor costs */
2661 const int align_loop
; /* Default alignments. */
2662 const int align_loop_max_skip
;
2663 const int align_jump
;
2664 const int align_jump_max_skip
;
2665 const int align_func
;
2668 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2670 {&i386_cost
, 4, 3, 4, 3, 4},
2671 {&i486_cost
, 16, 15, 16, 15, 16},
2672 {&pentium_cost
, 16, 7, 16, 7, 16},
2673 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2674 {&geode_cost
, 0, 0, 0, 0, 0},
2675 {&k6_cost
, 32, 7, 32, 7, 32},
2676 {&athlon_cost
, 16, 7, 16, 7, 16},
2677 {&pentium4_cost
, 0, 0, 0, 0, 0},
2678 {&k8_cost
, 16, 7, 16, 7, 16},
2679 {&nocona_cost
, 0, 0, 0, 0, 0},
2680 /* Core 2 32-bit. */
2681 {&generic32_cost
, 16, 10, 16, 10, 16},
2682 /* Core 2 64-bit. */
2683 {&generic64_cost
, 16, 10, 16, 10, 16},
2684 /* Core i7 32-bit. */
2685 {&generic32_cost
, 16, 10, 16, 10, 16},
2686 /* Core i7 64-bit. */
2687 {&generic64_cost
, 16, 10, 16, 10, 16},
2688 {&generic32_cost
, 16, 7, 16, 7, 16},
2689 {&generic64_cost
, 16, 10, 16, 10, 16},
2690 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2691 {&bdver1_cost
, 32, 24, 32, 7, 32},
2692 {&bdver2_cost
, 32, 24, 32, 7, 32},
2693 {&btver1_cost
, 32, 24, 32, 7, 32},
2694 {&btver2_cost
, 32, 24, 32, 7, 32},
2695 {&atom_cost
, 16, 15, 16, 7, 16}
2698 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2729 /* Return true if a red-zone is in use. */
2732 ix86_using_red_zone (void)
2734 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2737 /* Return a string that documents the current -m options. The caller is
2738 responsible for freeing the string. */
2741 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2742 const char *tune
, enum fpmath_unit fpmath
,
2745 struct ix86_target_opts
2747 const char *option
; /* option string */
2748 HOST_WIDE_INT mask
; /* isa mask options */
2751 /* This table is ordered so that options like -msse4.2 that imply
2752 preceding options while match those first. */
2753 static struct ix86_target_opts isa_opts
[] =
2755 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2756 { "-mfma", OPTION_MASK_ISA_FMA
},
2757 { "-mxop", OPTION_MASK_ISA_XOP
},
2758 { "-mlwp", OPTION_MASK_ISA_LWP
},
2759 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2760 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2761 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2762 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2763 { "-msse3", OPTION_MASK_ISA_SSE3
},
2764 { "-msse2", OPTION_MASK_ISA_SSE2
},
2765 { "-msse", OPTION_MASK_ISA_SSE
},
2766 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2767 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2768 { "-mmmx", OPTION_MASK_ISA_MMX
},
2769 { "-mabm", OPTION_MASK_ISA_ABM
},
2770 { "-mbmi", OPTION_MASK_ISA_BMI
},
2771 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2772 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2773 { "-mhle", OPTION_MASK_ISA_HLE
},
2774 { "-mfxsr", OPTION_MASK_ISA_FXSR
},
2775 { "-mrdseed", OPTION_MASK_ISA_RDSEED
},
2776 { "-mprfchw", OPTION_MASK_ISA_PRFCHW
},
2777 { "-madx", OPTION_MASK_ISA_ADX
},
2778 { "-mtbm", OPTION_MASK_ISA_TBM
},
2779 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2780 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2781 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2782 { "-maes", OPTION_MASK_ISA_AES
},
2783 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2784 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2785 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2786 { "-mf16c", OPTION_MASK_ISA_F16C
},
2787 { "-mrtm", OPTION_MASK_ISA_RTM
},
2788 { "-mxsave", OPTION_MASK_ISA_XSAVE
},
2789 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT
},
2793 static struct ix86_target_opts flag_opts
[] =
2795 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2796 { "-mlong-double-64", MASK_LONG_DOUBLE_64
},
2797 { "-m80387", MASK_80387
},
2798 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2799 { "-malign-double", MASK_ALIGN_DOUBLE
},
2800 { "-mcld", MASK_CLD
},
2801 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2802 { "-mieee-fp", MASK_IEEE_FP
},
2803 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2804 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2805 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2806 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2807 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2808 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2809 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2810 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2811 { "-mrecip", MASK_RECIP
},
2812 { "-mrtd", MASK_RTD
},
2813 { "-msseregparm", MASK_SSEREGPARM
},
2814 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2815 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2816 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2817 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2818 { "-mvzeroupper", MASK_VZEROUPPER
},
2819 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2820 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2821 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2824 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2827 char target_other
[40];
2837 memset (opts
, '\0', sizeof (opts
));
2839 /* Add -march= option. */
2842 opts
[num
][0] = "-march=";
2843 opts
[num
++][1] = arch
;
2846 /* Add -mtune= option. */
2849 opts
[num
][0] = "-mtune=";
2850 opts
[num
++][1] = tune
;
2853 /* Add -m32/-m64/-mx32. */
2854 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2856 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2860 isa
&= ~ (OPTION_MASK_ISA_64BIT
2861 | OPTION_MASK_ABI_64
2862 | OPTION_MASK_ABI_X32
);
2866 opts
[num
++][0] = abi
;
2868 /* Pick out the options in isa options. */
2869 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2871 if ((isa
& isa_opts
[i
].mask
) != 0)
2873 opts
[num
++][0] = isa_opts
[i
].option
;
2874 isa
&= ~ isa_opts
[i
].mask
;
2878 if (isa
&& add_nl_p
)
2880 opts
[num
++][0] = isa_other
;
2881 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2885 /* Add flag options. */
2886 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2888 if ((flags
& flag_opts
[i
].mask
) != 0)
2890 opts
[num
++][0] = flag_opts
[i
].option
;
2891 flags
&= ~ flag_opts
[i
].mask
;
2895 if (flags
&& add_nl_p
)
2897 opts
[num
++][0] = target_other
;
2898 sprintf (target_other
, "(other flags: %#x)", flags
);
2901 /* Add -fpmath= option. */
2904 opts
[num
][0] = "-mfpmath=";
2905 switch ((int) fpmath
)
2908 opts
[num
++][1] = "387";
2912 opts
[num
++][1] = "sse";
2915 case FPMATH_387
| FPMATH_SSE
:
2916 opts
[num
++][1] = "sse+387";
2928 gcc_assert (num
< ARRAY_SIZE (opts
));
2930 /* Size the string. */
2932 sep_len
= (add_nl_p
) ? 3 : 1;
2933 for (i
= 0; i
< num
; i
++)
2936 for (j
= 0; j
< 2; j
++)
2938 len
+= strlen (opts
[i
][j
]);
2941 /* Build the string. */
2942 ret
= ptr
= (char *) xmalloc (len
);
2945 for (i
= 0; i
< num
; i
++)
2949 for (j
= 0; j
< 2; j
++)
2950 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2957 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2965 for (j
= 0; j
< 2; j
++)
2968 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2970 line_len
+= len2
[j
];
2975 gcc_assert (ret
+ len
>= ptr
);
2980 /* Return true, if profiling code should be emitted before
2981 prologue. Otherwise it returns false.
2982 Note: For x86 with "hotfix" it is sorried. */
2984 ix86_profile_before_prologue (void)
2986 return flag_fentry
!= 0;
2989 /* Function that is callable from the debugger to print the current
2992 ix86_debug_options (void)
2994 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2995 ix86_arch_string
, ix86_tune_string
,
3000 fprintf (stderr
, "%s\n\n", opts
);
3004 fputs ("<no options>\n\n", stderr
);
3009 /* Override various settings based on options. If MAIN_ARGS_P, the
3010 options are from the command line, otherwise they are from
3014 ix86_option_override_internal (bool main_args_p
)
3017 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3018 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
3023 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3024 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3025 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3026 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3027 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3028 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3029 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3030 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3031 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3032 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3033 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3034 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3035 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3036 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3037 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3038 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3039 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3040 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3041 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3042 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3043 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3044 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3045 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3046 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3047 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3048 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3049 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3050 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3051 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3052 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3053 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3054 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3055 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3056 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3057 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3058 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3059 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3060 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3061 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3062 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3064 /* if this reaches 64, need to widen struct pta flags below */
3068 const char *const name
; /* processor name or nickname. */
3069 const enum processor_type processor
;
3070 const enum attr_cpu schedule
;
3071 const unsigned HOST_WIDE_INT flags
;
3073 const processor_alias_table
[] =
3075 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3076 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3077 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3078 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3079 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3080 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3081 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3082 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3083 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
3084 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3085 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3086 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_FXSR
},
3087 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3088 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3089 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3090 PTA_MMX
| PTA_SSE
| PTA_FXSR
},
3091 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3092 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3093 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3094 PTA_MMX
|PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3095 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3096 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_FXSR
},
3097 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3098 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
| PTA_FXSR
},
3099 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3100 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3101 | PTA_CX16
| PTA_NO_SAHF
| PTA_FXSR
},
3102 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
3103 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3104 | PTA_SSSE3
| PTA_CX16
| PTA_FXSR
},
3105 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3106 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3107 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
| PTA_FXSR
},
3108 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3109 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3110 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3111 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
3112 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3113 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
3114 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3115 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3116 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3117 | PTA_RDRND
| PTA_F16C
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3118 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
3119 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3120 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3121 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3122 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3123 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
| PTA_FXSR
| PTA_XSAVE
3125 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3126 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3127 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
| PTA_FXSR
},
3128 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3129 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3130 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3131 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3132 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3133 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3134 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3135 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3136 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3137 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3138 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3139 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3140 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3141 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3142 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3143 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3144 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3145 {"k8", PROCESSOR_K8
, CPU_K8
,
3146 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3147 | PTA_SSE2
| PTA_NO_SAHF
},
3148 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3149 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3150 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3151 {"opteron", PROCESSOR_K8
, CPU_K8
,
3152 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3153 | PTA_SSE2
| PTA_NO_SAHF
},
3154 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3155 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3156 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3157 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3158 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3159 | PTA_SSE2
| PTA_NO_SAHF
},
3160 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3161 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3162 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3163 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3164 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3165 | PTA_SSE2
| PTA_NO_SAHF
},
3166 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3167 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3168 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3169 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3170 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3171 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3172 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3173 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3174 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3175 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3176 | PTA_XOP
| PTA_LWP
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
3178 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3179 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3180 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3181 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3182 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3183 | PTA_FMA
| PTA_PRFCHW
| PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3184 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3185 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3186 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_PRFCHW
3187 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3188 {"btver2", PROCESSOR_BTVER2
, CPU_GENERIC64
,
3189 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3190 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3191 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3192 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
| PTA_PRFCHW
3193 | PTA_FXSR
| PTA_XSAVE
| PTA_XSAVEOPT
},
3195 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3196 PTA_HLE
/* flags are only used for -march switch. */ },
3197 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3199 | PTA_HLE
/* flags are only used for -march switch. */ },
3202 /* -mrecip options. */
3205 const char *string
; /* option name */
3206 unsigned int mask
; /* mask bits to set */
3208 const recip_options
[] =
3210 { "all", RECIP_MASK_ALL
},
3211 { "none", RECIP_MASK_NONE
},
3212 { "div", RECIP_MASK_DIV
},
3213 { "sqrt", RECIP_MASK_SQRT
},
3214 { "vec-div", RECIP_MASK_VEC_DIV
},
3215 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3218 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3220 /* Set up prefix/suffix so the error messages refer to either the command
3221 line argument, or the attribute(target). */
3230 prefix
= "option(\"";
3235 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3236 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3237 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3238 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3239 #ifdef TARGET_BI_ARCH
3242 #if TARGET_BI_ARCH == 1
3243 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3244 is on and OPTION_MASK_ABI_X32 is off. We turn off
3245 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3248 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3250 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3251 on and OPTION_MASK_ABI_64 is off. We turn off
3252 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3255 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3262 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3263 OPTION_MASK_ABI_64 for TARGET_X32. */
3264 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3265 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3267 else if (TARGET_LP64
)
3269 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3270 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3271 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3272 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3275 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3276 SUBTARGET_OVERRIDE_OPTIONS
;
3279 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3280 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3283 /* -fPIC is the default for x86_64. */
3284 if (TARGET_MACHO
&& TARGET_64BIT
)
3287 /* Need to check -mtune=generic first. */
3288 if (ix86_tune_string
)
3290 if (!strcmp (ix86_tune_string
, "generic")
3291 || !strcmp (ix86_tune_string
, "i686")
3292 /* As special support for cross compilers we read -mtune=native
3293 as -mtune=generic. With native compilers we won't see the
3294 -mtune=native, as it was changed by the driver. */
3295 || !strcmp (ix86_tune_string
, "native"))
3298 ix86_tune_string
= "generic64";
3300 ix86_tune_string
= "generic32";
3302 /* If this call is for setting the option attribute, allow the
3303 generic32/generic64 that was previously set. */
3304 else if (!main_args_p
3305 && (!strcmp (ix86_tune_string
, "generic32")
3306 || !strcmp (ix86_tune_string
, "generic64")))
3308 else if (!strncmp (ix86_tune_string
, "generic", 7))
3309 error ("bad value (%s) for %stune=%s %s",
3310 ix86_tune_string
, prefix
, suffix
, sw
);
3311 else if (!strcmp (ix86_tune_string
, "x86-64"))
3312 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3313 "%stune=k8%s or %stune=generic%s instead as appropriate",
3314 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3318 if (ix86_arch_string
)
3319 ix86_tune_string
= ix86_arch_string
;
3320 if (!ix86_tune_string
)
3322 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3323 ix86_tune_defaulted
= 1;
3326 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3327 need to use a sensible tune option. */
3328 if (!strcmp (ix86_tune_string
, "generic")
3329 || !strcmp (ix86_tune_string
, "x86-64")
3330 || !strcmp (ix86_tune_string
, "i686"))
3333 ix86_tune_string
= "generic64";
3335 ix86_tune_string
= "generic32";
3339 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3341 /* rep; movq isn't available in 32-bit code. */
3342 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3343 ix86_stringop_alg
= no_stringop
;
3346 if (!ix86_arch_string
)
3347 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3349 ix86_arch_specified
= 1;
3351 if (global_options_set
.x_ix86_pmode
)
3353 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3354 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3355 error ("address mode %qs not supported in the %s bit mode",
3356 TARGET_64BIT
? "short" : "long",
3357 TARGET_64BIT
? "64" : "32");
3360 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3362 if (!global_options_set
.x_ix86_abi
)
3363 ix86_abi
= DEFAULT_ABI
;
3365 if (global_options_set
.x_ix86_cmodel
)
3367 switch (ix86_cmodel
)
3372 ix86_cmodel
= CM_SMALL_PIC
;
3374 error ("code model %qs not supported in the %s bit mode",
3381 ix86_cmodel
= CM_MEDIUM_PIC
;
3383 error ("code model %qs not supported in the %s bit mode",
3385 else if (TARGET_X32
)
3386 error ("code model %qs not supported in x32 mode",
3393 ix86_cmodel
= CM_LARGE_PIC
;
3395 error ("code model %qs not supported in the %s bit mode",
3397 else if (TARGET_X32
)
3398 error ("code model %qs not supported in x32 mode",
3404 error ("code model %s does not support PIC mode", "32");
3406 error ("code model %qs not supported in the %s bit mode",
3413 error ("code model %s does not support PIC mode", "kernel");
3414 ix86_cmodel
= CM_32
;
3417 error ("code model %qs not supported in the %s bit mode",
3427 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3428 use of rip-relative addressing. This eliminates fixups that
3429 would otherwise be needed if this object is to be placed in a
3430 DLL, and is essentially just as efficient as direct addressing. */
3431 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3432 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3433 else if (TARGET_64BIT
)
3434 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3436 ix86_cmodel
= CM_32
;
3438 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3440 error ("-masm=intel not supported in this configuration");
3441 ix86_asm_dialect
= ASM_ATT
;
3443 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3444 sorry ("%i-bit mode not compiled in",
3445 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3447 for (i
= 0; i
< pta_size
; i
++)
3448 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3450 ix86_schedule
= processor_alias_table
[i
].schedule
;
3451 ix86_arch
= processor_alias_table
[i
].processor
;
3452 /* Default cpu tuning to the architecture. */
3453 ix86_tune
= ix86_arch
;
3455 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3456 error ("CPU you selected does not support x86-64 "
3459 if (processor_alias_table
[i
].flags
& PTA_MMX
3460 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3461 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3462 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3463 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3464 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3465 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3466 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3467 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3468 if (processor_alias_table
[i
].flags
& PTA_SSE
3469 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3470 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3471 if (processor_alias_table
[i
].flags
& PTA_SSE2
3472 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3473 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3474 if (processor_alias_table
[i
].flags
& PTA_SSE3
3475 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3476 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3477 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3478 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3479 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3480 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3481 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3482 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3483 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3484 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3485 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3486 if (processor_alias_table
[i
].flags
& PTA_AVX
3487 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3488 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3489 if (processor_alias_table
[i
].flags
& PTA_AVX2
3490 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3491 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3492 if (processor_alias_table
[i
].flags
& PTA_FMA
3493 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3494 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3495 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3496 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3497 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3498 if (processor_alias_table
[i
].flags
& PTA_FMA4
3499 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3500 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3501 if (processor_alias_table
[i
].flags
& PTA_XOP
3502 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3503 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3504 if (processor_alias_table
[i
].flags
& PTA_LWP
3505 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3506 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3507 if (processor_alias_table
[i
].flags
& PTA_ABM
3508 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3509 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3510 if (processor_alias_table
[i
].flags
& PTA_BMI
3511 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3512 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3513 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3514 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3515 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3516 if (processor_alias_table
[i
].flags
& PTA_TBM
3517 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3518 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3519 if (processor_alias_table
[i
].flags
& PTA_BMI2
3520 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3521 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3522 if (processor_alias_table
[i
].flags
& PTA_CX16
3523 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3524 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3525 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3526 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3527 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3528 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3529 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3530 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3531 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3532 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3533 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3534 if (processor_alias_table
[i
].flags
& PTA_AES
3535 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3536 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3537 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3538 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3539 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3540 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3541 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3542 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3543 if (processor_alias_table
[i
].flags
& PTA_RDRND
3544 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3545 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3546 if (processor_alias_table
[i
].flags
& PTA_F16C
3547 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3548 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3549 if (processor_alias_table
[i
].flags
& PTA_RTM
3550 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3551 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3552 if (processor_alias_table
[i
].flags
& PTA_HLE
3553 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3554 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3555 if (processor_alias_table
[i
].flags
& PTA_PRFCHW
3556 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PRFCHW
))
3557 ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW
;
3558 if (processor_alias_table
[i
].flags
& PTA_RDSEED
3559 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDSEED
))
3560 ix86_isa_flags
|= OPTION_MASK_ISA_RDSEED
;
3561 if (processor_alias_table
[i
].flags
& PTA_ADX
3562 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ADX
))
3563 ix86_isa_flags
|= OPTION_MASK_ISA_ADX
;
3564 if (processor_alias_table
[i
].flags
& PTA_FXSR
3565 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FXSR
))
3566 ix86_isa_flags
|= OPTION_MASK_ISA_FXSR
;
3567 if (processor_alias_table
[i
].flags
& PTA_XSAVE
3568 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVE
))
3569 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVE
;
3570 if (processor_alias_table
[i
].flags
& PTA_XSAVEOPT
3571 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XSAVEOPT
))
3572 ix86_isa_flags
|= OPTION_MASK_ISA_XSAVEOPT
;
3573 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3574 x86_prefetch_sse
= true;
3579 if (!strcmp (ix86_arch_string
, "generic"))
3580 error ("generic CPU can be used only for %stune=%s %s",
3581 prefix
, suffix
, sw
);
3582 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3583 error ("bad value (%s) for %sarch=%s %s",
3584 ix86_arch_string
, prefix
, suffix
, sw
);
3586 ix86_arch_mask
= 1u << ix86_arch
;
3587 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3588 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3590 for (i
= 0; i
< pta_size
; i
++)
3591 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3593 ix86_schedule
= processor_alias_table
[i
].schedule
;
3594 ix86_tune
= processor_alias_table
[i
].processor
;
3597 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3599 if (ix86_tune_defaulted
)
3601 ix86_tune_string
= "x86-64";
3602 for (i
= 0; i
< pta_size
; i
++)
3603 if (! strcmp (ix86_tune_string
,
3604 processor_alias_table
[i
].name
))
3606 ix86_schedule
= processor_alias_table
[i
].schedule
;
3607 ix86_tune
= processor_alias_table
[i
].processor
;
3610 error ("CPU you selected does not support x86-64 "
3616 /* Adjust tuning when compiling for 32-bit ABI. */
3619 case PROCESSOR_GENERIC64
:
3620 ix86_tune
= PROCESSOR_GENERIC32
;
3621 ix86_schedule
= CPU_PENTIUMPRO
;
3624 case PROCESSOR_CORE2_64
:
3625 ix86_tune
= PROCESSOR_CORE2_32
;
3628 case PROCESSOR_COREI7_64
:
3629 ix86_tune
= PROCESSOR_COREI7_32
;
3636 /* Intel CPUs have always interpreted SSE prefetch instructions as
3637 NOPs; so, we can enable SSE prefetch instructions even when
3638 -mtune (rather than -march) points us to a processor that has them.
3639 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3640 higher processors. */
3642 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3643 x86_prefetch_sse
= true;
3647 if (ix86_tune_specified
&& i
== pta_size
)
3648 error ("bad value (%s) for %stune=%s %s",
3649 ix86_tune_string
, prefix
, suffix
, sw
);
3651 ix86_tune_mask
= 1u << ix86_tune
;
3652 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3653 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3655 #ifndef USE_IX86_FRAME_POINTER
3656 #define USE_IX86_FRAME_POINTER 0
3659 #ifndef USE_X86_64_FRAME_POINTER
3660 #define USE_X86_64_FRAME_POINTER 0
3663 /* Set the default values for switches whose default depends on TARGET_64BIT
3664 in case they weren't overwritten by command line options. */
3667 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3668 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3669 if (flag_asynchronous_unwind_tables
== 2)
3670 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3671 if (flag_pcc_struct_return
== 2)
3672 flag_pcc_struct_return
= 0;
3676 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3677 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3678 if (flag_asynchronous_unwind_tables
== 2)
3679 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3680 if (flag_pcc_struct_return
== 2)
3681 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3684 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3686 ix86_cost
= &ix86_size_cost
;
3688 ix86_cost
= ix86_tune_cost
;
3690 /* Arrange to set up i386_stack_locals for all functions. */
3691 init_machine_status
= ix86_init_machine_status
;
3693 /* Validate -mregparm= value. */
3694 if (global_options_set
.x_ix86_regparm
)
3697 warning (0, "-mregparm is ignored in 64-bit mode");
3698 if (ix86_regparm
> REGPARM_MAX
)
3700 error ("-mregparm=%d is not between 0 and %d",
3701 ix86_regparm
, REGPARM_MAX
);
3706 ix86_regparm
= REGPARM_MAX
;
3708 /* Default align_* from the processor table. */
3709 if (align_loops
== 0)
3711 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3712 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3714 if (align_jumps
== 0)
3716 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3717 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3719 if (align_functions
== 0)
3721 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3724 /* Provide default for -mbranch-cost= value. */
3725 if (!global_options_set
.x_ix86_branch_cost
)
3726 ix86_branch_cost
= ix86_cost
->branch_cost
;
3730 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3732 /* Enable by default the SSE and MMX builtins. Do allow the user to
3733 explicitly disable any of these. In particular, disabling SSE and
3734 MMX for kernel code is extremely useful. */
3735 if (!ix86_arch_specified
)
3737 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3738 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3741 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3745 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3747 if (!ix86_arch_specified
)
3749 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3751 /* i386 ABI does not specify red zone. It still makes sense to use it
3752 when programmer takes care to stack from being destroyed. */
3753 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3754 target_flags
|= MASK_NO_RED_ZONE
;
3757 /* Keep nonleaf frame pointers. */
3758 if (flag_omit_frame_pointer
)
3759 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3760 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3761 flag_omit_frame_pointer
= 1;
3763 /* If we're doing fast math, we don't care about comparison order
3764 wrt NaNs. This lets us use a shorter comparison sequence. */
3765 if (flag_finite_math_only
)
3766 target_flags
&= ~MASK_IEEE_FP
;
3768 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3769 since the insns won't need emulation. */
3770 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3771 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3773 /* Likewise, if the target doesn't have a 387, or we've specified
3774 software floating point, don't use 387 inline intrinsics. */
3776 target_flags
|= MASK_NO_FANCY_MATH_387
;
3778 /* Turn on MMX builtins for -msse. */
3780 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3782 /* Enable SSE prefetch. */
3783 if (TARGET_SSE
|| TARGET_PRFCHW
)
3784 x86_prefetch_sse
= true;
3786 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3787 if (TARGET_SSE4_2
|| TARGET_ABM
)
3788 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3790 /* Turn on lzcnt instruction for -mabm. */
3792 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3794 /* Validate -mpreferred-stack-boundary= value or default it to
3795 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3796 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3797 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3799 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3800 int max
= (TARGET_SEH
? 4 : 12);
3802 if (ix86_preferred_stack_boundary_arg
< min
3803 || ix86_preferred_stack_boundary_arg
> max
)
3806 error ("-mpreferred-stack-boundary is not supported "
3809 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3810 ix86_preferred_stack_boundary_arg
, min
, max
);
3813 ix86_preferred_stack_boundary
3814 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3817 /* Set the default value for -mstackrealign. */
3818 if (ix86_force_align_arg_pointer
== -1)
3819 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3821 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3823 /* Validate -mincoming-stack-boundary= value or default it to
3824 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3825 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3826 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3828 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3829 || ix86_incoming_stack_boundary_arg
> 12)
3830 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3831 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3834 ix86_user_incoming_stack_boundary
3835 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3836 ix86_incoming_stack_boundary
3837 = ix86_user_incoming_stack_boundary
;
3841 /* Accept -msseregparm only if at least SSE support is enabled. */
3842 if (TARGET_SSEREGPARM
3844 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3846 if (global_options_set
.x_ix86_fpmath
)
3848 if (ix86_fpmath
& FPMATH_SSE
)
3852 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3853 ix86_fpmath
= FPMATH_387
;
3855 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3857 warning (0, "387 instruction set disabled, using SSE arithmetics");
3858 ix86_fpmath
= FPMATH_SSE
;
3863 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3865 /* If the i387 is disabled, then do not return values in it. */
3867 target_flags
&= ~MASK_FLOAT_RETURNS
;
3869 /* Use external vectorized library in vectorizing intrinsics. */
3870 if (global_options_set
.x_ix86_veclibabi_type
)
3871 switch (ix86_veclibabi_type
)
3873 case ix86_veclibabi_type_svml
:
3874 ix86_veclib_handler
= ix86_veclibabi_svml
;
3877 case ix86_veclibabi_type_acml
:
3878 ix86_veclib_handler
= ix86_veclibabi_acml
;
3885 if ((!USE_IX86_FRAME_POINTER
3886 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3887 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3889 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3891 /* ??? Unwind info is not correct around the CFG unless either a frame
3892 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3893 unwind info generation to be aware of the CFG and propagating states
3895 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3896 || flag_exceptions
|| flag_non_call_exceptions
)
3897 && flag_omit_frame_pointer
3898 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3900 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3901 warning (0, "unwind tables currently require either a frame pointer "
3902 "or %saccumulate-outgoing-args%s for correctness",
3904 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3907 /* If stack probes are required, the space used for large function
3908 arguments on the stack must also be probed, so enable
3909 -maccumulate-outgoing-args so this happens in the prologue. */
3910 if (TARGET_STACK_PROBE
3911 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3913 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3914 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3915 "for correctness", prefix
, suffix
);
3916 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3919 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3922 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3923 p
= strchr (internal_label_prefix
, 'X');
3924 internal_label_prefix_len
= p
- internal_label_prefix
;
3928 /* When scheduling description is not available, disable scheduler pass
3929 so it won't slow down the compilation and make x87 code slower. */
3930 if (!TARGET_SCHEDULE
)
3931 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3933 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3934 ix86_tune_cost
->simultaneous_prefetches
,
3935 global_options
.x_param_values
,
3936 global_options_set
.x_param_values
);
3937 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3938 ix86_tune_cost
->prefetch_block
,
3939 global_options
.x_param_values
,
3940 global_options_set
.x_param_values
);
3941 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3942 ix86_tune_cost
->l1_cache_size
,
3943 global_options
.x_param_values
,
3944 global_options_set
.x_param_values
);
3945 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3946 ix86_tune_cost
->l2_cache_size
,
3947 global_options
.x_param_values
,
3948 global_options_set
.x_param_values
);
3950 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3951 if (flag_prefetch_loop_arrays
< 0
3954 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3955 flag_prefetch_loop_arrays
= 1;
3957 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3958 can be optimized to ap = __builtin_next_arg (0). */
3959 if (!TARGET_64BIT
&& !flag_split_stack
)
3960 targetm
.expand_builtin_va_start
= NULL
;
3964 ix86_gen_leave
= gen_leave_rex64
;
3965 if (Pmode
== DImode
)
3967 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3968 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3969 ix86_gen_tls_local_dynamic_base_64
3970 = gen_tls_local_dynamic_base_64_di
;
3974 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3975 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3976 ix86_gen_tls_local_dynamic_base_64
3977 = gen_tls_local_dynamic_base_64_si
;
3982 ix86_gen_leave
= gen_leave
;
3983 ix86_gen_monitor
= gen_sse3_monitor
;
3986 if (Pmode
== DImode
)
3988 ix86_gen_add3
= gen_adddi3
;
3989 ix86_gen_sub3
= gen_subdi3
;
3990 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3991 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3992 ix86_gen_andsp
= gen_anddi3
;
3993 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3994 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3995 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3999 ix86_gen_add3
= gen_addsi3
;
4000 ix86_gen_sub3
= gen_subsi3
;
4001 ix86_gen_sub3_carry
= gen_subsi3_carry
;
4002 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
4003 ix86_gen_andsp
= gen_andsi3
;
4004 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
4005 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
4006 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
4010 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4012 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
4015 if (!TARGET_64BIT
&& flag_pic
)
4017 if (flag_fentry
> 0)
4018 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4022 else if (TARGET_SEH
)
4024 if (flag_fentry
== 0)
4025 sorry ("-mno-fentry isn%'t compatible with SEH");
4028 else if (flag_fentry
< 0)
4030 #if defined(PROFILE_BEFORE_PROLOGUE)
4039 /* When not optimize for size, enable vzeroupper optimization for
4040 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4041 AVX unaligned load/store. */
4044 if (flag_expensive_optimizations
4045 && !(target_flags_explicit
& MASK_VZEROUPPER
))
4046 target_flags
|= MASK_VZEROUPPER
;
4047 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
4048 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4049 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4050 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
4051 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4052 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4053 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
4054 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
4055 target_flags
|= MASK_PREFER_AVX128
;
4060 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4061 target_flags
&= ~MASK_VZEROUPPER
;
4064 if (ix86_recip_name
)
4066 char *p
= ASTRDUP (ix86_recip_name
);
4068 unsigned int mask
, i
;
4071 while ((q
= strtok (p
, ",")) != NULL
)
4082 if (!strcmp (q
, "default"))
4083 mask
= RECIP_MASK_ALL
;
4086 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4087 if (!strcmp (q
, recip_options
[i
].string
))
4089 mask
= recip_options
[i
].mask
;
4093 if (i
== ARRAY_SIZE (recip_options
))
4095 error ("unknown option for -mrecip=%s", q
);
4097 mask
= RECIP_MASK_NONE
;
4101 recip_mask_explicit
|= mask
;
4103 recip_mask
&= ~mask
;
4110 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4111 else if (target_flags_explicit
& MASK_RECIP
)
4112 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4114 /* Default long double to 64-bit for Bionic. */
4115 if (TARGET_HAS_BIONIC
4116 && !(target_flags_explicit
& MASK_LONG_DOUBLE_64
))
4117 target_flags
|= MASK_LONG_DOUBLE_64
;
4119 /* Save the initial options in case the user does function specific
4122 target_option_default_node
= target_option_current_node
4123 = build_target_option_node ();
4126 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4129 function_pass_avx256_p (const_rtx val
)
4134 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
4137 if (GET_CODE (val
) == PARALLEL
)
4142 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
4144 r
= XVECEXP (val
, 0, i
);
4145 if (GET_CODE (r
) == EXPR_LIST
4147 && REG_P (XEXP (r
, 0))
4148 && (GET_MODE (XEXP (r
, 0)) == OImode
4149 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4157 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4160 ix86_option_override (void)
4162 ix86_option_override_internal (true);
4165 /* Update register usage after having seen the compiler flags. */
4168 ix86_conditional_register_usage (void)
4173 /* The PIC register, if it exists, is fixed. */
4174 j
= PIC_OFFSET_TABLE_REGNUM
;
4175 if (j
!= INVALID_REGNUM
)
4176 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4178 /* For 32-bit targets, squash the REX registers. */
4181 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4182 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4183 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4184 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4187 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4188 c_mask
= (TARGET_64BIT_MS_ABI
? (1 << 3)
4189 : TARGET_64BIT
? (1 << 2)
4192 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4194 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4196 /* Set/reset conditionally defined registers from
4197 CALL_USED_REGISTERS initializer. */
4198 if (call_used_regs
[i
] > 1)
4199 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
4201 /* Calculate registers of CLOBBERED_REGS register set
4202 as call used registers from GENERAL_REGS register set. */
4203 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4204 && call_used_regs
[i
])
4205 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4208 /* If MMX is disabled, squash the registers. */
4210 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4211 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4212 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4214 /* If SSE is disabled, squash the registers. */
4216 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4217 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4218 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4220 /* If the FPU is disabled, squash the registers. */
4221 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4222 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4223 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4224 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4228 /* Save the current options */
4231 ix86_function_specific_save (struct cl_target_option
*ptr
)
4233 ptr
->arch
= ix86_arch
;
4234 ptr
->schedule
= ix86_schedule
;
4235 ptr
->tune
= ix86_tune
;
4236 ptr
->branch_cost
= ix86_branch_cost
;
4237 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4238 ptr
->arch_specified
= ix86_arch_specified
;
4239 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4240 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4241 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4243 /* The fields are char but the variables are not; make sure the
4244 values fit in the fields. */
4245 gcc_assert (ptr
->arch
== ix86_arch
);
4246 gcc_assert (ptr
->schedule
== ix86_schedule
);
4247 gcc_assert (ptr
->tune
== ix86_tune
);
4248 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4251 /* Restore the current options */
4254 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4256 enum processor_type old_tune
= ix86_tune
;
4257 enum processor_type old_arch
= ix86_arch
;
4258 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4261 ix86_arch
= (enum processor_type
) ptr
->arch
;
4262 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4263 ix86_tune
= (enum processor_type
) ptr
->tune
;
4264 ix86_branch_cost
= ptr
->branch_cost
;
4265 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4266 ix86_arch_specified
= ptr
->arch_specified
;
4267 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4268 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4269 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4271 /* Recreate the arch feature tests if the arch changed */
4272 if (old_arch
!= ix86_arch
)
4274 ix86_arch_mask
= 1u << ix86_arch
;
4275 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4276 ix86_arch_features
[i
]
4277 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4280 /* Recreate the tune optimization tests */
4281 if (old_tune
!= ix86_tune
)
4283 ix86_tune_mask
= 1u << ix86_tune
;
4284 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4285 ix86_tune_features
[i
]
4286 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4290 /* Print the current options */
4293 ix86_function_specific_print (FILE *file
, int indent
,
4294 struct cl_target_option
*ptr
)
4297 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4298 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4300 fprintf (file
, "%*sarch = %d (%s)\n",
4303 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4304 ? cpu_names
[ptr
->arch
]
4307 fprintf (file
, "%*stune = %d (%s)\n",
4310 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4311 ? cpu_names
[ptr
->tune
]
4314 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4318 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4319 free (target_string
);
4324 /* Inner function to process the attribute((target(...))), take an argument and
4325 set the current options from the argument. If we have a list, recursively go
4329 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4330 struct gcc_options
*enum_opts_set
)
4335 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4336 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4337 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4338 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4339 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4355 enum ix86_opt_type type
;
4360 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4361 IX86_ATTR_ISA ("abm", OPT_mabm
),
4362 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4363 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4364 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4365 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4366 IX86_ATTR_ISA ("aes", OPT_maes
),
4367 IX86_ATTR_ISA ("avx", OPT_mavx
),
4368 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4369 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4370 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4371 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4372 IX86_ATTR_ISA ("sse", OPT_msse
),
4373 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4374 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4375 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4376 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4377 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4378 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4379 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4380 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4381 IX86_ATTR_ISA ("fma", OPT_mfma
),
4382 IX86_ATTR_ISA ("xop", OPT_mxop
),
4383 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4384 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4385 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4386 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4387 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4388 IX86_ATTR_ISA ("hle", OPT_mhle
),
4389 IX86_ATTR_ISA ("prfchw", OPT_mprfchw
),
4390 IX86_ATTR_ISA ("rdseed", OPT_mrdseed
),
4391 IX86_ATTR_ISA ("adx", OPT_madx
),
4392 IX86_ATTR_ISA ("fxsr", OPT_mfxsr
),
4393 IX86_ATTR_ISA ("xsave", OPT_mxsave
),
4394 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt
),
4397 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4399 /* string options */
4400 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4401 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4404 IX86_ATTR_YES ("cld",
4408 IX86_ATTR_NO ("fancy-math-387",
4409 OPT_mfancy_math_387
,
4410 MASK_NO_FANCY_MATH_387
),
4412 IX86_ATTR_YES ("ieee-fp",
4416 IX86_ATTR_YES ("inline-all-stringops",
4417 OPT_minline_all_stringops
,
4418 MASK_INLINE_ALL_STRINGOPS
),
4420 IX86_ATTR_YES ("inline-stringops-dynamically",
4421 OPT_minline_stringops_dynamically
,
4422 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4424 IX86_ATTR_NO ("align-stringops",
4425 OPT_mno_align_stringops
,
4426 MASK_NO_ALIGN_STRINGOPS
),
4428 IX86_ATTR_YES ("recip",
4434 /* If this is a list, recurse to get the options. */
4435 if (TREE_CODE (args
) == TREE_LIST
)
4439 for (; args
; args
= TREE_CHAIN (args
))
4440 if (TREE_VALUE (args
)
4441 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4442 p_strings
, enum_opts_set
))
4448 else if (TREE_CODE (args
) != STRING_CST
)
4451 /* Handle multiple arguments separated by commas. */
4452 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4454 while (next_optstr
&& *next_optstr
!= '\0')
4456 char *p
= next_optstr
;
4458 char *comma
= strchr (next_optstr
, ',');
4459 const char *opt_string
;
4460 size_t len
, opt_len
;
4465 enum ix86_opt_type type
= ix86_opt_unknown
;
4471 len
= comma
- next_optstr
;
4472 next_optstr
= comma
+ 1;
4480 /* Recognize no-xxx. */
4481 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4490 /* Find the option. */
4493 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4495 type
= attrs
[i
].type
;
4496 opt_len
= attrs
[i
].len
;
4497 if (ch
== attrs
[i
].string
[0]
4498 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4501 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4504 mask
= attrs
[i
].mask
;
4505 opt_string
= attrs
[i
].string
;
4510 /* Process the option. */
4513 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4517 else if (type
== ix86_opt_isa
)
4519 struct cl_decoded_option decoded
;
4521 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4522 ix86_handle_option (&global_options
, &global_options_set
,
4523 &decoded
, input_location
);
4526 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4528 if (type
== ix86_opt_no
)
4529 opt_set_p
= !opt_set_p
;
4532 target_flags
|= mask
;
4534 target_flags
&= ~mask
;
4537 else if (type
== ix86_opt_str
)
4541 error ("option(\"%s\") was already specified", opt_string
);
4545 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4548 else if (type
== ix86_opt_enum
)
4553 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4555 set_option (&global_options
, enum_opts_set
, opt
, value
,
4556 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4560 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4572 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4575 ix86_valid_target_attribute_tree (tree args
)
4577 const char *orig_arch_string
= ix86_arch_string
;
4578 const char *orig_tune_string
= ix86_tune_string
;
4579 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4580 int orig_tune_defaulted
= ix86_tune_defaulted
;
4581 int orig_arch_specified
= ix86_arch_specified
;
4582 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4585 struct cl_target_option
*def
4586 = TREE_TARGET_OPTION (target_option_default_node
);
4587 struct gcc_options enum_opts_set
;
4589 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4591 /* Process each of the options on the chain. */
4592 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4596 /* If the changed options are different from the default, rerun
4597 ix86_option_override_internal, and then save the options away.
4598 The string options are are attribute options, and will be undone
4599 when we copy the save structure. */
4600 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4601 || target_flags
!= def
->x_target_flags
4602 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4603 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4604 || enum_opts_set
.x_ix86_fpmath
)
4606 /* If we are using the default tune= or arch=, undo the string assigned,
4607 and use the default. */
4608 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4609 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4610 else if (!orig_arch_specified
)
4611 ix86_arch_string
= NULL
;
4613 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4614 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4615 else if (orig_tune_defaulted
)
4616 ix86_tune_string
= NULL
;
4618 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4619 if (enum_opts_set
.x_ix86_fpmath
)
4620 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4621 else if (!TARGET_64BIT
&& TARGET_SSE
)
4623 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4624 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4627 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4628 ix86_option_override_internal (false);
4630 /* Add any builtin functions with the new isa if any. */
4631 ix86_add_new_builtins (ix86_isa_flags
);
4633 /* Save the current options unless we are validating options for
4635 t
= build_target_option_node ();
4637 ix86_arch_string
= orig_arch_string
;
4638 ix86_tune_string
= orig_tune_string
;
4639 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4641 /* Free up memory allocated to hold the strings */
4642 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4643 free (option_strings
[i
]);
4649 /* Hook to validate attribute((target("string"))). */
4652 ix86_valid_target_attribute_p (tree fndecl
,
4653 tree
ARG_UNUSED (name
),
4655 int ARG_UNUSED (flags
))
4657 struct cl_target_option cur_target
;
4659 tree old_optimize
= build_optimization_node ();
4660 tree new_target
, new_optimize
;
4661 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4663 /* If the function changed the optimization levels as well as setting target
4664 options, start with the optimizations specified. */
4665 if (func_optimize
&& func_optimize
!= old_optimize
)
4666 cl_optimization_restore (&global_options
,
4667 TREE_OPTIMIZATION (func_optimize
));
4669 /* The target attributes may also change some optimization flags, so update
4670 the optimization options if necessary. */
4671 cl_target_option_save (&cur_target
, &global_options
);
4672 new_target
= ix86_valid_target_attribute_tree (args
);
4673 new_optimize
= build_optimization_node ();
4680 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4682 if (old_optimize
!= new_optimize
)
4683 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4686 cl_target_option_restore (&global_options
, &cur_target
);
4688 if (old_optimize
!= new_optimize
)
4689 cl_optimization_restore (&global_options
,
4690 TREE_OPTIMIZATION (old_optimize
));
4696 /* Hook to determine if one function can safely inline another. */
4699 ix86_can_inline_p (tree caller
, tree callee
)
4702 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4703 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4705 /* If callee has no option attributes, then it is ok to inline. */
4709 /* If caller has no option attributes, but callee does then it is not ok to
4711 else if (!caller_tree
)
4716 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4717 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4719 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4720 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4722 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4723 != callee_opts
->x_ix86_isa_flags
)
4726 /* See if we have the same non-isa options. */
4727 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4730 /* See if arch, tune, etc. are the same. */
4731 else if (caller_opts
->arch
!= callee_opts
->arch
)
4734 else if (caller_opts
->tune
!= callee_opts
->tune
)
4737 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4740 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4751 /* Remember the last target of ix86_set_current_function. */
4752 static GTY(()) tree ix86_previous_fndecl
;
4754 /* Establish appropriate back-end context for processing the function
4755 FNDECL. The argument might be NULL to indicate processing at top
4756 level, outside of any function scope. */
4758 ix86_set_current_function (tree fndecl
)
4760 /* Only change the context if the function changes. This hook is called
4761 several times in the course of compiling a function, and we don't want to
4762 slow things down too much or call target_reinit when it isn't safe. */
4763 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4765 tree old_tree
= (ix86_previous_fndecl
4766 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4769 tree new_tree
= (fndecl
4770 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4773 ix86_previous_fndecl
= fndecl
;
4774 if (old_tree
== new_tree
)
4779 cl_target_option_restore (&global_options
,
4780 TREE_TARGET_OPTION (new_tree
));
4786 struct cl_target_option
*def
4787 = TREE_TARGET_OPTION (target_option_current_node
);
4789 cl_target_option_restore (&global_options
, def
);
4796 /* Return true if this goes in large data/bss. */
4799 ix86_in_large_data_p (tree exp
)
4801 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4804 /* Functions are never large data. */
4805 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4808 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4810 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4811 if (strcmp (section
, ".ldata") == 0
4812 || strcmp (section
, ".lbss") == 0)
4818 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4820 /* If this is an incomplete type with size 0, then we can't put it
4821 in data because it might be too big when completed. */
4822 if (!size
|| size
> ix86_section_threshold
)
4829 /* Switch to the appropriate section for output of DECL.
4830 DECL is either a `VAR_DECL' node or a constant of some sort.
4831 RELOC indicates whether forming the initial value of DECL requires
4832 link-time relocations. */
4834 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4838 x86_64_elf_select_section (tree decl
, int reloc
,
4839 unsigned HOST_WIDE_INT align
)
4841 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4842 && ix86_in_large_data_p (decl
))
4844 const char *sname
= NULL
;
4845 unsigned int flags
= SECTION_WRITE
;
4846 switch (categorize_decl_for_section (decl
, reloc
))
4851 case SECCAT_DATA_REL
:
4852 sname
= ".ldata.rel";
4854 case SECCAT_DATA_REL_LOCAL
:
4855 sname
= ".ldata.rel.local";
4857 case SECCAT_DATA_REL_RO
:
4858 sname
= ".ldata.rel.ro";
4860 case SECCAT_DATA_REL_RO_LOCAL
:
4861 sname
= ".ldata.rel.ro.local";
4865 flags
|= SECTION_BSS
;
4868 case SECCAT_RODATA_MERGE_STR
:
4869 case SECCAT_RODATA_MERGE_STR_INIT
:
4870 case SECCAT_RODATA_MERGE_CONST
:
4874 case SECCAT_SRODATA
:
4881 /* We don't split these for medium model. Place them into
4882 default sections and hope for best. */
4887 /* We might get called with string constants, but get_named_section
4888 doesn't like them as they are not DECLs. Also, we need to set
4889 flags in that case. */
4891 return get_section (sname
, flags
, NULL
);
4892 return get_named_section (decl
, sname
, reloc
);
4895 return default_elf_select_section (decl
, reloc
, align
);
4898 /* Build up a unique section name, expressed as a
4899 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4900 RELOC indicates whether the initial value of EXP requires
4901 link-time relocations. */
4903 static void ATTRIBUTE_UNUSED
4904 x86_64_elf_unique_section (tree decl
, int reloc
)
4906 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4907 && ix86_in_large_data_p (decl
))
4909 const char *prefix
= NULL
;
4910 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4911 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4913 switch (categorize_decl_for_section (decl
, reloc
))
4916 case SECCAT_DATA_REL
:
4917 case SECCAT_DATA_REL_LOCAL
:
4918 case SECCAT_DATA_REL_RO
:
4919 case SECCAT_DATA_REL_RO_LOCAL
:
4920 prefix
= one_only
? ".ld" : ".ldata";
4923 prefix
= one_only
? ".lb" : ".lbss";
4926 case SECCAT_RODATA_MERGE_STR
:
4927 case SECCAT_RODATA_MERGE_STR_INIT
:
4928 case SECCAT_RODATA_MERGE_CONST
:
4929 prefix
= one_only
? ".lr" : ".lrodata";
4931 case SECCAT_SRODATA
:
4938 /* We don't split these for medium model. Place them into
4939 default sections and hope for best. */
4944 const char *name
, *linkonce
;
4947 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4948 name
= targetm
.strip_name_encoding (name
);
4950 /* If we're using one_only, then there needs to be a .gnu.linkonce
4951 prefix to the section name. */
4952 linkonce
= one_only
? ".gnu.linkonce" : "";
4954 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4956 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4960 default_unique_section (decl
, reloc
);
4963 #ifdef COMMON_ASM_OP
4964 /* This says how to output assembler code to declare an
4965 uninitialized external linkage data object.
4967 For medium model x86-64 we need to use .largecomm opcode for
4970 x86_elf_aligned_common (FILE *file
,
4971 const char *name
, unsigned HOST_WIDE_INT size
,
4974 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4975 && size
> (unsigned int)ix86_section_threshold
)
4976 fputs (".largecomm\t", file
);
4978 fputs (COMMON_ASM_OP
, file
);
4979 assemble_name (file
, name
);
4980 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4981 size
, align
/ BITS_PER_UNIT
);
4985 /* Utility function for targets to use in implementing
4986 ASM_OUTPUT_ALIGNED_BSS. */
4989 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4990 const char *name
, unsigned HOST_WIDE_INT size
,
4993 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4994 && size
> (unsigned int)ix86_section_threshold
)
4995 switch_to_section (get_named_section (decl
, ".lbss", 0));
4997 switch_to_section (bss_section
);
4998 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4999 #ifdef ASM_DECLARE_OBJECT_NAME
5000 last_assemble_variable_decl
= decl
;
5001 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
5003 /* Standard thing is just output label for the object. */
5004 ASM_OUTPUT_LABEL (file
, name
);
5005 #endif /* ASM_DECLARE_OBJECT_NAME */
5006 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
5009 /* Decide whether we must probe the stack before any space allocation
5010 on this target. It's essentially TARGET_STACK_PROBE except when
5011 -fstack-check causes the stack to be already probed differently. */
5014 ix86_target_stack_probe (void)
5016 /* Do not probe the stack twice if static stack checking is enabled. */
5017 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
5020 return TARGET_STACK_PROBE
;
5023 /* Decide whether we can make a sibling call to a function. DECL is the
5024 declaration of the function being targeted by the call and EXP is the
5025 CALL_EXPR representing the call. */
5028 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
5030 tree type
, decl_or_type
;
5033 /* If we are generating position-independent code, we cannot sibcall
5034 optimize any indirect call, or a direct call to a global function,
5035 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5039 && (!decl
|| !targetm
.binds_local_p (decl
)))
5042 /* If we need to align the outgoing stack, then sibcalling would
5043 unalign the stack, which may break the called function. */
5044 if (ix86_minimum_incoming_stack_boundary (true)
5045 < PREFERRED_STACK_BOUNDARY
)
5050 decl_or_type
= decl
;
5051 type
= TREE_TYPE (decl
);
5055 /* We're looking at the CALL_EXPR, we need the type of the function. */
5056 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5057 type
= TREE_TYPE (type
); /* pointer type */
5058 type
= TREE_TYPE (type
); /* function type */
5059 decl_or_type
= type
;
5062 /* Check that the return value locations are the same. Like
5063 if we are returning floats on the 80387 register stack, we cannot
5064 make a sibcall from a function that doesn't return a float to a
5065 function that does or, conversely, from a function that does return
5066 a float to a function that doesn't; the necessary stack adjustment
5067 would not be executed. This is also the place we notice
5068 differences in the return value ABI. Note that it is ok for one
5069 of the functions to have void return type as long as the return
5070 value of the other is passed in a register. */
5071 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5072 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5074 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5076 if (!rtx_equal_p (a
, b
))
5079 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5081 /* Disable sibcall if we need to generate vzeroupper after
5083 if (TARGET_VZEROUPPER
5084 && cfun
->machine
->callee_return_avx256_p
5085 && !cfun
->machine
->caller_return_avx256_p
)
5088 else if (!rtx_equal_p (a
, b
))
5093 /* The SYSV ABI has more call-clobbered registers;
5094 disallow sibcalls from MS to SYSV. */
5095 if (cfun
->machine
->call_abi
== MS_ABI
5096 && ix86_function_type_abi (type
) == SYSV_ABI
)
5101 /* If this call is indirect, we'll need to be able to use a
5102 call-clobbered register for the address of the target function.
5103 Make sure that all such registers are not used for passing
5104 parameters. Note that DLLIMPORT functions are indirect. */
5106 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5108 if (ix86_function_regparm (type
, NULL
) >= 3)
5110 /* ??? Need to count the actual number of registers to be used,
5111 not the possible number of registers. Fix later. */
5117 /* Otherwise okay. That also includes certain types of indirect calls. */
5121 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5122 and "sseregparm" calling convention attributes;
5123 arguments as in struct attribute_spec.handler. */
5126 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5128 int flags ATTRIBUTE_UNUSED
,
5131 if (TREE_CODE (*node
) != FUNCTION_TYPE
5132 && TREE_CODE (*node
) != METHOD_TYPE
5133 && TREE_CODE (*node
) != FIELD_DECL
5134 && TREE_CODE (*node
) != TYPE_DECL
)
5136 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5138 *no_add_attrs
= true;
5142 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5143 if (is_attribute_p ("regparm", name
))
5147 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5149 error ("fastcall and regparm attributes are not compatible");
5152 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5154 error ("regparam and thiscall attributes are not compatible");
5157 cst
= TREE_VALUE (args
);
5158 if (TREE_CODE (cst
) != INTEGER_CST
)
5160 warning (OPT_Wattributes
,
5161 "%qE attribute requires an integer constant argument",
5163 *no_add_attrs
= true;
5165 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5167 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5169 *no_add_attrs
= true;
5177 /* Do not warn when emulating the MS ABI. */
5178 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5179 && TREE_CODE (*node
) != METHOD_TYPE
)
5180 || ix86_function_type_abi (*node
) != MS_ABI
)
5181 warning (OPT_Wattributes
, "%qE attribute ignored",
5183 *no_add_attrs
= true;
5187 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5188 if (is_attribute_p ("fastcall", name
))
5190 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5192 error ("fastcall and cdecl attributes are not compatible");
5194 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5196 error ("fastcall and stdcall attributes are not compatible");
5198 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5200 error ("fastcall and regparm attributes are not compatible");
5202 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5204 error ("fastcall and thiscall attributes are not compatible");
5208 /* Can combine stdcall with fastcall (redundant), regparm and
5210 else if (is_attribute_p ("stdcall", name
))
5212 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5214 error ("stdcall and cdecl attributes are not compatible");
5216 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5218 error ("stdcall and fastcall attributes are not compatible");
5220 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5222 error ("stdcall and thiscall attributes are not compatible");
5226 /* Can combine cdecl with regparm and sseregparm. */
5227 else if (is_attribute_p ("cdecl", name
))
5229 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5231 error ("stdcall and cdecl attributes are not compatible");
5233 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5235 error ("fastcall and cdecl attributes are not compatible");
5237 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5239 error ("cdecl and thiscall attributes are not compatible");
5242 else if (is_attribute_p ("thiscall", name
))
5244 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5245 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5247 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5249 error ("stdcall and thiscall attributes are not compatible");
5251 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5253 error ("fastcall and thiscall attributes are not compatible");
5255 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5257 error ("cdecl and thiscall attributes are not compatible");
5261 /* Can combine sseregparm with all attributes. */
5266 /* The transactional memory builtins are implicitly regparm or fastcall
5267 depending on the ABI. Override the generic do-nothing attribute that
5268 these builtins were declared with, and replace it with one of the two
5269 attributes that we expect elsewhere. */
5272 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5273 tree args ATTRIBUTE_UNUSED
,
5274 int flags ATTRIBUTE_UNUSED
,
5279 /* In no case do we want to add the placeholder attribute. */
5280 *no_add_attrs
= true;
5282 /* The 64-bit ABI is unchanged for transactional memory. */
5286 /* ??? Is there a better way to validate 32-bit windows? We have
5287 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5288 if (CHECK_STACK_LIMIT
> 0)
5289 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5292 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5293 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5295 decl_attributes (node
, alt
, flags
);
5300 /* This function determines from TYPE the calling-convention. */
5303 ix86_get_callcvt (const_tree type
)
5305 unsigned int ret
= 0;
5310 return IX86_CALLCVT_CDECL
;
5312 attrs
= TYPE_ATTRIBUTES (type
);
5313 if (attrs
!= NULL_TREE
)
5315 if (lookup_attribute ("cdecl", attrs
))
5316 ret
|= IX86_CALLCVT_CDECL
;
5317 else if (lookup_attribute ("stdcall", attrs
))
5318 ret
|= IX86_CALLCVT_STDCALL
;
5319 else if (lookup_attribute ("fastcall", attrs
))
5320 ret
|= IX86_CALLCVT_FASTCALL
;
5321 else if (lookup_attribute ("thiscall", attrs
))
5322 ret
|= IX86_CALLCVT_THISCALL
;
5324 /* Regparam isn't allowed for thiscall and fastcall. */
5325 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5327 if (lookup_attribute ("regparm", attrs
))
5328 ret
|= IX86_CALLCVT_REGPARM
;
5329 if (lookup_attribute ("sseregparm", attrs
))
5330 ret
|= IX86_CALLCVT_SSEREGPARM
;
5333 if (IX86_BASE_CALLCVT(ret
) != 0)
5337 is_stdarg
= stdarg_p (type
);
5338 if (TARGET_RTD
&& !is_stdarg
)
5339 return IX86_CALLCVT_STDCALL
| ret
;
5343 || TREE_CODE (type
) != METHOD_TYPE
5344 || ix86_function_type_abi (type
) != MS_ABI
)
5345 return IX86_CALLCVT_CDECL
| ret
;
5347 return IX86_CALLCVT_THISCALL
;
5350 /* Return 0 if the attributes for two types are incompatible, 1 if they
5351 are compatible, and 2 if they are nearly compatible (which causes a
5352 warning to be generated). */
5355 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5357 unsigned int ccvt1
, ccvt2
;
5359 if (TREE_CODE (type1
) != FUNCTION_TYPE
5360 && TREE_CODE (type1
) != METHOD_TYPE
)
5363 ccvt1
= ix86_get_callcvt (type1
);
5364 ccvt2
= ix86_get_callcvt (type2
);
5367 if (ix86_function_regparm (type1
, NULL
)
5368 != ix86_function_regparm (type2
, NULL
))
5374 /* Return the regparm value for a function with the indicated TYPE and DECL.
5375 DECL may be NULL when calling function indirectly
5376 or considering a libcall. */
5379 ix86_function_regparm (const_tree type
, const_tree decl
)
5386 return (ix86_function_type_abi (type
) == SYSV_ABI
5387 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5388 ccvt
= ix86_get_callcvt (type
);
5389 regparm
= ix86_regparm
;
5391 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5393 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5396 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5400 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5402 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5405 /* Use register calling convention for local functions when possible. */
5407 && TREE_CODE (decl
) == FUNCTION_DECL
5409 && !(profile_flag
&& !flag_fentry
))
5411 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5412 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5413 if (i
&& i
->local
&& i
->can_change_signature
)
5415 int local_regparm
, globals
= 0, regno
;
5417 /* Make sure no regparm register is taken by a
5418 fixed register variable. */
5419 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5420 if (fixed_regs
[local_regparm
])
5423 /* We don't want to use regparm(3) for nested functions as
5424 these use a static chain pointer in the third argument. */
5425 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5428 /* In 32-bit mode save a register for the split stack. */
5429 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5432 /* Each fixed register usage increases register pressure,
5433 so less registers should be used for argument passing.
5434 This functionality can be overriden by an explicit
5436 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
5437 if (fixed_regs
[regno
])
5441 = globals
< local_regparm
? local_regparm
- globals
: 0;
5443 if (local_regparm
> regparm
)
5444 regparm
= local_regparm
;
5451 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5452 DFmode (2) arguments in SSE registers for a function with the
5453 indicated TYPE and DECL. DECL may be NULL when calling function
5454 indirectly or considering a libcall. Otherwise return 0. */
5457 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5459 gcc_assert (!TARGET_64BIT
);
5461 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5462 by the sseregparm attribute. */
5463 if (TARGET_SSEREGPARM
5464 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5471 error ("calling %qD with attribute sseregparm without "
5472 "SSE/SSE2 enabled", decl
);
5474 error ("calling %qT with attribute sseregparm without "
5475 "SSE/SSE2 enabled", type
);
5483 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5484 (and DFmode for SSE2) arguments in SSE registers. */
5485 if (decl
&& TARGET_SSE_MATH
&& optimize
5486 && !(profile_flag
&& !flag_fentry
))
5488 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5489 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5490 if (i
&& i
->local
&& i
->can_change_signature
)
5491 return TARGET_SSE2
? 2 : 1;
5497 /* Return true if EAX is live at the start of the function. Used by
5498 ix86_expand_prologue to determine if we need special help before
5499 calling allocate_stack_worker. */
5502 ix86_eax_live_at_start_p (void)
5504 /* Cheat. Don't bother working forward from ix86_function_regparm
5505 to the function type to whether an actual argument is located in
5506 eax. Instead just look at cfg info, which is still close enough
5507 to correct at this point. This gives false positives for broken
5508 functions that might use uninitialized data that happens to be
5509 allocated in eax, but who cares? */
5510 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5514 ix86_keep_aggregate_return_pointer (tree fntype
)
5520 attr
= lookup_attribute ("callee_pop_aggregate_return",
5521 TYPE_ATTRIBUTES (fntype
));
5523 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5525 /* For 32-bit MS-ABI the default is to keep aggregate
5527 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5530 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5533 /* Value is the number of bytes of arguments automatically
5534 popped when returning from a subroutine call.
5535 FUNDECL is the declaration node of the function (as a tree),
5536 FUNTYPE is the data type of the function (as a tree),
5537 or for a library call it is an identifier node for the subroutine name.
5538 SIZE is the number of bytes of arguments passed on the stack.
5540 On the 80386, the RTD insn may be used to pop them if the number
5541 of args is fixed, but if the number is variable then the caller
5542 must pop them all. RTD can't be used for library calls now
5543 because the library is compiled with the Unix compiler.
5544 Use of RTD is a selectable option, since it is incompatible with
5545 standard Unix calling sequences. If the option is not selected,
5546 the caller must always pop the args.
5548 The attribute stdcall is equivalent to RTD on a per module basis. */
5551 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5555 /* None of the 64-bit ABIs pop arguments. */
5559 ccvt
= ix86_get_callcvt (funtype
);
5561 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5562 | IX86_CALLCVT_THISCALL
)) != 0
5563 && ! stdarg_p (funtype
))
5566 /* Lose any fake structure return argument if it is passed on the stack. */
5567 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5568 && !ix86_keep_aggregate_return_pointer (funtype
))
5570 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5572 return GET_MODE_SIZE (Pmode
);
5578 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5581 ix86_legitimate_combined_insn (rtx insn
)
5583 /* Check operand constraints in case hard registers were propagated
5584 into insn pattern. This check prevents combine pass from
5585 generating insn patterns with invalid hard register operands.
5586 These invalid insns can eventually confuse reload to error out
5587 with a spill failure. See also PRs 46829 and 46843. */
5588 if ((INSN_CODE (insn
) = recog (PATTERN (insn
), insn
, 0)) >= 0)
5592 extract_insn (insn
);
5593 preprocess_constraints ();
5595 for (i
= 0; i
< recog_data
.n_operands
; i
++)
5597 rtx op
= recog_data
.operand
[i
];
5598 enum machine_mode mode
= GET_MODE (op
);
5599 struct operand_alternative
*op_alt
;
5604 /* A unary operator may be accepted by the predicate, but it
5605 is irrelevant for matching constraints. */
5609 if (GET_CODE (op
) == SUBREG
)
5611 if (REG_P (SUBREG_REG (op
))
5612 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
5613 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
5614 GET_MODE (SUBREG_REG (op
)),
5617 op
= SUBREG_REG (op
);
5620 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
5623 op_alt
= recog_op_alt
[i
];
5625 /* Operand has no constraints, anything is OK. */
5626 win
= !recog_data
.n_alternatives
;
5628 for (j
= 0; j
< recog_data
.n_alternatives
; j
++)
5630 if (op_alt
[j
].anything_ok
5631 || (op_alt
[j
].matches
!= -1
5633 (recog_data
.operand
[i
],
5634 recog_data
.operand
[op_alt
[j
].matches
]))
5635 || reg_fits_class_p (op
, op_alt
[j
].cl
, offset
, mode
))
5650 /* Argument support functions. */
5652 /* Return true when register may be used to pass function parameters. */
5654 ix86_function_arg_regno_p (int regno
)
5657 const int *parm_regs
;
5662 return (regno
< REGPARM_MAX
5663 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5665 return (regno
< REGPARM_MAX
5666 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5667 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5668 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5669 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5674 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5679 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5680 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5684 /* TODO: The function should depend on current function ABI but
5685 builtins.c would need updating then. Therefore we use the
5688 /* RAX is used as hidden argument to va_arg functions. */
5689 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5692 if (ix86_abi
== MS_ABI
)
5693 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5695 parm_regs
= x86_64_int_parameter_registers
;
5696 for (i
= 0; i
< (ix86_abi
== MS_ABI
5697 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5698 if (regno
== parm_regs
[i
])
5703 /* Return if we do not know how to pass TYPE solely in registers. */
5706 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5708 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5711 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5712 The layout_type routine is crafty and tries to trick us into passing
5713 currently unsupported vector types on the stack by using TImode. */
5714 return (!TARGET_64BIT
&& mode
== TImode
5715 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5718 /* It returns the size, in bytes, of the area reserved for arguments passed
5719 in registers for the function represented by fndecl dependent to the used
5722 ix86_reg_parm_stack_space (const_tree fndecl
)
5724 enum calling_abi call_abi
= SYSV_ABI
;
5725 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5726 call_abi
= ix86_function_abi (fndecl
);
5728 call_abi
= ix86_function_type_abi (fndecl
);
5729 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5734 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5737 ix86_function_type_abi (const_tree fntype
)
5739 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5741 enum calling_abi abi
= ix86_abi
;
5742 if (abi
== SYSV_ABI
)
5744 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5747 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5755 ix86_function_ms_hook_prologue (const_tree fn
)
5757 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5759 if (decl_function_context (fn
) != NULL_TREE
)
5760 error_at (DECL_SOURCE_LOCATION (fn
),
5761 "ms_hook_prologue is not compatible with nested function");
5768 static enum calling_abi
5769 ix86_function_abi (const_tree fndecl
)
5773 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5776 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5779 ix86_cfun_abi (void)
5783 return cfun
->machine
->call_abi
;
5786 /* Write the extra assembler code needed to declare a function properly. */
5789 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5792 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5796 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5797 unsigned int filler_cc
= 0xcccccccc;
5799 for (i
= 0; i
< filler_count
; i
+= 4)
5800 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5803 #ifdef SUBTARGET_ASM_UNWIND_INIT
5804 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5807 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5809 /* Output magic byte marker, if hot-patch attribute is set. */
5814 /* leaq [%rsp + 0], %rsp */
5815 asm_fprintf (asm_out_file
, ASM_BYTE
5816 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5820 /* movl.s %edi, %edi
5822 movl.s %esp, %ebp */
5823 asm_fprintf (asm_out_file
, ASM_BYTE
5824 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5830 extern void init_regs (void);
5832 /* Implementation of call abi switching target hook. Specific to FNDECL
5833 the specific call register sets are set. See also
5834 ix86_conditional_register_usage for more details. */
5836 ix86_call_abi_override (const_tree fndecl
)
5838 if (fndecl
== NULL_TREE
)
5839 cfun
->machine
->call_abi
= ix86_abi
;
5841 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5844 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5845 expensive re-initialization of init_regs each time we switch function context
5846 since this is needed only during RTL expansion. */
5848 ix86_maybe_switch_abi (void)
5851 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5855 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5856 for a call to a function whose data type is FNTYPE.
5857 For a library call, FNTYPE is 0. */
5860 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5861 tree fntype
, /* tree ptr for function decl */
5862 rtx libname
, /* SYMBOL_REF of library name or 0 */
5866 struct cgraph_local_info
*i
;
5869 memset (cum
, 0, sizeof (*cum
));
5871 /* Initialize for the current callee. */
5874 cfun
->machine
->callee_pass_avx256_p
= false;
5875 cfun
->machine
->callee_return_avx256_p
= false;
5880 i
= cgraph_local_info (fndecl
);
5881 cum
->call_abi
= ix86_function_abi (fndecl
);
5882 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5887 cum
->call_abi
= ix86_function_type_abi (fntype
);
5889 fnret_type
= TREE_TYPE (fntype
);
5894 if (TARGET_VZEROUPPER
&& fnret_type
)
5896 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5898 if (function_pass_avx256_p (fnret_value
))
5900 /* The return value of this function uses 256bit AVX modes. */
5902 cfun
->machine
->callee_return_avx256_p
= true;
5904 cfun
->machine
->caller_return_avx256_p
= true;
5908 cum
->caller
= caller
;
5910 /* Set up the number of registers to use for passing arguments. */
5912 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5913 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5914 "or subtarget optimization implying it");
5915 cum
->nregs
= ix86_regparm
;
5918 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5919 ? X86_64_REGPARM_MAX
5920 : X86_64_MS_REGPARM_MAX
);
5924 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5927 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5928 ? X86_64_SSE_REGPARM_MAX
5929 : X86_64_MS_SSE_REGPARM_MAX
);
5933 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5934 cum
->warn_avx
= true;
5935 cum
->warn_sse
= true;
5936 cum
->warn_mmx
= true;
5938 /* Because type might mismatch in between caller and callee, we need to
5939 use actual type of function for local calls.
5940 FIXME: cgraph_analyze can be told to actually record if function uses
5941 va_start so for local functions maybe_vaarg can be made aggressive
5943 FIXME: once typesytem is fixed, we won't need this code anymore. */
5944 if (i
&& i
->local
&& i
->can_change_signature
)
5945 fntype
= TREE_TYPE (fndecl
);
5946 cum
->maybe_vaarg
= (fntype
5947 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5952 /* If there are variable arguments, then we won't pass anything
5953 in registers in 32-bit mode. */
5954 if (stdarg_p (fntype
))
5965 /* Use ecx and edx registers if function has fastcall attribute,
5966 else look for regparm information. */
5969 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5970 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5973 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5975 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5981 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5984 /* Set up the number of SSE registers used for passing SFmode
5985 and DFmode arguments. Warn for mismatching ABI. */
5986 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5990 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5991 But in the case of vector types, it is some vector mode.
5993 When we have only some of our vector isa extensions enabled, then there
5994 are some modes for which vector_mode_supported_p is false. For these
5995 modes, the generic vector support in gcc will choose some non-vector mode
5996 in order to implement the type. By computing the natural mode, we'll
5997 select the proper ABI location for the operand and not depend on whatever
5998 the middle-end decides to do with these vector types.
6000 The midde-end can't deal with the vector types > 16 bytes. In this
6001 case, we return the original mode and warn ABI change if CUM isn't
6004 static enum machine_mode
6005 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
6007 enum machine_mode mode
= TYPE_MODE (type
);
6009 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
6011 HOST_WIDE_INT size
= int_size_in_bytes (type
);
6012 if ((size
== 8 || size
== 16 || size
== 32)
6013 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6014 && TYPE_VECTOR_SUBPARTS (type
) > 1)
6016 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
6018 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
6019 mode
= MIN_MODE_VECTOR_FLOAT
;
6021 mode
= MIN_MODE_VECTOR_INT
;
6023 /* Get the mode which has this inner mode and number of units. */
6024 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
6025 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
6026 && GET_MODE_INNER (mode
) == innermode
)
6028 if (size
== 32 && !TARGET_AVX
)
6030 static bool warnedavx
;
6037 warning (0, "AVX vector argument without AVX "
6038 "enabled changes the ABI");
6040 return TYPE_MODE (type
);
6042 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
6044 static bool warnedsse
;
6051 warning (0, "SSE vector argument without SSE "
6052 "enabled changes the ABI");
6067 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6068 this may not agree with the mode that the type system has chosen for the
6069 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6070 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6073 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
6078 if (orig_mode
!= BLKmode
)
6079 tmp
= gen_rtx_REG (orig_mode
, regno
);
6082 tmp
= gen_rtx_REG (mode
, regno
);
6083 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
6084 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
6090 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6091 of this code is to classify each 8bytes of incoming argument by the register
6092 class and assign registers accordingly. */
6094 /* Return the union class of CLASS1 and CLASS2.
6095 See the x86-64 PS ABI for details. */
6097 static enum x86_64_reg_class
6098 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
6100 /* Rule #1: If both classes are equal, this is the resulting class. */
6101 if (class1
== class2
)
6104 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6106 if (class1
== X86_64_NO_CLASS
)
6108 if (class2
== X86_64_NO_CLASS
)
6111 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6112 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
6113 return X86_64_MEMORY_CLASS
;
6115 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6116 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6117 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6118 return X86_64_INTEGERSI_CLASS
;
6119 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6120 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6121 return X86_64_INTEGER_CLASS
;
6123 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6125 if (class1
== X86_64_X87_CLASS
6126 || class1
== X86_64_X87UP_CLASS
6127 || class1
== X86_64_COMPLEX_X87_CLASS
6128 || class2
== X86_64_X87_CLASS
6129 || class2
== X86_64_X87UP_CLASS
6130 || class2
== X86_64_COMPLEX_X87_CLASS
)
6131 return X86_64_MEMORY_CLASS
;
6133 /* Rule #6: Otherwise class SSE is used. */
6134 return X86_64_SSE_CLASS
;
6137 /* Classify the argument of type TYPE and mode MODE.
6138 CLASSES will be filled by the register class used to pass each word
6139 of the operand. The number of words is returned. In case the parameter
6140 should be passed in memory, 0 is returned. As a special case for zero
6141 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6143 BIT_OFFSET is used internally for handling records and specifies offset
6144 of the offset in bits modulo 256 to avoid overflow cases.
6146 See the x86-64 PS ABI for details.
6150 classify_argument (enum machine_mode mode
, const_tree type
,
6151 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6153 HOST_WIDE_INT bytes
=
6154 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6156 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6158 /* Variable sized entities are always passed/returned in memory. */
6162 if (mode
!= VOIDmode
6163 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6166 if (type
&& AGGREGATE_TYPE_P (type
))
6170 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6172 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6176 for (i
= 0; i
< words
; i
++)
6177 classes
[i
] = X86_64_NO_CLASS
;
6179 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6180 signalize memory class, so handle it as special case. */
6183 classes
[0] = X86_64_NO_CLASS
;
6187 /* Classify each field of record and merge classes. */
6188 switch (TREE_CODE (type
))
6191 /* And now merge the fields of structure. */
6192 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6194 if (TREE_CODE (field
) == FIELD_DECL
)
6198 if (TREE_TYPE (field
) == error_mark_node
)
6201 /* Bitfields are always classified as integer. Handle them
6202 early, since later code would consider them to be
6203 misaligned integers. */
6204 if (DECL_BIT_FIELD (field
))
6206 for (i
= (int_bit_position (field
)
6207 + (bit_offset
% 64)) / 8 / 8;
6208 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6209 + tree_low_cst (DECL_SIZE (field
), 0)
6212 merge_classes (X86_64_INTEGER_CLASS
,
6219 type
= TREE_TYPE (field
);
6221 /* Flexible array member is ignored. */
6222 if (TYPE_MODE (type
) == BLKmode
6223 && TREE_CODE (type
) == ARRAY_TYPE
6224 && TYPE_SIZE (type
) == NULL_TREE
6225 && TYPE_DOMAIN (type
) != NULL_TREE
6226 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6231 if (!warned
&& warn_psabi
)
6234 inform (input_location
,
6235 "the ABI of passing struct with"
6236 " a flexible array member has"
6237 " changed in GCC 4.4");
6241 num
= classify_argument (TYPE_MODE (type
), type
,
6243 (int_bit_position (field
)
6244 + bit_offset
) % 256);
6247 pos
= (int_bit_position (field
)
6248 + (bit_offset
% 64)) / 8 / 8;
6249 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6251 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6258 /* Arrays are handled as small records. */
6261 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6262 TREE_TYPE (type
), subclasses
, bit_offset
);
6266 /* The partial classes are now full classes. */
6267 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6268 subclasses
[0] = X86_64_SSE_CLASS
;
6269 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6270 && !((bit_offset
% 64) == 0 && bytes
== 4))
6271 subclasses
[0] = X86_64_INTEGER_CLASS
;
6273 for (i
= 0; i
< words
; i
++)
6274 classes
[i
] = subclasses
[i
% num
];
6279 case QUAL_UNION_TYPE
:
6280 /* Unions are similar to RECORD_TYPE but offset is always 0.
6282 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6284 if (TREE_CODE (field
) == FIELD_DECL
)
6288 if (TREE_TYPE (field
) == error_mark_node
)
6291 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6292 TREE_TYPE (field
), subclasses
,
6296 for (i
= 0; i
< num
; i
++)
6297 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6308 /* When size > 16 bytes, if the first one isn't
6309 X86_64_SSE_CLASS or any other ones aren't
6310 X86_64_SSEUP_CLASS, everything should be passed in
6312 if (classes
[0] != X86_64_SSE_CLASS
)
6315 for (i
= 1; i
< words
; i
++)
6316 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6320 /* Final merger cleanup. */
6321 for (i
= 0; i
< words
; i
++)
6323 /* If one class is MEMORY, everything should be passed in
6325 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6328 /* The X86_64_SSEUP_CLASS should be always preceded by
6329 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6330 if (classes
[i
] == X86_64_SSEUP_CLASS
6331 && classes
[i
- 1] != X86_64_SSE_CLASS
6332 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6334 /* The first one should never be X86_64_SSEUP_CLASS. */
6335 gcc_assert (i
!= 0);
6336 classes
[i
] = X86_64_SSE_CLASS
;
6339 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6340 everything should be passed in memory. */
6341 if (classes
[i
] == X86_64_X87UP_CLASS
6342 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6346 /* The first one should never be X86_64_X87UP_CLASS. */
6347 gcc_assert (i
!= 0);
6348 if (!warned
&& warn_psabi
)
6351 inform (input_location
,
6352 "the ABI of passing union with long double"
6353 " has changed in GCC 4.4");
6361 /* Compute alignment needed. We align all types to natural boundaries with
6362 exception of XFmode that is aligned to 64bits. */
6363 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6365 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6368 mode_alignment
= 128;
6369 else if (mode
== XCmode
)
6370 mode_alignment
= 256;
6371 if (COMPLEX_MODE_P (mode
))
6372 mode_alignment
/= 2;
6373 /* Misaligned fields are always returned in memory. */
6374 if (bit_offset
% mode_alignment
)
6378 /* for V1xx modes, just use the base mode */
6379 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6380 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6381 mode
= GET_MODE_INNER (mode
);
6383 /* Classification of atomic types. */
6388 classes
[0] = X86_64_SSE_CLASS
;
6391 classes
[0] = X86_64_SSE_CLASS
;
6392 classes
[1] = X86_64_SSEUP_CLASS
;
6402 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6406 classes
[0] = X86_64_INTEGERSI_CLASS
;
6409 else if (size
<= 64)
6411 classes
[0] = X86_64_INTEGER_CLASS
;
6414 else if (size
<= 64+32)
6416 classes
[0] = X86_64_INTEGER_CLASS
;
6417 classes
[1] = X86_64_INTEGERSI_CLASS
;
6420 else if (size
<= 64+64)
6422 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6430 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6434 /* OImode shouldn't be used directly. */
6439 if (!(bit_offset
% 64))
6440 classes
[0] = X86_64_SSESF_CLASS
;
6442 classes
[0] = X86_64_SSE_CLASS
;
6445 classes
[0] = X86_64_SSEDF_CLASS
;
6448 classes
[0] = X86_64_X87_CLASS
;
6449 classes
[1] = X86_64_X87UP_CLASS
;
6452 classes
[0] = X86_64_SSE_CLASS
;
6453 classes
[1] = X86_64_SSEUP_CLASS
;
6456 classes
[0] = X86_64_SSE_CLASS
;
6457 if (!(bit_offset
% 64))
6463 if (!warned
&& warn_psabi
)
6466 inform (input_location
,
6467 "the ABI of passing structure with complex float"
6468 " member has changed in GCC 4.4");
6470 classes
[1] = X86_64_SSESF_CLASS
;
6474 classes
[0] = X86_64_SSEDF_CLASS
;
6475 classes
[1] = X86_64_SSEDF_CLASS
;
6478 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6481 /* This modes is larger than 16 bytes. */
6489 classes
[0] = X86_64_SSE_CLASS
;
6490 classes
[1] = X86_64_SSEUP_CLASS
;
6491 classes
[2] = X86_64_SSEUP_CLASS
;
6492 classes
[3] = X86_64_SSEUP_CLASS
;
6500 classes
[0] = X86_64_SSE_CLASS
;
6501 classes
[1] = X86_64_SSEUP_CLASS
;
6509 classes
[0] = X86_64_SSE_CLASS
;
6515 gcc_assert (VECTOR_MODE_P (mode
));
6520 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6522 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6523 classes
[0] = X86_64_INTEGERSI_CLASS
;
6525 classes
[0] = X86_64_INTEGER_CLASS
;
6526 classes
[1] = X86_64_INTEGER_CLASS
;
6527 return 1 + (bytes
> 8);
6531 /* Examine the argument and return set number of register required in each
6532 class. Return 0 iff parameter should be passed in memory. */
6534 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6535 int *int_nregs
, int *sse_nregs
)
6537 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6538 int n
= classify_argument (mode
, type
, regclass
, 0);
6544 for (n
--; n
>= 0; n
--)
6545 switch (regclass
[n
])
6547 case X86_64_INTEGER_CLASS
:
6548 case X86_64_INTEGERSI_CLASS
:
6551 case X86_64_SSE_CLASS
:
6552 case X86_64_SSESF_CLASS
:
6553 case X86_64_SSEDF_CLASS
:
6556 case X86_64_NO_CLASS
:
6557 case X86_64_SSEUP_CLASS
:
6559 case X86_64_X87_CLASS
:
6560 case X86_64_X87UP_CLASS
:
6564 case X86_64_COMPLEX_X87_CLASS
:
6565 return in_return
? 2 : 0;
6566 case X86_64_MEMORY_CLASS
:
6572 /* Construct container for the argument used by GCC interface. See
6573 FUNCTION_ARG for the detailed description. */
6576 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6577 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6578 const int *intreg
, int sse_regno
)
6580 /* The following variables hold the static issued_error state. */
6581 static bool issued_sse_arg_error
;
6582 static bool issued_sse_ret_error
;
6583 static bool issued_x87_ret_error
;
6585 enum machine_mode tmpmode
;
6587 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6588 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6592 int needed_sseregs
, needed_intregs
;
6593 rtx exp
[MAX_CLASSES
];
6596 n
= classify_argument (mode
, type
, regclass
, 0);
6599 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6602 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6605 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6606 some less clueful developer tries to use floating-point anyway. */
6607 if (needed_sseregs
&& !TARGET_SSE
)
6611 if (!issued_sse_ret_error
)
6613 error ("SSE register return with SSE disabled");
6614 issued_sse_ret_error
= true;
6617 else if (!issued_sse_arg_error
)
6619 error ("SSE register argument with SSE disabled");
6620 issued_sse_arg_error
= true;
6625 /* Likewise, error if the ABI requires us to return values in the
6626 x87 registers and the user specified -mno-80387. */
6627 if (!TARGET_80387
&& in_return
)
6628 for (i
= 0; i
< n
; i
++)
6629 if (regclass
[i
] == X86_64_X87_CLASS
6630 || regclass
[i
] == X86_64_X87UP_CLASS
6631 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6633 if (!issued_x87_ret_error
)
6635 error ("x87 register return with x87 disabled");
6636 issued_x87_ret_error
= true;
6641 /* First construct simple cases. Avoid SCmode, since we want to use
6642 single register to pass this type. */
6643 if (n
== 1 && mode
!= SCmode
)
6644 switch (regclass
[0])
6646 case X86_64_INTEGER_CLASS
:
6647 case X86_64_INTEGERSI_CLASS
:
6648 return gen_rtx_REG (mode
, intreg
[0]);
6649 case X86_64_SSE_CLASS
:
6650 case X86_64_SSESF_CLASS
:
6651 case X86_64_SSEDF_CLASS
:
6652 if (mode
!= BLKmode
)
6653 return gen_reg_or_parallel (mode
, orig_mode
,
6654 SSE_REGNO (sse_regno
));
6656 case X86_64_X87_CLASS
:
6657 case X86_64_COMPLEX_X87_CLASS
:
6658 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6659 case X86_64_NO_CLASS
:
6660 /* Zero sized array, struct or class. */
6666 && regclass
[0] == X86_64_SSE_CLASS
6667 && regclass
[1] == X86_64_SSEUP_CLASS
6669 return gen_reg_or_parallel (mode
, orig_mode
,
6670 SSE_REGNO (sse_regno
));
6672 && regclass
[0] == X86_64_SSE_CLASS
6673 && regclass
[1] == X86_64_SSEUP_CLASS
6674 && regclass
[2] == X86_64_SSEUP_CLASS
6675 && regclass
[3] == X86_64_SSEUP_CLASS
6677 return gen_reg_or_parallel (mode
, orig_mode
,
6678 SSE_REGNO (sse_regno
));
6680 && regclass
[0] == X86_64_X87_CLASS
6681 && regclass
[1] == X86_64_X87UP_CLASS
)
6682 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6685 && regclass
[0] == X86_64_INTEGER_CLASS
6686 && regclass
[1] == X86_64_INTEGER_CLASS
6687 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6688 && intreg
[0] + 1 == intreg
[1])
6689 return gen_rtx_REG (mode
, intreg
[0]);
6691 /* Otherwise figure out the entries of the PARALLEL. */
6692 for (i
= 0; i
< n
; i
++)
6696 switch (regclass
[i
])
6698 case X86_64_NO_CLASS
:
6700 case X86_64_INTEGER_CLASS
:
6701 case X86_64_INTEGERSI_CLASS
:
6702 /* Merge TImodes on aligned occasions here too. */
6703 if (i
* 8 + 8 > bytes
)
6705 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6706 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6710 /* We've requested 24 bytes we
6711 don't have mode for. Use DImode. */
6712 if (tmpmode
== BLKmode
)
6715 = gen_rtx_EXPR_LIST (VOIDmode
,
6716 gen_rtx_REG (tmpmode
, *intreg
),
6720 case X86_64_SSESF_CLASS
:
6722 = gen_rtx_EXPR_LIST (VOIDmode
,
6723 gen_rtx_REG (SFmode
,
6724 SSE_REGNO (sse_regno
)),
6728 case X86_64_SSEDF_CLASS
:
6730 = gen_rtx_EXPR_LIST (VOIDmode
,
6731 gen_rtx_REG (DFmode
,
6732 SSE_REGNO (sse_regno
)),
6736 case X86_64_SSE_CLASS
:
6744 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6754 && regclass
[1] == X86_64_SSEUP_CLASS
6755 && regclass
[2] == X86_64_SSEUP_CLASS
6756 && regclass
[3] == X86_64_SSEUP_CLASS
);
6764 = gen_rtx_EXPR_LIST (VOIDmode
,
6765 gen_rtx_REG (tmpmode
,
6766 SSE_REGNO (sse_regno
)),
6775 /* Empty aligned struct, union or class. */
6779 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6780 for (i
= 0; i
< nexps
; i
++)
6781 XVECEXP (ret
, 0, i
) = exp
[i
];
6785 /* Update the data in CUM to advance over an argument of mode MODE
6786 and data type TYPE. (TYPE is null for libcalls where that information
6787 may not be available.) */
6790 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6791 const_tree type
, HOST_WIDE_INT bytes
,
6792 HOST_WIDE_INT words
)
6808 cum
->words
+= words
;
6809 cum
->nregs
-= words
;
6810 cum
->regno
+= words
;
6812 if (cum
->nregs
<= 0)
6820 /* OImode shouldn't be used directly. */
6824 if (cum
->float_in_sse
< 2)
6827 if (cum
->float_in_sse
< 1)
6844 if (!type
|| !AGGREGATE_TYPE_P (type
))
6846 cum
->sse_words
+= words
;
6847 cum
->sse_nregs
-= 1;
6848 cum
->sse_regno
+= 1;
6849 if (cum
->sse_nregs
<= 0)
6863 if (!type
|| !AGGREGATE_TYPE_P (type
))
6865 cum
->mmx_words
+= words
;
6866 cum
->mmx_nregs
-= 1;
6867 cum
->mmx_regno
+= 1;
6868 if (cum
->mmx_nregs
<= 0)
6879 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6880 const_tree type
, HOST_WIDE_INT words
, bool named
)
6882 int int_nregs
, sse_nregs
;
6884 /* Unnamed 256bit vector mode parameters are passed on stack. */
6885 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6888 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6889 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6891 cum
->nregs
-= int_nregs
;
6892 cum
->sse_nregs
-= sse_nregs
;
6893 cum
->regno
+= int_nregs
;
6894 cum
->sse_regno
+= sse_nregs
;
6898 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6899 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6900 cum
->words
+= words
;
6905 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6906 HOST_WIDE_INT words
)
6908 /* Otherwise, this should be passed indirect. */
6909 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6911 cum
->words
+= words
;
6919 /* Update the data in CUM to advance over an argument of mode MODE and
6920 data type TYPE. (TYPE is null for libcalls where that information
6921 may not be available.) */
6924 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6925 const_tree type
, bool named
)
6927 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6928 HOST_WIDE_INT bytes
, words
;
6930 if (mode
== BLKmode
)
6931 bytes
= int_size_in_bytes (type
);
6933 bytes
= GET_MODE_SIZE (mode
);
6934 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6937 mode
= type_natural_mode (type
, NULL
);
6939 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6940 function_arg_advance_ms_64 (cum
, bytes
, words
);
6941 else if (TARGET_64BIT
)
6942 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6944 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6947 /* Define where to put the arguments to a function.
6948 Value is zero to push the argument on the stack,
6949 or a hard register in which to store the argument.
6951 MODE is the argument's machine mode.
6952 TYPE is the data type of the argument (as a tree).
6953 This is null for libcalls where that information may
6955 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6956 the preceding args and about the function being called.
6957 NAMED is nonzero if this argument is a named parameter
6958 (otherwise it is an extra parameter matching an ellipsis). */
6961 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6962 enum machine_mode orig_mode
, const_tree type
,
6963 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6965 static bool warnedsse
, warnedmmx
;
6967 /* Avoid the AL settings for the Unix64 ABI. */
6968 if (mode
== VOIDmode
)
6984 if (words
<= cum
->nregs
)
6986 int regno
= cum
->regno
;
6988 /* Fastcall allocates the first two DWORD (SImode) or
6989 smaller arguments to ECX and EDX if it isn't an
6995 || (type
&& AGGREGATE_TYPE_P (type
)))
6998 /* ECX not EAX is the first allocated register. */
6999 if (regno
== AX_REG
)
7002 return gen_rtx_REG (mode
, regno
);
7007 if (cum
->float_in_sse
< 2)
7010 if (cum
->float_in_sse
< 1)
7014 /* In 32bit, we pass TImode in xmm registers. */
7021 if (!type
|| !AGGREGATE_TYPE_P (type
))
7023 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
7026 warning (0, "SSE vector argument without SSE enabled "
7030 return gen_reg_or_parallel (mode
, orig_mode
,
7031 cum
->sse_regno
+ FIRST_SSE_REG
);
7036 /* OImode shouldn't be used directly. */
7045 if (!type
|| !AGGREGATE_TYPE_P (type
))
7048 return gen_reg_or_parallel (mode
, orig_mode
,
7049 cum
->sse_regno
+ FIRST_SSE_REG
);
7059 if (!type
|| !AGGREGATE_TYPE_P (type
))
7061 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
7064 warning (0, "MMX vector argument without MMX enabled "
7068 return gen_reg_or_parallel (mode
, orig_mode
,
7069 cum
->mmx_regno
+ FIRST_MMX_REG
);
7078 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7079 enum machine_mode orig_mode
, const_tree type
, bool named
)
7081 /* Handle a hidden AL argument containing number of registers
7082 for varargs x86-64 functions. */
7083 if (mode
== VOIDmode
)
7084 return GEN_INT (cum
->maybe_vaarg
7085 ? (cum
->sse_nregs
< 0
7086 ? X86_64_SSE_REGPARM_MAX
7101 /* Unnamed 256bit vector mode parameters are passed on stack. */
7107 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
7109 &x86_64_int_parameter_registers
[cum
->regno
],
7114 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
7115 enum machine_mode orig_mode
, bool named
,
7116 HOST_WIDE_INT bytes
)
7120 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7121 We use value of -2 to specify that current function call is MSABI. */
7122 if (mode
== VOIDmode
)
7123 return GEN_INT (-2);
7125 /* If we've run out of registers, it goes on the stack. */
7126 if (cum
->nregs
== 0)
7129 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7131 /* Only floating point modes are passed in anything but integer regs. */
7132 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7135 regno
= cum
->regno
+ FIRST_SSE_REG
;
7140 /* Unnamed floating parameters are passed in both the
7141 SSE and integer registers. */
7142 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7143 t2
= gen_rtx_REG (mode
, regno
);
7144 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7145 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7146 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7149 /* Handle aggregated types passed in register. */
7150 if (orig_mode
== BLKmode
)
7152 if (bytes
> 0 && bytes
<= 8)
7153 mode
= (bytes
> 4 ? DImode
: SImode
);
7154 if (mode
== BLKmode
)
7158 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7161 /* Return where to put the arguments to a function.
7162 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7164 MODE is the argument's machine mode. TYPE is the data type of the
7165 argument. It is null for libcalls where that information may not be
7166 available. CUM gives information about the preceding args and about
7167 the function being called. NAMED is nonzero if this argument is a
7168 named parameter (otherwise it is an extra parameter matching an
7172 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7173 const_tree type
, bool named
)
7175 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7176 enum machine_mode mode
= omode
;
7177 HOST_WIDE_INT bytes
, words
;
7180 if (mode
== BLKmode
)
7181 bytes
= int_size_in_bytes (type
);
7183 bytes
= GET_MODE_SIZE (mode
);
7184 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7186 /* To simplify the code below, represent vector types with a vector mode
7187 even if MMX/SSE are not active. */
7188 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7189 mode
= type_natural_mode (type
, cum
);
7191 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7192 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7193 else if (TARGET_64BIT
)
7194 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7196 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7198 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
7200 /* This argument uses 256bit AVX modes. */
7202 cfun
->machine
->callee_pass_avx256_p
= true;
7204 cfun
->machine
->caller_pass_avx256_p
= true;
7210 /* A C expression that indicates when an argument must be passed by
7211 reference. If nonzero for an argument, a copy of that argument is
7212 made in memory and a pointer to the argument is passed instead of
7213 the argument itself. The pointer is passed in whatever way is
7214 appropriate for passing a pointer to that type. */
7217 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7218 enum machine_mode mode ATTRIBUTE_UNUSED
,
7219 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7221 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7223 /* See Windows x64 Software Convention. */
7224 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7226 int msize
= (int) GET_MODE_SIZE (mode
);
7229 /* Arrays are passed by reference. */
7230 if (TREE_CODE (type
) == ARRAY_TYPE
)
7233 if (AGGREGATE_TYPE_P (type
))
7235 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7236 are passed by reference. */
7237 msize
= int_size_in_bytes (type
);
7241 /* __m128 is passed by reference. */
7243 case 1: case 2: case 4: case 8:
7249 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7255 /* Return true when TYPE should be 128bit aligned for 32bit argument
7256 passing ABI. XXX: This function is obsolete and is only used for
7257 checking psABI compatibility with previous versions of GCC. */
7260 ix86_compat_aligned_value_p (const_tree type
)
7262 enum machine_mode mode
= TYPE_MODE (type
);
7263 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7267 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7269 if (TYPE_ALIGN (type
) < 128)
7272 if (AGGREGATE_TYPE_P (type
))
7274 /* Walk the aggregates recursively. */
7275 switch (TREE_CODE (type
))
7279 case QUAL_UNION_TYPE
:
7283 /* Walk all the structure fields. */
7284 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7286 if (TREE_CODE (field
) == FIELD_DECL
7287 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7294 /* Just for use if some languages passes arrays by value. */
7295 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7306 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7307 XXX: This function is obsolete and is only used for checking psABI
7308 compatibility with previous versions of GCC. */
7311 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7312 const_tree type
, unsigned int align
)
7314 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7315 natural boundaries. */
7316 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7318 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7319 make an exception for SSE modes since these require 128bit
7322 The handling here differs from field_alignment. ICC aligns MMX
7323 arguments to 4 byte boundaries, while structure fields are aligned
7324 to 8 byte boundaries. */
7327 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7328 align
= PARM_BOUNDARY
;
7332 if (!ix86_compat_aligned_value_p (type
))
7333 align
= PARM_BOUNDARY
;
7336 if (align
> BIGGEST_ALIGNMENT
)
7337 align
= BIGGEST_ALIGNMENT
;
7341 /* Return true when TYPE should be 128bit aligned for 32bit argument
7345 ix86_contains_aligned_value_p (const_tree type
)
7347 enum machine_mode mode
= TYPE_MODE (type
);
7349 if (mode
== XFmode
|| mode
== XCmode
)
7352 if (TYPE_ALIGN (type
) < 128)
7355 if (AGGREGATE_TYPE_P (type
))
7357 /* Walk the aggregates recursively. */
7358 switch (TREE_CODE (type
))
7362 case QUAL_UNION_TYPE
:
7366 /* Walk all the structure fields. */
7367 for (field
= TYPE_FIELDS (type
);
7369 field
= DECL_CHAIN (field
))
7371 if (TREE_CODE (field
) == FIELD_DECL
7372 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7379 /* Just for use if some languages passes arrays by value. */
7380 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7389 return TYPE_ALIGN (type
) >= 128;
7394 /* Gives the alignment boundary, in bits, of an argument with the
7395 specified mode and type. */
7398 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7403 /* Since the main variant type is used for call, we convert it to
7404 the main variant type. */
7405 type
= TYPE_MAIN_VARIANT (type
);
7406 align
= TYPE_ALIGN (type
);
7409 align
= GET_MODE_ALIGNMENT (mode
);
7410 if (align
< PARM_BOUNDARY
)
7411 align
= PARM_BOUNDARY
;
7415 unsigned int saved_align
= align
;
7419 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7422 if (mode
== XFmode
|| mode
== XCmode
)
7423 align
= PARM_BOUNDARY
;
7425 else if (!ix86_contains_aligned_value_p (type
))
7426 align
= PARM_BOUNDARY
;
7429 align
= PARM_BOUNDARY
;
7434 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7438 inform (input_location
,
7439 "The ABI for passing parameters with %d-byte"
7440 " alignment has changed in GCC 4.6",
7441 align
/ BITS_PER_UNIT
);
7448 /* Return true if N is a possible register number of function value. */
7451 ix86_function_value_regno_p (const unsigned int regno
)
7458 case FIRST_FLOAT_REG
:
7459 /* TODO: The function should depend on current function ABI but
7460 builtins.c would need updating then. Therefore we use the
7462 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7464 return TARGET_FLOAT_RETURNS_IN_80387
;
7470 if (TARGET_MACHO
|| TARGET_64BIT
)
7478 /* Define how to find the value returned by a function.
7479 VALTYPE is the data type of the value (as a tree).
7480 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7481 otherwise, FUNC is 0. */
7484 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7485 const_tree fntype
, const_tree fn
)
7489 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7490 we normally prevent this case when mmx is not available. However
7491 some ABIs may require the result to be returned like DImode. */
7492 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7493 regno
= FIRST_MMX_REG
;
7495 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7496 we prevent this case when sse is not available. However some ABIs
7497 may require the result to be returned like integer TImode. */
7498 else if (mode
== TImode
7499 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7500 regno
= FIRST_SSE_REG
;
7502 /* 32-byte vector modes in %ymm0. */
7503 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7504 regno
= FIRST_SSE_REG
;
7506 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7507 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7508 regno
= FIRST_FLOAT_REG
;
7510 /* Most things go in %eax. */
7513 /* Override FP return register with %xmm0 for local functions when
7514 SSE math is enabled or for functions with sseregparm attribute. */
7515 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7517 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7518 if ((sse_level
>= 1 && mode
== SFmode
)
7519 || (sse_level
== 2 && mode
== DFmode
))
7520 regno
= FIRST_SSE_REG
;
7523 /* OImode shouldn't be used directly. */
7524 gcc_assert (mode
!= OImode
);
7526 return gen_rtx_REG (orig_mode
, regno
);
7530 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7535 /* Handle libcalls, which don't provide a type node. */
7536 if (valtype
== NULL
)
7550 regno
= FIRST_SSE_REG
;
7554 regno
= FIRST_FLOAT_REG
;
7562 return gen_rtx_REG (mode
, regno
);
7564 else if (POINTER_TYPE_P (valtype
))
7566 /* Pointers are always returned in word_mode. */
7570 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7571 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7572 x86_64_int_return_registers
, 0);
7574 /* For zero sized structures, construct_container returns NULL, but we
7575 need to keep rest of compiler happy by returning meaningful value. */
7577 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7583 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7585 unsigned int regno
= AX_REG
;
7589 switch (GET_MODE_SIZE (mode
))
7592 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7593 && !COMPLEX_MODE_P (mode
))
7594 regno
= FIRST_SSE_REG
;
7598 if (mode
== SFmode
|| mode
== DFmode
)
7599 regno
= FIRST_SSE_REG
;
7605 return gen_rtx_REG (orig_mode
, regno
);
7609 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7610 enum machine_mode orig_mode
, enum machine_mode mode
)
7612 const_tree fn
, fntype
;
7615 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7616 fn
= fntype_or_decl
;
7617 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7619 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7620 return function_value_ms_64 (orig_mode
, mode
);
7621 else if (TARGET_64BIT
)
7622 return function_value_64 (orig_mode
, mode
, valtype
);
7624 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7628 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7629 bool outgoing ATTRIBUTE_UNUSED
)
7631 enum machine_mode mode
, orig_mode
;
7633 orig_mode
= TYPE_MODE (valtype
);
7634 mode
= type_natural_mode (valtype
, NULL
);
7635 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7638 /* Pointer function arguments and return values are promoted to
7641 static enum machine_mode
7642 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7643 int *punsignedp
, const_tree fntype
,
7646 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7648 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7651 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7655 /* Return true if a structure, union or array with MODE containing FIELD
7656 should be accessed using BLKmode. */
7659 ix86_member_type_forces_blk (const_tree field
, enum machine_mode mode
)
7661 /* Union with XFmode must be in BLKmode. */
7662 return (mode
== XFmode
7663 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
7664 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
7668 ix86_libcall_value (enum machine_mode mode
)
7670 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7673 /* Return true iff type is returned in memory. */
7675 static bool ATTRIBUTE_UNUSED
7676 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7680 if (mode
== BLKmode
)
7683 size
= int_size_in_bytes (type
);
7685 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7688 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7690 /* User-created vectors small enough to fit in EAX. */
7694 /* MMX/3dNow values are returned in MM0,
7695 except when it doesn't exits or the ABI prescribes otherwise. */
7697 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7699 /* SSE values are returned in XMM0, except when it doesn't exist. */
7703 /* AVX values are returned in YMM0, except when it doesn't exist. */
7714 /* OImode shouldn't be used directly. */
7715 gcc_assert (mode
!= OImode
);
7720 static bool ATTRIBUTE_UNUSED
7721 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7723 int needed_intregs
, needed_sseregs
;
7724 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7727 static bool ATTRIBUTE_UNUSED
7728 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7730 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7732 /* __m128 is returned in xmm0. */
7733 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7734 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7737 /* Otherwise, the size must be exactly in [1248]. */
7738 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7742 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7744 #ifdef SUBTARGET_RETURN_IN_MEMORY
7745 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7747 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7751 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7752 return return_in_memory_ms_64 (type
, mode
);
7754 return return_in_memory_64 (type
, mode
);
7757 return return_in_memory_32 (type
, mode
);
7761 /* When returning SSE vector types, we have a choice of either
7762 (1) being abi incompatible with a -march switch, or
7763 (2) generating an error.
7764 Given no good solution, I think the safest thing is one warning.
7765 The user won't be able to use -Werror, but....
7767 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7768 called in response to actually generating a caller or callee that
7769 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7770 via aggregate_value_p for general type probing from tree-ssa. */
7773 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7775 static bool warnedsse
, warnedmmx
;
7777 if (!TARGET_64BIT
&& type
)
7779 /* Look at the return type of the function, not the function type. */
7780 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7782 if (!TARGET_SSE
&& !warnedsse
)
7785 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7788 warning (0, "SSE vector return without SSE enabled "
7793 if (!TARGET_MMX
&& !warnedmmx
)
7795 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7798 warning (0, "MMX vector return without MMX enabled "
7808 /* Create the va_list data type. */
7810 /* Returns the calling convention specific va_list date type.
7811 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7814 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7816 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7818 /* For i386 we use plain pointer to argument area. */
7819 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7820 return build_pointer_type (char_type_node
);
7822 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7823 type_decl
= build_decl (BUILTINS_LOCATION
,
7824 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7826 f_gpr
= build_decl (BUILTINS_LOCATION
,
7827 FIELD_DECL
, get_identifier ("gp_offset"),
7828 unsigned_type_node
);
7829 f_fpr
= build_decl (BUILTINS_LOCATION
,
7830 FIELD_DECL
, get_identifier ("fp_offset"),
7831 unsigned_type_node
);
7832 f_ovf
= build_decl (BUILTINS_LOCATION
,
7833 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7835 f_sav
= build_decl (BUILTINS_LOCATION
,
7836 FIELD_DECL
, get_identifier ("reg_save_area"),
7839 va_list_gpr_counter_field
= f_gpr
;
7840 va_list_fpr_counter_field
= f_fpr
;
7842 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7843 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7844 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7845 DECL_FIELD_CONTEXT (f_sav
) = record
;
7847 TYPE_STUB_DECL (record
) = type_decl
;
7848 TYPE_NAME (record
) = type_decl
;
7849 TYPE_FIELDS (record
) = f_gpr
;
7850 DECL_CHAIN (f_gpr
) = f_fpr
;
7851 DECL_CHAIN (f_fpr
) = f_ovf
;
7852 DECL_CHAIN (f_ovf
) = f_sav
;
7854 layout_type (record
);
7856 /* The correct type is an array type of one element. */
7857 return build_array_type (record
, build_index_type (size_zero_node
));
7860 /* Setup the builtin va_list data type and for 64-bit the additional
7861 calling convention specific va_list data types. */
7864 ix86_build_builtin_va_list (void)
7866 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7868 /* Initialize abi specific va_list builtin types. */
7872 if (ix86_abi
== MS_ABI
)
7874 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7875 if (TREE_CODE (t
) != RECORD_TYPE
)
7876 t
= build_variant_type_copy (t
);
7877 sysv_va_list_type_node
= t
;
7882 if (TREE_CODE (t
) != RECORD_TYPE
)
7883 t
= build_variant_type_copy (t
);
7884 sysv_va_list_type_node
= t
;
7886 if (ix86_abi
!= MS_ABI
)
7888 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7889 if (TREE_CODE (t
) != RECORD_TYPE
)
7890 t
= build_variant_type_copy (t
);
7891 ms_va_list_type_node
= t
;
7896 if (TREE_CODE (t
) != RECORD_TYPE
)
7897 t
= build_variant_type_copy (t
);
7898 ms_va_list_type_node
= t
;
7905 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7908 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7914 /* GPR size of varargs save area. */
7915 if (cfun
->va_list_gpr_size
)
7916 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7918 ix86_varargs_gpr_size
= 0;
7920 /* FPR size of varargs save area. We don't need it if we don't pass
7921 anything in SSE registers. */
7922 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7923 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7925 ix86_varargs_fpr_size
= 0;
7927 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7930 save_area
= frame_pointer_rtx
;
7931 set
= get_varargs_alias_set ();
7933 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7934 if (max
> X86_64_REGPARM_MAX
)
7935 max
= X86_64_REGPARM_MAX
;
7937 for (i
= cum
->regno
; i
< max
; i
++)
7939 mem
= gen_rtx_MEM (word_mode
,
7940 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7941 MEM_NOTRAP_P (mem
) = 1;
7942 set_mem_alias_set (mem
, set
);
7943 emit_move_insn (mem
,
7944 gen_rtx_REG (word_mode
,
7945 x86_64_int_parameter_registers
[i
]));
7948 if (ix86_varargs_fpr_size
)
7950 enum machine_mode smode
;
7953 /* Now emit code to save SSE registers. The AX parameter contains number
7954 of SSE parameter registers used to call this function, though all we
7955 actually check here is the zero/non-zero status. */
7957 label
= gen_label_rtx ();
7958 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7959 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7962 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7963 we used movdqa (i.e. TImode) instead? Perhaps even better would
7964 be if we could determine the real mode of the data, via a hook
7965 into pass_stdarg. Ignore all that for now. */
7967 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7968 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7970 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7971 if (max
> X86_64_SSE_REGPARM_MAX
)
7972 max
= X86_64_SSE_REGPARM_MAX
;
7974 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7976 mem
= plus_constant (Pmode
, save_area
,
7977 i
* 16 + ix86_varargs_gpr_size
);
7978 mem
= gen_rtx_MEM (smode
, mem
);
7979 MEM_NOTRAP_P (mem
) = 1;
7980 set_mem_alias_set (mem
, set
);
7981 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7983 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7991 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7993 alias_set_type set
= get_varargs_alias_set ();
7996 /* Reset to zero, as there might be a sysv vaarg used
7998 ix86_varargs_gpr_size
= 0;
7999 ix86_varargs_fpr_size
= 0;
8001 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
8005 mem
= gen_rtx_MEM (Pmode
,
8006 plus_constant (Pmode
, virtual_incoming_args_rtx
,
8007 i
* UNITS_PER_WORD
));
8008 MEM_NOTRAP_P (mem
) = 1;
8009 set_mem_alias_set (mem
, set
);
8011 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
8012 emit_move_insn (mem
, reg
);
8017 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
8018 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
8021 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8022 CUMULATIVE_ARGS next_cum
;
8025 /* This argument doesn't appear to be used anymore. Which is good,
8026 because the old code here didn't suppress rtl generation. */
8027 gcc_assert (!no_rtl
);
8032 fntype
= TREE_TYPE (current_function_decl
);
8034 /* For varargs, we do not want to skip the dummy va_dcl argument.
8035 For stdargs, we do want to skip the last named argument. */
8037 if (stdarg_p (fntype
))
8038 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
8041 if (cum
->call_abi
== MS_ABI
)
8042 setup_incoming_varargs_ms_64 (&next_cum
);
8044 setup_incoming_varargs_64 (&next_cum
);
8047 /* Checks if TYPE is of kind va_list char *. */
8050 is_va_list_char_pointer (tree type
)
8054 /* For 32-bit it is always true. */
8057 canonic
= ix86_canonical_va_list_type (type
);
8058 return (canonic
== ms_va_list_type_node
8059 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
8062 /* Implement va_start. */
8065 ix86_va_start (tree valist
, rtx nextarg
)
8067 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
8068 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8069 tree gpr
, fpr
, ovf
, sav
, t
;
8073 if (flag_split_stack
8074 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8076 unsigned int scratch_regno
;
8078 /* When we are splitting the stack, we can't refer to the stack
8079 arguments using internal_arg_pointer, because they may be on
8080 the old stack. The split stack prologue will arrange to
8081 leave a pointer to the old stack arguments in a scratch
8082 register, which we here copy to a pseudo-register. The split
8083 stack prologue can't set the pseudo-register directly because
8084 it (the prologue) runs before any registers have been saved. */
8086 scratch_regno
= split_stack_prologue_scratch_regno ();
8087 if (scratch_regno
!= INVALID_REGNUM
)
8091 reg
= gen_reg_rtx (Pmode
);
8092 cfun
->machine
->split_stack_varargs_pointer
= reg
;
8095 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
8099 push_topmost_sequence ();
8100 emit_insn_after (seq
, entry_of_function ());
8101 pop_topmost_sequence ();
8105 /* Only 64bit target needs something special. */
8106 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8108 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8109 std_expand_builtin_va_start (valist
, nextarg
);
8114 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
8115 next
= expand_binop (ptr_mode
, add_optab
,
8116 cfun
->machine
->split_stack_varargs_pointer
,
8117 crtl
->args
.arg_offset_rtx
,
8118 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
8119 convert_move (va_r
, next
, 0);
8124 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8125 f_fpr
= DECL_CHAIN (f_gpr
);
8126 f_ovf
= DECL_CHAIN (f_fpr
);
8127 f_sav
= DECL_CHAIN (f_ovf
);
8129 valist
= build_simple_mem_ref (valist
);
8130 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8131 /* The following should be folded into the MEM_REF offset. */
8132 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8134 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8136 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8138 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8141 /* Count number of gp and fp argument registers used. */
8142 words
= crtl
->args
.info
.words
;
8143 n_gpr
= crtl
->args
.info
.regno
;
8144 n_fpr
= crtl
->args
.info
.sse_regno
;
8146 if (cfun
->va_list_gpr_size
)
8148 type
= TREE_TYPE (gpr
);
8149 t
= build2 (MODIFY_EXPR
, type
,
8150 gpr
, build_int_cst (type
, n_gpr
* 8));
8151 TREE_SIDE_EFFECTS (t
) = 1;
8152 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8155 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8157 type
= TREE_TYPE (fpr
);
8158 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8159 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8160 TREE_SIDE_EFFECTS (t
) = 1;
8161 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8164 /* Find the overflow area. */
8165 type
= TREE_TYPE (ovf
);
8166 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8167 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8169 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8170 t
= make_tree (type
, ovf_rtx
);
8172 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8173 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8174 TREE_SIDE_EFFECTS (t
) = 1;
8175 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8177 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8179 /* Find the register save area.
8180 Prologue of the function save it right above stack frame. */
8181 type
= TREE_TYPE (sav
);
8182 t
= make_tree (type
, frame_pointer_rtx
);
8183 if (!ix86_varargs_gpr_size
)
8184 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8185 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8186 TREE_SIDE_EFFECTS (t
) = 1;
8187 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8191 /* Implement va_arg. */
8194 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8197 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8198 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8199 tree gpr
, fpr
, ovf
, sav
, t
;
8201 tree lab_false
, lab_over
= NULL_TREE
;
8206 enum machine_mode nat_mode
;
8207 unsigned int arg_boundary
;
8209 /* Only 64bit target needs something special. */
8210 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8211 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8213 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8214 f_fpr
= DECL_CHAIN (f_gpr
);
8215 f_ovf
= DECL_CHAIN (f_fpr
);
8216 f_sav
= DECL_CHAIN (f_ovf
);
8218 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8219 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8220 valist
= build_va_arg_indirect_ref (valist
);
8221 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8222 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8223 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8225 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8227 type
= build_pointer_type (type
);
8228 size
= int_size_in_bytes (type
);
8229 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8231 nat_mode
= type_natural_mode (type
, NULL
);
8240 /* Unnamed 256bit vector mode parameters are passed on stack. */
8241 if (!TARGET_64BIT_MS_ABI
)
8248 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8249 type
, 0, X86_64_REGPARM_MAX
,
8250 X86_64_SSE_REGPARM_MAX
, intreg
,
8255 /* Pull the value out of the saved registers. */
8257 addr
= create_tmp_var (ptr_type_node
, "addr");
8261 int needed_intregs
, needed_sseregs
;
8263 tree int_addr
, sse_addr
;
8265 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8266 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8268 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8270 need_temp
= (!REG_P (container
)
8271 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8272 || TYPE_ALIGN (type
) > 128));
8274 /* In case we are passing structure, verify that it is consecutive block
8275 on the register save area. If not we need to do moves. */
8276 if (!need_temp
&& !REG_P (container
))
8278 /* Verify that all registers are strictly consecutive */
8279 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8283 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8285 rtx slot
= XVECEXP (container
, 0, i
);
8286 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8287 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8295 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8297 rtx slot
= XVECEXP (container
, 0, i
);
8298 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8299 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8311 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8312 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8315 /* First ensure that we fit completely in registers. */
8318 t
= build_int_cst (TREE_TYPE (gpr
),
8319 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8320 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8321 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8322 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8323 gimplify_and_add (t
, pre_p
);
8327 t
= build_int_cst (TREE_TYPE (fpr
),
8328 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8329 + X86_64_REGPARM_MAX
* 8);
8330 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8331 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8332 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8333 gimplify_and_add (t
, pre_p
);
8336 /* Compute index to start of area used for integer regs. */
8339 /* int_addr = gpr + sav; */
8340 t
= fold_build_pointer_plus (sav
, gpr
);
8341 gimplify_assign (int_addr
, t
, pre_p
);
8345 /* sse_addr = fpr + sav; */
8346 t
= fold_build_pointer_plus (sav
, fpr
);
8347 gimplify_assign (sse_addr
, t
, pre_p
);
8351 int i
, prev_size
= 0;
8352 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8355 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8356 gimplify_assign (addr
, t
, pre_p
);
8358 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8360 rtx slot
= XVECEXP (container
, 0, i
);
8361 rtx reg
= XEXP (slot
, 0);
8362 enum machine_mode mode
= GET_MODE (reg
);
8368 tree dest_addr
, dest
;
8369 int cur_size
= GET_MODE_SIZE (mode
);
8371 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8372 prev_size
= INTVAL (XEXP (slot
, 1));
8373 if (prev_size
+ cur_size
> size
)
8375 cur_size
= size
- prev_size
;
8376 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8377 if (mode
== BLKmode
)
8380 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8381 if (mode
== GET_MODE (reg
))
8382 addr_type
= build_pointer_type (piece_type
);
8384 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8386 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8389 if (SSE_REGNO_P (REGNO (reg
)))
8391 src_addr
= sse_addr
;
8392 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8396 src_addr
= int_addr
;
8397 src_offset
= REGNO (reg
) * 8;
8399 src_addr
= fold_convert (addr_type
, src_addr
);
8400 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8402 dest_addr
= fold_convert (daddr_type
, addr
);
8403 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8404 if (cur_size
== GET_MODE_SIZE (mode
))
8406 src
= build_va_arg_indirect_ref (src_addr
);
8407 dest
= build_va_arg_indirect_ref (dest_addr
);
8409 gimplify_assign (dest
, src
, pre_p
);
8414 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8415 3, dest_addr
, src_addr
,
8416 size_int (cur_size
));
8417 gimplify_and_add (copy
, pre_p
);
8419 prev_size
+= cur_size
;
8425 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8426 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8427 gimplify_assign (gpr
, t
, pre_p
);
8432 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8433 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8434 gimplify_assign (fpr
, t
, pre_p
);
8437 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8439 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8442 /* ... otherwise out of the overflow area. */
8444 /* When we align parameter on stack for caller, if the parameter
8445 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8446 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8447 here with caller. */
8448 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8449 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8450 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8452 /* Care for on-stack alignment if needed. */
8453 if (arg_boundary
<= 64 || size
== 0)
8457 HOST_WIDE_INT align
= arg_boundary
/ 8;
8458 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8459 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8460 build_int_cst (TREE_TYPE (t
), -align
));
8463 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8464 gimplify_assign (addr
, t
, pre_p
);
8466 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8467 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8470 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8472 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8473 addr
= fold_convert (ptrtype
, addr
);
8476 addr
= build_va_arg_indirect_ref (addr
);
8477 return build_va_arg_indirect_ref (addr
);
8480 /* Return true if OPNUM's MEM should be matched
8481 in movabs* patterns. */
8484 ix86_check_movabs (rtx insn
, int opnum
)
8488 set
= PATTERN (insn
);
8489 if (GET_CODE (set
) == PARALLEL
)
8490 set
= XVECEXP (set
, 0, 0);
8491 gcc_assert (GET_CODE (set
) == SET
);
8492 mem
= XEXP (set
, opnum
);
8493 while (GET_CODE (mem
) == SUBREG
)
8494 mem
= SUBREG_REG (mem
);
8495 gcc_assert (MEM_P (mem
));
8496 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8499 /* Initialize the table of extra 80387 mathematical constants. */
8502 init_ext_80387_constants (void)
8504 static const char * cst
[5] =
8506 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8507 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8508 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8509 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8510 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8514 for (i
= 0; i
< 5; i
++)
8516 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8517 /* Ensure each constant is rounded to XFmode precision. */
8518 real_convert (&ext_80387_constants_table
[i
],
8519 XFmode
, &ext_80387_constants_table
[i
]);
8522 ext_80387_constants_init
= 1;
8525 /* Return non-zero if the constant is something that
8526 can be loaded with a special instruction. */
8529 standard_80387_constant_p (rtx x
)
8531 enum machine_mode mode
= GET_MODE (x
);
8535 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8538 if (x
== CONST0_RTX (mode
))
8540 if (x
== CONST1_RTX (mode
))
8543 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8545 /* For XFmode constants, try to find a special 80387 instruction when
8546 optimizing for size or on those CPUs that benefit from them. */
8548 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8552 if (! ext_80387_constants_init
)
8553 init_ext_80387_constants ();
8555 for (i
= 0; i
< 5; i
++)
8556 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8560 /* Load of the constant -0.0 or -1.0 will be split as
8561 fldz;fchs or fld1;fchs sequence. */
8562 if (real_isnegzero (&r
))
8564 if (real_identical (&r
, &dconstm1
))
8570 /* Return the opcode of the special instruction to be used to load
8574 standard_80387_constant_opcode (rtx x
)
8576 switch (standard_80387_constant_p (x
))
8600 /* Return the CONST_DOUBLE representing the 80387 constant that is
8601 loaded by the specified special instruction. The argument IDX
8602 matches the return value from standard_80387_constant_p. */
8605 standard_80387_constant_rtx (int idx
)
8609 if (! ext_80387_constants_init
)
8610 init_ext_80387_constants ();
8626 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8630 /* Return 1 if X is all 0s and 2 if x is all 1s
8631 in supported SSE/AVX vector mode. */
8634 standard_sse_constant_p (rtx x
)
8636 enum machine_mode mode
= GET_MODE (x
);
8638 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8640 if (vector_all_ones_operand (x
, mode
))
8662 /* Return the opcode of the special instruction to be used to load
8666 standard_sse_constant_opcode (rtx insn
, rtx x
)
8668 switch (standard_sse_constant_p (x
))
8671 switch (get_attr_mode (insn
))
8674 return "%vpxor\t%0, %d0";
8676 return "%vxorpd\t%0, %d0";
8678 return "%vxorps\t%0, %d0";
8681 return "vpxor\t%x0, %x0, %x0";
8683 return "vxorpd\t%x0, %x0, %x0";
8685 return "vxorps\t%x0, %x0, %x0";
8693 return "vpcmpeqd\t%0, %0, %0";
8695 return "pcmpeqd\t%0, %0";
8703 /* Returns true if OP contains a symbol reference */
8706 symbolic_reference_mentioned_p (rtx op
)
8711 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8714 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8715 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8721 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8722 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8726 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8733 /* Return true if it is appropriate to emit `ret' instructions in the
8734 body of a function. Do this only if the epilogue is simple, needing a
8735 couple of insns. Prior to reloading, we can't tell how many registers
8736 must be saved, so return false then. Return false if there is no frame
8737 marker to de-allocate. */
8740 ix86_can_use_return_insn_p (void)
8742 struct ix86_frame frame
;
8744 if (! reload_completed
|| frame_pointer_needed
)
8747 /* Don't allow more than 32k pop, since that's all we can do
8748 with one instruction. */
8749 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8752 ix86_compute_frame_layout (&frame
);
8753 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8754 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8757 /* Value should be nonzero if functions must have frame pointers.
8758 Zero means the frame pointer need not be set up (and parms may
8759 be accessed via the stack pointer) in functions that seem suitable. */
8762 ix86_frame_pointer_required (void)
8764 /* If we accessed previous frames, then the generated code expects
8765 to be able to access the saved ebp value in our frame. */
8766 if (cfun
->machine
->accesses_prev_frame
)
8769 /* Several x86 os'es need a frame pointer for other reasons,
8770 usually pertaining to setjmp. */
8771 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8774 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8775 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8778 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8779 allocation is 4GB. */
8780 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8783 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8784 turns off the frame pointer by default. Turn it back on now if
8785 we've not got a leaf function. */
8786 if (TARGET_OMIT_LEAF_FRAME_POINTER
8788 || ix86_current_function_calls_tls_descriptor
))
8791 if (crtl
->profile
&& !flag_fentry
)
8797 /* Record that the current function accesses previous call frames. */
8800 ix86_setup_frame_addresses (void)
8802 cfun
->machine
->accesses_prev_frame
= 1;
8805 #ifndef USE_HIDDEN_LINKONCE
8806 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8807 # define USE_HIDDEN_LINKONCE 1
8809 # define USE_HIDDEN_LINKONCE 0
8813 static int pic_labels_used
;
8815 /* Fills in the label name that should be used for a pc thunk for
8816 the given register. */
8819 get_pc_thunk_name (char name
[32], unsigned int regno
)
8821 gcc_assert (!TARGET_64BIT
);
8823 if (USE_HIDDEN_LINKONCE
)
8824 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8826 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8830 /* This function generates code for -fpic that loads %ebx with
8831 the return address of the caller and then returns. */
8834 ix86_code_end (void)
8839 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8844 if (!(pic_labels_used
& (1 << regno
)))
8847 get_pc_thunk_name (name
, regno
);
8849 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8850 get_identifier (name
),
8851 build_function_type_list (void_type_node
, NULL_TREE
));
8852 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8853 NULL_TREE
, void_type_node
);
8854 TREE_PUBLIC (decl
) = 1;
8855 TREE_STATIC (decl
) = 1;
8856 DECL_IGNORED_P (decl
) = 1;
8861 switch_to_section (darwin_sections
[text_coal_section
]);
8862 fputs ("\t.weak_definition\t", asm_out_file
);
8863 assemble_name (asm_out_file
, name
);
8864 fputs ("\n\t.private_extern\t", asm_out_file
);
8865 assemble_name (asm_out_file
, name
);
8866 putc ('\n', asm_out_file
);
8867 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8868 DECL_WEAK (decl
) = 1;
8872 if (USE_HIDDEN_LINKONCE
)
8874 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8876 targetm
.asm_out
.unique_section (decl
, 0);
8877 switch_to_section (get_named_section (decl
, NULL
, 0));
8879 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8880 fputs ("\t.hidden\t", asm_out_file
);
8881 assemble_name (asm_out_file
, name
);
8882 putc ('\n', asm_out_file
);
8883 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8887 switch_to_section (text_section
);
8888 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8891 DECL_INITIAL (decl
) = make_node (BLOCK
);
8892 current_function_decl
= decl
;
8893 init_function_start (decl
);
8894 first_function_block_is_cold
= false;
8895 /* Make sure unwind info is emitted for the thunk if needed. */
8896 final_start_function (emit_barrier (), asm_out_file
, 1);
8898 /* Pad stack IP move with 4 instructions (two NOPs count
8899 as one instruction). */
8900 if (TARGET_PAD_SHORT_FUNCTION
)
8905 fputs ("\tnop\n", asm_out_file
);
8908 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8909 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8910 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8911 fputs ("\tret\n", asm_out_file
);
8912 final_end_function ();
8913 init_insn_lengths ();
8914 free_after_compilation (cfun
);
8916 current_function_decl
= NULL
;
8919 if (flag_split_stack
)
8920 file_end_indicate_split_stack ();
8923 /* Emit code for the SET_GOT patterns. */
8926 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8932 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8934 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8935 xops
[2] = gen_rtx_MEM (Pmode
,
8936 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8937 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8939 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8940 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8941 an unadorned address. */
8942 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8943 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8944 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8948 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8952 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8954 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8957 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8958 is what will be referenced by the Mach-O PIC subsystem. */
8960 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8963 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8964 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8969 get_pc_thunk_name (name
, REGNO (dest
));
8970 pic_labels_used
|= 1 << REGNO (dest
);
8972 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8973 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8974 output_asm_insn ("call\t%X2", xops
);
8975 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8976 is what will be referenced by the Mach-O PIC subsystem. */
8979 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8981 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8982 CODE_LABEL_NUMBER (label
));
8987 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8992 /* Generate an "push" pattern for input ARG. */
8997 struct machine_function
*m
= cfun
->machine
;
8999 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9000 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
9001 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
9003 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9004 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9006 return gen_rtx_SET (VOIDmode
,
9007 gen_rtx_MEM (word_mode
,
9008 gen_rtx_PRE_DEC (Pmode
,
9009 stack_pointer_rtx
)),
9013 /* Generate an "pop" pattern for input ARG. */
9018 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
9019 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
9021 return gen_rtx_SET (VOIDmode
,
9023 gen_rtx_MEM (word_mode
,
9024 gen_rtx_POST_INC (Pmode
,
9025 stack_pointer_rtx
)));
9028 /* Return >= 0 if there is an unused call-clobbered register available
9029 for the entire function. */
9032 ix86_select_alt_pic_regnum (void)
9036 && !ix86_current_function_calls_tls_descriptor
)
9039 /* Can't use the same register for both PIC and DRAP. */
9041 drap
= REGNO (crtl
->drap_reg
);
9044 for (i
= 2; i
>= 0; --i
)
9045 if (i
!= drap
&& !df_regs_ever_live_p (i
))
9049 return INVALID_REGNUM
;
9052 /* Return TRUE if we need to save REGNO. */
9055 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
9057 if (pic_offset_table_rtx
9058 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
9059 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
9061 || crtl
->calls_eh_return
9062 || crtl
->uses_const_pool
))
9063 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
9065 if (crtl
->calls_eh_return
&& maybe_eh_return
)
9070 unsigned test
= EH_RETURN_DATA_REGNO (i
);
9071 if (test
== INVALID_REGNUM
)
9078 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
9081 return (df_regs_ever_live_p (regno
)
9082 && !call_used_regs
[regno
]
9083 && !fixed_regs
[regno
]
9084 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
9087 /* Return number of saved general prupose registers. */
9090 ix86_nsaved_regs (void)
9095 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9096 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9101 /* Return number of saved SSE registrers. */
9104 ix86_nsaved_sseregs (void)
9109 if (!TARGET_64BIT_MS_ABI
)
9111 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9112 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9117 /* Given FROM and TO register numbers, say whether this elimination is
9118 allowed. If stack alignment is needed, we can only replace argument
9119 pointer with hard frame pointer, or replace frame pointer with stack
9120 pointer. Otherwise, frame pointer elimination is automatically
9121 handled and all other eliminations are valid. */
9124 ix86_can_eliminate (const int from
, const int to
)
9126 if (stack_realign_fp
)
9127 return ((from
== ARG_POINTER_REGNUM
9128 && to
== HARD_FRAME_POINTER_REGNUM
)
9129 || (from
== FRAME_POINTER_REGNUM
9130 && to
== STACK_POINTER_REGNUM
));
9132 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9135 /* Return the offset between two registers, one to be eliminated, and the other
9136 its replacement, at the start of a routine. */
9139 ix86_initial_elimination_offset (int from
, int to
)
9141 struct ix86_frame frame
;
9142 ix86_compute_frame_layout (&frame
);
9144 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9145 return frame
.hard_frame_pointer_offset
;
9146 else if (from
== FRAME_POINTER_REGNUM
9147 && to
== HARD_FRAME_POINTER_REGNUM
)
9148 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9151 gcc_assert (to
== STACK_POINTER_REGNUM
);
9153 if (from
== ARG_POINTER_REGNUM
)
9154 return frame
.stack_pointer_offset
;
9156 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9157 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9161 /* In a dynamically-aligned function, we can't know the offset from
9162 stack pointer to frame pointer, so we must ensure that setjmp
9163 eliminates fp against the hard fp (%ebp) rather than trying to
9164 index from %esp up to the top of the frame across a gap that is
9165 of unknown (at compile-time) size. */
9167 ix86_builtin_setjmp_frame_value (void)
9169 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9172 /* When using -fsplit-stack, the allocation routines set a field in
9173 the TCB to the bottom of the stack plus this much space, measured
9176 #define SPLIT_STACK_AVAILABLE 256
9178 /* Fill structure ix86_frame about frame of currently computed function. */
9181 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9183 unsigned HOST_WIDE_INT stack_alignment_needed
;
9184 HOST_WIDE_INT offset
;
9185 unsigned HOST_WIDE_INT preferred_alignment
;
9186 HOST_WIDE_INT size
= get_frame_size ();
9187 HOST_WIDE_INT to_allocate
;
9189 frame
->nregs
= ix86_nsaved_regs ();
9190 frame
->nsseregs
= ix86_nsaved_sseregs ();
9192 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9193 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9195 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9196 function prologues and leaf. */
9197 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9198 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9199 || ix86_current_function_calls_tls_descriptor
))
9201 preferred_alignment
= 16;
9202 stack_alignment_needed
= 16;
9203 crtl
->preferred_stack_boundary
= 128;
9204 crtl
->stack_alignment_needed
= 128;
9207 gcc_assert (!size
|| stack_alignment_needed
);
9208 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9209 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9211 /* For SEH we have to limit the amount of code movement into the prologue.
9212 At present we do this via a BLOCKAGE, at which point there's very little
9213 scheduling that can be done, which means that there's very little point
9214 in doing anything except PUSHs. */
9216 cfun
->machine
->use_fast_prologue_epilogue
= false;
9218 /* During reload iteration the amount of registers saved can change.
9219 Recompute the value as needed. Do not recompute when amount of registers
9220 didn't change as reload does multiple calls to the function and does not
9221 expect the decision to change within single iteration. */
9222 else if (!optimize_function_for_size_p (cfun
)
9223 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9225 int count
= frame
->nregs
;
9226 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9228 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9230 /* The fast prologue uses move instead of push to save registers. This
9231 is significantly longer, but also executes faster as modern hardware
9232 can execute the moves in parallel, but can't do that for push/pop.
9234 Be careful about choosing what prologue to emit: When function takes
9235 many instructions to execute we may use slow version as well as in
9236 case function is known to be outside hot spot (this is known with
9237 feedback only). Weight the size of function by number of registers
9238 to save as it is cheap to use one or two push instructions but very
9239 slow to use many of them. */
9241 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9242 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9243 || (flag_branch_probabilities
9244 && node
->frequency
< NODE_FREQUENCY_HOT
))
9245 cfun
->machine
->use_fast_prologue_epilogue
= false;
9247 cfun
->machine
->use_fast_prologue_epilogue
9248 = !expensive_function_p (count
);
9251 frame
->save_regs_using_mov
9252 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9253 /* If static stack checking is enabled and done with probes,
9254 the registers need to be saved before allocating the frame. */
9255 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9257 /* Skip return address. */
9258 offset
= UNITS_PER_WORD
;
9260 /* Skip pushed static chain. */
9261 if (ix86_static_chain_on_stack
)
9262 offset
+= UNITS_PER_WORD
;
9264 /* Skip saved base pointer. */
9265 if (frame_pointer_needed
)
9266 offset
+= UNITS_PER_WORD
;
9267 frame
->hfp_save_offset
= offset
;
9269 /* The traditional frame pointer location is at the top of the frame. */
9270 frame
->hard_frame_pointer_offset
= offset
;
9272 /* Register save area */
9273 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9274 frame
->reg_save_offset
= offset
;
9276 /* On SEH target, registers are pushed just before the frame pointer
9279 frame
->hard_frame_pointer_offset
= offset
;
9281 /* Align and set SSE register save area. */
9282 if (frame
->nsseregs
)
9284 /* The only ABI that has saved SSE registers (Win64) also has a
9285 16-byte aligned default stack, and thus we don't need to be
9286 within the re-aligned local stack frame to save them. */
9287 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9288 offset
= (offset
+ 16 - 1) & -16;
9289 offset
+= frame
->nsseregs
* 16;
9291 frame
->sse_reg_save_offset
= offset
;
9293 /* The re-aligned stack starts here. Values before this point are not
9294 directly comparable with values below this point. In order to make
9295 sure that no value happens to be the same before and after, force
9296 the alignment computation below to add a non-zero value. */
9297 if (stack_realign_fp
)
9298 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9301 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9302 offset
+= frame
->va_arg_size
;
9304 /* Align start of frame for local function. */
9305 if (stack_realign_fp
9306 || offset
!= frame
->sse_reg_save_offset
9309 || cfun
->calls_alloca
9310 || ix86_current_function_calls_tls_descriptor
)
9311 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9313 /* Frame pointer points here. */
9314 frame
->frame_pointer_offset
= offset
;
9318 /* Add outgoing arguments area. Can be skipped if we eliminated
9319 all the function calls as dead code.
9320 Skipping is however impossible when function calls alloca. Alloca
9321 expander assumes that last crtl->outgoing_args_size
9322 of stack frame are unused. */
9323 if (ACCUMULATE_OUTGOING_ARGS
9324 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9325 || ix86_current_function_calls_tls_descriptor
))
9327 offset
+= crtl
->outgoing_args_size
;
9328 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9331 frame
->outgoing_arguments_size
= 0;
9333 /* Align stack boundary. Only needed if we're calling another function
9335 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9336 || ix86_current_function_calls_tls_descriptor
)
9337 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9339 /* We've reached end of stack frame. */
9340 frame
->stack_pointer_offset
= offset
;
9342 /* Size prologue needs to allocate. */
9343 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9345 if ((!to_allocate
&& frame
->nregs
<= 1)
9346 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9347 frame
->save_regs_using_mov
= false;
9349 if (ix86_using_red_zone ()
9350 && crtl
->sp_is_unchanging
9352 && !ix86_current_function_calls_tls_descriptor
)
9354 frame
->red_zone_size
= to_allocate
;
9355 if (frame
->save_regs_using_mov
)
9356 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9357 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9358 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9361 frame
->red_zone_size
= 0;
9362 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9364 /* The SEH frame pointer location is near the bottom of the frame.
9365 This is enforced by the fact that the difference between the
9366 stack pointer and the frame pointer is limited to 240 bytes in
9367 the unwind data structure. */
9372 /* If we can leave the frame pointer where it is, do so. Also, returns
9373 the establisher frame for __builtin_frame_address (0). */
9374 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9375 if (diff
<= SEH_MAX_FRAME_SIZE
9376 && (diff
> 240 || (diff
& 15) != 0)
9377 && !crtl
->accesses_prior_frames
)
9379 /* Ideally we'd determine what portion of the local stack frame
9380 (within the constraint of the lowest 240) is most heavily used.
9381 But without that complication, simply bias the frame pointer
9382 by 128 bytes so as to maximize the amount of the local stack
9383 frame that is addressable with 8-bit offsets. */
9384 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9389 /* This is semi-inlined memory_address_length, but simplified
9390 since we know that we're always dealing with reg+offset, and
9391 to avoid having to create and discard all that rtl. */
9394 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9400 /* EBP and R13 cannot be encoded without an offset. */
9401 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9403 else if (IN_RANGE (offset
, -128, 127))
9406 /* ESP and R12 must be encoded with a SIB byte. */
9407 if (regno
== SP_REG
|| regno
== R12_REG
)
9413 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9414 The valid base registers are taken from CFUN->MACHINE->FS. */
9417 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9419 const struct machine_function
*m
= cfun
->machine
;
9420 rtx base_reg
= NULL
;
9421 HOST_WIDE_INT base_offset
= 0;
9423 if (m
->use_fast_prologue_epilogue
)
9425 /* Choose the base register most likely to allow the most scheduling
9426 opportunities. Generally FP is valid throughout the function,
9427 while DRAP must be reloaded within the epilogue. But choose either
9428 over the SP due to increased encoding size. */
9432 base_reg
= hard_frame_pointer_rtx
;
9433 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9435 else if (m
->fs
.drap_valid
)
9437 base_reg
= crtl
->drap_reg
;
9438 base_offset
= 0 - cfa_offset
;
9440 else if (m
->fs
.sp_valid
)
9442 base_reg
= stack_pointer_rtx
;
9443 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9448 HOST_WIDE_INT toffset
;
9451 /* Choose the base register with the smallest address encoding.
9452 With a tie, choose FP > DRAP > SP. */
9455 base_reg
= stack_pointer_rtx
;
9456 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9457 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9459 if (m
->fs
.drap_valid
)
9461 toffset
= 0 - cfa_offset
;
9462 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9465 base_reg
= crtl
->drap_reg
;
9466 base_offset
= toffset
;
9472 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9473 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9476 base_reg
= hard_frame_pointer_rtx
;
9477 base_offset
= toffset
;
9482 gcc_assert (base_reg
!= NULL
);
9484 return plus_constant (Pmode
, base_reg
, base_offset
);
9487 /* Emit code to save registers in the prologue. */
9490 ix86_emit_save_regs (void)
9495 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9496 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9498 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9499 RTX_FRAME_RELATED_P (insn
) = 1;
9503 /* Emit a single register save at CFA - CFA_OFFSET. */
9506 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9507 HOST_WIDE_INT cfa_offset
)
9509 struct machine_function
*m
= cfun
->machine
;
9510 rtx reg
= gen_rtx_REG (mode
, regno
);
9511 rtx mem
, addr
, base
, insn
;
9513 addr
= choose_baseaddr (cfa_offset
);
9514 mem
= gen_frame_mem (mode
, addr
);
9516 /* For SSE saves, we need to indicate the 128-bit alignment. */
9517 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9519 insn
= emit_move_insn (mem
, reg
);
9520 RTX_FRAME_RELATED_P (insn
) = 1;
9523 if (GET_CODE (base
) == PLUS
)
9524 base
= XEXP (base
, 0);
9525 gcc_checking_assert (REG_P (base
));
9527 /* When saving registers into a re-aligned local stack frame, avoid
9528 any tricky guessing by dwarf2out. */
9529 if (m
->fs
.realigned
)
9531 gcc_checking_assert (stack_realign_drap
);
9533 if (regno
== REGNO (crtl
->drap_reg
))
9535 /* A bit of a hack. We force the DRAP register to be saved in
9536 the re-aligned stack frame, which provides us with a copy
9537 of the CFA that will last past the prologue. Install it. */
9538 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9539 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9540 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9541 mem
= gen_rtx_MEM (mode
, addr
);
9542 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9546 /* The frame pointer is a stable reference within the
9547 aligned frame. Use it. */
9548 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9549 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9550 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9551 mem
= gen_rtx_MEM (mode
, addr
);
9552 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9553 gen_rtx_SET (VOIDmode
, mem
, reg
));
9557 /* The memory may not be relative to the current CFA register,
9558 which means that we may need to generate a new pattern for
9559 use by the unwind info. */
9560 else if (base
!= m
->fs
.cfa_reg
)
9562 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9563 m
->fs
.cfa_offset
- cfa_offset
);
9564 mem
= gen_rtx_MEM (mode
, addr
);
9565 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9569 /* Emit code to save registers using MOV insns.
9570 First register is stored at CFA - CFA_OFFSET. */
9572 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9576 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9577 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9579 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9580 cfa_offset
-= UNITS_PER_WORD
;
9584 /* Emit code to save SSE registers using MOV insns.
9585 First register is stored at CFA - CFA_OFFSET. */
9587 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9591 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9592 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9594 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9599 static GTY(()) rtx queued_cfa_restores
;
9601 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9602 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9603 Don't add the note if the previously saved value will be left untouched
9604 within stack red-zone till return, as unwinders can find the same value
9605 in the register and on the stack. */
9608 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9610 if (!crtl
->shrink_wrapped
9611 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9616 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9617 RTX_FRAME_RELATED_P (insn
) = 1;
9621 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9624 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9627 ix86_add_queued_cfa_restore_notes (rtx insn
)
9630 if (!queued_cfa_restores
)
9632 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9634 XEXP (last
, 1) = REG_NOTES (insn
);
9635 REG_NOTES (insn
) = queued_cfa_restores
;
9636 queued_cfa_restores
= NULL_RTX
;
9637 RTX_FRAME_RELATED_P (insn
) = 1;
9640 /* Expand prologue or epilogue stack adjustment.
9641 The pattern exist to put a dependency on all ebp-based memory accesses.
9642 STYLE should be negative if instructions should be marked as frame related,
9643 zero if %r11 register is live and cannot be freely used and positive
9647 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9648 int style
, bool set_cfa
)
9650 struct machine_function
*m
= cfun
->machine
;
9652 bool add_frame_related_expr
= false;
9654 if (Pmode
== SImode
)
9655 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9656 else if (x86_64_immediate_operand (offset
, DImode
))
9657 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9661 /* r11 is used by indirect sibcall return as well, set before the
9662 epilogue and used after the epilogue. */
9664 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9667 gcc_assert (src
!= hard_frame_pointer_rtx
9668 && dest
!= hard_frame_pointer_rtx
);
9669 tmp
= hard_frame_pointer_rtx
;
9671 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9673 add_frame_related_expr
= true;
9675 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9678 insn
= emit_insn (insn
);
9680 ix86_add_queued_cfa_restore_notes (insn
);
9686 gcc_assert (m
->fs
.cfa_reg
== src
);
9687 m
->fs
.cfa_offset
+= INTVAL (offset
);
9688 m
->fs
.cfa_reg
= dest
;
9690 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9691 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9692 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9693 RTX_FRAME_RELATED_P (insn
) = 1;
9697 RTX_FRAME_RELATED_P (insn
) = 1;
9698 if (add_frame_related_expr
)
9700 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9701 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9702 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9706 if (dest
== stack_pointer_rtx
)
9708 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9709 bool valid
= m
->fs
.sp_valid
;
9711 if (src
== hard_frame_pointer_rtx
)
9713 valid
= m
->fs
.fp_valid
;
9714 ooffset
= m
->fs
.fp_offset
;
9716 else if (src
== crtl
->drap_reg
)
9718 valid
= m
->fs
.drap_valid
;
9723 /* Else there are two possibilities: SP itself, which we set
9724 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9725 taken care of this by hand along the eh_return path. */
9726 gcc_checking_assert (src
== stack_pointer_rtx
9727 || offset
== const0_rtx
);
9730 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9731 m
->fs
.sp_valid
= valid
;
9735 /* Find an available register to be used as dynamic realign argument
9736 pointer regsiter. Such a register will be written in prologue and
9737 used in begin of body, so it must not be
9738 1. parameter passing register.
9740 We reuse static-chain register if it is available. Otherwise, we
9741 use DI for i386 and R13 for x86-64. We chose R13 since it has
9744 Return: the regno of chosen register. */
9747 find_drap_reg (void)
9749 tree decl
= cfun
->decl
;
9753 /* Use R13 for nested function or function need static chain.
9754 Since function with tail call may use any caller-saved
9755 registers in epilogue, DRAP must not use caller-saved
9756 register in such case. */
9757 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9764 /* Use DI for nested function or function need static chain.
9765 Since function with tail call may use any caller-saved
9766 registers in epilogue, DRAP must not use caller-saved
9767 register in such case. */
9768 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9771 /* Reuse static chain register if it isn't used for parameter
9773 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9775 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9776 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9783 /* Return minimum incoming stack alignment. */
9786 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9788 unsigned int incoming_stack_boundary
;
9790 /* Prefer the one specified at command line. */
9791 if (ix86_user_incoming_stack_boundary
)
9792 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9793 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9794 if -mstackrealign is used, it isn't used for sibcall check and
9795 estimated stack alignment is 128bit. */
9798 && ix86_force_align_arg_pointer
9799 && crtl
->stack_alignment_estimated
== 128)
9800 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9802 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9804 /* Incoming stack alignment can be changed on individual functions
9805 via force_align_arg_pointer attribute. We use the smallest
9806 incoming stack boundary. */
9807 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9808 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9809 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9810 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9812 /* The incoming stack frame has to be aligned at least at
9813 parm_stack_boundary. */
9814 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9815 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9817 /* Stack at entrance of main is aligned by runtime. We use the
9818 smallest incoming stack boundary. */
9819 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9820 && DECL_NAME (current_function_decl
)
9821 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9822 && DECL_FILE_SCOPE_P (current_function_decl
))
9823 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9825 return incoming_stack_boundary
;
9828 /* Update incoming stack boundary and estimated stack alignment. */
9831 ix86_update_stack_boundary (void)
9833 ix86_incoming_stack_boundary
9834 = ix86_minimum_incoming_stack_boundary (false);
9836 /* x86_64 vararg needs 16byte stack alignment for register save
9840 && crtl
->stack_alignment_estimated
< 128)
9841 crtl
->stack_alignment_estimated
= 128;
9844 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9845 needed or an rtx for DRAP otherwise. */
9848 ix86_get_drap_rtx (void)
9850 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9851 crtl
->need_drap
= true;
9853 if (stack_realign_drap
)
9855 /* Assign DRAP to vDRAP and returns vDRAP */
9856 unsigned int regno
= find_drap_reg ();
9861 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9862 crtl
->drap_reg
= arg_ptr
;
9865 drap_vreg
= copy_to_reg (arg_ptr
);
9869 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9872 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9873 RTX_FRAME_RELATED_P (insn
) = 1;
9881 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9884 ix86_internal_arg_pointer (void)
9886 return virtual_incoming_args_rtx
;
9889 struct scratch_reg
{
9894 /* Return a short-lived scratch register for use on function entry.
9895 In 32-bit mode, it is valid only after the registers are saved
9896 in the prologue. This register must be released by means of
9897 release_scratch_register_on_entry once it is dead. */
9900 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9908 /* We always use R11 in 64-bit mode. */
9913 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9915 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9916 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9917 int regparm
= ix86_function_regparm (fntype
, decl
);
9919 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9921 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9922 for the static chain register. */
9923 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9924 && drap_regno
!= AX_REG
)
9926 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9928 /* ecx is the static chain register. */
9929 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9930 && drap_regno
!= CX_REG
)
9932 else if (ix86_save_reg (BX_REG
, true))
9934 /* esi is the static chain register. */
9935 else if (!(regparm
== 3 && static_chain_p
)
9936 && ix86_save_reg (SI_REG
, true))
9938 else if (ix86_save_reg (DI_REG
, true))
9942 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9947 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9950 rtx insn
= emit_insn (gen_push (sr
->reg
));
9951 RTX_FRAME_RELATED_P (insn
) = 1;
9955 /* Release a scratch register obtained from the preceding function. */
9958 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9962 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9964 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9965 RTX_FRAME_RELATED_P (insn
) = 1;
9966 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9967 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9968 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9972 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9974 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9977 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9979 /* We skip the probe for the first interval + a small dope of 4 words and
9980 probe that many bytes past the specified size to maintain a protection
9981 area at the botton of the stack. */
9982 const int dope
= 4 * UNITS_PER_WORD
;
9983 rtx size_rtx
= GEN_INT (size
), last
;
9985 /* See if we have a constant small number of probes to generate. If so,
9986 that's the easy case. The run-time loop is made up of 11 insns in the
9987 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9988 for n # of intervals. */
9989 if (size
<= 5 * PROBE_INTERVAL
)
9991 HOST_WIDE_INT i
, adjust
;
9992 bool first_probe
= true;
9994 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9995 values of N from 1 until it exceeds SIZE. If only one probe is
9996 needed, this will not generate any code. Then adjust and probe
9997 to PROBE_INTERVAL + SIZE. */
9998 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10002 adjust
= 2 * PROBE_INTERVAL
+ dope
;
10003 first_probe
= false;
10006 adjust
= PROBE_INTERVAL
;
10008 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10009 plus_constant (Pmode
, stack_pointer_rtx
,
10011 emit_stack_probe (stack_pointer_rtx
);
10015 adjust
= size
+ PROBE_INTERVAL
+ dope
;
10017 adjust
= size
+ PROBE_INTERVAL
- i
;
10019 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10020 plus_constant (Pmode
, stack_pointer_rtx
,
10022 emit_stack_probe (stack_pointer_rtx
);
10024 /* Adjust back to account for the additional first interval. */
10025 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10026 plus_constant (Pmode
, stack_pointer_rtx
,
10027 PROBE_INTERVAL
+ dope
)));
10030 /* Otherwise, do the same as above, but in a loop. Note that we must be
10031 extra careful with variables wrapping around because we might be at
10032 the very top (or the very bottom) of the address space and we have
10033 to be able to handle this case properly; in particular, we use an
10034 equality test for the loop condition. */
10037 HOST_WIDE_INT rounded_size
;
10038 struct scratch_reg sr
;
10040 get_scratch_register_on_entry (&sr
);
10043 /* Step 1: round SIZE to the previous multiple of the interval. */
10045 rounded_size
= size
& -PROBE_INTERVAL
;
10048 /* Step 2: compute initial and final value of the loop counter. */
10050 /* SP = SP_0 + PROBE_INTERVAL. */
10051 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10052 plus_constant (Pmode
, stack_pointer_rtx
,
10053 - (PROBE_INTERVAL
+ dope
))));
10055 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10056 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
10057 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
10058 gen_rtx_PLUS (Pmode
, sr
.reg
,
10059 stack_pointer_rtx
)));
10062 /* Step 3: the loop
10064 while (SP != LAST_ADDR)
10066 SP = SP + PROBE_INTERVAL
10070 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10071 values of N from 1 until it is equal to ROUNDED_SIZE. */
10073 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
10076 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10077 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10079 if (size
!= rounded_size
)
10081 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10082 plus_constant (Pmode
, stack_pointer_rtx
,
10083 rounded_size
- size
)));
10084 emit_stack_probe (stack_pointer_rtx
);
10087 /* Adjust back to account for the additional first interval. */
10088 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10089 plus_constant (Pmode
, stack_pointer_rtx
,
10090 PROBE_INTERVAL
+ dope
)));
10092 release_scratch_register_on_entry (&sr
);
10095 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
10097 /* Even if the stack pointer isn't the CFA register, we need to correctly
10098 describe the adjustments made to it, in particular differentiate the
10099 frame-related ones from the frame-unrelated ones. */
10102 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
10103 XVECEXP (expr
, 0, 0)
10104 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10105 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
10106 XVECEXP (expr
, 0, 1)
10107 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10108 plus_constant (Pmode
, stack_pointer_rtx
,
10109 PROBE_INTERVAL
+ dope
+ size
));
10110 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
10111 RTX_FRAME_RELATED_P (last
) = 1;
10113 cfun
->machine
->fs
.sp_offset
+= size
;
10116 /* Make sure nothing is scheduled before we are done. */
10117 emit_insn (gen_blockage ());
10120 /* Adjust the stack pointer up to REG while probing it. */
10123 output_adjust_stack_and_probe (rtx reg
)
10125 static int labelno
= 0;
10126 char loop_lab
[32], end_lab
[32];
10129 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10130 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10132 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10134 /* Jump to END_LAB if SP == LAST_ADDR. */
10135 xops
[0] = stack_pointer_rtx
;
10137 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10138 fputs ("\tje\t", asm_out_file
);
10139 assemble_name_raw (asm_out_file
, end_lab
);
10140 fputc ('\n', asm_out_file
);
10142 /* SP = SP + PROBE_INTERVAL. */
10143 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10144 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10147 xops
[1] = const0_rtx
;
10148 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10150 fprintf (asm_out_file
, "\tjmp\t");
10151 assemble_name_raw (asm_out_file
, loop_lab
);
10152 fputc ('\n', asm_out_file
);
10154 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10159 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10160 inclusive. These are offsets from the current stack pointer. */
10163 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10165 /* See if we have a constant small number of probes to generate. If so,
10166 that's the easy case. The run-time loop is made up of 7 insns in the
10167 generic case while the compile-time loop is made up of n insns for n #
10169 if (size
<= 7 * PROBE_INTERVAL
)
10173 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10174 it exceeds SIZE. If only one probe is needed, this will not
10175 generate any code. Then probe at FIRST + SIZE. */
10176 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10177 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10180 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10184 /* Otherwise, do the same as above, but in a loop. Note that we must be
10185 extra careful with variables wrapping around because we might be at
10186 the very top (or the very bottom) of the address space and we have
10187 to be able to handle this case properly; in particular, we use an
10188 equality test for the loop condition. */
10191 HOST_WIDE_INT rounded_size
, last
;
10192 struct scratch_reg sr
;
10194 get_scratch_register_on_entry (&sr
);
10197 /* Step 1: round SIZE to the previous multiple of the interval. */
10199 rounded_size
= size
& -PROBE_INTERVAL
;
10202 /* Step 2: compute initial and final value of the loop counter. */
10204 /* TEST_OFFSET = FIRST. */
10205 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10207 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10208 last
= first
+ rounded_size
;
10211 /* Step 3: the loop
10213 while (TEST_ADDR != LAST_ADDR)
10215 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10219 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10220 until it is equal to ROUNDED_SIZE. */
10222 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10225 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10226 that SIZE is equal to ROUNDED_SIZE. */
10228 if (size
!= rounded_size
)
10229 emit_stack_probe (plus_constant (Pmode
,
10230 gen_rtx_PLUS (Pmode
,
10233 rounded_size
- size
));
10235 release_scratch_register_on_entry (&sr
);
10238 /* Make sure nothing is scheduled before we are done. */
10239 emit_insn (gen_blockage ());
10242 /* Probe a range of stack addresses from REG to END, inclusive. These are
10243 offsets from the current stack pointer. */
10246 output_probe_stack_range (rtx reg
, rtx end
)
10248 static int labelno
= 0;
10249 char loop_lab
[32], end_lab
[32];
10252 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10253 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10255 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10257 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10260 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10261 fputs ("\tje\t", asm_out_file
);
10262 assemble_name_raw (asm_out_file
, end_lab
);
10263 fputc ('\n', asm_out_file
);
10265 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10266 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10267 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10269 /* Probe at TEST_ADDR. */
10270 xops
[0] = stack_pointer_rtx
;
10272 xops
[2] = const0_rtx
;
10273 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10275 fprintf (asm_out_file
, "\tjmp\t");
10276 assemble_name_raw (asm_out_file
, loop_lab
);
10277 fputc ('\n', asm_out_file
);
10279 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10284 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10285 to be generated in correct form. */
10287 ix86_finalize_stack_realign_flags (void)
10289 /* Check if stack realign is really needed after reload, and
10290 stores result in cfun */
10291 unsigned int incoming_stack_boundary
10292 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10293 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10294 unsigned int stack_realign
= (incoming_stack_boundary
10296 ? crtl
->max_used_stack_slot_alignment
10297 : crtl
->stack_alignment_needed
));
10299 if (crtl
->stack_realign_finalized
)
10301 /* After stack_realign_needed is finalized, we can't no longer
10303 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10307 /* If the only reason for frame_pointer_needed is that we conservatively
10308 assumed stack realignment might be needed, but in the end nothing that
10309 needed the stack alignment had been spilled, clear frame_pointer_needed
10310 and say we don't need stack realignment. */
10312 && !crtl
->need_drap
10313 && frame_pointer_needed
10315 && flag_omit_frame_pointer
10316 && crtl
->sp_is_unchanging
10317 && !ix86_current_function_calls_tls_descriptor
10318 && !crtl
->accesses_prior_frames
10319 && !cfun
->calls_alloca
10320 && !crtl
->calls_eh_return
10321 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10322 && !ix86_frame_pointer_required ()
10323 && get_frame_size () == 0
10324 && ix86_nsaved_sseregs () == 0
10325 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10327 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10330 CLEAR_HARD_REG_SET (prologue_used
);
10331 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10332 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10333 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10334 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10335 HARD_FRAME_POINTER_REGNUM
);
10339 FOR_BB_INSNS (bb
, insn
)
10340 if (NONDEBUG_INSN_P (insn
)
10341 && requires_stack_frame_p (insn
, prologue_used
,
10342 set_up_by_prologue
))
10344 crtl
->stack_realign_needed
= stack_realign
;
10345 crtl
->stack_realign_finalized
= true;
10350 frame_pointer_needed
= false;
10351 stack_realign
= false;
10352 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10353 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10354 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10355 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10356 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10357 df_finish_pass (true);
10358 df_scan_alloc (NULL
);
10360 df_compute_regs_ever_live (true);
10364 crtl
->stack_realign_needed
= stack_realign
;
10365 crtl
->stack_realign_finalized
= true;
10368 /* Expand the prologue into a bunch of separate insns. */
10371 ix86_expand_prologue (void)
10373 struct machine_function
*m
= cfun
->machine
;
10376 struct ix86_frame frame
;
10377 HOST_WIDE_INT allocate
;
10378 bool int_registers_saved
;
10379 bool sse_registers_saved
;
10381 ix86_finalize_stack_realign_flags ();
10383 /* DRAP should not coexist with stack_realign_fp */
10384 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10386 memset (&m
->fs
, 0, sizeof (m
->fs
));
10388 /* Initialize CFA state for before the prologue. */
10389 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10390 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10392 /* Track SP offset to the CFA. We continue tracking this after we've
10393 swapped the CFA register away from SP. In the case of re-alignment
10394 this is fudged; we're interested to offsets within the local frame. */
10395 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10396 m
->fs
.sp_valid
= true;
10398 ix86_compute_frame_layout (&frame
);
10400 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10402 /* We should have already generated an error for any use of
10403 ms_hook on a nested function. */
10404 gcc_checking_assert (!ix86_static_chain_on_stack
);
10406 /* Check if profiling is active and we shall use profiling before
10407 prologue variant. If so sorry. */
10408 if (crtl
->profile
&& flag_fentry
!= 0)
10409 sorry ("ms_hook_prologue attribute isn%'t compatible "
10410 "with -mfentry for 32-bit");
10412 /* In ix86_asm_output_function_label we emitted:
10413 8b ff movl.s %edi,%edi
10415 8b ec movl.s %esp,%ebp
10417 This matches the hookable function prologue in Win32 API
10418 functions in Microsoft Windows XP Service Pack 2 and newer.
10419 Wine uses this to enable Windows apps to hook the Win32 API
10420 functions provided by Wine.
10422 What that means is that we've already set up the frame pointer. */
10424 if (frame_pointer_needed
10425 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10429 /* We've decided to use the frame pointer already set up.
10430 Describe this to the unwinder by pretending that both
10431 push and mov insns happen right here.
10433 Putting the unwind info here at the end of the ms_hook
10434 is done so that we can make absolutely certain we get
10435 the required byte sequence at the start of the function,
10436 rather than relying on an assembler that can produce
10437 the exact encoding required.
10439 However it does mean (in the unpatched case) that we have
10440 a 1 insn window where the asynchronous unwind info is
10441 incorrect. However, if we placed the unwind info at
10442 its correct location we would have incorrect unwind info
10443 in the patched case. Which is probably all moot since
10444 I don't expect Wine generates dwarf2 unwind info for the
10445 system libraries that use this feature. */
10447 insn
= emit_insn (gen_blockage ());
10449 push
= gen_push (hard_frame_pointer_rtx
);
10450 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10451 stack_pointer_rtx
);
10452 RTX_FRAME_RELATED_P (push
) = 1;
10453 RTX_FRAME_RELATED_P (mov
) = 1;
10455 RTX_FRAME_RELATED_P (insn
) = 1;
10456 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10457 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10459 /* Note that gen_push incremented m->fs.cfa_offset, even
10460 though we didn't emit the push insn here. */
10461 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10462 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10463 m
->fs
.fp_valid
= true;
10467 /* The frame pointer is not needed so pop %ebp again.
10468 This leaves us with a pristine state. */
10469 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10473 /* The first insn of a function that accepts its static chain on the
10474 stack is to push the register that would be filled in by a direct
10475 call. This insn will be skipped by the trampoline. */
10476 else if (ix86_static_chain_on_stack
)
10478 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10479 emit_insn (gen_blockage ());
10481 /* We don't want to interpret this push insn as a register save,
10482 only as a stack adjustment. The real copy of the register as
10483 a save will be done later, if needed. */
10484 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10485 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10486 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10487 RTX_FRAME_RELATED_P (insn
) = 1;
10490 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10491 of DRAP is needed and stack realignment is really needed after reload */
10492 if (stack_realign_drap
)
10494 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10496 /* Only need to push parameter pointer reg if it is caller saved. */
10497 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10499 /* Push arg pointer reg */
10500 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10501 RTX_FRAME_RELATED_P (insn
) = 1;
10504 /* Grab the argument pointer. */
10505 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10506 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10507 RTX_FRAME_RELATED_P (insn
) = 1;
10508 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10509 m
->fs
.cfa_offset
= 0;
10511 /* Align the stack. */
10512 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10514 GEN_INT (-align_bytes
)));
10515 RTX_FRAME_RELATED_P (insn
) = 1;
10517 /* Replicate the return address on the stack so that return
10518 address can be reached via (argp - 1) slot. This is needed
10519 to implement macro RETURN_ADDR_RTX and intrinsic function
10520 expand_builtin_return_addr etc. */
10521 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10522 t
= gen_frame_mem (word_mode
, t
);
10523 insn
= emit_insn (gen_push (t
));
10524 RTX_FRAME_RELATED_P (insn
) = 1;
10526 /* For the purposes of frame and register save area addressing,
10527 we've started over with a new frame. */
10528 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10529 m
->fs
.realigned
= true;
10532 int_registers_saved
= (frame
.nregs
== 0);
10533 sse_registers_saved
= (frame
.nsseregs
== 0);
10535 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10537 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10538 slower on all targets. Also sdb doesn't like it. */
10539 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10540 RTX_FRAME_RELATED_P (insn
) = 1;
10542 /* Push registers now, before setting the frame pointer
10544 if (!int_registers_saved
10546 && !frame
.save_regs_using_mov
)
10548 ix86_emit_save_regs ();
10549 int_registers_saved
= true;
10550 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10553 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10555 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10556 RTX_FRAME_RELATED_P (insn
) = 1;
10558 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10559 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10560 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10561 m
->fs
.fp_valid
= true;
10565 if (!int_registers_saved
)
10567 /* If saving registers via PUSH, do so now. */
10568 if (!frame
.save_regs_using_mov
)
10570 ix86_emit_save_regs ();
10571 int_registers_saved
= true;
10572 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10575 /* When using red zone we may start register saving before allocating
10576 the stack frame saving one cycle of the prologue. However, avoid
10577 doing this if we have to probe the stack; at least on x86_64 the
10578 stack probe can turn into a call that clobbers a red zone location. */
10579 else if (ix86_using_red_zone ()
10580 && (! TARGET_STACK_PROBE
10581 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10583 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10584 int_registers_saved
= true;
10588 if (stack_realign_fp
)
10590 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10591 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10593 /* The computation of the size of the re-aligned stack frame means
10594 that we must allocate the size of the register save area before
10595 performing the actual alignment. Otherwise we cannot guarantee
10596 that there's enough storage above the realignment point. */
10597 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10598 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10599 GEN_INT (m
->fs
.sp_offset
10600 - frame
.sse_reg_save_offset
),
10603 /* Align the stack. */
10604 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10606 GEN_INT (-align_bytes
)));
10608 /* For the purposes of register save area addressing, the stack
10609 pointer is no longer valid. As for the value of sp_offset,
10610 see ix86_compute_frame_layout, which we need to match in order
10611 to pass verification of stack_pointer_offset at the end. */
10612 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10613 m
->fs
.sp_valid
= false;
10616 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10618 if (flag_stack_usage_info
)
10620 /* We start to count from ARG_POINTER. */
10621 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10623 /* If it was realigned, take into account the fake frame. */
10624 if (stack_realign_drap
)
10626 if (ix86_static_chain_on_stack
)
10627 stack_size
+= UNITS_PER_WORD
;
10629 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10630 stack_size
+= UNITS_PER_WORD
;
10632 /* This over-estimates by 1 minimal-stack-alignment-unit but
10633 mitigates that by counting in the new return address slot. */
10634 current_function_dynamic_stack_size
10635 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10638 current_function_static_stack_size
= stack_size
;
10641 /* On SEH target with very large frame size, allocate an area to save
10642 SSE registers (as the very large allocation won't be described). */
10644 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10645 && !sse_registers_saved
)
10647 HOST_WIDE_INT sse_size
=
10648 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10650 gcc_assert (int_registers_saved
);
10652 /* No need to do stack checking as the area will be immediately
10654 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10655 GEN_INT (-sse_size
), -1,
10656 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10657 allocate
-= sse_size
;
10658 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10659 sse_registers_saved
= true;
10662 /* The stack has already been decremented by the instruction calling us
10663 so probe if the size is non-negative to preserve the protection area. */
10664 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10666 /* We expect the registers to be saved when probes are used. */
10667 gcc_assert (int_registers_saved
);
10669 if (STACK_CHECK_MOVING_SP
)
10671 ix86_adjust_stack_and_probe (allocate
);
10676 HOST_WIDE_INT size
= allocate
;
10678 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10679 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10681 if (TARGET_STACK_PROBE
)
10682 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10684 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10690 else if (!ix86_target_stack_probe ()
10691 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10693 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10694 GEN_INT (-allocate
), -1,
10695 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10699 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10701 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10703 bool eax_live
= false;
10704 bool r10_live
= false;
10707 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10708 if (!TARGET_64BIT_MS_ABI
)
10709 eax_live
= ix86_eax_live_at_start_p ();
10713 emit_insn (gen_push (eax
));
10714 allocate
-= UNITS_PER_WORD
;
10718 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10719 emit_insn (gen_push (r10
));
10720 allocate
-= UNITS_PER_WORD
;
10723 emit_move_insn (eax
, GEN_INT (allocate
));
10724 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10726 /* Use the fact that AX still contains ALLOCATE. */
10727 adjust_stack_insn
= (Pmode
== DImode
10728 ? gen_pro_epilogue_adjust_stack_di_sub
10729 : gen_pro_epilogue_adjust_stack_si_sub
);
10731 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10732 stack_pointer_rtx
, eax
));
10734 /* Note that SEH directives need to continue tracking the stack
10735 pointer even after the frame pointer has been set up. */
10736 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10738 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10739 m
->fs
.cfa_offset
+= allocate
;
10741 RTX_FRAME_RELATED_P (insn
) = 1;
10742 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10743 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10744 plus_constant (Pmode
, stack_pointer_rtx
,
10747 m
->fs
.sp_offset
+= allocate
;
10749 if (r10_live
&& eax_live
)
10751 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10752 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10753 gen_frame_mem (word_mode
, t
));
10754 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10755 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10756 gen_frame_mem (word_mode
, t
));
10758 else if (eax_live
|| r10_live
)
10760 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10761 emit_move_insn (gen_rtx_REG (word_mode
,
10762 (eax_live
? AX_REG
: R10_REG
)),
10763 gen_frame_mem (word_mode
, t
));
10766 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10768 /* If we havn't already set up the frame pointer, do so now. */
10769 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10771 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10772 GEN_INT (frame
.stack_pointer_offset
10773 - frame
.hard_frame_pointer_offset
));
10774 insn
= emit_insn (insn
);
10775 RTX_FRAME_RELATED_P (insn
) = 1;
10776 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10778 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10779 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10780 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10781 m
->fs
.fp_valid
= true;
10784 if (!int_registers_saved
)
10785 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10786 if (!sse_registers_saved
)
10787 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10789 pic_reg_used
= false;
10790 if (pic_offset_table_rtx
10791 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10794 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10796 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10797 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10799 pic_reg_used
= true;
10806 if (ix86_cmodel
== CM_LARGE_PIC
)
10808 rtx label
, tmp_reg
;
10810 gcc_assert (Pmode
== DImode
);
10811 label
= gen_label_rtx ();
10812 emit_label (label
);
10813 LABEL_PRESERVE_P (label
) = 1;
10814 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10815 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10816 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10818 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10819 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10820 pic_offset_table_rtx
, tmp_reg
));
10823 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10827 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10828 RTX_FRAME_RELATED_P (insn
) = 1;
10829 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10833 /* In the pic_reg_used case, make sure that the got load isn't deleted
10834 when mcount needs it. Blockage to avoid call movement across mcount
10835 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10837 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10838 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10840 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10842 /* vDRAP is setup but after reload it turns out stack realign
10843 isn't necessary, here we will emit prologue to setup DRAP
10844 without stack realign adjustment */
10845 t
= choose_baseaddr (0);
10846 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10849 /* Prevent instructions from being scheduled into register save push
10850 sequence when access to the redzone area is done through frame pointer.
10851 The offset between the frame pointer and the stack pointer is calculated
10852 relative to the value of the stack pointer at the end of the function
10853 prologue, and moving instructions that access redzone area via frame
10854 pointer inside push sequence violates this assumption. */
10855 if (frame_pointer_needed
&& frame
.red_zone_size
)
10856 emit_insn (gen_memory_blockage ());
10858 /* Emit cld instruction if stringops are used in the function. */
10859 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10860 emit_insn (gen_cld ());
10862 /* SEH requires that the prologue end within 256 bytes of the start of
10863 the function. Prevent instruction schedules that would extend that.
10864 Further, prevent alloca modifications to the stack pointer from being
10865 combined with prologue modifications. */
10867 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10870 /* Emit code to restore REG using a POP insn. */
10873 ix86_emit_restore_reg_using_pop (rtx reg
)
10875 struct machine_function
*m
= cfun
->machine
;
10876 rtx insn
= emit_insn (gen_pop (reg
));
10878 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10879 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10881 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10882 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10884 /* Previously we'd represented the CFA as an expression
10885 like *(%ebp - 8). We've just popped that value from
10886 the stack, which means we need to reset the CFA to
10887 the drap register. This will remain until we restore
10888 the stack pointer. */
10889 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10890 RTX_FRAME_RELATED_P (insn
) = 1;
10892 /* This means that the DRAP register is valid for addressing too. */
10893 m
->fs
.drap_valid
= true;
10897 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10899 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10900 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10901 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10902 RTX_FRAME_RELATED_P (insn
) = 1;
10904 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10907 /* When the frame pointer is the CFA, and we pop it, we are
10908 swapping back to the stack pointer as the CFA. This happens
10909 for stack frames that don't allocate other data, so we assume
10910 the stack pointer is now pointing at the return address, i.e.
10911 the function entry state, which makes the offset be 1 word. */
10912 if (reg
== hard_frame_pointer_rtx
)
10914 m
->fs
.fp_valid
= false;
10915 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10917 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10918 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10920 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10921 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10922 GEN_INT (m
->fs
.cfa_offset
)));
10923 RTX_FRAME_RELATED_P (insn
) = 1;
10928 /* Emit code to restore saved registers using POP insns. */
10931 ix86_emit_restore_regs_using_pop (void)
10933 unsigned int regno
;
10935 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10936 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10937 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10940 /* Emit code and notes for the LEAVE instruction. */
10943 ix86_emit_leave (void)
10945 struct machine_function
*m
= cfun
->machine
;
10946 rtx insn
= emit_insn (ix86_gen_leave ());
10948 ix86_add_queued_cfa_restore_notes (insn
);
10950 gcc_assert (m
->fs
.fp_valid
);
10951 m
->fs
.sp_valid
= true;
10952 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10953 m
->fs
.fp_valid
= false;
10955 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10957 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10958 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10960 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10961 plus_constant (Pmode
, stack_pointer_rtx
,
10963 RTX_FRAME_RELATED_P (insn
) = 1;
10965 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10969 /* Emit code to restore saved registers using MOV insns.
10970 First register is restored from CFA - CFA_OFFSET. */
10972 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10973 bool maybe_eh_return
)
10975 struct machine_function
*m
= cfun
->machine
;
10976 unsigned int regno
;
10978 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10979 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10981 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10984 mem
= choose_baseaddr (cfa_offset
);
10985 mem
= gen_frame_mem (word_mode
, mem
);
10986 insn
= emit_move_insn (reg
, mem
);
10988 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10990 /* Previously we'd represented the CFA as an expression
10991 like *(%ebp - 8). We've just popped that value from
10992 the stack, which means we need to reset the CFA to
10993 the drap register. This will remain until we restore
10994 the stack pointer. */
10995 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10996 RTX_FRAME_RELATED_P (insn
) = 1;
10998 /* This means that the DRAP register is valid for addressing. */
10999 m
->fs
.drap_valid
= true;
11002 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11004 cfa_offset
-= UNITS_PER_WORD
;
11008 /* Emit code to restore saved registers using MOV insns.
11009 First register is restored from CFA - CFA_OFFSET. */
11011 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
11012 bool maybe_eh_return
)
11014 unsigned int regno
;
11016 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
11017 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
11019 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
11022 mem
= choose_baseaddr (cfa_offset
);
11023 mem
= gen_rtx_MEM (V4SFmode
, mem
);
11024 set_mem_align (mem
, 128);
11025 emit_move_insn (reg
, mem
);
11027 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
11033 /* Emit vzeroupper if needed. */
11036 ix86_maybe_emit_epilogue_vzeroupper (void)
11038 if (TARGET_VZEROUPPER
11039 && !TREE_THIS_VOLATILE (cfun
->decl
)
11040 && !cfun
->machine
->caller_return_avx256_p
)
11041 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
11044 /* Restore function stack, frame, and registers. */
11047 ix86_expand_epilogue (int style
)
11049 struct machine_function
*m
= cfun
->machine
;
11050 struct machine_frame_state frame_state_save
= m
->fs
;
11051 struct ix86_frame frame
;
11052 bool restore_regs_via_mov
;
11055 ix86_finalize_stack_realign_flags ();
11056 ix86_compute_frame_layout (&frame
);
11058 m
->fs
.sp_valid
= (!frame_pointer_needed
11059 || (crtl
->sp_is_unchanging
11060 && !stack_realign_fp
));
11061 gcc_assert (!m
->fs
.sp_valid
11062 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
11064 /* The FP must be valid if the frame pointer is present. */
11065 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
11066 gcc_assert (!m
->fs
.fp_valid
11067 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
11069 /* We must have *some* valid pointer to the stack frame. */
11070 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
11072 /* The DRAP is never valid at this point. */
11073 gcc_assert (!m
->fs
.drap_valid
);
11075 /* See the comment about red zone and frame
11076 pointer usage in ix86_expand_prologue. */
11077 if (frame_pointer_needed
&& frame
.red_zone_size
)
11078 emit_insn (gen_memory_blockage ());
11080 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
11081 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
11083 /* Determine the CFA offset of the end of the red-zone. */
11084 m
->fs
.red_zone_offset
= 0;
11085 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
11087 /* The red-zone begins below the return address. */
11088 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
11090 /* When the register save area is in the aligned portion of
11091 the stack, determine the maximum runtime displacement that
11092 matches up with the aligned frame. */
11093 if (stack_realign_drap
)
11094 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
11098 /* Special care must be taken for the normal return case of a function
11099 using eh_return: the eax and edx registers are marked as saved, but
11100 not restored along this path. Adjust the save location to match. */
11101 if (crtl
->calls_eh_return
&& style
!= 2)
11102 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
11104 /* EH_RETURN requires the use of moves to function properly. */
11105 if (crtl
->calls_eh_return
)
11106 restore_regs_via_mov
= true;
11107 /* SEH requires the use of pops to identify the epilogue. */
11108 else if (TARGET_SEH
)
11109 restore_regs_via_mov
= false;
11110 /* If we're only restoring one register and sp is not valid then
11111 using a move instruction to restore the register since it's
11112 less work than reloading sp and popping the register. */
11113 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
11114 restore_regs_via_mov
= true;
11115 else if (TARGET_EPILOGUE_USING_MOVE
11116 && cfun
->machine
->use_fast_prologue_epilogue
11117 && (frame
.nregs
> 1
11118 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
11119 restore_regs_via_mov
= true;
11120 else if (frame_pointer_needed
11122 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11123 restore_regs_via_mov
= true;
11124 else if (frame_pointer_needed
11125 && TARGET_USE_LEAVE
11126 && cfun
->machine
->use_fast_prologue_epilogue
11127 && frame
.nregs
== 1)
11128 restore_regs_via_mov
= true;
11130 restore_regs_via_mov
= false;
11132 if (restore_regs_via_mov
|| frame
.nsseregs
)
11134 /* Ensure that the entire register save area is addressable via
11135 the stack pointer, if we will restore via sp. */
11137 && m
->fs
.sp_offset
> 0x7fffffff
11138 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11139 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11141 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11142 GEN_INT (m
->fs
.sp_offset
11143 - frame
.sse_reg_save_offset
),
11145 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11149 /* If there are any SSE registers to restore, then we have to do it
11150 via moves, since there's obviously no pop for SSE regs. */
11151 if (frame
.nsseregs
)
11152 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11155 if (restore_regs_via_mov
)
11160 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11162 /* eh_return epilogues need %ecx added to the stack pointer. */
11165 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11167 /* Stack align doesn't work with eh_return. */
11168 gcc_assert (!stack_realign_drap
);
11169 /* Neither does regparm nested functions. */
11170 gcc_assert (!ix86_static_chain_on_stack
);
11172 if (frame_pointer_needed
)
11174 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11175 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11176 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11178 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11179 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11181 /* Note that we use SA as a temporary CFA, as the return
11182 address is at the proper place relative to it. We
11183 pretend this happens at the FP restore insn because
11184 prior to this insn the FP would be stored at the wrong
11185 offset relative to SA, and after this insn we have no
11186 other reasonable register to use for the CFA. We don't
11187 bother resetting the CFA to the SP for the duration of
11188 the return insn. */
11189 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11190 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11191 ix86_add_queued_cfa_restore_notes (insn
);
11192 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11193 RTX_FRAME_RELATED_P (insn
) = 1;
11195 m
->fs
.cfa_reg
= sa
;
11196 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11197 m
->fs
.fp_valid
= false;
11199 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11200 const0_rtx
, style
, false);
11204 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11205 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11206 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11207 ix86_add_queued_cfa_restore_notes (insn
);
11209 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11210 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11212 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11213 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11214 plus_constant (Pmode
, stack_pointer_rtx
,
11216 RTX_FRAME_RELATED_P (insn
) = 1;
11219 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11220 m
->fs
.sp_valid
= true;
11225 /* SEH requires that the function end with (1) a stack adjustment
11226 if necessary, (2) a sequence of pops, and (3) a return or
11227 jump instruction. Prevent insns from the function body from
11228 being scheduled into this sequence. */
11231 /* Prevent a catch region from being adjacent to the standard
11232 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11233 several other flags that would be interesting to test are
11235 if (flag_non_call_exceptions
)
11236 emit_insn (gen_nops (const1_rtx
));
11238 emit_insn (gen_blockage ());
11241 /* First step is to deallocate the stack frame so that we can
11242 pop the registers. Also do it on SEH target for very large
11243 frame as the emitted instructions aren't allowed by the ABI in
11245 if (!m
->fs
.sp_valid
11247 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11248 >= SEH_MAX_FRAME_SIZE
)))
11250 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11251 GEN_INT (m
->fs
.fp_offset
11252 - frame
.reg_save_offset
),
11255 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11257 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11258 GEN_INT (m
->fs
.sp_offset
11259 - frame
.reg_save_offset
),
11261 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11264 ix86_emit_restore_regs_using_pop ();
11267 /* If we used a stack pointer and haven't already got rid of it,
11269 if (m
->fs
.fp_valid
)
11271 /* If the stack pointer is valid and pointing at the frame
11272 pointer store address, then we only need a pop. */
11273 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11274 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11275 /* Leave results in shorter dependency chains on CPUs that are
11276 able to grok it fast. */
11277 else if (TARGET_USE_LEAVE
11278 || optimize_function_for_size_p (cfun
)
11279 || !cfun
->machine
->use_fast_prologue_epilogue
)
11280 ix86_emit_leave ();
11283 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11284 hard_frame_pointer_rtx
,
11285 const0_rtx
, style
, !using_drap
);
11286 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11292 int param_ptr_offset
= UNITS_PER_WORD
;
11295 gcc_assert (stack_realign_drap
);
11297 if (ix86_static_chain_on_stack
)
11298 param_ptr_offset
+= UNITS_PER_WORD
;
11299 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11300 param_ptr_offset
+= UNITS_PER_WORD
;
11302 insn
= emit_insn (gen_rtx_SET
11303 (VOIDmode
, stack_pointer_rtx
,
11304 gen_rtx_PLUS (Pmode
,
11306 GEN_INT (-param_ptr_offset
))));
11307 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11308 m
->fs
.cfa_offset
= param_ptr_offset
;
11309 m
->fs
.sp_offset
= param_ptr_offset
;
11310 m
->fs
.realigned
= false;
11312 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11313 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11314 GEN_INT (param_ptr_offset
)));
11315 RTX_FRAME_RELATED_P (insn
) = 1;
11317 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11318 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11321 /* At this point the stack pointer must be valid, and we must have
11322 restored all of the registers. We may not have deallocated the
11323 entire stack frame. We've delayed this until now because it may
11324 be possible to merge the local stack deallocation with the
11325 deallocation forced by ix86_static_chain_on_stack. */
11326 gcc_assert (m
->fs
.sp_valid
);
11327 gcc_assert (!m
->fs
.fp_valid
);
11328 gcc_assert (!m
->fs
.realigned
);
11329 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11331 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11332 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11336 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11338 /* Sibcall epilogues don't want a return instruction. */
11341 m
->fs
= frame_state_save
;
11345 /* Emit vzeroupper if needed. */
11346 ix86_maybe_emit_epilogue_vzeroupper ();
11348 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11350 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11352 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11353 address, do explicit add, and jump indirectly to the caller. */
11355 if (crtl
->args
.pops_args
>= 65536)
11357 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11360 /* There is no "pascal" calling convention in any 64bit ABI. */
11361 gcc_assert (!TARGET_64BIT
);
11363 insn
= emit_insn (gen_pop (ecx
));
11364 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11365 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11367 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11368 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11369 add_reg_note (insn
, REG_CFA_REGISTER
,
11370 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11371 RTX_FRAME_RELATED_P (insn
) = 1;
11373 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11375 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11378 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11381 emit_jump_insn (gen_simple_return_internal ());
11383 /* Restore the state back to the state from the prologue,
11384 so that it's correct for the next epilogue. */
11385 m
->fs
= frame_state_save
;
11388 /* Reset from the function's potential modifications. */
11391 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11392 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11394 if (pic_offset_table_rtx
)
11395 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11397 /* Mach-O doesn't support labels at the end of objects, so if
11398 it looks like we might want one, insert a NOP. */
11400 rtx insn
= get_last_insn ();
11401 rtx deleted_debug_label
= NULL_RTX
;
11404 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11406 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11407 notes only, instead set their CODE_LABEL_NUMBER to -1,
11408 otherwise there would be code generation differences
11409 in between -g and -g0. */
11410 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11411 deleted_debug_label
= insn
;
11412 insn
= PREV_INSN (insn
);
11417 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11418 fputs ("\tnop\n", file
);
11419 else if (deleted_debug_label
)
11420 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11421 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11422 CODE_LABEL_NUMBER (insn
) = -1;
11428 /* Return a scratch register to use in the split stack prologue. The
11429 split stack prologue is used for -fsplit-stack. It is the first
11430 instructions in the function, even before the regular prologue.
11431 The scratch register can be any caller-saved register which is not
11432 used for parameters or for the static chain. */
11434 static unsigned int
11435 split_stack_prologue_scratch_regno (void)
11444 is_fastcall
= (lookup_attribute ("fastcall",
11445 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11447 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11451 if (DECL_STATIC_CHAIN (cfun
->decl
))
11453 sorry ("-fsplit-stack does not support fastcall with "
11454 "nested function");
11455 return INVALID_REGNUM
;
11459 else if (regparm
< 3)
11461 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11467 sorry ("-fsplit-stack does not support 2 register "
11468 " parameters for a nested function");
11469 return INVALID_REGNUM
;
11476 /* FIXME: We could make this work by pushing a register
11477 around the addition and comparison. */
11478 sorry ("-fsplit-stack does not support 3 register parameters");
11479 return INVALID_REGNUM
;
11484 /* A SYMBOL_REF for the function which allocates new stackspace for
11487 static GTY(()) rtx split_stack_fn
;
11489 /* A SYMBOL_REF for the more stack function when using the large
11492 static GTY(()) rtx split_stack_fn_large
;
11494 /* Handle -fsplit-stack. These are the first instructions in the
11495 function, even before the regular prologue. */
11498 ix86_expand_split_stack_prologue (void)
11500 struct ix86_frame frame
;
11501 HOST_WIDE_INT allocate
;
11502 unsigned HOST_WIDE_INT args_size
;
11503 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11504 rtx scratch_reg
= NULL_RTX
;
11505 rtx varargs_label
= NULL_RTX
;
11508 gcc_assert (flag_split_stack
&& reload_completed
);
11510 ix86_finalize_stack_realign_flags ();
11511 ix86_compute_frame_layout (&frame
);
11512 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11514 /* This is the label we will branch to if we have enough stack
11515 space. We expect the basic block reordering pass to reverse this
11516 branch if optimizing, so that we branch in the unlikely case. */
11517 label
= gen_label_rtx ();
11519 /* We need to compare the stack pointer minus the frame size with
11520 the stack boundary in the TCB. The stack boundary always gives
11521 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11522 can compare directly. Otherwise we need to do an addition. */
11524 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11525 UNSPEC_STACK_CHECK
);
11526 limit
= gen_rtx_CONST (Pmode
, limit
);
11527 limit
= gen_rtx_MEM (Pmode
, limit
);
11528 if (allocate
< SPLIT_STACK_AVAILABLE
)
11529 current
= stack_pointer_rtx
;
11532 unsigned int scratch_regno
;
11535 /* We need a scratch register to hold the stack pointer minus
11536 the required frame size. Since this is the very start of the
11537 function, the scratch register can be any caller-saved
11538 register which is not used for parameters. */
11539 offset
= GEN_INT (- allocate
);
11540 scratch_regno
= split_stack_prologue_scratch_regno ();
11541 if (scratch_regno
== INVALID_REGNUM
)
11543 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11544 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11546 /* We don't use ix86_gen_add3 in this case because it will
11547 want to split to lea, but when not optimizing the insn
11548 will not be split after this point. */
11549 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11550 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11555 emit_move_insn (scratch_reg
, offset
);
11556 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11557 stack_pointer_rtx
));
11559 current
= scratch_reg
;
11562 ix86_expand_branch (GEU
, current
, limit
, label
);
11563 jump_insn
= get_last_insn ();
11564 JUMP_LABEL (jump_insn
) = label
;
11566 /* Mark the jump as very likely to be taken. */
11567 add_reg_note (jump_insn
, REG_BR_PROB
,
11568 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11570 if (split_stack_fn
== NULL_RTX
)
11571 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11572 fn
= split_stack_fn
;
11574 /* Get more stack space. We pass in the desired stack space and the
11575 size of the arguments to copy to the new stack. In 32-bit mode
11576 we push the parameters; __morestack will return on a new stack
11577 anyhow. In 64-bit mode we pass the parameters in r10 and
11579 allocate_rtx
= GEN_INT (allocate
);
11580 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11581 call_fusage
= NULL_RTX
;
11586 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11587 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11589 /* If this function uses a static chain, it will be in %r10.
11590 Preserve it across the call to __morestack. */
11591 if (DECL_STATIC_CHAIN (cfun
->decl
))
11595 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11596 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11597 use_reg (&call_fusage
, rax
);
11600 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11602 HOST_WIDE_INT argval
;
11604 gcc_assert (Pmode
== DImode
);
11605 /* When using the large model we need to load the address
11606 into a register, and we've run out of registers. So we
11607 switch to a different calling convention, and we call a
11608 different function: __morestack_large. We pass the
11609 argument size in the upper 32 bits of r10 and pass the
11610 frame size in the lower 32 bits. */
11611 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11612 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11614 if (split_stack_fn_large
== NULL_RTX
)
11615 split_stack_fn_large
=
11616 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11618 if (ix86_cmodel
== CM_LARGE_PIC
)
11622 label
= gen_label_rtx ();
11623 emit_label (label
);
11624 LABEL_PRESERVE_P (label
) = 1;
11625 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11626 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11627 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11628 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11630 x
= gen_rtx_CONST (Pmode
, x
);
11631 emit_move_insn (reg11
, x
);
11632 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11633 x
= gen_const_mem (Pmode
, x
);
11634 emit_move_insn (reg11
, x
);
11637 emit_move_insn (reg11
, split_stack_fn_large
);
11641 argval
= ((args_size
<< 16) << 16) + allocate
;
11642 emit_move_insn (reg10
, GEN_INT (argval
));
11646 emit_move_insn (reg10
, allocate_rtx
);
11647 emit_move_insn (reg11
, GEN_INT (args_size
));
11648 use_reg (&call_fusage
, reg11
);
11651 use_reg (&call_fusage
, reg10
);
11655 emit_insn (gen_push (GEN_INT (args_size
)));
11656 emit_insn (gen_push (allocate_rtx
));
11658 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11659 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11661 add_function_usage_to (call_insn
, call_fusage
);
11663 /* In order to make call/return prediction work right, we now need
11664 to execute a return instruction. See
11665 libgcc/config/i386/morestack.S for the details on how this works.
11667 For flow purposes gcc must not see this as a return
11668 instruction--we need control flow to continue at the subsequent
11669 label. Therefore, we use an unspec. */
11670 gcc_assert (crtl
->args
.pops_args
< 65536);
11671 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11673 /* If we are in 64-bit mode and this function uses a static chain,
11674 we saved %r10 in %rax before calling _morestack. */
11675 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11676 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11677 gen_rtx_REG (word_mode
, AX_REG
));
11679 /* If this function calls va_start, we need to store a pointer to
11680 the arguments on the old stack, because they may not have been
11681 all copied to the new stack. At this point the old stack can be
11682 found at the frame pointer value used by __morestack, because
11683 __morestack has set that up before calling back to us. Here we
11684 store that pointer in a scratch register, and in
11685 ix86_expand_prologue we store the scratch register in a stack
11687 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11689 unsigned int scratch_regno
;
11693 scratch_regno
= split_stack_prologue_scratch_regno ();
11694 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11695 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11699 return address within this function
11700 return address of caller of this function
11702 So we add three words to get to the stack arguments.
11706 return address within this function
11707 first argument to __morestack
11708 second argument to __morestack
11709 return address of caller of this function
11711 So we add five words to get to the stack arguments.
11713 words
= TARGET_64BIT
? 3 : 5;
11714 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11715 gen_rtx_PLUS (Pmode
, frame_reg
,
11716 GEN_INT (words
* UNITS_PER_WORD
))));
11718 varargs_label
= gen_label_rtx ();
11719 emit_jump_insn (gen_jump (varargs_label
));
11720 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11725 emit_label (label
);
11726 LABEL_NUSES (label
) = 1;
11728 /* If this function calls va_start, we now have to set the scratch
11729 register for the case where we do not call __morestack. In this
11730 case we need to set it based on the stack pointer. */
11731 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11733 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11734 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11735 GEN_INT (UNITS_PER_WORD
))));
11737 emit_label (varargs_label
);
11738 LABEL_NUSES (varargs_label
) = 1;
11742 /* We may have to tell the dataflow pass that the split stack prologue
11743 is initializing a scratch register. */
11746 ix86_live_on_entry (bitmap regs
)
11748 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11750 gcc_assert (flag_split_stack
);
11751 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11755 /* Determine if op is suitable SUBREG RTX for address. */
11758 ix86_address_subreg_operand (rtx op
)
11760 enum machine_mode mode
;
11765 mode
= GET_MODE (op
);
11767 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11770 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11771 failures when the register is one word out of a two word structure. */
11772 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11775 /* simplify_subreg does not handle stack pointer. */
11776 if (REGNO (op
) == STACK_POINTER_REGNUM
)
11779 /* Allow only SUBREGs of non-eliminable hard registers. */
11780 return register_no_elim_operand (op
, mode
);
11783 /* Extract the parts of an RTL expression that is a valid memory address
11784 for an instruction. Return 0 if the structure of the address is
11785 grossly off. Return -1 if the address contains ASHIFT, so it is not
11786 strictly valid, but still used for computing length of lea instruction. */
11789 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11791 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11792 rtx base_reg
, index_reg
;
11793 HOST_WIDE_INT scale
= 1;
11794 rtx scale_rtx
= NULL_RTX
;
11797 enum ix86_address_seg seg
= SEG_DEFAULT
;
11799 /* Allow zero-extended SImode addresses,
11800 they will be emitted with addr32 prefix. */
11801 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11803 if (GET_CODE (addr
) == ZERO_EXTEND
11804 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11806 addr
= XEXP (addr
, 0);
11807 if (CONST_INT_P (addr
))
11810 else if (GET_CODE (addr
) == AND
11811 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11813 addr
= simplify_gen_subreg (SImode
, XEXP (addr
, 0), DImode
, 0);
11814 if (addr
== NULL_RTX
)
11817 if (CONST_INT_P (addr
))
11822 /* Allow SImode subregs of DImode addresses,
11823 they will be emitted with addr32 prefix. */
11824 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11826 if (GET_CODE (addr
) == SUBREG
11827 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11829 addr
= SUBREG_REG (addr
);
11830 if (CONST_INT_P (addr
))
11837 else if (GET_CODE (addr
) == SUBREG
)
11839 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11844 else if (GET_CODE (addr
) == PLUS
)
11846 rtx addends
[4], op
;
11854 addends
[n
++] = XEXP (op
, 1);
11857 while (GET_CODE (op
) == PLUS
);
11862 for (i
= n
; i
>= 0; --i
)
11865 switch (GET_CODE (op
))
11870 index
= XEXP (op
, 0);
11871 scale_rtx
= XEXP (op
, 1);
11877 index
= XEXP (op
, 0);
11878 tmp
= XEXP (op
, 1);
11879 if (!CONST_INT_P (tmp
))
11881 scale
= INTVAL (tmp
);
11882 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11884 scale
= 1 << scale
;
11889 if (GET_CODE (op
) != UNSPEC
)
11894 if (XINT (op
, 1) == UNSPEC_TP
11895 && TARGET_TLS_DIRECT_SEG_REFS
11896 && seg
== SEG_DEFAULT
)
11897 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11903 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11930 else if (GET_CODE (addr
) == MULT
)
11932 index
= XEXP (addr
, 0); /* index*scale */
11933 scale_rtx
= XEXP (addr
, 1);
11935 else if (GET_CODE (addr
) == ASHIFT
)
11937 /* We're called for lea too, which implements ashift on occasion. */
11938 index
= XEXP (addr
, 0);
11939 tmp
= XEXP (addr
, 1);
11940 if (!CONST_INT_P (tmp
))
11942 scale
= INTVAL (tmp
);
11943 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11945 scale
= 1 << scale
;
11948 else if (CONST_INT_P (addr
))
11950 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11953 /* Constant addresses are sign extended to 64bit, we have to
11954 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11956 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11962 disp
= addr
; /* displacement */
11968 else if (GET_CODE (index
) == SUBREG
11969 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11975 /* Address override works only on the (%reg) part of %fs:(%reg). */
11976 if (seg
!= SEG_DEFAULT
11977 && ((base
&& GET_MODE (base
) != word_mode
)
11978 || (index
&& GET_MODE (index
) != word_mode
)))
11981 /* Extract the integral value of scale. */
11984 if (!CONST_INT_P (scale_rtx
))
11986 scale
= INTVAL (scale_rtx
);
11989 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11990 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11992 /* Avoid useless 0 displacement. */
11993 if (disp
== const0_rtx
&& (base
|| index
))
11996 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11997 if (base_reg
&& index_reg
&& scale
== 1
11998 && (index_reg
== arg_pointer_rtx
11999 || index_reg
== frame_pointer_rtx
12000 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
12003 tmp
= base
, base
= index
, index
= tmp
;
12004 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
12007 /* Special case: %ebp cannot be encoded as a base without a displacement.
12011 && (base_reg
== hard_frame_pointer_rtx
12012 || base_reg
== frame_pointer_rtx
12013 || base_reg
== arg_pointer_rtx
12014 || (REG_P (base_reg
)
12015 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
12016 || REGNO (base_reg
) == R13_REG
))))
12019 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12020 Avoid this by transforming to [%esi+0].
12021 Reload calls address legitimization without cfun defined, so we need
12022 to test cfun for being non-NULL. */
12023 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
12024 && base_reg
&& !index_reg
&& !disp
12025 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
12028 /* Special case: encode reg+reg instead of reg*2. */
12029 if (!base
&& index
&& scale
== 2)
12030 base
= index
, base_reg
= index_reg
, scale
= 1;
12032 /* Special case: scaling cannot be encoded without base or displacement. */
12033 if (!base
&& !disp
&& index
&& scale
!= 1)
12037 out
->index
= index
;
12039 out
->scale
= scale
;
12045 /* Return cost of the memory address x.
12046 For i386, it is better to use a complex address than let gcc copy
12047 the address into a reg and make a new pseudo. But not if the address
12048 requires to two regs - that would mean more pseudos with longer
12051 ix86_address_cost (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
,
12052 addr_space_t as ATTRIBUTE_UNUSED
,
12053 bool speed ATTRIBUTE_UNUSED
)
12055 struct ix86_address parts
;
12057 int ok
= ix86_decompose_address (x
, &parts
);
12061 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
12062 parts
.base
= SUBREG_REG (parts
.base
);
12063 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
12064 parts
.index
= SUBREG_REG (parts
.index
);
12066 /* Attempt to minimize number of registers in the address. */
12068 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
12070 && (!REG_P (parts
.index
)
12071 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
12075 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
12077 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
12078 && parts
.base
!= parts
.index
)
12081 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12082 since it's predecode logic can't detect the length of instructions
12083 and it degenerates to vector decoded. Increase cost of such
12084 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12085 to split such addresses or even refuse such addresses at all.
12087 Following addressing modes are affected:
12092 The first and last case may be avoidable by explicitly coding the zero in
12093 memory address, but I don't have AMD-K6 machine handy to check this
12097 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12098 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
12099 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
12105 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12106 this is used for to form addresses to local data when -fPIC is in
12110 darwin_local_data_pic (rtx disp
)
12112 return (GET_CODE (disp
) == UNSPEC
12113 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
12116 /* Determine if a given RTX is a valid constant. We already know this
12117 satisfies CONSTANT_P. */
12120 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
12122 switch (GET_CODE (x
))
12127 if (GET_CODE (x
) == PLUS
)
12129 if (!CONST_INT_P (XEXP (x
, 1)))
12134 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12137 /* Only some unspecs are valid as "constants". */
12138 if (GET_CODE (x
) == UNSPEC
)
12139 switch (XINT (x
, 1))
12142 case UNSPEC_GOTOFF
:
12143 case UNSPEC_PLTOFF
:
12144 return TARGET_64BIT
;
12146 case UNSPEC_NTPOFF
:
12147 x
= XVECEXP (x
, 0, 0);
12148 return (GET_CODE (x
) == SYMBOL_REF
12149 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12150 case UNSPEC_DTPOFF
:
12151 x
= XVECEXP (x
, 0, 0);
12152 return (GET_CODE (x
) == SYMBOL_REF
12153 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12158 /* We must have drilled down to a symbol. */
12159 if (GET_CODE (x
) == LABEL_REF
)
12161 if (GET_CODE (x
) != SYMBOL_REF
)
12166 /* TLS symbols are never valid. */
12167 if (SYMBOL_REF_TLS_MODEL (x
))
12170 /* DLLIMPORT symbols are never valid. */
12171 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12172 && SYMBOL_REF_DLLIMPORT_P (x
))
12176 /* mdynamic-no-pic */
12177 if (MACHO_DYNAMIC_NO_PIC_P
)
12178 return machopic_symbol_defined_p (x
);
12183 if (GET_MODE (x
) == TImode
12184 && x
!= CONST0_RTX (TImode
)
12190 if (!standard_sse_constant_p (x
))
12197 /* Otherwise we handle everything else in the move patterns. */
12201 /* Determine if it's legal to put X into the constant pool. This
12202 is not possible for the address of thread-local symbols, which
12203 is checked above. */
12206 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12208 /* We can always put integral constants and vectors in memory. */
12209 switch (GET_CODE (x
))
12219 return !ix86_legitimate_constant_p (mode
, x
);
12223 /* Nonzero if the constant value X is a legitimate general operand
12224 when generating PIC code. It is given that flag_pic is on and
12225 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12228 legitimate_pic_operand_p (rtx x
)
12232 switch (GET_CODE (x
))
12235 inner
= XEXP (x
, 0);
12236 if (GET_CODE (inner
) == PLUS
12237 && CONST_INT_P (XEXP (inner
, 1)))
12238 inner
= XEXP (inner
, 0);
12240 /* Only some unspecs are valid as "constants". */
12241 if (GET_CODE (inner
) == UNSPEC
)
12242 switch (XINT (inner
, 1))
12245 case UNSPEC_GOTOFF
:
12246 case UNSPEC_PLTOFF
:
12247 return TARGET_64BIT
;
12249 x
= XVECEXP (inner
, 0, 0);
12250 return (GET_CODE (x
) == SYMBOL_REF
12251 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12252 case UNSPEC_MACHOPIC_OFFSET
:
12253 return legitimate_pic_address_disp_p (x
);
12261 return legitimate_pic_address_disp_p (x
);
12268 /* Determine if a given CONST RTX is a valid memory displacement
12272 legitimate_pic_address_disp_p (rtx disp
)
12276 /* In 64bit mode we can allow direct addresses of symbols and labels
12277 when they are not dynamic symbols. */
12280 rtx op0
= disp
, op1
;
12282 switch (GET_CODE (disp
))
12288 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12290 op0
= XEXP (XEXP (disp
, 0), 0);
12291 op1
= XEXP (XEXP (disp
, 0), 1);
12292 if (!CONST_INT_P (op1
)
12293 || INTVAL (op1
) >= 16*1024*1024
12294 || INTVAL (op1
) < -16*1024*1024)
12296 if (GET_CODE (op0
) == LABEL_REF
)
12298 if (GET_CODE (op0
) == CONST
12299 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12300 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12302 if (GET_CODE (op0
) == UNSPEC
12303 && XINT (op0
, 1) == UNSPEC_PCREL
)
12305 if (GET_CODE (op0
) != SYMBOL_REF
)
12310 /* TLS references should always be enclosed in UNSPEC. */
12311 if (SYMBOL_REF_TLS_MODEL (op0
))
12313 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12314 && ix86_cmodel
!= CM_LARGE_PIC
)
12322 if (GET_CODE (disp
) != CONST
)
12324 disp
= XEXP (disp
, 0);
12328 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12329 of GOT tables. We should not need these anyway. */
12330 if (GET_CODE (disp
) != UNSPEC
12331 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12332 && XINT (disp
, 1) != UNSPEC_GOTOFF
12333 && XINT (disp
, 1) != UNSPEC_PCREL
12334 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12337 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12338 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12344 if (GET_CODE (disp
) == PLUS
)
12346 if (!CONST_INT_P (XEXP (disp
, 1)))
12348 disp
= XEXP (disp
, 0);
12352 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12355 if (GET_CODE (disp
) != UNSPEC
)
12358 switch (XINT (disp
, 1))
12363 /* We need to check for both symbols and labels because VxWorks loads
12364 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12366 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12367 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12368 case UNSPEC_GOTOFF
:
12369 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12370 While ABI specify also 32bit relocation but we don't produce it in
12371 small PIC model at all. */
12372 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12373 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12375 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12377 case UNSPEC_GOTTPOFF
:
12378 case UNSPEC_GOTNTPOFF
:
12379 case UNSPEC_INDNTPOFF
:
12382 disp
= XVECEXP (disp
, 0, 0);
12383 return (GET_CODE (disp
) == SYMBOL_REF
12384 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12385 case UNSPEC_NTPOFF
:
12386 disp
= XVECEXP (disp
, 0, 0);
12387 return (GET_CODE (disp
) == SYMBOL_REF
12388 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12389 case UNSPEC_DTPOFF
:
12390 disp
= XVECEXP (disp
, 0, 0);
12391 return (GET_CODE (disp
) == SYMBOL_REF
12392 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12398 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12399 replace the input X, or the original X if no replacement is called for.
12400 The output parameter *WIN is 1 if the calling macro should goto WIN,
12401 0 if it should not. */
12404 ix86_legitimize_reload_address (rtx x
,
12405 enum machine_mode mode ATTRIBUTE_UNUSED
,
12406 int opnum
, int type
,
12407 int ind_levels ATTRIBUTE_UNUSED
)
12409 /* Reload can generate:
12411 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12415 This RTX is rejected from ix86_legitimate_address_p due to
12416 non-strictness of base register 97. Following this rejection,
12417 reload pushes all three components into separate registers,
12418 creating invalid memory address RTX.
12420 Following code reloads only the invalid part of the
12421 memory address RTX. */
12423 if (GET_CODE (x
) == PLUS
12424 && REG_P (XEXP (x
, 1))
12425 && GET_CODE (XEXP (x
, 0)) == PLUS
12426 && REG_P (XEXP (XEXP (x
, 0), 1)))
12429 bool something_reloaded
= false;
12431 base
= XEXP (XEXP (x
, 0), 1);
12432 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12434 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12435 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12436 opnum
, (enum reload_type
) type
);
12437 something_reloaded
= true;
12440 index
= XEXP (x
, 1);
12441 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12443 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12444 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12445 opnum
, (enum reload_type
) type
);
12446 something_reloaded
= true;
12449 gcc_assert (something_reloaded
);
12456 /* Recognizes RTL expressions that are valid memory addresses for an
12457 instruction. The MODE argument is the machine mode for the MEM
12458 expression that wants to use this address.
12460 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12461 convert common non-canonical forms to canonical form so that they will
12465 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12466 rtx addr
, bool strict
)
12468 struct ix86_address parts
;
12469 rtx base
, index
, disp
;
12470 HOST_WIDE_INT scale
;
12472 if (ix86_decompose_address (addr
, &parts
) <= 0)
12473 /* Decomposition failed. */
12477 index
= parts
.index
;
12479 scale
= parts
.scale
;
12481 /* Validate base register. */
12488 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12489 reg
= SUBREG_REG (base
);
12491 /* Base is not a register. */
12494 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12497 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12498 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12499 /* Base is not valid. */
12503 /* Validate index register. */
12510 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12511 reg
= SUBREG_REG (index
);
12513 /* Index is not a register. */
12516 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12519 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12520 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12521 /* Index is not valid. */
12525 /* Index and base should have the same mode. */
12527 && GET_MODE (base
) != GET_MODE (index
))
12530 /* Validate scale factor. */
12534 /* Scale without index. */
12537 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12538 /* Scale is not a valid multiplier. */
12542 /* Validate displacement. */
12545 if (GET_CODE (disp
) == CONST
12546 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12547 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12548 switch (XINT (XEXP (disp
, 0), 1))
12550 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12551 used. While ABI specify also 32bit relocations, we don't produce
12552 them at all and use IP relative instead. */
12554 case UNSPEC_GOTOFF
:
12555 gcc_assert (flag_pic
);
12557 goto is_legitimate_pic
;
12559 /* 64bit address unspec. */
12562 case UNSPEC_GOTPCREL
:
12564 gcc_assert (flag_pic
);
12565 goto is_legitimate_pic
;
12567 case UNSPEC_GOTTPOFF
:
12568 case UNSPEC_GOTNTPOFF
:
12569 case UNSPEC_INDNTPOFF
:
12570 case UNSPEC_NTPOFF
:
12571 case UNSPEC_DTPOFF
:
12574 case UNSPEC_STACK_CHECK
:
12575 gcc_assert (flag_split_stack
);
12579 /* Invalid address unspec. */
12583 else if (SYMBOLIC_CONST (disp
)
12587 && MACHOPIC_INDIRECT
12588 && !machopic_operand_p (disp
)
12594 if (TARGET_64BIT
&& (index
|| base
))
12596 /* foo@dtpoff(%rX) is ok. */
12597 if (GET_CODE (disp
) != CONST
12598 || GET_CODE (XEXP (disp
, 0)) != PLUS
12599 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12600 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12601 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12602 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12603 /* Non-constant pic memory reference. */
12606 else if ((!TARGET_MACHO
|| flag_pic
)
12607 && ! legitimate_pic_address_disp_p (disp
))
12608 /* Displacement is an invalid pic construct. */
12611 else if (MACHO_DYNAMIC_NO_PIC_P
12612 && !ix86_legitimate_constant_p (Pmode
, disp
))
12613 /* displacment must be referenced via non_lazy_pointer */
12617 /* This code used to verify that a symbolic pic displacement
12618 includes the pic_offset_table_rtx register.
12620 While this is good idea, unfortunately these constructs may
12621 be created by "adds using lea" optimization for incorrect
12630 This code is nonsensical, but results in addressing
12631 GOT table with pic_offset_table_rtx base. We can't
12632 just refuse it easily, since it gets matched by
12633 "addsi3" pattern, that later gets split to lea in the
12634 case output register differs from input. While this
12635 can be handled by separate addsi pattern for this case
12636 that never results in lea, this seems to be easier and
12637 correct fix for crash to disable this test. */
12639 else if (GET_CODE (disp
) != LABEL_REF
12640 && !CONST_INT_P (disp
)
12641 && (GET_CODE (disp
) != CONST
12642 || !ix86_legitimate_constant_p (Pmode
, disp
))
12643 && (GET_CODE (disp
) != SYMBOL_REF
12644 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12645 /* Displacement is not constant. */
12647 else if (TARGET_64BIT
12648 && !x86_64_immediate_operand (disp
, VOIDmode
))
12649 /* Displacement is out of range. */
12653 /* Everything looks valid. */
12657 /* Determine if a given RTX is a valid constant address. */
12660 constant_address_p (rtx x
)
12662 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12665 /* Return a unique alias set for the GOT. */
12667 static alias_set_type
12668 ix86_GOT_alias_set (void)
12670 static alias_set_type set
= -1;
12672 set
= new_alias_set ();
12676 /* Return a legitimate reference for ORIG (an address) using the
12677 register REG. If REG is 0, a new pseudo is generated.
12679 There are two types of references that must be handled:
12681 1. Global data references must load the address from the GOT, via
12682 the PIC reg. An insn is emitted to do this load, and the reg is
12685 2. Static data references, constant pool addresses, and code labels
12686 compute the address as an offset from the GOT, whose base is in
12687 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12688 differentiate them from global data objects. The returned
12689 address is the PIC reg + an unspec constant.
12691 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12692 reg also appears in the address. */
12695 legitimize_pic_address (rtx orig
, rtx reg
)
12698 rtx new_rtx
= orig
;
12702 if (TARGET_MACHO
&& !TARGET_64BIT
)
12705 reg
= gen_reg_rtx (Pmode
);
12706 /* Use the generic Mach-O PIC machinery. */
12707 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12711 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12713 else if (TARGET_64BIT
12714 && ix86_cmodel
!= CM_SMALL_PIC
12715 && gotoff_operand (addr
, Pmode
))
12718 /* This symbol may be referenced via a displacement from the PIC
12719 base address (@GOTOFF). */
12721 if (reload_in_progress
)
12722 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12723 if (GET_CODE (addr
) == CONST
)
12724 addr
= XEXP (addr
, 0);
12725 if (GET_CODE (addr
) == PLUS
)
12727 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12729 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12732 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12733 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12735 tmpreg
= gen_reg_rtx (Pmode
);
12738 emit_move_insn (tmpreg
, new_rtx
);
12742 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12743 tmpreg
, 1, OPTAB_DIRECT
);
12746 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12748 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12750 /* This symbol may be referenced via a displacement from the PIC
12751 base address (@GOTOFF). */
12753 if (reload_in_progress
)
12754 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12755 if (GET_CODE (addr
) == CONST
)
12756 addr
= XEXP (addr
, 0);
12757 if (GET_CODE (addr
) == PLUS
)
12759 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12761 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12764 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12765 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12766 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12770 emit_move_insn (reg
, new_rtx
);
12774 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12775 /* We can't use @GOTOFF for text labels on VxWorks;
12776 see gotoff_operand. */
12777 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12779 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12781 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12782 return legitimize_dllimport_symbol (addr
, true);
12783 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12784 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12785 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12787 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12788 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12792 /* For x64 PE-COFF there is no GOT table. So we use address
12794 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12796 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12797 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12800 reg
= gen_reg_rtx (Pmode
);
12801 emit_move_insn (reg
, new_rtx
);
12804 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12806 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12807 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12808 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12809 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12812 reg
= gen_reg_rtx (Pmode
);
12813 /* Use directly gen_movsi, otherwise the address is loaded
12814 into register for CSE. We don't want to CSE this addresses,
12815 instead we CSE addresses from the GOT table, so skip this. */
12816 emit_insn (gen_movsi (reg
, new_rtx
));
12821 /* This symbol must be referenced via a load from the
12822 Global Offset Table (@GOT). */
12824 if (reload_in_progress
)
12825 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12826 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12827 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12829 new_rtx
= force_reg (Pmode
, new_rtx
);
12830 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12831 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12832 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12835 reg
= gen_reg_rtx (Pmode
);
12836 emit_move_insn (reg
, new_rtx
);
12842 if (CONST_INT_P (addr
)
12843 && !x86_64_immediate_operand (addr
, VOIDmode
))
12847 emit_move_insn (reg
, addr
);
12851 new_rtx
= force_reg (Pmode
, addr
);
12853 else if (GET_CODE (addr
) == CONST
)
12855 addr
= XEXP (addr
, 0);
12857 /* We must match stuff we generate before. Assume the only
12858 unspecs that can get here are ours. Not that we could do
12859 anything with them anyway.... */
12860 if (GET_CODE (addr
) == UNSPEC
12861 || (GET_CODE (addr
) == PLUS
12862 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12864 gcc_assert (GET_CODE (addr
) == PLUS
);
12866 if (GET_CODE (addr
) == PLUS
)
12868 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12870 /* Check first to see if this is a constant offset from a @GOTOFF
12871 symbol reference. */
12872 if (gotoff_operand (op0
, Pmode
)
12873 && CONST_INT_P (op1
))
12877 if (reload_in_progress
)
12878 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12879 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12881 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12882 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12883 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12887 emit_move_insn (reg
, new_rtx
);
12893 if (INTVAL (op1
) < -16*1024*1024
12894 || INTVAL (op1
) >= 16*1024*1024)
12896 if (!x86_64_immediate_operand (op1
, Pmode
))
12897 op1
= force_reg (Pmode
, op1
);
12898 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12904 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12905 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12906 base
== reg
? NULL_RTX
: reg
);
12908 if (CONST_INT_P (new_rtx
))
12909 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
12912 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12914 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12915 new_rtx
= XEXP (new_rtx
, 1);
12917 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12925 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12928 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12930 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12932 if (GET_MODE (tp
) != tp_mode
)
12934 gcc_assert (GET_MODE (tp
) == SImode
);
12935 gcc_assert (tp_mode
== DImode
);
12937 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12941 tp
= copy_to_mode_reg (tp_mode
, tp
);
12946 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12948 static GTY(()) rtx ix86_tls_symbol
;
12951 ix86_tls_get_addr (void)
12953 if (!ix86_tls_symbol
)
12956 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12957 ? "___tls_get_addr" : "__tls_get_addr");
12959 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12962 return ix86_tls_symbol
;
12965 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12967 static GTY(()) rtx ix86_tls_module_base_symbol
;
12970 ix86_tls_module_base (void)
12972 if (!ix86_tls_module_base_symbol
)
12974 ix86_tls_module_base_symbol
12975 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12977 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12978 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12981 return ix86_tls_module_base_symbol
;
12984 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12985 false if we expect this to be used for a memory address and true if
12986 we expect to load the address into a register. */
12989 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12991 rtx dest
, base
, off
;
12992 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12993 enum machine_mode tp_mode
= Pmode
;
12998 case TLS_MODEL_GLOBAL_DYNAMIC
:
12999 dest
= gen_reg_rtx (Pmode
);
13004 pic
= pic_offset_table_rtx
;
13007 pic
= gen_reg_rtx (Pmode
);
13008 emit_insn (gen_set_got (pic
));
13012 if (TARGET_GNU2_TLS
)
13015 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
13017 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
13019 tp
= get_thread_pointer (Pmode
, true);
13020 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
13022 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13026 rtx caddr
= ix86_tls_get_addr ();
13030 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
13033 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
13035 insns
= get_insns ();
13038 RTL_CONST_CALL_P (insns
) = 1;
13039 emit_libcall_block (insns
, dest
, rax
, x
);
13042 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
13046 case TLS_MODEL_LOCAL_DYNAMIC
:
13047 base
= gen_reg_rtx (Pmode
);
13052 pic
= pic_offset_table_rtx
;
13055 pic
= gen_reg_rtx (Pmode
);
13056 emit_insn (gen_set_got (pic
));
13060 if (TARGET_GNU2_TLS
)
13062 rtx tmp
= ix86_tls_module_base ();
13065 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
13067 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
13069 tp
= get_thread_pointer (Pmode
, true);
13070 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
13071 gen_rtx_MINUS (Pmode
, tmp
, tp
));
13075 rtx caddr
= ix86_tls_get_addr ();
13079 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
13082 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
13084 insns
= get_insns ();
13087 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13088 share the LD_BASE result with other LD model accesses. */
13089 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
13090 UNSPEC_TLS_LD_BASE
);
13092 RTL_CONST_CALL_P (insns
) = 1;
13093 emit_libcall_block (insns
, base
, rax
, eqv
);
13096 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
13099 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
13100 off
= gen_rtx_CONST (Pmode
, off
);
13102 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
13104 if (TARGET_GNU2_TLS
)
13106 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
13108 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
13112 case TLS_MODEL_INITIAL_EXEC
:
13115 if (TARGET_SUN_TLS
&& !TARGET_X32
)
13117 /* The Sun linker took the AMD64 TLS spec literally
13118 and can only handle %rax as destination of the
13119 initial executable code sequence. */
13121 dest
= gen_reg_rtx (DImode
);
13122 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13126 /* Generate DImode references to avoid %fs:(%reg32)
13127 problems and linker IE->LE relaxation bug. */
13130 type
= UNSPEC_GOTNTPOFF
;
13134 if (reload_in_progress
)
13135 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13136 pic
= pic_offset_table_rtx
;
13137 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13139 else if (!TARGET_ANY_GNU_TLS
)
13141 pic
= gen_reg_rtx (Pmode
);
13142 emit_insn (gen_set_got (pic
));
13143 type
= UNSPEC_GOTTPOFF
;
13148 type
= UNSPEC_INDNTPOFF
;
13151 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13152 off
= gen_rtx_CONST (tp_mode
, off
);
13154 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13155 off
= gen_const_mem (tp_mode
, off
);
13156 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13158 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13160 base
= get_thread_pointer (tp_mode
,
13161 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13162 off
= force_reg (tp_mode
, off
);
13163 return gen_rtx_PLUS (tp_mode
, base
, off
);
13167 base
= get_thread_pointer (Pmode
, true);
13168 dest
= gen_reg_rtx (Pmode
);
13169 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13173 case TLS_MODEL_LOCAL_EXEC
:
13174 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13175 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13176 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13177 off
= gen_rtx_CONST (Pmode
, off
);
13179 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13181 base
= get_thread_pointer (Pmode
,
13182 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13183 return gen_rtx_PLUS (Pmode
, base
, off
);
13187 base
= get_thread_pointer (Pmode
, true);
13188 dest
= gen_reg_rtx (Pmode
);
13189 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13194 gcc_unreachable ();
13200 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13203 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13204 htab_t dllimport_map
;
13207 get_dllimport_decl (tree decl
)
13209 struct tree_map
*h
, in
;
13212 const char *prefix
;
13213 size_t namelen
, prefixlen
;
13218 if (!dllimport_map
)
13219 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13221 in
.hash
= htab_hash_pointer (decl
);
13222 in
.base
.from
= decl
;
13223 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13224 h
= (struct tree_map
*) *loc
;
13228 *loc
= h
= ggc_alloc_tree_map ();
13230 h
->base
.from
= decl
;
13231 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13232 VAR_DECL
, NULL
, ptr_type_node
);
13233 DECL_ARTIFICIAL (to
) = 1;
13234 DECL_IGNORED_P (to
) = 1;
13235 DECL_EXTERNAL (to
) = 1;
13236 TREE_READONLY (to
) = 1;
13238 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13239 name
= targetm
.strip_name_encoding (name
);
13240 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13241 ? "*__imp_" : "*__imp__";
13242 namelen
= strlen (name
);
13243 prefixlen
= strlen (prefix
);
13244 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13245 memcpy (imp_name
, prefix
, prefixlen
);
13246 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13248 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13249 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13250 SET_SYMBOL_REF_DECL (rtl
, to
);
13251 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13253 rtl
= gen_const_mem (Pmode
, rtl
);
13254 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13256 SET_DECL_RTL (to
, rtl
);
13257 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13262 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13263 true if we require the result be a register. */
13266 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13271 gcc_assert (SYMBOL_REF_DECL (symbol
));
13272 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13274 x
= DECL_RTL (imp_decl
);
13276 x
= force_reg (Pmode
, x
);
13280 /* Try machine-dependent ways of modifying an illegitimate address
13281 to be legitimate. If we find one, return the new, valid address.
13282 This macro is used in only one place: `memory_address' in explow.c.
13284 OLDX is the address as it was before break_out_memory_refs was called.
13285 In some cases it is useful to look at this to decide what needs to be done.
13287 It is always safe for this macro to do nothing. It exists to recognize
13288 opportunities to optimize the output.
13290 For the 80386, we handle X+REG by loading X into a register R and
13291 using R+REG. R will go in a general reg and indexing will be used.
13292 However, if REG is a broken-out memory address or multiplication,
13293 nothing needs to be done because REG can certainly go in a general reg.
13295 When -fpic is used, special handling is needed for symbolic references.
13296 See comments by legitimize_pic_address in i386.c for details. */
13299 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13300 enum machine_mode mode
)
13305 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13307 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13308 if (GET_CODE (x
) == CONST
13309 && GET_CODE (XEXP (x
, 0)) == PLUS
13310 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13311 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13313 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13314 (enum tls_model
) log
, false);
13315 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13318 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13320 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13321 return legitimize_dllimport_symbol (x
, true);
13322 if (GET_CODE (x
) == CONST
13323 && GET_CODE (XEXP (x
, 0)) == PLUS
13324 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13325 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13327 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13328 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13332 if (flag_pic
&& SYMBOLIC_CONST (x
))
13333 return legitimize_pic_address (x
, 0);
13336 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13337 return machopic_indirect_data_reference (x
, 0);
13340 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13341 if (GET_CODE (x
) == ASHIFT
13342 && CONST_INT_P (XEXP (x
, 1))
13343 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13346 log
= INTVAL (XEXP (x
, 1));
13347 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13348 GEN_INT (1 << log
));
13351 if (GET_CODE (x
) == PLUS
)
13353 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13355 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13356 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13357 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13360 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13361 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13362 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13363 GEN_INT (1 << log
));
13366 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13367 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13368 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13371 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13372 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13373 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13374 GEN_INT (1 << log
));
13377 /* Put multiply first if it isn't already. */
13378 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13380 rtx tmp
= XEXP (x
, 0);
13381 XEXP (x
, 0) = XEXP (x
, 1);
13386 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13387 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13388 created by virtual register instantiation, register elimination, and
13389 similar optimizations. */
13390 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13393 x
= gen_rtx_PLUS (Pmode
,
13394 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13395 XEXP (XEXP (x
, 1), 0)),
13396 XEXP (XEXP (x
, 1), 1));
13400 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13401 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13402 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13403 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13404 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13405 && CONSTANT_P (XEXP (x
, 1)))
13408 rtx other
= NULL_RTX
;
13410 if (CONST_INT_P (XEXP (x
, 1)))
13412 constant
= XEXP (x
, 1);
13413 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13415 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13417 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13418 other
= XEXP (x
, 1);
13426 x
= gen_rtx_PLUS (Pmode
,
13427 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13428 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13429 plus_constant (Pmode
, other
,
13430 INTVAL (constant
)));
13434 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13437 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13440 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13443 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13446 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13450 && REG_P (XEXP (x
, 1))
13451 && REG_P (XEXP (x
, 0)))
13454 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13457 x
= legitimize_pic_address (x
, 0);
13460 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13463 if (REG_P (XEXP (x
, 0)))
13465 rtx temp
= gen_reg_rtx (Pmode
);
13466 rtx val
= force_operand (XEXP (x
, 1), temp
);
13469 if (GET_MODE (val
) != Pmode
)
13470 val
= convert_to_mode (Pmode
, val
, 1);
13471 emit_move_insn (temp
, val
);
13474 XEXP (x
, 1) = temp
;
13478 else if (REG_P (XEXP (x
, 1)))
13480 rtx temp
= gen_reg_rtx (Pmode
);
13481 rtx val
= force_operand (XEXP (x
, 0), temp
);
13484 if (GET_MODE (val
) != Pmode
)
13485 val
= convert_to_mode (Pmode
, val
, 1);
13486 emit_move_insn (temp
, val
);
13489 XEXP (x
, 0) = temp
;
13497 /* Print an integer constant expression in assembler syntax. Addition
13498 and subtraction are the only arithmetic that may appear in these
13499 expressions. FILE is the stdio stream to write to, X is the rtx, and
13500 CODE is the operand print code from the output string. */
13503 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13507 switch (GET_CODE (x
))
13510 gcc_assert (flag_pic
);
13515 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13516 output_addr_const (file
, x
);
13519 const char *name
= XSTR (x
, 0);
13521 /* Mark the decl as referenced so that cgraph will
13522 output the function. */
13523 if (SYMBOL_REF_DECL (x
))
13524 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13527 if (MACHOPIC_INDIRECT
13528 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13529 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13531 assemble_name (file
, name
);
13533 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13534 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13535 fputs ("@PLT", file
);
13542 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13543 assemble_name (asm_out_file
, buf
);
13547 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13551 /* This used to output parentheses around the expression,
13552 but that does not work on the 386 (either ATT or BSD assembler). */
13553 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13557 if (GET_MODE (x
) == VOIDmode
)
13559 /* We can use %d if the number is <32 bits and positive. */
13560 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13561 fprintf (file
, "0x%lx%08lx",
13562 (unsigned long) CONST_DOUBLE_HIGH (x
),
13563 (unsigned long) CONST_DOUBLE_LOW (x
));
13565 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13568 /* We can't handle floating point constants;
13569 TARGET_PRINT_OPERAND must handle them. */
13570 output_operand_lossage ("floating constant misused");
13574 /* Some assemblers need integer constants to appear first. */
13575 if (CONST_INT_P (XEXP (x
, 0)))
13577 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13579 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13583 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13584 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13586 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13592 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13593 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13595 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13597 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13601 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13603 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13608 gcc_assert (XVECLEN (x
, 0) == 1);
13609 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13610 switch (XINT (x
, 1))
13613 fputs ("@GOT", file
);
13615 case UNSPEC_GOTOFF
:
13616 fputs ("@GOTOFF", file
);
13618 case UNSPEC_PLTOFF
:
13619 fputs ("@PLTOFF", file
);
13622 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13623 "(%rip)" : "[rip]", file
);
13625 case UNSPEC_GOTPCREL
:
13626 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13627 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13629 case UNSPEC_GOTTPOFF
:
13630 /* FIXME: This might be @TPOFF in Sun ld too. */
13631 fputs ("@gottpoff", file
);
13634 fputs ("@tpoff", file
);
13636 case UNSPEC_NTPOFF
:
13638 fputs ("@tpoff", file
);
13640 fputs ("@ntpoff", file
);
13642 case UNSPEC_DTPOFF
:
13643 fputs ("@dtpoff", file
);
13645 case UNSPEC_GOTNTPOFF
:
13647 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13648 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13650 fputs ("@gotntpoff", file
);
13652 case UNSPEC_INDNTPOFF
:
13653 fputs ("@indntpoff", file
);
13656 case UNSPEC_MACHOPIC_OFFSET
:
13658 machopic_output_function_base_name (file
);
13662 output_operand_lossage ("invalid UNSPEC as operand");
13668 output_operand_lossage ("invalid expression as operand");
13672 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13673 We need to emit DTP-relative relocations. */
13675 static void ATTRIBUTE_UNUSED
13676 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13678 fputs (ASM_LONG
, file
);
13679 output_addr_const (file
, x
);
13680 fputs ("@dtpoff", file
);
13686 fputs (", 0", file
);
13689 gcc_unreachable ();
13693 /* Return true if X is a representation of the PIC register. This copes
13694 with calls from ix86_find_base_term, where the register might have
13695 been replaced by a cselib value. */
13698 ix86_pic_register_p (rtx x
)
13700 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13701 return (pic_offset_table_rtx
13702 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13704 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13707 /* Helper function for ix86_delegitimize_address.
13708 Attempt to delegitimize TLS local-exec accesses. */
13711 ix86_delegitimize_tls_address (rtx orig_x
)
13713 rtx x
= orig_x
, unspec
;
13714 struct ix86_address addr
;
13716 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13720 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13722 if (ix86_decompose_address (x
, &addr
) == 0
13723 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13724 || addr
.disp
== NULL_RTX
13725 || GET_CODE (addr
.disp
) != CONST
)
13727 unspec
= XEXP (addr
.disp
, 0);
13728 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13729 unspec
= XEXP (unspec
, 0);
13730 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13732 x
= XVECEXP (unspec
, 0, 0);
13733 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13734 if (unspec
!= XEXP (addr
.disp
, 0))
13735 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13738 rtx idx
= addr
.index
;
13739 if (addr
.scale
!= 1)
13740 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13741 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13744 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13745 if (MEM_P (orig_x
))
13746 x
= replace_equiv_address_nv (orig_x
, x
);
13750 /* In the name of slightly smaller debug output, and to cater to
13751 general assembler lossage, recognize PIC+GOTOFF and turn it back
13752 into a direct symbol reference.
13754 On Darwin, this is necessary to avoid a crash, because Darwin
13755 has a different PIC label for each routine but the DWARF debugging
13756 information is not associated with any particular routine, so it's
13757 necessary to remove references to the PIC label from RTL stored by
13758 the DWARF output code. */
13761 ix86_delegitimize_address (rtx x
)
13763 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13764 /* addend is NULL or some rtx if x is something+GOTOFF where
13765 something doesn't include the PIC register. */
13766 rtx addend
= NULL_RTX
;
13767 /* reg_addend is NULL or a multiple of some register. */
13768 rtx reg_addend
= NULL_RTX
;
13769 /* const_addend is NULL or a const_int. */
13770 rtx const_addend
= NULL_RTX
;
13771 /* This is the result, or NULL. */
13772 rtx result
= NULL_RTX
;
13781 if (GET_CODE (x
) == CONST
13782 && GET_CODE (XEXP (x
, 0)) == PLUS
13783 && GET_MODE (XEXP (x
, 0)) == Pmode
13784 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13785 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13786 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13788 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13789 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13790 if (MEM_P (orig_x
))
13791 x
= replace_equiv_address_nv (orig_x
, x
);
13794 if (GET_CODE (x
) != CONST
13795 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13796 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13797 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13798 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13799 return ix86_delegitimize_tls_address (orig_x
);
13800 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13801 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13803 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13811 if (GET_CODE (x
) != PLUS
13812 || GET_CODE (XEXP (x
, 1)) != CONST
)
13813 return ix86_delegitimize_tls_address (orig_x
);
13815 if (ix86_pic_register_p (XEXP (x
, 0)))
13816 /* %ebx + GOT/GOTOFF */
13818 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13820 /* %ebx + %reg * scale + GOT/GOTOFF */
13821 reg_addend
= XEXP (x
, 0);
13822 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13823 reg_addend
= XEXP (reg_addend
, 1);
13824 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13825 reg_addend
= XEXP (reg_addend
, 0);
13828 reg_addend
= NULL_RTX
;
13829 addend
= XEXP (x
, 0);
13833 addend
= XEXP (x
, 0);
13835 x
= XEXP (XEXP (x
, 1), 0);
13836 if (GET_CODE (x
) == PLUS
13837 && CONST_INT_P (XEXP (x
, 1)))
13839 const_addend
= XEXP (x
, 1);
13843 if (GET_CODE (x
) == UNSPEC
13844 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13845 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13846 result
= XVECEXP (x
, 0, 0);
13848 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13849 && !MEM_P (orig_x
))
13850 result
= XVECEXP (x
, 0, 0);
13853 return ix86_delegitimize_tls_address (orig_x
);
13856 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13858 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13861 /* If the rest of original X doesn't involve the PIC register, add
13862 addend and subtract pic_offset_table_rtx. This can happen e.g.
13864 leal (%ebx, %ecx, 4), %ecx
13866 movl foo@GOTOFF(%ecx), %edx
13867 in which case we return (%ecx - %ebx) + foo. */
13868 if (pic_offset_table_rtx
)
13869 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13870 pic_offset_table_rtx
),
13875 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13877 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13878 if (result
== NULL_RTX
)
13884 /* If X is a machine specific address (i.e. a symbol or label being
13885 referenced as a displacement from the GOT implemented using an
13886 UNSPEC), then return the base term. Otherwise return X. */
13889 ix86_find_base_term (rtx x
)
13895 if (GET_CODE (x
) != CONST
)
13897 term
= XEXP (x
, 0);
13898 if (GET_CODE (term
) == PLUS
13899 && (CONST_INT_P (XEXP (term
, 1))
13900 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13901 term
= XEXP (term
, 0);
13902 if (GET_CODE (term
) != UNSPEC
13903 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13904 && XINT (term
, 1) != UNSPEC_PCREL
))
13907 return XVECEXP (term
, 0, 0);
13910 return ix86_delegitimize_address (x
);
13914 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13915 bool fp
, FILE *file
)
13917 const char *suffix
;
13919 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13921 code
= ix86_fp_compare_code_to_integer (code
);
13925 code
= reverse_condition (code
);
13976 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13980 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13981 Those same assemblers have the same but opposite lossage on cmov. */
13982 if (mode
== CCmode
)
13983 suffix
= fp
? "nbe" : "a";
13984 else if (mode
== CCCmode
)
13987 gcc_unreachable ();
14003 gcc_unreachable ();
14007 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14024 gcc_unreachable ();
14028 /* ??? As above. */
14029 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
14030 suffix
= fp
? "nb" : "ae";
14033 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
14037 /* ??? As above. */
14038 if (mode
== CCmode
)
14040 else if (mode
== CCCmode
)
14041 suffix
= fp
? "nb" : "ae";
14043 gcc_unreachable ();
14046 suffix
= fp
? "u" : "p";
14049 suffix
= fp
? "nu" : "np";
14052 gcc_unreachable ();
14054 fputs (suffix
, file
);
14057 /* Print the name of register X to FILE based on its machine mode and number.
14058 If CODE is 'w', pretend the mode is HImode.
14059 If CODE is 'b', pretend the mode is QImode.
14060 If CODE is 'k', pretend the mode is SImode.
14061 If CODE is 'q', pretend the mode is DImode.
14062 If CODE is 'x', pretend the mode is V4SFmode.
14063 If CODE is 't', pretend the mode is V8SFmode.
14064 If CODE is 'h', pretend the reg is the 'high' byte register.
14065 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14066 If CODE is 'd', duplicate the operand for AVX instruction.
14070 print_reg (rtx x
, int code
, FILE *file
)
14073 bool duplicated
= code
== 'd' && TARGET_AVX
;
14075 gcc_assert (x
== pc_rtx
14076 || (REGNO (x
) != ARG_POINTER_REGNUM
14077 && REGNO (x
) != FRAME_POINTER_REGNUM
14078 && REGNO (x
) != FLAGS_REG
14079 && REGNO (x
) != FPSR_REG
14080 && REGNO (x
) != FPCR_REG
));
14082 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14087 gcc_assert (TARGET_64BIT
);
14088 fputs ("rip", file
);
14092 if (code
== 'w' || MMX_REG_P (x
))
14094 else if (code
== 'b')
14096 else if (code
== 'k')
14098 else if (code
== 'q')
14100 else if (code
== 'y')
14102 else if (code
== 'h')
14104 else if (code
== 'x')
14106 else if (code
== 't')
14109 code
= GET_MODE_SIZE (GET_MODE (x
));
14111 /* Irritatingly, AMD extended registers use different naming convention
14112 from the normal registers: "r%d[bwd]" */
14113 if (REX_INT_REG_P (x
))
14115 gcc_assert (TARGET_64BIT
);
14117 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
14121 error ("extended registers have no high halves");
14136 error ("unsupported operand size for extended register");
14146 if (STACK_TOP_P (x
))
14155 if (! ANY_FP_REG_P (x
))
14156 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14161 reg
= hi_reg_name
[REGNO (x
)];
14164 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
14166 reg
= qi_reg_name
[REGNO (x
)];
14169 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
14171 reg
= qi_high_reg_name
[REGNO (x
)];
14176 gcc_assert (!duplicated
);
14178 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14183 gcc_unreachable ();
14189 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14190 fprintf (file
, ", %%%s", reg
);
14192 fprintf (file
, ", %s", reg
);
14196 /* Locate some local-dynamic symbol still in use by this function
14197 so that we can print its name in some tls_local_dynamic_base
14201 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14205 if (GET_CODE (x
) == SYMBOL_REF
14206 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14208 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14215 static const char *
14216 get_some_local_dynamic_name (void)
14220 if (cfun
->machine
->some_ld_name
)
14221 return cfun
->machine
->some_ld_name
;
14223 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14224 if (NONDEBUG_INSN_P (insn
)
14225 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14226 return cfun
->machine
->some_ld_name
;
14231 /* Meaning of CODE:
14232 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14233 C -- print opcode suffix for set/cmov insn.
14234 c -- like C, but print reversed condition
14235 F,f -- likewise, but for floating-point.
14236 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14238 R -- print the prefix for register names.
14239 z -- print the opcode suffix for the size of the current operand.
14240 Z -- likewise, with special suffixes for x87 instructions.
14241 * -- print a star (in certain assembler syntax)
14242 A -- print an absolute memory reference.
14243 E -- print address with DImode register names if TARGET_64BIT.
14244 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14245 s -- print a shift double count, followed by the assemblers argument
14247 b -- print the QImode name of the register for the indicated operand.
14248 %b0 would print %al if operands[0] is reg 0.
14249 w -- likewise, print the HImode name of the register.
14250 k -- likewise, print the SImode name of the register.
14251 q -- likewise, print the DImode name of the register.
14252 x -- likewise, print the V4SFmode name of the register.
14253 t -- likewise, print the V8SFmode name of the register.
14254 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14255 y -- print "st(0)" instead of "st" as a register.
14256 d -- print duplicated register operand for AVX instruction.
14257 D -- print condition for SSE cmp instruction.
14258 P -- if PIC, print an @PLT suffix.
14259 p -- print raw symbol name.
14260 X -- don't print any sort of PIC '@' suffix for a symbol.
14261 & -- print some in-use local-dynamic symbol name.
14262 H -- print a memory address offset by 8; used for sse high-parts
14263 Y -- print condition for XOP pcom* instruction.
14264 + -- print a branch hint as 'cs' or 'ds' prefix
14265 ; -- print a semicolon (after prefixes due to bug in older gas).
14266 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14267 @ -- print a segment register of thread base pointer load
14268 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14272 ix86_print_operand (FILE *file
, rtx x
, int code
)
14279 switch (ASSEMBLER_DIALECT
)
14286 /* Intel syntax. For absolute addresses, registers should not
14287 be surrounded by braces. */
14291 ix86_print_operand (file
, x
, 0);
14298 gcc_unreachable ();
14301 ix86_print_operand (file
, x
, 0);
14305 /* Wrap address in an UNSPEC to declare special handling. */
14307 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14309 output_address (x
);
14313 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14318 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14323 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14328 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14333 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14338 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14343 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14344 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14347 switch (GET_MODE_SIZE (GET_MODE (x
)))
14362 output_operand_lossage
14363 ("invalid operand size for operand code 'O'");
14372 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14374 /* Opcodes don't get size suffixes if using Intel opcodes. */
14375 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14378 switch (GET_MODE_SIZE (GET_MODE (x
)))
14397 output_operand_lossage
14398 ("invalid operand size for operand code 'z'");
14403 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14405 (0, "non-integer operand used with operand code 'z'");
14409 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14410 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14413 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14415 switch (GET_MODE_SIZE (GET_MODE (x
)))
14418 #ifdef HAVE_AS_IX86_FILDS
14428 #ifdef HAVE_AS_IX86_FILDQ
14431 fputs ("ll", file
);
14439 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14441 /* 387 opcodes don't get size suffixes
14442 if the operands are registers. */
14443 if (STACK_REG_P (x
))
14446 switch (GET_MODE_SIZE (GET_MODE (x
)))
14467 output_operand_lossage
14468 ("invalid operand type used with operand code 'Z'");
14472 output_operand_lossage
14473 ("invalid operand size for operand code 'Z'");
14491 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14493 ix86_print_operand (file
, x
, 0);
14494 fputs (", ", file
);
14499 switch (GET_CODE (x
))
14502 fputs ("neq", file
);
14505 fputs ("eq", file
);
14509 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14513 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14517 fputs ("le", file
);
14521 fputs ("lt", file
);
14524 fputs ("unord", file
);
14527 fputs ("ord", file
);
14530 fputs ("ueq", file
);
14533 fputs ("nlt", file
);
14536 fputs ("nle", file
);
14539 fputs ("ule", file
);
14542 fputs ("ult", file
);
14545 fputs ("une", file
);
14548 output_operand_lossage ("operand is not a condition code, "
14549 "invalid operand code 'Y'");
14555 /* Little bit of braindamage here. The SSE compare instructions
14556 does use completely different names for the comparisons that the
14557 fp conditional moves. */
14558 switch (GET_CODE (x
))
14563 fputs ("eq_us", file
);
14567 fputs ("eq", file
);
14572 fputs ("nge", file
);
14576 fputs ("lt", file
);
14581 fputs ("ngt", file
);
14585 fputs ("le", file
);
14588 fputs ("unord", file
);
14593 fputs ("neq_oq", file
);
14597 fputs ("neq", file
);
14602 fputs ("ge", file
);
14606 fputs ("nlt", file
);
14611 fputs ("gt", file
);
14615 fputs ("nle", file
);
14618 fputs ("ord", file
);
14621 output_operand_lossage ("operand is not a condition code, "
14622 "invalid operand code 'D'");
14629 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14630 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14636 if (!COMPARISON_P (x
))
14638 output_operand_lossage ("operand is not a condition code, "
14639 "invalid operand code '%c'", code
);
14642 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14643 code
== 'c' || code
== 'f',
14644 code
== 'F' || code
== 'f',
14649 if (!offsettable_memref_p (x
))
14651 output_operand_lossage ("operand is not an offsettable memory "
14652 "reference, invalid operand code 'H'");
14655 /* It doesn't actually matter what mode we use here, as we're
14656 only going to use this for printing. */
14657 x
= adjust_address_nv (x
, DImode
, 8);
14661 gcc_assert (CONST_INT_P (x
));
14663 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14664 #ifdef HAVE_AS_IX86_HLE
14665 fputs ("xacquire ", file
);
14667 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14669 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14670 #ifdef HAVE_AS_IX86_HLE
14671 fputs ("xrelease ", file
);
14673 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14675 /* We do not want to print value of the operand. */
14679 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14685 const char *name
= get_some_local_dynamic_name ();
14687 output_operand_lossage ("'%%&' used without any "
14688 "local dynamic TLS references");
14690 assemble_name (file
, name
);
14699 || optimize_function_for_size_p (cfun
)
14700 || !TARGET_BRANCH_PREDICTION_HINTS
)
14703 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14706 int pred_val
= INTVAL (XEXP (x
, 0));
14708 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14709 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14711 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14713 = final_forward_branch_p (current_output_insn
) == 0;
14715 /* Emit hints only in the case default branch prediction
14716 heuristics would fail. */
14717 if (taken
!= cputaken
)
14719 /* We use 3e (DS) prefix for taken branches and
14720 2e (CS) prefix for not taken branches. */
14722 fputs ("ds ; ", file
);
14724 fputs ("cs ; ", file
);
14732 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14738 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14741 /* The kernel uses a different segment register for performance
14742 reasons; a system call would not have to trash the userspace
14743 segment register, which would be expensive. */
14744 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14745 fputs ("fs", file
);
14747 fputs ("gs", file
);
14751 putc (TARGET_AVX2
? 'i' : 'f', file
);
14755 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14756 fputs ("addr32 ", file
);
14760 output_operand_lossage ("invalid operand code '%c'", code
);
14765 print_reg (x
, code
, file
);
14767 else if (MEM_P (x
))
14769 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14770 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14771 && GET_MODE (x
) != BLKmode
)
14774 switch (GET_MODE_SIZE (GET_MODE (x
)))
14776 case 1: size
= "BYTE"; break;
14777 case 2: size
= "WORD"; break;
14778 case 4: size
= "DWORD"; break;
14779 case 8: size
= "QWORD"; break;
14780 case 12: size
= "TBYTE"; break;
14782 if (GET_MODE (x
) == XFmode
)
14787 case 32: size
= "YMMWORD"; break;
14789 gcc_unreachable ();
14792 /* Check for explicit size override (codes 'b', 'w', 'k',
14796 else if (code
== 'w')
14798 else if (code
== 'k')
14800 else if (code
== 'q')
14802 else if (code
== 'x')
14805 fputs (size
, file
);
14806 fputs (" PTR ", file
);
14810 /* Avoid (%rip) for call operands. */
14811 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14812 && !CONST_INT_P (x
))
14813 output_addr_const (file
, x
);
14814 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14815 output_operand_lossage ("invalid constraints for operand");
14817 output_address (x
);
14820 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14825 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14826 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14828 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14830 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14832 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14834 fprintf (file
, "0x%08x", (unsigned int) l
);
14837 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14842 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14843 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14845 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14847 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14850 /* These float cases don't actually occur as immediate operands. */
14851 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14855 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14856 fputs (dstr
, file
);
14861 /* We have patterns that allow zero sets of memory, for instance.
14862 In 64-bit mode, we should probably support all 8-byte vectors,
14863 since we can in fact encode that into an immediate. */
14864 if (GET_CODE (x
) == CONST_VECTOR
)
14866 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14870 if (code
!= 'P' && code
!= 'p')
14872 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14874 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14877 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14878 || GET_CODE (x
) == LABEL_REF
)
14880 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14883 fputs ("OFFSET FLAT:", file
);
14886 if (CONST_INT_P (x
))
14887 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14888 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14889 output_pic_addr_const (file
, x
, code
);
14891 output_addr_const (file
, x
);
14896 ix86_print_operand_punct_valid_p (unsigned char code
)
14898 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14899 || code
== ';' || code
== '~' || code
== '^');
14902 /* Print a memory operand whose address is ADDR. */
14905 ix86_print_operand_address (FILE *file
, rtx addr
)
14907 struct ix86_address parts
;
14908 rtx base
, index
, disp
;
14914 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14916 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14917 gcc_assert (parts
.index
== NULL_RTX
);
14918 parts
.index
= XVECEXP (addr
, 0, 1);
14919 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14920 addr
= XVECEXP (addr
, 0, 0);
14923 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14925 gcc_assert (TARGET_64BIT
);
14926 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14930 ok
= ix86_decompose_address (addr
, &parts
);
14934 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14936 rtx tmp
= SUBREG_REG (parts
.base
);
14937 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14938 tmp
, GET_MODE (tmp
), 0);
14939 gcc_assert (parts
.base
!= NULL_RTX
);
14942 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14944 rtx tmp
= SUBREG_REG (parts
.index
);
14945 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14946 tmp
, GET_MODE (tmp
), 0);
14947 gcc_assert (parts
.index
!= NULL_RTX
);
14951 index
= parts
.index
;
14953 scale
= parts
.scale
;
14961 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14963 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14966 gcc_unreachable ();
14969 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14970 if (TARGET_64BIT
&& !base
&& !index
)
14974 if (GET_CODE (disp
) == CONST
14975 && GET_CODE (XEXP (disp
, 0)) == PLUS
14976 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14977 symbol
= XEXP (XEXP (disp
, 0), 0);
14979 if (GET_CODE (symbol
) == LABEL_REF
14980 || (GET_CODE (symbol
) == SYMBOL_REF
14981 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14984 if (!base
&& !index
)
14986 /* Displacement only requires special attention. */
14988 if (CONST_INT_P (disp
))
14990 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14991 fputs ("ds:", file
);
14992 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14995 output_pic_addr_const (file
, disp
, 0);
14997 output_addr_const (file
, disp
);
15001 /* Print SImode register names to force addr32 prefix. */
15002 if (SImode_address_operand (addr
, VOIDmode
))
15004 #ifdef ENABLE_CHECKING
15005 gcc_assert (TARGET_64BIT
);
15006 switch (GET_CODE (addr
))
15009 gcc_assert (GET_MODE (addr
) == SImode
);
15010 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
15014 gcc_assert (GET_MODE (addr
) == DImode
);
15017 gcc_unreachable ();
15020 gcc_assert (!code
);
15024 if (ASSEMBLER_DIALECT
== ASM_ATT
)
15029 output_pic_addr_const (file
, disp
, 0);
15030 else if (GET_CODE (disp
) == LABEL_REF
)
15031 output_asm_label (disp
);
15033 output_addr_const (file
, disp
);
15038 print_reg (base
, code
, file
);
15042 print_reg (index
, vsib
? 0 : code
, file
);
15043 if (scale
!= 1 || vsib
)
15044 fprintf (file
, ",%d", scale
);
15050 rtx offset
= NULL_RTX
;
15054 /* Pull out the offset of a symbol; print any symbol itself. */
15055 if (GET_CODE (disp
) == CONST
15056 && GET_CODE (XEXP (disp
, 0)) == PLUS
15057 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
15059 offset
= XEXP (XEXP (disp
, 0), 1);
15060 disp
= gen_rtx_CONST (VOIDmode
,
15061 XEXP (XEXP (disp
, 0), 0));
15065 output_pic_addr_const (file
, disp
, 0);
15066 else if (GET_CODE (disp
) == LABEL_REF
)
15067 output_asm_label (disp
);
15068 else if (CONST_INT_P (disp
))
15071 output_addr_const (file
, disp
);
15077 print_reg (base
, code
, file
);
15080 if (INTVAL (offset
) >= 0)
15082 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15086 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
15093 print_reg (index
, vsib
? 0 : code
, file
);
15094 if (scale
!= 1 || vsib
)
15095 fprintf (file
, "*%d", scale
);
15102 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15105 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
15109 if (GET_CODE (x
) != UNSPEC
)
15112 op
= XVECEXP (x
, 0, 0);
15113 switch (XINT (x
, 1))
15115 case UNSPEC_GOTTPOFF
:
15116 output_addr_const (file
, op
);
15117 /* FIXME: This might be @TPOFF in Sun ld. */
15118 fputs ("@gottpoff", file
);
15121 output_addr_const (file
, op
);
15122 fputs ("@tpoff", file
);
15124 case UNSPEC_NTPOFF
:
15125 output_addr_const (file
, op
);
15127 fputs ("@tpoff", file
);
15129 fputs ("@ntpoff", file
);
15131 case UNSPEC_DTPOFF
:
15132 output_addr_const (file
, op
);
15133 fputs ("@dtpoff", file
);
15135 case UNSPEC_GOTNTPOFF
:
15136 output_addr_const (file
, op
);
15138 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15139 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15141 fputs ("@gotntpoff", file
);
15143 case UNSPEC_INDNTPOFF
:
15144 output_addr_const (file
, op
);
15145 fputs ("@indntpoff", file
);
15148 case UNSPEC_MACHOPIC_OFFSET
:
15149 output_addr_const (file
, op
);
15151 machopic_output_function_base_name (file
);
15155 case UNSPEC_STACK_CHECK
:
15159 gcc_assert (flag_split_stack
);
15161 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15162 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15164 gcc_unreachable ();
15167 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15178 /* Split one or more double-mode RTL references into pairs of half-mode
15179 references. The RTL can be REG, offsettable MEM, integer constant, or
15180 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15181 split and "num" is its length. lo_half and hi_half are output arrays
15182 that parallel "operands". */
15185 split_double_mode (enum machine_mode mode
, rtx operands
[],
15186 int num
, rtx lo_half
[], rtx hi_half
[])
15188 enum machine_mode half_mode
;
15194 half_mode
= DImode
;
15197 half_mode
= SImode
;
15200 gcc_unreachable ();
15203 byte
= GET_MODE_SIZE (half_mode
);
15207 rtx op
= operands
[num
];
15209 /* simplify_subreg refuse to split volatile memory addresses,
15210 but we still have to handle it. */
15213 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15214 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15218 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15219 GET_MODE (op
) == VOIDmode
15220 ? mode
: GET_MODE (op
), 0);
15221 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15222 GET_MODE (op
) == VOIDmode
15223 ? mode
: GET_MODE (op
), byte
);
15228 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15229 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15230 is the expression of the binary operation. The output may either be
15231 emitted here, or returned to the caller, like all output_* functions.
15233 There is no guarantee that the operands are the same mode, as they
15234 might be within FLOAT or FLOAT_EXTEND expressions. */
15236 #ifndef SYSV386_COMPAT
15237 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15238 wants to fix the assemblers because that causes incompatibility
15239 with gcc. No-one wants to fix gcc because that causes
15240 incompatibility with assemblers... You can use the option of
15241 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15242 #define SYSV386_COMPAT 1
15246 output_387_binary_op (rtx insn
, rtx
*operands
)
15248 static char buf
[40];
15251 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15253 #ifdef ENABLE_CHECKING
15254 /* Even if we do not want to check the inputs, this documents input
15255 constraints. Which helps in understanding the following code. */
15256 if (STACK_REG_P (operands
[0])
15257 && ((REG_P (operands
[1])
15258 && REGNO (operands
[0]) == REGNO (operands
[1])
15259 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15260 || (REG_P (operands
[2])
15261 && REGNO (operands
[0]) == REGNO (operands
[2])
15262 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15263 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15266 gcc_assert (is_sse
);
15269 switch (GET_CODE (operands
[3]))
15272 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15273 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15281 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15282 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15290 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15291 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15299 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15300 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15308 gcc_unreachable ();
15315 strcpy (buf
, ssep
);
15316 if (GET_MODE (operands
[0]) == SFmode
)
15317 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15319 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15323 strcpy (buf
, ssep
+ 1);
15324 if (GET_MODE (operands
[0]) == SFmode
)
15325 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15327 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15333 switch (GET_CODE (operands
[3]))
15337 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15339 rtx temp
= operands
[2];
15340 operands
[2] = operands
[1];
15341 operands
[1] = temp
;
15344 /* know operands[0] == operands[1]. */
15346 if (MEM_P (operands
[2]))
15352 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15354 if (STACK_TOP_P (operands
[0]))
15355 /* How is it that we are storing to a dead operand[2]?
15356 Well, presumably operands[1] is dead too. We can't
15357 store the result to st(0) as st(0) gets popped on this
15358 instruction. Instead store to operands[2] (which I
15359 think has to be st(1)). st(1) will be popped later.
15360 gcc <= 2.8.1 didn't have this check and generated
15361 assembly code that the Unixware assembler rejected. */
15362 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15364 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15368 if (STACK_TOP_P (operands
[0]))
15369 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15371 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15376 if (MEM_P (operands
[1]))
15382 if (MEM_P (operands
[2]))
15388 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15391 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15392 derived assemblers, confusingly reverse the direction of
15393 the operation for fsub{r} and fdiv{r} when the
15394 destination register is not st(0). The Intel assembler
15395 doesn't have this brain damage. Read !SYSV386_COMPAT to
15396 figure out what the hardware really does. */
15397 if (STACK_TOP_P (operands
[0]))
15398 p
= "{p\t%0, %2|rp\t%2, %0}";
15400 p
= "{rp\t%2, %0|p\t%0, %2}";
15402 if (STACK_TOP_P (operands
[0]))
15403 /* As above for fmul/fadd, we can't store to st(0). */
15404 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15406 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15411 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15414 if (STACK_TOP_P (operands
[0]))
15415 p
= "{rp\t%0, %1|p\t%1, %0}";
15417 p
= "{p\t%1, %0|rp\t%0, %1}";
15419 if (STACK_TOP_P (operands
[0]))
15420 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15422 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15427 if (STACK_TOP_P (operands
[0]))
15429 if (STACK_TOP_P (operands
[1]))
15430 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15432 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15435 else if (STACK_TOP_P (operands
[1]))
15438 p
= "{\t%1, %0|r\t%0, %1}";
15440 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15446 p
= "{r\t%2, %0|\t%0, %2}";
15448 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15454 gcc_unreachable ();
15461 /* Return needed mode for entity in optimize_mode_switching pass. */
15464 ix86_mode_needed (int entity
, rtx insn
)
15466 enum attr_i387_cw mode
;
15468 /* The mode UNINITIALIZED is used to store control word after a
15469 function call or ASM pattern. The mode ANY specify that function
15470 has no requirements on the control word and make no changes in the
15471 bits we are interested in. */
15474 || (NONJUMP_INSN_P (insn
)
15475 && (asm_noperands (PATTERN (insn
)) >= 0
15476 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15477 return I387_CW_UNINITIALIZED
;
15479 if (recog_memoized (insn
) < 0)
15480 return I387_CW_ANY
;
15482 mode
= get_attr_i387_cw (insn
);
15487 if (mode
== I387_CW_TRUNC
)
15492 if (mode
== I387_CW_FLOOR
)
15497 if (mode
== I387_CW_CEIL
)
15502 if (mode
== I387_CW_MASK_PM
)
15507 gcc_unreachable ();
15510 return I387_CW_ANY
;
15513 /* Output code to initialize control word copies used by trunc?f?i and
15514 rounding patterns. CURRENT_MODE is set to current control word,
15515 while NEW_MODE is set to new control word. */
15518 emit_i387_cw_initialization (int mode
)
15520 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15523 enum ix86_stack_slot slot
;
15525 rtx reg
= gen_reg_rtx (HImode
);
15527 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15528 emit_move_insn (reg
, copy_rtx (stored_mode
));
15530 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15531 || optimize_function_for_size_p (cfun
))
15535 case I387_CW_TRUNC
:
15536 /* round toward zero (truncate) */
15537 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15538 slot
= SLOT_CW_TRUNC
;
15541 case I387_CW_FLOOR
:
15542 /* round down toward -oo */
15543 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15544 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15545 slot
= SLOT_CW_FLOOR
;
15549 /* round up toward +oo */
15550 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15551 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15552 slot
= SLOT_CW_CEIL
;
15555 case I387_CW_MASK_PM
:
15556 /* mask precision exception for nearbyint() */
15557 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15558 slot
= SLOT_CW_MASK_PM
;
15562 gcc_unreachable ();
15569 case I387_CW_TRUNC
:
15570 /* round toward zero (truncate) */
15571 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15572 slot
= SLOT_CW_TRUNC
;
15575 case I387_CW_FLOOR
:
15576 /* round down toward -oo */
15577 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15578 slot
= SLOT_CW_FLOOR
;
15582 /* round up toward +oo */
15583 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15584 slot
= SLOT_CW_CEIL
;
15587 case I387_CW_MASK_PM
:
15588 /* mask precision exception for nearbyint() */
15589 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15590 slot
= SLOT_CW_MASK_PM
;
15594 gcc_unreachable ();
15598 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15600 new_mode
= assign_386_stack_local (HImode
, slot
);
15601 emit_move_insn (new_mode
, reg
);
15604 /* Output code for INSN to convert a float to a signed int. OPERANDS
15605 are the insn operands. The output may be [HSD]Imode and the input
15606 operand may be [SDX]Fmode. */
15609 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15611 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15612 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15613 int round_mode
= get_attr_i387_cw (insn
);
15615 /* Jump through a hoop or two for DImode, since the hardware has no
15616 non-popping instruction. We used to do this a different way, but
15617 that was somewhat fragile and broke with post-reload splitters. */
15618 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15619 output_asm_insn ("fld\t%y1", operands
);
15621 gcc_assert (STACK_TOP_P (operands
[1]));
15622 gcc_assert (MEM_P (operands
[0]));
15623 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15626 output_asm_insn ("fisttp%Z0\t%0", operands
);
15629 if (round_mode
!= I387_CW_ANY
)
15630 output_asm_insn ("fldcw\t%3", operands
);
15631 if (stack_top_dies
|| dimode_p
)
15632 output_asm_insn ("fistp%Z0\t%0", operands
);
15634 output_asm_insn ("fist%Z0\t%0", operands
);
15635 if (round_mode
!= I387_CW_ANY
)
15636 output_asm_insn ("fldcw\t%2", operands
);
15642 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15643 have the values zero or one, indicates the ffreep insn's operand
15644 from the OPERANDS array. */
15646 static const char *
15647 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15649 if (TARGET_USE_FFREEP
)
15650 #ifdef HAVE_AS_IX86_FFREEP
15651 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15654 static char retval
[32];
15655 int regno
= REGNO (operands
[opno
]);
15657 gcc_assert (STACK_REGNO_P (regno
));
15659 regno
-= FIRST_STACK_REG
;
15661 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15666 return opno
? "fstp\t%y1" : "fstp\t%y0";
15670 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15671 should be used. UNORDERED_P is true when fucom should be used. */
15674 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15676 int stack_top_dies
;
15677 rtx cmp_op0
, cmp_op1
;
15678 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15682 cmp_op0
= operands
[0];
15683 cmp_op1
= operands
[1];
15687 cmp_op0
= operands
[1];
15688 cmp_op1
= operands
[2];
15693 if (GET_MODE (operands
[0]) == SFmode
)
15695 return "%vucomiss\t{%1, %0|%0, %1}";
15697 return "%vcomiss\t{%1, %0|%0, %1}";
15700 return "%vucomisd\t{%1, %0|%0, %1}";
15702 return "%vcomisd\t{%1, %0|%0, %1}";
15705 gcc_assert (STACK_TOP_P (cmp_op0
));
15707 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15709 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15711 if (stack_top_dies
)
15713 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15714 return output_387_ffreep (operands
, 1);
15717 return "ftst\n\tfnstsw\t%0";
15720 if (STACK_REG_P (cmp_op1
)
15722 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15723 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15725 /* If both the top of the 387 stack dies, and the other operand
15726 is also a stack register that dies, then this must be a
15727 `fcompp' float compare */
15731 /* There is no double popping fcomi variant. Fortunately,
15732 eflags is immune from the fstp's cc clobbering. */
15734 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15736 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15737 return output_387_ffreep (operands
, 0);
15742 return "fucompp\n\tfnstsw\t%0";
15744 return "fcompp\n\tfnstsw\t%0";
15749 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15751 static const char * const alt
[16] =
15753 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15754 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15755 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15756 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15758 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15759 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15763 "fcomi\t{%y1, %0|%0, %y1}",
15764 "fcomip\t{%y1, %0|%0, %y1}",
15765 "fucomi\t{%y1, %0|%0, %y1}",
15766 "fucomip\t{%y1, %0|%0, %y1}",
15777 mask
= eflags_p
<< 3;
15778 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15779 mask
|= unordered_p
<< 1;
15780 mask
|= stack_top_dies
;
15782 gcc_assert (mask
< 16);
15791 ix86_output_addr_vec_elt (FILE *file
, int value
)
15793 const char *directive
= ASM_LONG
;
15797 directive
= ASM_QUAD
;
15799 gcc_assert (!TARGET_64BIT
);
15802 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15806 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15808 const char *directive
= ASM_LONG
;
15811 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15812 directive
= ASM_QUAD
;
15814 gcc_assert (!TARGET_64BIT
);
15816 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15817 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15818 fprintf (file
, "%s%s%d-%s%d\n",
15819 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15820 else if (HAVE_AS_GOTOFF_IN_DATA
)
15821 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15823 else if (TARGET_MACHO
)
15825 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15826 machopic_output_function_base_name (file
);
15831 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15832 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15835 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15839 ix86_expand_clear (rtx dest
)
15843 /* We play register width games, which are only valid after reload. */
15844 gcc_assert (reload_completed
);
15846 /* Avoid HImode and its attendant prefix byte. */
15847 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15848 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15849 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15851 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15852 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15854 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15855 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15861 /* X is an unchanging MEM. If it is a constant pool reference, return
15862 the constant pool rtx, else NULL. */
15865 maybe_get_pool_constant (rtx x
)
15867 x
= ix86_delegitimize_address (XEXP (x
, 0));
15869 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15870 return get_pool_constant (x
);
15876 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15879 enum tls_model model
;
15884 if (GET_CODE (op1
) == SYMBOL_REF
)
15886 model
= SYMBOL_REF_TLS_MODEL (op1
);
15889 op1
= legitimize_tls_address (op1
, model
, true);
15890 op1
= force_operand (op1
, op0
);
15893 if (GET_MODE (op1
) != mode
)
15894 op1
= convert_to_mode (mode
, op1
, 1);
15896 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15897 && SYMBOL_REF_DLLIMPORT_P (op1
))
15898 op1
= legitimize_dllimport_symbol (op1
, false);
15900 else if (GET_CODE (op1
) == CONST
15901 && GET_CODE (XEXP (op1
, 0)) == PLUS
15902 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15904 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15905 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15908 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15910 tmp
= legitimize_tls_address (symbol
, model
, true);
15911 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15912 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15913 tmp
= legitimize_dllimport_symbol (symbol
, true);
15917 tmp
= force_operand (tmp
, NULL
);
15918 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15919 op0
, 1, OPTAB_DIRECT
);
15922 if (GET_MODE (tmp
) != mode
)
15923 op1
= convert_to_mode (mode
, tmp
, 1);
15927 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15928 && symbolic_operand (op1
, mode
))
15930 if (TARGET_MACHO
&& !TARGET_64BIT
)
15933 /* dynamic-no-pic */
15934 if (MACHOPIC_INDIRECT
)
15936 rtx temp
= ((reload_in_progress
15937 || ((op0
&& REG_P (op0
))
15939 ? op0
: gen_reg_rtx (Pmode
));
15940 op1
= machopic_indirect_data_reference (op1
, temp
);
15942 op1
= machopic_legitimize_pic_address (op1
, mode
,
15943 temp
== op1
? 0 : temp
);
15945 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15947 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15951 if (GET_CODE (op0
) == MEM
)
15952 op1
= force_reg (Pmode
, op1
);
15956 if (GET_CODE (temp
) != REG
)
15957 temp
= gen_reg_rtx (Pmode
);
15958 temp
= legitimize_pic_address (op1
, temp
);
15963 /* dynamic-no-pic */
15969 op1
= force_reg (mode
, op1
);
15970 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15972 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15973 op1
= legitimize_pic_address (op1
, reg
);
15976 if (GET_MODE (op1
) != mode
)
15977 op1
= convert_to_mode (mode
, op1
, 1);
15984 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15985 || !push_operand (op0
, mode
))
15987 op1
= force_reg (mode
, op1
);
15989 if (push_operand (op0
, mode
)
15990 && ! general_no_elim_operand (op1
, mode
))
15991 op1
= copy_to_mode_reg (mode
, op1
);
15993 /* Force large constants in 64bit compilation into register
15994 to get them CSEed. */
15995 if (can_create_pseudo_p ()
15996 && (mode
== DImode
) && TARGET_64BIT
15997 && immediate_operand (op1
, mode
)
15998 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15999 && !register_operand (op0
, mode
)
16001 op1
= copy_to_mode_reg (mode
, op1
);
16003 if (can_create_pseudo_p ()
16004 && FLOAT_MODE_P (mode
)
16005 && GET_CODE (op1
) == CONST_DOUBLE
)
16007 /* If we are loading a floating point constant to a register,
16008 force the value to memory now, since we'll get better code
16009 out the back end. */
16011 op1
= validize_mem (force_const_mem (mode
, op1
));
16012 if (!register_operand (op0
, mode
))
16014 rtx temp
= gen_reg_rtx (mode
);
16015 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
16016 emit_move_insn (op0
, temp
);
16022 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16026 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
16028 rtx op0
= operands
[0], op1
= operands
[1];
16029 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
16031 /* Force constants other than zero into memory. We do not know how
16032 the instructions used to build constants modify the upper 64 bits
16033 of the register, once we have that information we may be able
16034 to handle some of them more efficiently. */
16035 if (can_create_pseudo_p ()
16036 && register_operand (op0
, mode
)
16037 && (CONSTANT_P (op1
)
16038 || (GET_CODE (op1
) == SUBREG
16039 && CONSTANT_P (SUBREG_REG (op1
))))
16040 && !standard_sse_constant_p (op1
))
16041 op1
= validize_mem (force_const_mem (mode
, op1
));
16043 /* We need to check memory alignment for SSE mode since attribute
16044 can make operands unaligned. */
16045 if (can_create_pseudo_p ()
16046 && SSE_REG_MODE_P (mode
)
16047 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
16048 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
16052 /* ix86_expand_vector_move_misalign() does not like constants ... */
16053 if (CONSTANT_P (op1
)
16054 || (GET_CODE (op1
) == SUBREG
16055 && CONSTANT_P (SUBREG_REG (op1
))))
16056 op1
= validize_mem (force_const_mem (mode
, op1
));
16058 /* ... nor both arguments in memory. */
16059 if (!register_operand (op0
, mode
)
16060 && !register_operand (op1
, mode
))
16061 op1
= force_reg (mode
, op1
);
16063 tmp
[0] = op0
; tmp
[1] = op1
;
16064 ix86_expand_vector_move_misalign (mode
, tmp
);
16068 /* Make operand1 a register if it isn't already. */
16069 if (can_create_pseudo_p ()
16070 && !register_operand (op0
, mode
)
16071 && !register_operand (op1
, mode
))
16073 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
16077 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
16080 /* Split 32-byte AVX unaligned load and store if needed. */
16083 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
16086 rtx (*extract
) (rtx
, rtx
, rtx
);
16087 rtx (*load_unaligned
) (rtx
, rtx
);
16088 rtx (*store_unaligned
) (rtx
, rtx
);
16089 enum machine_mode mode
;
16091 switch (GET_MODE (op0
))
16094 gcc_unreachable ();
16096 extract
= gen_avx_vextractf128v32qi
;
16097 load_unaligned
= gen_avx_loaddqu256
;
16098 store_unaligned
= gen_avx_storedqu256
;
16102 extract
= gen_avx_vextractf128v8sf
;
16103 load_unaligned
= gen_avx_loadups256
;
16104 store_unaligned
= gen_avx_storeups256
;
16108 extract
= gen_avx_vextractf128v4df
;
16109 load_unaligned
= gen_avx_loadupd256
;
16110 store_unaligned
= gen_avx_storeupd256
;
16117 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
16119 rtx r
= gen_reg_rtx (mode
);
16120 m
= adjust_address (op1
, mode
, 0);
16121 emit_move_insn (r
, m
);
16122 m
= adjust_address (op1
, mode
, 16);
16123 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
16124 emit_move_insn (op0
, r
);
16127 emit_insn (load_unaligned (op0
, op1
));
16129 else if (MEM_P (op0
))
16131 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
16133 m
= adjust_address (op0
, mode
, 0);
16134 emit_insn (extract (m
, op1
, const0_rtx
));
16135 m
= adjust_address (op0
, mode
, 16);
16136 emit_insn (extract (m
, op1
, const1_rtx
));
16139 emit_insn (store_unaligned (op0
, op1
));
16142 gcc_unreachable ();
16145 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16146 straight to ix86_expand_vector_move. */
16147 /* Code generation for scalar reg-reg moves of single and double precision data:
16148 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16152 if (x86_sse_partial_reg_dependency == true)
16157 Code generation for scalar loads of double precision data:
16158 if (x86_sse_split_regs == true)
16159 movlpd mem, reg (gas syntax)
16163 Code generation for unaligned packed loads of single precision data
16164 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16165 if (x86_sse_unaligned_move_optimal)
16168 if (x86_sse_partial_reg_dependency == true)
16180 Code generation for unaligned packed loads of double precision data
16181 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16182 if (x86_sse_unaligned_move_optimal)
16185 if (x86_sse_split_regs == true)
16198 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16206 && GET_MODE_SIZE (mode
) == 32)
16208 switch (GET_MODE_CLASS (mode
))
16210 case MODE_VECTOR_INT
:
16212 op0
= gen_lowpart (V32QImode
, op0
);
16213 op1
= gen_lowpart (V32QImode
, op1
);
16216 case MODE_VECTOR_FLOAT
:
16217 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16221 gcc_unreachable ();
16229 /* ??? If we have typed data, then it would appear that using
16230 movdqu is the only way to get unaligned data loaded with
16232 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16234 op0
= gen_lowpart (V16QImode
, op0
);
16235 op1
= gen_lowpart (V16QImode
, op1
);
16236 /* We will eventually emit movups based on insn attributes. */
16237 emit_insn (gen_sse2_loaddqu (op0
, op1
));
16239 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16244 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16245 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16246 || optimize_function_for_size_p (cfun
))
16248 /* We will eventually emit movups based on insn attributes. */
16249 emit_insn (gen_sse2_loadupd (op0
, op1
));
16253 /* When SSE registers are split into halves, we can avoid
16254 writing to the top half twice. */
16255 if (TARGET_SSE_SPLIT_REGS
)
16257 emit_clobber (op0
);
16262 /* ??? Not sure about the best option for the Intel chips.
16263 The following would seem to satisfy; the register is
16264 entirely cleared, breaking the dependency chain. We
16265 then store to the upper half, with a dependency depth
16266 of one. A rumor has it that Intel recommends two movsd
16267 followed by an unpacklpd, but this is unconfirmed. And
16268 given that the dependency depth of the unpacklpd would
16269 still be one, I'm not sure why this would be better. */
16270 zero
= CONST0_RTX (V2DFmode
);
16273 m
= adjust_address (op1
, DFmode
, 0);
16274 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16275 m
= adjust_address (op1
, DFmode
, 8);
16276 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16281 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16282 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16283 || optimize_function_for_size_p (cfun
))
16285 op0
= gen_lowpart (V4SFmode
, op0
);
16286 op1
= gen_lowpart (V4SFmode
, op1
);
16287 emit_insn (gen_sse_loadups (op0
, op1
));
16291 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16292 emit_move_insn (op0
, CONST0_RTX (mode
));
16294 emit_clobber (op0
);
16296 if (mode
!= V4SFmode
)
16297 op0
= gen_lowpart (V4SFmode
, op0
);
16299 m
= adjust_address (op1
, V2SFmode
, 0);
16300 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16301 m
= adjust_address (op1
, V2SFmode
, 8);
16302 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16305 else if (MEM_P (op0
))
16307 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16309 op0
= gen_lowpart (V16QImode
, op0
);
16310 op1
= gen_lowpart (V16QImode
, op1
);
16311 /* We will eventually emit movups based on insn attributes. */
16312 emit_insn (gen_sse2_storedqu (op0
, op1
));
16314 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16317 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16318 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16319 || optimize_function_for_size_p (cfun
))
16320 /* We will eventually emit movups based on insn attributes. */
16321 emit_insn (gen_sse2_storeupd (op0
, op1
));
16324 m
= adjust_address (op0
, DFmode
, 0);
16325 emit_insn (gen_sse2_storelpd (m
, op1
));
16326 m
= adjust_address (op0
, DFmode
, 8);
16327 emit_insn (gen_sse2_storehpd (m
, op1
));
16332 if (mode
!= V4SFmode
)
16333 op1
= gen_lowpart (V4SFmode
, op1
);
16336 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16337 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16338 || optimize_function_for_size_p (cfun
))
16340 op0
= gen_lowpart (V4SFmode
, op0
);
16341 emit_insn (gen_sse_storeups (op0
, op1
));
16345 m
= adjust_address (op0
, V2SFmode
, 0);
16346 emit_insn (gen_sse_storelps (m
, op1
));
16347 m
= adjust_address (op0
, V2SFmode
, 8);
16348 emit_insn (gen_sse_storehps (m
, op1
));
16353 gcc_unreachable ();
16356 /* Expand a push in MODE. This is some mode for which we do not support
16357 proper push instructions, at least from the registers that we expect
16358 the value to live in. */
16361 ix86_expand_push (enum machine_mode mode
, rtx x
)
16365 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16366 GEN_INT (-GET_MODE_SIZE (mode
)),
16367 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16368 if (tmp
!= stack_pointer_rtx
)
16369 emit_move_insn (stack_pointer_rtx
, tmp
);
16371 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16373 /* When we push an operand onto stack, it has to be aligned at least
16374 at the function argument boundary. However since we don't have
16375 the argument type, we can't determine the actual argument
16377 emit_move_insn (tmp
, x
);
16380 /* Helper function of ix86_fixup_binary_operands to canonicalize
16381 operand order. Returns true if the operands should be swapped. */
16384 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16387 rtx dst
= operands
[0];
16388 rtx src1
= operands
[1];
16389 rtx src2
= operands
[2];
16391 /* If the operation is not commutative, we can't do anything. */
16392 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16395 /* Highest priority is that src1 should match dst. */
16396 if (rtx_equal_p (dst
, src1
))
16398 if (rtx_equal_p (dst
, src2
))
16401 /* Next highest priority is that immediate constants come second. */
16402 if (immediate_operand (src2
, mode
))
16404 if (immediate_operand (src1
, mode
))
16407 /* Lowest priority is that memory references should come second. */
16417 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16418 destination to use for the operation. If different from the true
16419 destination in operands[0], a copy operation will be required. */
16422 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16425 rtx dst
= operands
[0];
16426 rtx src1
= operands
[1];
16427 rtx src2
= operands
[2];
16429 /* Canonicalize operand order. */
16430 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16434 /* It is invalid to swap operands of different modes. */
16435 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16442 /* Both source operands cannot be in memory. */
16443 if (MEM_P (src1
) && MEM_P (src2
))
16445 /* Optimization: Only read from memory once. */
16446 if (rtx_equal_p (src1
, src2
))
16448 src2
= force_reg (mode
, src2
);
16452 src2
= force_reg (mode
, src2
);
16455 /* If the destination is memory, and we do not have matching source
16456 operands, do things in registers. */
16457 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16458 dst
= gen_reg_rtx (mode
);
16460 /* Source 1 cannot be a constant. */
16461 if (CONSTANT_P (src1
))
16462 src1
= force_reg (mode
, src1
);
16464 /* Source 1 cannot be a non-matching memory. */
16465 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16466 src1
= force_reg (mode
, src1
);
16468 /* Improve address combine. */
16470 && GET_MODE_CLASS (mode
) == MODE_INT
16472 src2
= force_reg (mode
, src2
);
16474 operands
[1] = src1
;
16475 operands
[2] = src2
;
16479 /* Similarly, but assume that the destination has already been
16480 set up properly. */
16483 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16484 enum machine_mode mode
, rtx operands
[])
16486 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16487 gcc_assert (dst
== operands
[0]);
16490 /* Attempt to expand a binary operator. Make the expansion closer to the
16491 actual machine, then just general_operand, which will allow 3 separate
16492 memory references (one output, two input) in a single insn. */
16495 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16498 rtx src1
, src2
, dst
, op
, clob
;
16500 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16501 src1
= operands
[1];
16502 src2
= operands
[2];
16504 /* Emit the instruction. */
16506 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16507 if (reload_in_progress
)
16509 /* Reload doesn't know about the flags register, and doesn't know that
16510 it doesn't want to clobber it. We can only do this with PLUS. */
16511 gcc_assert (code
== PLUS
);
16514 else if (reload_completed
16516 && !rtx_equal_p (dst
, src1
))
16518 /* This is going to be an LEA; avoid splitting it later. */
16523 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16524 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16527 /* Fix up the destination if needed. */
16528 if (dst
!= operands
[0])
16529 emit_move_insn (operands
[0], dst
);
16532 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
16533 the given OPERANDS. */
16536 ix86_expand_vector_logical_operator (enum rtx_code code
, enum machine_mode mode
,
16539 rtx op1
= NULL_RTX
, op2
= NULL_RTX
;
16540 if (GET_CODE (operands
[1]) == SUBREG
)
16545 else if (GET_CODE (operands
[2]) == SUBREG
)
16550 /* Optimize (__m128i) d | (__m128i) e and similar code
16551 when d and e are float vectors into float vector logical
16552 insn. In C/C++ without using intrinsics there is no other way
16553 to express vector logical operation on float vectors than
16554 to cast them temporarily to integer vectors. */
16556 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16557 && ((GET_CODE (op2
) == SUBREG
|| GET_CODE (op2
) == CONST_VECTOR
))
16558 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1
))) == MODE_VECTOR_FLOAT
16559 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1
))) == GET_MODE_SIZE (mode
)
16560 && SUBREG_BYTE (op1
) == 0
16561 && (GET_CODE (op2
) == CONST_VECTOR
16562 || (GET_MODE (SUBREG_REG (op1
)) == GET_MODE (SUBREG_REG (op2
))
16563 && SUBREG_BYTE (op2
) == 0))
16564 && can_create_pseudo_p ())
16567 switch (GET_MODE (SUBREG_REG (op1
)))
16573 dst
= gen_reg_rtx (GET_MODE (SUBREG_REG (op1
)));
16574 if (GET_CODE (op2
) == CONST_VECTOR
)
16576 op2
= gen_lowpart (GET_MODE (dst
), op2
);
16577 op2
= force_reg (GET_MODE (dst
), op2
);
16582 op2
= SUBREG_REG (operands
[2]);
16583 if (!nonimmediate_operand (op2
, GET_MODE (dst
)))
16584 op2
= force_reg (GET_MODE (dst
), op2
);
16586 op1
= SUBREG_REG (op1
);
16587 if (!nonimmediate_operand (op1
, GET_MODE (dst
)))
16588 op1
= force_reg (GET_MODE (dst
), op1
);
16589 emit_insn (gen_rtx_SET (VOIDmode
, dst
,
16590 gen_rtx_fmt_ee (code
, GET_MODE (dst
),
16592 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
16598 if (!nonimmediate_operand (operands
[1], mode
))
16599 operands
[1] = force_reg (mode
, operands
[1]);
16600 if (!nonimmediate_operand (operands
[2], mode
))
16601 operands
[2] = force_reg (mode
, operands
[2]);
16602 ix86_fixup_binary_operands_no_copy (code
, mode
, operands
);
16603 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
16604 gen_rtx_fmt_ee (code
, mode
, operands
[1],
16608 /* Return TRUE or FALSE depending on whether the binary operator meets the
16609 appropriate constraints. */
16612 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16615 rtx dst
= operands
[0];
16616 rtx src1
= operands
[1];
16617 rtx src2
= operands
[2];
16619 /* Both source operands cannot be in memory. */
16620 if (MEM_P (src1
) && MEM_P (src2
))
16623 /* Canonicalize operand order for commutative operators. */
16624 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16631 /* If the destination is memory, we must have a matching source operand. */
16632 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16635 /* Source 1 cannot be a constant. */
16636 if (CONSTANT_P (src1
))
16639 /* Source 1 cannot be a non-matching memory. */
16640 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16641 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16642 return (code
== AND
16645 || (TARGET_64BIT
&& mode
== DImode
))
16646 && satisfies_constraint_L (src2
));
16651 /* Attempt to expand a unary operator. Make the expansion closer to the
16652 actual machine, then just general_operand, which will allow 2 separate
16653 memory references (one output, one input) in a single insn. */
16656 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16659 int matching_memory
;
16660 rtx src
, dst
, op
, clob
;
16665 /* If the destination is memory, and we do not have matching source
16666 operands, do things in registers. */
16667 matching_memory
= 0;
16670 if (rtx_equal_p (dst
, src
))
16671 matching_memory
= 1;
16673 dst
= gen_reg_rtx (mode
);
16676 /* When source operand is memory, destination must match. */
16677 if (MEM_P (src
) && !matching_memory
)
16678 src
= force_reg (mode
, src
);
16680 /* Emit the instruction. */
16682 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16683 if (reload_in_progress
|| code
== NOT
)
16685 /* Reload doesn't know about the flags register, and doesn't know that
16686 it doesn't want to clobber it. */
16687 gcc_assert (code
== NOT
);
16692 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16693 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16696 /* Fix up the destination if needed. */
16697 if (dst
!= operands
[0])
16698 emit_move_insn (operands
[0], dst
);
16701 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16702 divisor are within the range [0-255]. */
16705 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16708 rtx end_label
, qimode_label
;
16709 rtx insn
, div
, mod
;
16710 rtx scratch
, tmp0
, tmp1
, tmp2
;
16711 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16712 rtx (*gen_zero_extend
) (rtx
, rtx
);
16713 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16718 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16719 gen_test_ccno_1
= gen_testsi_ccno_1
;
16720 gen_zero_extend
= gen_zero_extendqisi2
;
16723 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16724 gen_test_ccno_1
= gen_testdi_ccno_1
;
16725 gen_zero_extend
= gen_zero_extendqidi2
;
16728 gcc_unreachable ();
16731 end_label
= gen_label_rtx ();
16732 qimode_label
= gen_label_rtx ();
16734 scratch
= gen_reg_rtx (mode
);
16736 /* Use 8bit unsigned divimod if dividend and divisor are within
16737 the range [0-255]. */
16738 emit_move_insn (scratch
, operands
[2]);
16739 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16740 scratch
, 1, OPTAB_DIRECT
);
16741 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16742 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16743 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16744 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16745 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16747 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16748 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16749 JUMP_LABEL (insn
) = qimode_label
;
16751 /* Generate original signed/unsigned divimod. */
16752 div
= gen_divmod4_1 (operands
[0], operands
[1],
16753 operands
[2], operands
[3]);
16756 /* Branch to the end. */
16757 emit_jump_insn (gen_jump (end_label
));
16760 /* Generate 8bit unsigned divide. */
16761 emit_label (qimode_label
);
16762 /* Don't use operands[0] for result of 8bit divide since not all
16763 registers support QImode ZERO_EXTRACT. */
16764 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16765 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16766 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16767 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16771 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16772 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16776 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16777 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16780 /* Extract remainder from AH. */
16781 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16782 if (REG_P (operands
[1]))
16783 insn
= emit_move_insn (operands
[1], tmp1
);
16786 /* Need a new scratch register since the old one has result
16788 scratch
= gen_reg_rtx (mode
);
16789 emit_move_insn (scratch
, tmp1
);
16790 insn
= emit_move_insn (operands
[1], scratch
);
16792 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16794 /* Zero extend quotient from AL. */
16795 tmp1
= gen_lowpart (QImode
, tmp0
);
16796 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16797 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16799 emit_label (end_label
);
16802 #define LEA_MAX_STALL (3)
16803 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16805 /* Increase given DISTANCE in half-cycles according to
16806 dependencies between PREV and NEXT instructions.
16807 Add 1 half-cycle if there is no dependency and
16808 go to next cycle if there is some dependecy. */
16810 static unsigned int
16811 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16816 if (!prev
|| !next
)
16817 return distance
+ (distance
& 1) + 2;
16819 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16820 return distance
+ 1;
16822 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16823 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16824 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16825 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16826 return distance
+ (distance
& 1) + 2;
16828 return distance
+ 1;
16831 /* Function checks if instruction INSN defines register number
16832 REGNO1 or REGNO2. */
16835 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16840 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16841 if (DF_REF_REG_DEF_P (*def_rec
)
16842 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16843 && (regno1
== DF_REF_REGNO (*def_rec
)
16844 || regno2
== DF_REF_REGNO (*def_rec
)))
16852 /* Function checks if instruction INSN uses register number
16853 REGNO as a part of address expression. */
16856 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16860 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16861 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16867 /* Search backward for non-agu definition of register number REGNO1
16868 or register number REGNO2 in basic block starting from instruction
16869 START up to head of basic block or instruction INSN.
16871 Function puts true value into *FOUND var if definition was found
16872 and false otherwise.
16874 Distance in half-cycles between START and found instruction or head
16875 of BB is added to DISTANCE and returned. */
16878 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16879 rtx insn
, int distance
,
16880 rtx start
, bool *found
)
16882 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16890 && distance
< LEA_SEARCH_THRESHOLD
)
16892 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16894 distance
= increase_distance (prev
, next
, distance
);
16895 if (insn_defines_reg (regno1
, regno2
, prev
))
16897 if (recog_memoized (prev
) < 0
16898 || get_attr_type (prev
) != TYPE_LEA
)
16907 if (prev
== BB_HEAD (bb
))
16910 prev
= PREV_INSN (prev
);
16916 /* Search backward for non-agu definition of register number REGNO1
16917 or register number REGNO2 in INSN's basic block until
16918 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16919 2. Reach neighbour BBs boundary, or
16920 3. Reach agu definition.
16921 Returns the distance between the non-agu definition point and INSN.
16922 If no definition point, returns -1. */
16925 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16928 basic_block bb
= BLOCK_FOR_INSN (insn
);
16930 bool found
= false;
16932 if (insn
!= BB_HEAD (bb
))
16933 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16934 distance
, PREV_INSN (insn
),
16937 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16941 bool simple_loop
= false;
16943 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16946 simple_loop
= true;
16951 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16953 BB_END (bb
), &found
);
16956 int shortest_dist
= -1;
16957 bool found_in_bb
= false;
16959 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16962 = distance_non_agu_define_in_bb (regno1
, regno2
,
16968 if (shortest_dist
< 0)
16969 shortest_dist
= bb_dist
;
16970 else if (bb_dist
> 0)
16971 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16977 distance
= shortest_dist
;
16981 /* get_attr_type may modify recog data. We want to make sure
16982 that recog data is valid for instruction INSN, on which
16983 distance_non_agu_define is called. INSN is unchanged here. */
16984 extract_insn_cached (insn
);
16989 return distance
>> 1;
16992 /* Return the distance in half-cycles between INSN and the next
16993 insn that uses register number REGNO in memory address added
16994 to DISTANCE. Return -1 if REGNO0 is set.
16996 Put true value into *FOUND if register usage was found and
16998 Put true value into *REDEFINED if register redefinition was
16999 found and false otherwise. */
17002 distance_agu_use_in_bb (unsigned int regno
,
17003 rtx insn
, int distance
, rtx start
,
17004 bool *found
, bool *redefined
)
17006 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
17011 *redefined
= false;
17015 && distance
< LEA_SEARCH_THRESHOLD
)
17017 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
17019 distance
= increase_distance(prev
, next
, distance
);
17020 if (insn_uses_reg_mem (regno
, next
))
17022 /* Return DISTANCE if OP0 is used in memory
17023 address in NEXT. */
17028 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
17030 /* Return -1 if OP0 is set in NEXT. */
17038 if (next
== BB_END (bb
))
17041 next
= NEXT_INSN (next
);
17047 /* Return the distance between INSN and the next insn that uses
17048 register number REGNO0 in memory address. Return -1 if no such
17049 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17052 distance_agu_use (unsigned int regno0
, rtx insn
)
17054 basic_block bb
= BLOCK_FOR_INSN (insn
);
17056 bool found
= false;
17057 bool redefined
= false;
17059 if (insn
!= BB_END (bb
))
17060 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
17062 &found
, &redefined
);
17064 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
17068 bool simple_loop
= false;
17070 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17073 simple_loop
= true;
17078 distance
= distance_agu_use_in_bb (regno0
, insn
,
17079 distance
, BB_HEAD (bb
),
17080 &found
, &redefined
);
17083 int shortest_dist
= -1;
17084 bool found_in_bb
= false;
17085 bool redefined_in_bb
= false;
17087 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
17090 = distance_agu_use_in_bb (regno0
, insn
,
17091 distance
, BB_HEAD (e
->dest
),
17092 &found_in_bb
, &redefined_in_bb
);
17095 if (shortest_dist
< 0)
17096 shortest_dist
= bb_dist
;
17097 else if (bb_dist
> 0)
17098 shortest_dist
= MIN (bb_dist
, shortest_dist
);
17104 distance
= shortest_dist
;
17108 if (!found
|| redefined
)
17111 return distance
>> 1;
17114 /* Define this macro to tune LEA priority vs ADD, it take effect when
17115 there is a dilemma of choicing LEA or ADD
17116 Negative value: ADD is more preferred than LEA
17118 Positive value: LEA is more preferred than ADD*/
17119 #define IX86_LEA_PRIORITY 0
17121 /* Return true if usage of lea INSN has performance advantage
17122 over a sequence of instructions. Instructions sequence has
17123 SPLIT_COST cycles higher latency than lea latency. */
17126 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
17127 unsigned int regno2
, int split_cost
)
17129 int dist_define
, dist_use
;
17131 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
17132 dist_use
= distance_agu_use (regno0
, insn
);
17134 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
17136 /* If there is no non AGU operand definition, no AGU
17137 operand usage and split cost is 0 then both lea
17138 and non lea variants have same priority. Currently
17139 we prefer lea for 64 bit code and non lea on 32 bit
17141 if (dist_use
< 0 && split_cost
== 0)
17142 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
17147 /* With longer definitions distance lea is more preferable.
17148 Here we change it to take into account splitting cost and
17150 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
17152 /* If there is no use in memory addess then we just check
17153 that split cost exceeds AGU stall. */
17155 return dist_define
> LEA_MAX_STALL
;
17157 /* If this insn has both backward non-agu dependence and forward
17158 agu dependence, the one with short distance takes effect. */
17159 return dist_define
>= dist_use
;
17162 /* Return true if it is legal to clobber flags by INSN and
17163 false otherwise. */
17166 ix86_ok_to_clobber_flags (rtx insn
)
17168 basic_block bb
= BLOCK_FOR_INSN (insn
);
17174 if (NONDEBUG_INSN_P (insn
))
17176 for (use
= DF_INSN_USES (insn
); *use
; use
++)
17177 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
17180 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
17184 if (insn
== BB_END (bb
))
17187 insn
= NEXT_INSN (insn
);
17190 live
= df_get_live_out(bb
);
17191 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
17194 /* Return true if we need to split op0 = op1 + op2 into a sequence of
17195 move and add to avoid AGU stalls. */
17198 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
17200 unsigned int regno0
, regno1
, regno2
;
17202 /* Check if we need to optimize. */
17203 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17206 /* Check it is correct to split here. */
17207 if (!ix86_ok_to_clobber_flags(insn
))
17210 regno0
= true_regnum (operands
[0]);
17211 regno1
= true_regnum (operands
[1]);
17212 regno2
= true_regnum (operands
[2]);
17214 /* We need to split only adds with non destructive
17215 destination operand. */
17216 if (regno0
== regno1
|| regno0
== regno2
)
17219 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
17222 /* Return true if we should emit lea instruction instead of mov
17226 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17228 unsigned int regno0
, regno1
;
17230 /* Check if we need to optimize. */
17231 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17234 /* Use lea for reg to reg moves only. */
17235 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17238 regno0
= true_regnum (operands
[0]);
17239 regno1
= true_regnum (operands
[1]);
17241 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17244 /* Return true if we need to split lea into a sequence of
17245 instructions to avoid AGU stalls. */
17248 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17250 unsigned int regno0
, regno1
, regno2
;
17252 struct ix86_address parts
;
17255 /* Check we need to optimize. */
17256 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17259 /* Check it is correct to split here. */
17260 if (!ix86_ok_to_clobber_flags(insn
))
17263 ok
= ix86_decompose_address (operands
[1], &parts
);
17266 /* There should be at least two components in the address. */
17267 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
17268 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
17271 /* We should not split into add if non legitimate pic
17272 operand is used as displacement. */
17273 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17276 regno0
= true_regnum (operands
[0]) ;
17277 regno1
= INVALID_REGNUM
;
17278 regno2
= INVALID_REGNUM
;
17281 regno1
= true_regnum (parts
.base
);
17283 regno2
= true_regnum (parts
.index
);
17287 /* Compute how many cycles we will add to execution time
17288 if split lea into a sequence of instructions. */
17289 if (parts
.base
|| parts
.index
)
17291 /* Have to use mov instruction if non desctructive
17292 destination form is used. */
17293 if (regno1
!= regno0
&& regno2
!= regno0
)
17296 /* Have to add index to base if both exist. */
17297 if (parts
.base
&& parts
.index
)
17300 /* Have to use shift and adds if scale is 2 or greater. */
17301 if (parts
.scale
> 1)
17303 if (regno0
!= regno1
)
17305 else if (regno2
== regno0
)
17308 split_cost
+= parts
.scale
;
17311 /* Have to use add instruction with immediate if
17312 disp is non zero. */
17313 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17316 /* Subtract the price of lea. */
17320 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17323 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17324 matches destination. RTX includes clobber of FLAGS_REG. */
17327 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17332 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17333 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17335 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17338 /* Return true if regno1 def is nearest to the insn. */
17341 find_nearest_reg_def (rtx insn
, int regno1
, int regno2
)
17344 rtx start
= BB_HEAD (BLOCK_FOR_INSN (insn
));
17348 while (prev
&& prev
!= start
)
17350 if (!INSN_P (prev
) || !NONDEBUG_INSN_P (prev
))
17352 prev
= PREV_INSN (prev
);
17355 if (insn_defines_reg (regno1
, INVALID_REGNUM
, prev
))
17357 else if (insn_defines_reg (regno2
, INVALID_REGNUM
, prev
))
17359 prev
= PREV_INSN (prev
);
17362 /* None of the regs is defined in the bb. */
17366 /* Split lea instructions into a sequence of instructions
17367 which are executed on ALU to avoid AGU stalls.
17368 It is assumed that it is allowed to clobber flags register
17369 at lea position. */
17372 ix86_split_lea_for_addr (rtx insn
, rtx operands
[], enum machine_mode mode
)
17374 unsigned int regno0
, regno1
, regno2
;
17375 struct ix86_address parts
;
17379 ok
= ix86_decompose_address (operands
[1], &parts
);
17382 target
= gen_lowpart (mode
, operands
[0]);
17384 regno0
= true_regnum (target
);
17385 regno1
= INVALID_REGNUM
;
17386 regno2
= INVALID_REGNUM
;
17390 parts
.base
= gen_lowpart (mode
, parts
.base
);
17391 regno1
= true_regnum (parts
.base
);
17396 parts
.index
= gen_lowpart (mode
, parts
.index
);
17397 regno2
= true_regnum (parts
.index
);
17401 parts
.disp
= gen_lowpart (mode
, parts
.disp
);
17403 if (parts
.scale
> 1)
17405 /* Case r1 = r1 + ... */
17406 if (regno1
== regno0
)
17408 /* If we have a case r1 = r1 + C * r1 then we
17409 should use multiplication which is very
17410 expensive. Assume cost model is wrong if we
17411 have such case here. */
17412 gcc_assert (regno2
!= regno0
);
17414 for (adds
= parts
.scale
; adds
> 0; adds
--)
17415 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17419 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17420 if (regno0
!= regno2
)
17421 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17423 /* Use shift for scaling. */
17424 ix86_emit_binop (ASHIFT
, mode
, target
,
17425 GEN_INT (exact_log2 (parts
.scale
)));
17428 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17430 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17431 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17434 else if (!parts
.base
&& !parts
.index
)
17436 gcc_assert(parts
.disp
);
17437 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17443 if (regno0
!= regno2
)
17444 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17446 else if (!parts
.index
)
17448 if (regno0
!= regno1
)
17449 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17453 if (regno0
== regno1
)
17455 else if (regno0
== regno2
)
17461 /* Find better operand for SET instruction, depending
17462 on which definition is farther from the insn. */
17463 if (find_nearest_reg_def (insn
, regno1
, regno2
))
17464 tmp
= parts
.index
, tmp1
= parts
.base
;
17466 tmp
= parts
.base
, tmp1
= parts
.index
;
17468 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
17470 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17471 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17473 ix86_emit_binop (PLUS
, mode
, target
, tmp1
);
17477 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17480 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17481 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17485 /* Return true if it is ok to optimize an ADD operation to LEA
17486 operation to avoid flag register consumation. For most processors,
17487 ADD is faster than LEA. For the processors like ATOM, if the
17488 destination register of LEA holds an actual address which will be
17489 used soon, LEA is better and otherwise ADD is better. */
17492 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17494 unsigned int regno0
= true_regnum (operands
[0]);
17495 unsigned int regno1
= true_regnum (operands
[1]);
17496 unsigned int regno2
= true_regnum (operands
[2]);
17498 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17499 if (regno0
!= regno1
&& regno0
!= regno2
)
17502 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17505 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17508 /* Return true if destination reg of SET_BODY is shift count of
17512 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17518 /* Retrieve destination of SET_BODY. */
17519 switch (GET_CODE (set_body
))
17522 set_dest
= SET_DEST (set_body
);
17523 if (!set_dest
|| !REG_P (set_dest
))
17527 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17528 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17536 /* Retrieve shift count of USE_BODY. */
17537 switch (GET_CODE (use_body
))
17540 shift_rtx
= XEXP (use_body
, 1);
17543 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17544 if (ix86_dep_by_shift_count_body (set_body
,
17545 XVECEXP (use_body
, 0, i
)))
17553 && (GET_CODE (shift_rtx
) == ASHIFT
17554 || GET_CODE (shift_rtx
) == LSHIFTRT
17555 || GET_CODE (shift_rtx
) == ASHIFTRT
17556 || GET_CODE (shift_rtx
) == ROTATE
17557 || GET_CODE (shift_rtx
) == ROTATERT
))
17559 rtx shift_count
= XEXP (shift_rtx
, 1);
17561 /* Return true if shift count is dest of SET_BODY. */
17562 if (REG_P (shift_count
))
17564 /* Add check since it can be invoked before register
17565 allocation in pre-reload schedule. */
17566 if (reload_completed
17567 && true_regnum (set_dest
) == true_regnum (shift_count
))
17569 else if (REGNO(set_dest
) == REGNO(shift_count
))
17577 /* Return true if destination reg of SET_INSN is shift count of
17581 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17583 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17584 PATTERN (use_insn
));
17587 /* Return TRUE or FALSE depending on whether the unary operator meets the
17588 appropriate constraints. */
17591 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17592 enum machine_mode mode ATTRIBUTE_UNUSED
,
17593 rtx operands
[2] ATTRIBUTE_UNUSED
)
17595 /* If one of operands is memory, source and destination must match. */
17596 if ((MEM_P (operands
[0])
17597 || MEM_P (operands
[1]))
17598 && ! rtx_equal_p (operands
[0], operands
[1]))
17603 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17604 are ok, keeping in mind the possible movddup alternative. */
17607 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17609 if (MEM_P (operands
[0]))
17610 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17611 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17612 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17616 /* Post-reload splitter for converting an SF or DFmode value in an
17617 SSE register into an unsigned SImode. */
17620 ix86_split_convert_uns_si_sse (rtx operands
[])
17622 enum machine_mode vecmode
;
17623 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17625 large
= operands
[1];
17626 zero_or_two31
= operands
[2];
17627 input
= operands
[3];
17628 two31
= operands
[4];
17629 vecmode
= GET_MODE (large
);
17630 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17632 /* Load up the value into the low element. We must ensure that the other
17633 elements are valid floats -- zero is the easiest such value. */
17636 if (vecmode
== V4SFmode
)
17637 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17639 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17643 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17644 emit_move_insn (value
, CONST0_RTX (vecmode
));
17645 if (vecmode
== V4SFmode
)
17646 emit_insn (gen_sse_movss (value
, value
, input
));
17648 emit_insn (gen_sse2_movsd (value
, value
, input
));
17651 emit_move_insn (large
, two31
);
17652 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17654 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17655 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17657 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17658 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17660 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17661 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17663 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17664 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17666 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17667 if (vecmode
== V4SFmode
)
17668 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17670 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17673 emit_insn (gen_xorv4si3 (value
, value
, large
));
17676 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17677 Expects the 64-bit DImode to be supplied in a pair of integral
17678 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17679 -mfpmath=sse, !optimize_size only. */
17682 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17684 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17685 rtx int_xmm
, fp_xmm
;
17686 rtx biases
, exponents
;
17689 int_xmm
= gen_reg_rtx (V4SImode
);
17690 if (TARGET_INTER_UNIT_MOVES
)
17691 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17692 else if (TARGET_SSE_SPLIT_REGS
)
17694 emit_clobber (int_xmm
);
17695 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17699 x
= gen_reg_rtx (V2DImode
);
17700 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17701 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17704 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17705 gen_rtvec (4, GEN_INT (0x43300000UL
),
17706 GEN_INT (0x45300000UL
),
17707 const0_rtx
, const0_rtx
));
17708 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17710 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17711 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17713 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17714 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17715 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17716 (0x1.0p84 + double(fp_value_hi_xmm)).
17717 Note these exponents differ by 32. */
17719 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17721 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17722 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17723 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17724 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17725 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17726 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17727 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17728 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17729 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17731 /* Add the upper and lower DFmode values together. */
17733 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17736 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17737 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17738 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17741 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17744 /* Not used, but eases macroization of patterns. */
17746 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17747 rtx input ATTRIBUTE_UNUSED
)
17749 gcc_unreachable ();
17752 /* Convert an unsigned SImode value into a DFmode. Only currently used
17753 for SSE, but applicable anywhere. */
17756 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17758 REAL_VALUE_TYPE TWO31r
;
17761 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17762 NULL
, 1, OPTAB_DIRECT
);
17764 fp
= gen_reg_rtx (DFmode
);
17765 emit_insn (gen_floatsidf2 (fp
, x
));
17767 real_ldexp (&TWO31r
, &dconst1
, 31);
17768 x
= const_double_from_real_value (TWO31r
, DFmode
);
17770 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17772 emit_move_insn (target
, x
);
17775 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17776 32-bit mode; otherwise we have a direct convert instruction. */
17779 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17781 REAL_VALUE_TYPE TWO32r
;
17782 rtx fp_lo
, fp_hi
, x
;
17784 fp_lo
= gen_reg_rtx (DFmode
);
17785 fp_hi
= gen_reg_rtx (DFmode
);
17787 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17789 real_ldexp (&TWO32r
, &dconst1
, 32);
17790 x
= const_double_from_real_value (TWO32r
, DFmode
);
17791 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17793 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17795 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17798 emit_move_insn (target
, x
);
17801 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17802 For x86_32, -mfpmath=sse, !optimize_size only. */
17804 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17806 REAL_VALUE_TYPE ONE16r
;
17807 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17809 real_ldexp (&ONE16r
, &dconst1
, 16);
17810 x
= const_double_from_real_value (ONE16r
, SFmode
);
17811 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17812 NULL
, 0, OPTAB_DIRECT
);
17813 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17814 NULL
, 0, OPTAB_DIRECT
);
17815 fp_hi
= gen_reg_rtx (SFmode
);
17816 fp_lo
= gen_reg_rtx (SFmode
);
17817 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17818 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17819 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17821 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17823 if (!rtx_equal_p (target
, fp_hi
))
17824 emit_move_insn (target
, fp_hi
);
17827 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17828 a vector of unsigned ints VAL to vector of floats TARGET. */
17831 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17834 REAL_VALUE_TYPE TWO16r
;
17835 enum machine_mode intmode
= GET_MODE (val
);
17836 enum machine_mode fltmode
= GET_MODE (target
);
17837 rtx (*cvt
) (rtx
, rtx
);
17839 if (intmode
== V4SImode
)
17840 cvt
= gen_floatv4siv4sf2
;
17842 cvt
= gen_floatv8siv8sf2
;
17843 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17844 tmp
[0] = force_reg (intmode
, tmp
[0]);
17845 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17847 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17848 NULL_RTX
, 1, OPTAB_DIRECT
);
17849 tmp
[3] = gen_reg_rtx (fltmode
);
17850 emit_insn (cvt (tmp
[3], tmp
[1]));
17851 tmp
[4] = gen_reg_rtx (fltmode
);
17852 emit_insn (cvt (tmp
[4], tmp
[2]));
17853 real_ldexp (&TWO16r
, &dconst1
, 16);
17854 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17855 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17856 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17858 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17860 if (tmp
[7] != target
)
17861 emit_move_insn (target
, tmp
[7]);
17864 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17865 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17866 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17867 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17870 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17872 REAL_VALUE_TYPE TWO31r
;
17873 rtx two31r
, tmp
[4];
17874 enum machine_mode mode
= GET_MODE (val
);
17875 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17876 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17877 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17880 for (i
= 0; i
< 3; i
++)
17881 tmp
[i
] = gen_reg_rtx (mode
);
17882 real_ldexp (&TWO31r
, &dconst1
, 31);
17883 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17884 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17885 two31r
= force_reg (mode
, two31r
);
17888 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17889 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17890 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17891 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17892 default: gcc_unreachable ();
17894 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17895 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17896 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17898 if (intmode
== V4SImode
|| TARGET_AVX2
)
17899 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17900 gen_lowpart (intmode
, tmp
[0]),
17901 GEN_INT (31), NULL_RTX
, 0,
17905 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17906 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17907 *xorp
= expand_simple_binop (intmode
, AND
,
17908 gen_lowpart (intmode
, tmp
[0]),
17909 two31
, NULL_RTX
, 0,
17912 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17916 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17917 then replicate the value for all elements of the vector
17921 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17925 enum machine_mode scalar_mode
;
17942 n_elt
= GET_MODE_NUNITS (mode
);
17943 v
= rtvec_alloc (n_elt
);
17944 scalar_mode
= GET_MODE_INNER (mode
);
17946 RTVEC_ELT (v
, 0) = value
;
17948 for (i
= 1; i
< n_elt
; ++i
)
17949 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17951 return gen_rtx_CONST_VECTOR (mode
, v
);
17954 gcc_unreachable ();
17958 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17959 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17960 for an SSE register. If VECT is true, then replicate the mask for
17961 all elements of the vector register. If INVERT is true, then create
17962 a mask excluding the sign bit. */
17965 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17967 enum machine_mode vec_mode
, imode
;
17968 HOST_WIDE_INT hi
, lo
;
17973 /* Find the sign bit, sign extended to 2*HWI. */
17981 mode
= GET_MODE_INNER (mode
);
17983 lo
= 0x80000000, hi
= lo
< 0;
17991 mode
= GET_MODE_INNER (mode
);
17993 if (HOST_BITS_PER_WIDE_INT
>= 64)
17994 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17996 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18001 vec_mode
= VOIDmode
;
18002 if (HOST_BITS_PER_WIDE_INT
>= 64)
18005 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
18012 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
18016 lo
= ~lo
, hi
= ~hi
;
18022 mask
= immed_double_const (lo
, hi
, imode
);
18024 vec
= gen_rtvec (2, v
, mask
);
18025 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
18026 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
18033 gcc_unreachable ();
18037 lo
= ~lo
, hi
= ~hi
;
18039 /* Force this value into the low part of a fp vector constant. */
18040 mask
= immed_double_const (lo
, hi
, imode
);
18041 mask
= gen_lowpart (mode
, mask
);
18043 if (vec_mode
== VOIDmode
)
18044 return force_reg (mode
, mask
);
18046 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
18047 return force_reg (vec_mode
, v
);
18050 /* Generate code for floating point ABS or NEG. */
18053 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
18056 rtx mask
, set
, dst
, src
;
18057 bool use_sse
= false;
18058 bool vector_mode
= VECTOR_MODE_P (mode
);
18059 enum machine_mode vmode
= mode
;
18063 else if (mode
== TFmode
)
18065 else if (TARGET_SSE_MATH
)
18067 use_sse
= SSE_FLOAT_MODE_P (mode
);
18068 if (mode
== SFmode
)
18070 else if (mode
== DFmode
)
18074 /* NEG and ABS performed with SSE use bitwise mask operations.
18075 Create the appropriate mask now. */
18077 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
18084 set
= gen_rtx_fmt_e (code
, mode
, src
);
18085 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
18092 use
= gen_rtx_USE (VOIDmode
, mask
);
18094 par
= gen_rtvec (2, set
, use
);
18097 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
18098 par
= gen_rtvec (3, set
, use
, clob
);
18100 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
18106 /* Expand a copysign operation. Special case operand 0 being a constant. */
18109 ix86_expand_copysign (rtx operands
[])
18111 enum machine_mode mode
, vmode
;
18112 rtx dest
, op0
, op1
, mask
, nmask
;
18114 dest
= operands
[0];
18118 mode
= GET_MODE (dest
);
18120 if (mode
== SFmode
)
18122 else if (mode
== DFmode
)
18127 if (GET_CODE (op0
) == CONST_DOUBLE
)
18129 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
18131 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
18132 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
18134 if (mode
== SFmode
|| mode
== DFmode
)
18136 if (op0
== CONST0_RTX (mode
))
18137 op0
= CONST0_RTX (vmode
);
18140 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
18142 op0
= force_reg (vmode
, v
);
18145 else if (op0
!= CONST0_RTX (mode
))
18146 op0
= force_reg (mode
, op0
);
18148 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18150 if (mode
== SFmode
)
18151 copysign_insn
= gen_copysignsf3_const
;
18152 else if (mode
== DFmode
)
18153 copysign_insn
= gen_copysigndf3_const
;
18155 copysign_insn
= gen_copysigntf3_const
;
18157 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
18161 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
18163 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
18164 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
18166 if (mode
== SFmode
)
18167 copysign_insn
= gen_copysignsf3_var
;
18168 else if (mode
== DFmode
)
18169 copysign_insn
= gen_copysigndf3_var
;
18171 copysign_insn
= gen_copysigntf3_var
;
18173 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
18177 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
18178 be a constant, and so has already been expanded into a vector constant. */
18181 ix86_split_copysign_const (rtx operands
[])
18183 enum machine_mode mode
, vmode
;
18184 rtx dest
, op0
, mask
, x
;
18186 dest
= operands
[0];
18188 mask
= operands
[3];
18190 mode
= GET_MODE (dest
);
18191 vmode
= GET_MODE (mask
);
18193 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
18194 x
= gen_rtx_AND (vmode
, dest
, mask
);
18195 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18197 if (op0
!= CONST0_RTX (vmode
))
18199 x
= gen_rtx_IOR (vmode
, dest
, op0
);
18200 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18204 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
18205 so we have to do two masks. */
18208 ix86_split_copysign_var (rtx operands
[])
18210 enum machine_mode mode
, vmode
;
18211 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
18213 dest
= operands
[0];
18214 scratch
= operands
[1];
18217 nmask
= operands
[4];
18218 mask
= operands
[5];
18220 mode
= GET_MODE (dest
);
18221 vmode
= GET_MODE (mask
);
18223 if (rtx_equal_p (op0
, op1
))
18225 /* Shouldn't happen often (it's useless, obviously), but when it does
18226 we'd generate incorrect code if we continue below. */
18227 emit_move_insn (dest
, op0
);
18231 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
18233 gcc_assert (REGNO (op1
) == REGNO (scratch
));
18235 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18236 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18239 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18240 x
= gen_rtx_NOT (vmode
, dest
);
18241 x
= gen_rtx_AND (vmode
, x
, op0
);
18242 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18246 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
18248 x
= gen_rtx_AND (vmode
, scratch
, mask
);
18250 else /* alternative 2,4 */
18252 gcc_assert (REGNO (mask
) == REGNO (scratch
));
18253 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
18254 x
= gen_rtx_AND (vmode
, scratch
, op1
);
18256 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
18258 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
18260 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18261 x
= gen_rtx_AND (vmode
, dest
, nmask
);
18263 else /* alternative 3,4 */
18265 gcc_assert (REGNO (nmask
) == REGNO (dest
));
18267 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
18268 x
= gen_rtx_AND (vmode
, dest
, op0
);
18270 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18273 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18274 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18277 /* Return TRUE or FALSE depending on whether the first SET in INSN
18278 has source and destination with matching CC modes, and that the
18279 CC mode is at least as constrained as REQ_MODE. */
18282 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18285 enum machine_mode set_mode
;
18287 set
= PATTERN (insn
);
18288 if (GET_CODE (set
) == PARALLEL
)
18289 set
= XVECEXP (set
, 0, 0);
18290 gcc_assert (GET_CODE (set
) == SET
);
18291 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18293 set_mode
= GET_MODE (SET_DEST (set
));
18297 if (req_mode
!= CCNOmode
18298 && (req_mode
!= CCmode
18299 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18303 if (req_mode
== CCGCmode
)
18307 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18311 if (req_mode
== CCZmode
)
18321 if (set_mode
!= req_mode
)
18326 gcc_unreachable ();
18329 return GET_MODE (SET_SRC (set
)) == set_mode
;
18332 /* Generate insn patterns to do an integer compare of OPERANDS. */
18335 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18337 enum machine_mode cmpmode
;
18340 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18341 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18343 /* This is very simple, but making the interface the same as in the
18344 FP case makes the rest of the code easier. */
18345 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18346 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18348 /* Return the test that should be put into the flags user, i.e.
18349 the bcc, scc, or cmov instruction. */
18350 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18353 /* Figure out whether to use ordered or unordered fp comparisons.
18354 Return the appropriate mode to use. */
18357 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18359 /* ??? In order to make all comparisons reversible, we do all comparisons
18360 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18361 all forms trapping and nontrapping comparisons, we can make inequality
18362 comparisons trapping again, since it results in better code when using
18363 FCOM based compares. */
18364 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18368 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18370 enum machine_mode mode
= GET_MODE (op0
);
18372 if (SCALAR_FLOAT_MODE_P (mode
))
18374 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18375 return ix86_fp_compare_mode (code
);
18380 /* Only zero flag is needed. */
18381 case EQ
: /* ZF=0 */
18382 case NE
: /* ZF!=0 */
18384 /* Codes needing carry flag. */
18385 case GEU
: /* CF=0 */
18386 case LTU
: /* CF=1 */
18387 /* Detect overflow checks. They need just the carry flag. */
18388 if (GET_CODE (op0
) == PLUS
18389 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18393 case GTU
: /* CF=0 & ZF=0 */
18394 case LEU
: /* CF=1 | ZF=1 */
18395 /* Detect overflow checks. They need just the carry flag. */
18396 if (GET_CODE (op0
) == MINUS
18397 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18401 /* Codes possibly doable only with sign flag when
18402 comparing against zero. */
18403 case GE
: /* SF=OF or SF=0 */
18404 case LT
: /* SF<>OF or SF=1 */
18405 if (op1
== const0_rtx
)
18408 /* For other cases Carry flag is not required. */
18410 /* Codes doable only with sign flag when comparing
18411 against zero, but we miss jump instruction for it
18412 so we need to use relational tests against overflow
18413 that thus needs to be zero. */
18414 case GT
: /* ZF=0 & SF=OF */
18415 case LE
: /* ZF=1 | SF<>OF */
18416 if (op1
== const0_rtx
)
18420 /* strcmp pattern do (use flags) and combine may ask us for proper
18425 gcc_unreachable ();
18429 /* Return the fixed registers used for condition codes. */
18432 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18439 /* If two condition code modes are compatible, return a condition code
18440 mode which is compatible with both. Otherwise, return
18443 static enum machine_mode
18444 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18449 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18452 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18453 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18456 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18458 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18464 gcc_unreachable ();
18494 /* These are only compatible with themselves, which we already
18501 /* Return a comparison we can do and that it is equivalent to
18502 swap_condition (code) apart possibly from orderedness.
18503 But, never change orderedness if TARGET_IEEE_FP, returning
18504 UNKNOWN in that case if necessary. */
18506 static enum rtx_code
18507 ix86_fp_swap_condition (enum rtx_code code
)
18511 case GT
: /* GTU - CF=0 & ZF=0 */
18512 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18513 case GE
: /* GEU - CF=0 */
18514 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18515 case UNLT
: /* LTU - CF=1 */
18516 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18517 case UNLE
: /* LEU - CF=1 | ZF=1 */
18518 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18520 return swap_condition (code
);
18524 /* Return cost of comparison CODE using the best strategy for performance.
18525 All following functions do use number of instructions as a cost metrics.
18526 In future this should be tweaked to compute bytes for optimize_size and
18527 take into account performance of various instructions on various CPUs. */
18530 ix86_fp_comparison_cost (enum rtx_code code
)
18534 /* The cost of code using bit-twiddling on %ah. */
18551 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18555 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18558 gcc_unreachable ();
18561 switch (ix86_fp_comparison_strategy (code
))
18563 case IX86_FPCMP_COMI
:
18564 return arith_cost
> 4 ? 3 : 2;
18565 case IX86_FPCMP_SAHF
:
18566 return arith_cost
> 4 ? 4 : 3;
18572 /* Return strategy to use for floating-point. We assume that fcomi is always
18573 preferrable where available, since that is also true when looking at size
18574 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18576 enum ix86_fpcmp_strategy
18577 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18579 /* Do fcomi/sahf based test when profitable. */
18582 return IX86_FPCMP_COMI
;
18584 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18585 return IX86_FPCMP_SAHF
;
18587 return IX86_FPCMP_ARITH
;
18590 /* Swap, force into registers, or otherwise massage the two operands
18591 to a fp comparison. The operands are updated in place; the new
18592 comparison code is returned. */
18594 static enum rtx_code
18595 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18597 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18598 rtx op0
= *pop0
, op1
= *pop1
;
18599 enum machine_mode op_mode
= GET_MODE (op0
);
18600 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18602 /* All of the unordered compare instructions only work on registers.
18603 The same is true of the fcomi compare instructions. The XFmode
18604 compare instructions require registers except when comparing
18605 against zero or when converting operand 1 from fixed point to
18609 && (fpcmp_mode
== CCFPUmode
18610 || (op_mode
== XFmode
18611 && ! (standard_80387_constant_p (op0
) == 1
18612 || standard_80387_constant_p (op1
) == 1)
18613 && GET_CODE (op1
) != FLOAT
)
18614 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18616 op0
= force_reg (op_mode
, op0
);
18617 op1
= force_reg (op_mode
, op1
);
18621 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18622 things around if they appear profitable, otherwise force op0
18623 into a register. */
18625 if (standard_80387_constant_p (op0
) == 0
18627 && ! (standard_80387_constant_p (op1
) == 0
18630 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18631 if (new_code
!= UNKNOWN
)
18634 tmp
= op0
, op0
= op1
, op1
= tmp
;
18640 op0
= force_reg (op_mode
, op0
);
18642 if (CONSTANT_P (op1
))
18644 int tmp
= standard_80387_constant_p (op1
);
18646 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18650 op1
= force_reg (op_mode
, op1
);
18653 op1
= force_reg (op_mode
, op1
);
18657 /* Try to rearrange the comparison to make it cheaper. */
18658 if (ix86_fp_comparison_cost (code
)
18659 > ix86_fp_comparison_cost (swap_condition (code
))
18660 && (REG_P (op1
) || can_create_pseudo_p ()))
18663 tmp
= op0
, op0
= op1
, op1
= tmp
;
18664 code
= swap_condition (code
);
18666 op0
= force_reg (op_mode
, op0
);
18674 /* Convert comparison codes we use to represent FP comparison to integer
18675 code that will result in proper branch. Return UNKNOWN if no such code
18679 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18708 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18711 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18713 enum machine_mode fpcmp_mode
, intcmp_mode
;
18716 fpcmp_mode
= ix86_fp_compare_mode (code
);
18717 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18719 /* Do fcomi/sahf based test when profitable. */
18720 switch (ix86_fp_comparison_strategy (code
))
18722 case IX86_FPCMP_COMI
:
18723 intcmp_mode
= fpcmp_mode
;
18724 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18725 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18730 case IX86_FPCMP_SAHF
:
18731 intcmp_mode
= fpcmp_mode
;
18732 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18733 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18737 scratch
= gen_reg_rtx (HImode
);
18738 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18739 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18742 case IX86_FPCMP_ARITH
:
18743 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18744 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18745 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18747 scratch
= gen_reg_rtx (HImode
);
18748 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18750 /* In the unordered case, we have to check C2 for NaN's, which
18751 doesn't happen to work out to anything nice combination-wise.
18752 So do some bit twiddling on the value we've got in AH to come
18753 up with an appropriate set of condition codes. */
18755 intcmp_mode
= CCNOmode
;
18760 if (code
== GT
|| !TARGET_IEEE_FP
)
18762 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18767 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18768 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18769 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18770 intcmp_mode
= CCmode
;
18776 if (code
== LT
&& TARGET_IEEE_FP
)
18778 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18779 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18780 intcmp_mode
= CCmode
;
18785 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18791 if (code
== GE
|| !TARGET_IEEE_FP
)
18793 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18798 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18799 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18805 if (code
== LE
&& TARGET_IEEE_FP
)
18807 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18808 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18809 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18810 intcmp_mode
= CCmode
;
18815 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18821 if (code
== EQ
&& TARGET_IEEE_FP
)
18823 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18824 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18825 intcmp_mode
= CCmode
;
18830 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18836 if (code
== NE
&& TARGET_IEEE_FP
)
18838 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18839 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18845 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18851 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18855 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18860 gcc_unreachable ();
18868 /* Return the test that should be put into the flags user, i.e.
18869 the bcc, scc, or cmov instruction. */
18870 return gen_rtx_fmt_ee (code
, VOIDmode
,
18871 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18876 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18880 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18881 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18883 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18885 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18886 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18889 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18895 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18897 enum machine_mode mode
= GET_MODE (op0
);
18909 tmp
= ix86_expand_compare (code
, op0
, op1
);
18910 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18911 gen_rtx_LABEL_REF (VOIDmode
, label
),
18913 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18920 /* Expand DImode branch into multiple compare+branch. */
18922 rtx lo
[2], hi
[2], label2
;
18923 enum rtx_code code1
, code2
, code3
;
18924 enum machine_mode submode
;
18926 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18928 tmp
= op0
, op0
= op1
, op1
= tmp
;
18929 code
= swap_condition (code
);
18932 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18933 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18935 submode
= mode
== DImode
? SImode
: DImode
;
18937 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18938 avoid two branches. This costs one extra insn, so disable when
18939 optimizing for size. */
18941 if ((code
== EQ
|| code
== NE
)
18942 && (!optimize_insn_for_size_p ()
18943 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18948 if (hi
[1] != const0_rtx
)
18949 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18950 NULL_RTX
, 0, OPTAB_WIDEN
);
18953 if (lo
[1] != const0_rtx
)
18954 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18955 NULL_RTX
, 0, OPTAB_WIDEN
);
18957 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18958 NULL_RTX
, 0, OPTAB_WIDEN
);
18960 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18964 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18965 op1 is a constant and the low word is zero, then we can just
18966 examine the high word. Similarly for low word -1 and
18967 less-or-equal-than or greater-than. */
18969 if (CONST_INT_P (hi
[1]))
18972 case LT
: case LTU
: case GE
: case GEU
:
18973 if (lo
[1] == const0_rtx
)
18975 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18979 case LE
: case LEU
: case GT
: case GTU
:
18980 if (lo
[1] == constm1_rtx
)
18982 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18990 /* Otherwise, we need two or three jumps. */
18992 label2
= gen_label_rtx ();
18995 code2
= swap_condition (code
);
18996 code3
= unsigned_condition (code
);
19000 case LT
: case GT
: case LTU
: case GTU
:
19003 case LE
: code1
= LT
; code2
= GT
; break;
19004 case GE
: code1
= GT
; code2
= LT
; break;
19005 case LEU
: code1
= LTU
; code2
= GTU
; break;
19006 case GEU
: code1
= GTU
; code2
= LTU
; break;
19008 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
19009 case NE
: code2
= UNKNOWN
; break;
19012 gcc_unreachable ();
19017 * if (hi(a) < hi(b)) goto true;
19018 * if (hi(a) > hi(b)) goto false;
19019 * if (lo(a) < lo(b)) goto true;
19023 if (code1
!= UNKNOWN
)
19024 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
19025 if (code2
!= UNKNOWN
)
19026 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
19028 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
19030 if (code2
!= UNKNOWN
)
19031 emit_label (label2
);
19036 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
19041 /* Split branch based on floating point condition. */
19043 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
19044 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
19049 if (target2
!= pc_rtx
)
19052 code
= reverse_condition_maybe_unordered (code
);
19057 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
19060 /* Remove pushed operand from stack. */
19062 ix86_free_from_memory (GET_MODE (pushed
));
19064 i
= emit_jump_insn (gen_rtx_SET
19066 gen_rtx_IF_THEN_ELSE (VOIDmode
,
19067 condition
, target1
, target2
)));
19068 if (split_branch_probability
>= 0)
19069 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
19073 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
19077 gcc_assert (GET_MODE (dest
) == QImode
);
19079 ret
= ix86_expand_compare (code
, op0
, op1
);
19080 PUT_MODE (ret
, QImode
);
19081 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
19084 /* Expand comparison setting or clearing carry flag. Return true when
19085 successful and set pop for the operation. */
19087 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
19089 enum machine_mode mode
=
19090 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
19092 /* Do not handle double-mode compares that go through special path. */
19093 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
19096 if (SCALAR_FLOAT_MODE_P (mode
))
19098 rtx compare_op
, compare_seq
;
19100 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
19102 /* Shortcut: following common codes never translate
19103 into carry flag compares. */
19104 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
19105 || code
== ORDERED
|| code
== UNORDERED
)
19108 /* These comparisons require zero flag; swap operands so they won't. */
19109 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
19110 && !TARGET_IEEE_FP
)
19115 code
= swap_condition (code
);
19118 /* Try to expand the comparison and verify that we end up with
19119 carry flag based comparison. This fails to be true only when
19120 we decide to expand comparison using arithmetic that is not
19121 too common scenario. */
19123 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
19124 compare_seq
= get_insns ();
19127 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
19128 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
19129 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
19131 code
= GET_CODE (compare_op
);
19133 if (code
!= LTU
&& code
!= GEU
)
19136 emit_insn (compare_seq
);
19141 if (!INTEGRAL_MODE_P (mode
))
19150 /* Convert a==0 into (unsigned)a<1. */
19153 if (op1
!= const0_rtx
)
19156 code
= (code
== EQ
? LTU
: GEU
);
19159 /* Convert a>b into b<a or a>=b-1. */
19162 if (CONST_INT_P (op1
))
19164 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
19165 /* Bail out on overflow. We still can swap operands but that
19166 would force loading of the constant into register. */
19167 if (op1
== const0_rtx
19168 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
19170 code
= (code
== GTU
? GEU
: LTU
);
19177 code
= (code
== GTU
? LTU
: GEU
);
19181 /* Convert a>=0 into (unsigned)a<0x80000000. */
19184 if (mode
== DImode
|| op1
!= const0_rtx
)
19186 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19187 code
= (code
== LT
? GEU
: LTU
);
19191 if (mode
== DImode
|| op1
!= constm1_rtx
)
19193 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
19194 code
= (code
== LE
? GEU
: LTU
);
19200 /* Swapping operands may cause constant to appear as first operand. */
19201 if (!nonimmediate_operand (op0
, VOIDmode
))
19203 if (!can_create_pseudo_p ())
19205 op0
= force_reg (mode
, op0
);
19207 *pop
= ix86_expand_compare (code
, op0
, op1
);
19208 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
19213 ix86_expand_int_movcc (rtx operands
[])
19215 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
19216 rtx compare_seq
, compare_op
;
19217 enum machine_mode mode
= GET_MODE (operands
[0]);
19218 bool sign_bit_compare_p
= false;
19219 rtx op0
= XEXP (operands
[1], 0);
19220 rtx op1
= XEXP (operands
[1], 1);
19222 if (GET_MODE (op0
) == TImode
19223 || (GET_MODE (op0
) == DImode
19228 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19229 compare_seq
= get_insns ();
19232 compare_code
= GET_CODE (compare_op
);
19234 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
19235 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
19236 sign_bit_compare_p
= true;
19238 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
19239 HImode insns, we'd be swallowed in word prefix ops. */
19241 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
19242 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
19243 && CONST_INT_P (operands
[2])
19244 && CONST_INT_P (operands
[3]))
19246 rtx out
= operands
[0];
19247 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
19248 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
19249 HOST_WIDE_INT diff
;
19252 /* Sign bit compares are better done using shifts than we do by using
19254 if (sign_bit_compare_p
19255 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
19257 /* Detect overlap between destination and compare sources. */
19260 if (!sign_bit_compare_p
)
19263 bool fpcmp
= false;
19265 compare_code
= GET_CODE (compare_op
);
19267 flags
= XEXP (compare_op
, 0);
19269 if (GET_MODE (flags
) == CCFPmode
19270 || GET_MODE (flags
) == CCFPUmode
)
19274 = ix86_fp_compare_code_to_integer (compare_code
);
19277 /* To simplify rest of code, restrict to the GEU case. */
19278 if (compare_code
== LTU
)
19280 HOST_WIDE_INT tmp
= ct
;
19283 compare_code
= reverse_condition (compare_code
);
19284 code
= reverse_condition (code
);
19289 PUT_CODE (compare_op
,
19290 reverse_condition_maybe_unordered
19291 (GET_CODE (compare_op
)));
19293 PUT_CODE (compare_op
,
19294 reverse_condition (GET_CODE (compare_op
)));
19298 if (reg_overlap_mentioned_p (out
, op0
)
19299 || reg_overlap_mentioned_p (out
, op1
))
19300 tmp
= gen_reg_rtx (mode
);
19302 if (mode
== DImode
)
19303 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19305 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19306 flags
, compare_op
));
19310 if (code
== GT
|| code
== GE
)
19311 code
= reverse_condition (code
);
19314 HOST_WIDE_INT tmp
= ct
;
19319 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19332 tmp
= expand_simple_binop (mode
, PLUS
,
19334 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19345 tmp
= expand_simple_binop (mode
, IOR
,
19347 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19349 else if (diff
== -1 && ct
)
19359 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19361 tmp
= expand_simple_binop (mode
, PLUS
,
19362 copy_rtx (tmp
), GEN_INT (cf
),
19363 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19371 * andl cf - ct, dest
19381 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19384 tmp
= expand_simple_binop (mode
, AND
,
19386 gen_int_mode (cf
- ct
, mode
),
19387 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19389 tmp
= expand_simple_binop (mode
, PLUS
,
19390 copy_rtx (tmp
), GEN_INT (ct
),
19391 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19394 if (!rtx_equal_p (tmp
, out
))
19395 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19402 enum machine_mode cmp_mode
= GET_MODE (op0
);
19405 tmp
= ct
, ct
= cf
, cf
= tmp
;
19408 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19410 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19412 /* We may be reversing unordered compare to normal compare, that
19413 is not valid in general (we may convert non-trapping condition
19414 to trapping one), however on i386 we currently emit all
19415 comparisons unordered. */
19416 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19417 code
= reverse_condition_maybe_unordered (code
);
19421 compare_code
= reverse_condition (compare_code
);
19422 code
= reverse_condition (code
);
19426 compare_code
= UNKNOWN
;
19427 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19428 && CONST_INT_P (op1
))
19430 if (op1
== const0_rtx
19431 && (code
== LT
|| code
== GE
))
19432 compare_code
= code
;
19433 else if (op1
== constm1_rtx
)
19437 else if (code
== GT
)
19442 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19443 if (compare_code
!= UNKNOWN
19444 && GET_MODE (op0
) == GET_MODE (out
)
19445 && (cf
== -1 || ct
== -1))
19447 /* If lea code below could be used, only optimize
19448 if it results in a 2 insn sequence. */
19450 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19451 || diff
== 3 || diff
== 5 || diff
== 9)
19452 || (compare_code
== LT
&& ct
== -1)
19453 || (compare_code
== GE
&& cf
== -1))
19456 * notl op1 (if necessary)
19464 code
= reverse_condition (code
);
19467 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19469 out
= expand_simple_binop (mode
, IOR
,
19471 out
, 1, OPTAB_DIRECT
);
19472 if (out
!= operands
[0])
19473 emit_move_insn (operands
[0], out
);
19480 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19481 || diff
== 3 || diff
== 5 || diff
== 9)
19482 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19484 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19490 * lea cf(dest*(ct-cf)),dest
19494 * This also catches the degenerate setcc-only case.
19500 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19503 /* On x86_64 the lea instruction operates on Pmode, so we need
19504 to get arithmetics done in proper mode to match. */
19506 tmp
= copy_rtx (out
);
19510 out1
= copy_rtx (out
);
19511 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19515 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19521 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19524 if (!rtx_equal_p (tmp
, out
))
19527 out
= force_operand (tmp
, copy_rtx (out
));
19529 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19531 if (!rtx_equal_p (out
, operands
[0]))
19532 emit_move_insn (operands
[0], copy_rtx (out
));
19538 * General case: Jumpful:
19539 * xorl dest,dest cmpl op1, op2
19540 * cmpl op1, op2 movl ct, dest
19541 * setcc dest jcc 1f
19542 * decl dest movl cf, dest
19543 * andl (cf-ct),dest 1:
19546 * Size 20. Size 14.
19548 * This is reasonably steep, but branch mispredict costs are
19549 * high on modern cpus, so consider failing only if optimizing
19553 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19554 && BRANCH_COST (optimize_insn_for_speed_p (),
19559 enum machine_mode cmp_mode
= GET_MODE (op0
);
19564 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19566 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19568 /* We may be reversing unordered compare to normal compare,
19569 that is not valid in general (we may convert non-trapping
19570 condition to trapping one), however on i386 we currently
19571 emit all comparisons unordered. */
19572 code
= reverse_condition_maybe_unordered (code
);
19576 code
= reverse_condition (code
);
19577 if (compare_code
!= UNKNOWN
)
19578 compare_code
= reverse_condition (compare_code
);
19582 if (compare_code
!= UNKNOWN
)
19584 /* notl op1 (if needed)
19589 For x < 0 (resp. x <= -1) there will be no notl,
19590 so if possible swap the constants to get rid of the
19592 True/false will be -1/0 while code below (store flag
19593 followed by decrement) is 0/-1, so the constants need
19594 to be exchanged once more. */
19596 if (compare_code
== GE
|| !cf
)
19598 code
= reverse_condition (code
);
19603 HOST_WIDE_INT tmp
= cf
;
19608 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19612 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19614 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19616 copy_rtx (out
), 1, OPTAB_DIRECT
);
19619 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19620 gen_int_mode (cf
- ct
, mode
),
19621 copy_rtx (out
), 1, OPTAB_DIRECT
);
19623 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19624 copy_rtx (out
), 1, OPTAB_DIRECT
);
19625 if (!rtx_equal_p (out
, operands
[0]))
19626 emit_move_insn (operands
[0], copy_rtx (out
));
19632 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19634 /* Try a few things more with specific constants and a variable. */
19637 rtx var
, orig_out
, out
, tmp
;
19639 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19642 /* If one of the two operands is an interesting constant, load a
19643 constant with the above and mask it in with a logical operation. */
19645 if (CONST_INT_P (operands
[2]))
19648 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19649 operands
[3] = constm1_rtx
, op
= and_optab
;
19650 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19651 operands
[3] = const0_rtx
, op
= ior_optab
;
19655 else if (CONST_INT_P (operands
[3]))
19658 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19659 operands
[2] = constm1_rtx
, op
= and_optab
;
19660 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19661 operands
[2] = const0_rtx
, op
= ior_optab
;
19668 orig_out
= operands
[0];
19669 tmp
= gen_reg_rtx (mode
);
19672 /* Recurse to get the constant loaded. */
19673 if (ix86_expand_int_movcc (operands
) == 0)
19676 /* Mask in the interesting variable. */
19677 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19679 if (!rtx_equal_p (out
, orig_out
))
19680 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19686 * For comparison with above,
19696 if (! nonimmediate_operand (operands
[2], mode
))
19697 operands
[2] = force_reg (mode
, operands
[2]);
19698 if (! nonimmediate_operand (operands
[3], mode
))
19699 operands
[3] = force_reg (mode
, operands
[3]);
19701 if (! register_operand (operands
[2], VOIDmode
)
19703 || ! register_operand (operands
[3], VOIDmode
)))
19704 operands
[2] = force_reg (mode
, operands
[2]);
19707 && ! register_operand (operands
[3], VOIDmode
))
19708 operands
[3] = force_reg (mode
, operands
[3]);
19710 emit_insn (compare_seq
);
19711 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19712 gen_rtx_IF_THEN_ELSE (mode
,
19713 compare_op
, operands
[2],
19718 /* Swap, force into registers, or otherwise massage the two operands
19719 to an sse comparison with a mask result. Thus we differ a bit from
19720 ix86_prepare_fp_compare_args which expects to produce a flags result.
19722 The DEST operand exists to help determine whether to commute commutative
19723 operators. The POP0/POP1 operands are updated in place. The new
19724 comparison code is returned, or UNKNOWN if not implementable. */
19726 static enum rtx_code
19727 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19728 rtx
*pop0
, rtx
*pop1
)
19736 /* AVX supports all the needed comparisons. */
19739 /* We have no LTGT as an operator. We could implement it with
19740 NE & ORDERED, but this requires an extra temporary. It's
19741 not clear that it's worth it. */
19748 /* These are supported directly. */
19755 /* AVX has 3 operand comparisons, no need to swap anything. */
19758 /* For commutative operators, try to canonicalize the destination
19759 operand to be first in the comparison - this helps reload to
19760 avoid extra moves. */
19761 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19769 /* These are not supported directly before AVX, and furthermore
19770 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19771 comparison operands to transform into something that is
19776 code
= swap_condition (code
);
19780 gcc_unreachable ();
19786 /* Detect conditional moves that exactly match min/max operational
19787 semantics. Note that this is IEEE safe, as long as we don't
19788 interchange the operands.
19790 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19791 and TRUE if the operation is successful and instructions are emitted. */
19794 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19795 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19797 enum machine_mode mode
;
19803 else if (code
== UNGE
)
19806 if_true
= if_false
;
19812 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19814 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19819 mode
= GET_MODE (dest
);
19821 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19822 but MODE may be a vector mode and thus not appropriate. */
19823 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19825 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19828 if_true
= force_reg (mode
, if_true
);
19829 v
= gen_rtvec (2, if_true
, if_false
);
19830 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19834 code
= is_min
? SMIN
: SMAX
;
19835 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19838 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19842 /* Expand an sse vector comparison. Return the register with the result. */
19845 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19846 rtx op_true
, rtx op_false
)
19848 enum machine_mode mode
= GET_MODE (dest
);
19849 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19852 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19853 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19854 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19857 || reg_overlap_mentioned_p (dest
, op_true
)
19858 || reg_overlap_mentioned_p (dest
, op_false
))
19859 dest
= gen_reg_rtx (mode
);
19861 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19862 if (cmp_mode
!= mode
)
19864 x
= force_reg (cmp_mode
, x
);
19865 convert_move (dest
, x
, false);
19868 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19873 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19874 operations. This is used for both scalar and vector conditional moves. */
19877 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19879 enum machine_mode mode
= GET_MODE (dest
);
19882 if (vector_all_ones_operand (op_true
, mode
)
19883 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19885 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19887 else if (op_false
== CONST0_RTX (mode
))
19889 op_true
= force_reg (mode
, op_true
);
19890 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19891 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19893 else if (op_true
== CONST0_RTX (mode
))
19895 op_false
= force_reg (mode
, op_false
);
19896 x
= gen_rtx_NOT (mode
, cmp
);
19897 x
= gen_rtx_AND (mode
, x
, op_false
);
19898 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19900 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19902 op_false
= force_reg (mode
, op_false
);
19903 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19904 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19906 else if (TARGET_XOP
)
19908 op_true
= force_reg (mode
, op_true
);
19910 if (!nonimmediate_operand (op_false
, mode
))
19911 op_false
= force_reg (mode
, op_false
);
19913 emit_insn (gen_rtx_SET (mode
, dest
,
19914 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19920 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19922 if (!nonimmediate_operand (op_true
, mode
))
19923 op_true
= force_reg (mode
, op_true
);
19925 op_false
= force_reg (mode
, op_false
);
19931 gen
= gen_sse4_1_blendvps
;
19935 gen
= gen_sse4_1_blendvpd
;
19943 gen
= gen_sse4_1_pblendvb
;
19944 dest
= gen_lowpart (V16QImode
, dest
);
19945 op_false
= gen_lowpart (V16QImode
, op_false
);
19946 op_true
= gen_lowpart (V16QImode
, op_true
);
19947 cmp
= gen_lowpart (V16QImode
, cmp
);
19952 gen
= gen_avx_blendvps256
;
19956 gen
= gen_avx_blendvpd256
;
19964 gen
= gen_avx2_pblendvb
;
19965 dest
= gen_lowpart (V32QImode
, dest
);
19966 op_false
= gen_lowpart (V32QImode
, op_false
);
19967 op_true
= gen_lowpart (V32QImode
, op_true
);
19968 cmp
= gen_lowpart (V32QImode
, cmp
);
19976 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19979 op_true
= force_reg (mode
, op_true
);
19981 t2
= gen_reg_rtx (mode
);
19983 t3
= gen_reg_rtx (mode
);
19987 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19988 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19990 x
= gen_rtx_NOT (mode
, cmp
);
19991 x
= gen_rtx_AND (mode
, x
, op_false
);
19992 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19994 x
= gen_rtx_IOR (mode
, t3
, t2
);
19995 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
20000 /* Expand a floating-point conditional move. Return true if successful. */
20003 ix86_expand_fp_movcc (rtx operands
[])
20005 enum machine_mode mode
= GET_MODE (operands
[0]);
20006 enum rtx_code code
= GET_CODE (operands
[1]);
20007 rtx tmp
, compare_op
;
20008 rtx op0
= XEXP (operands
[1], 0);
20009 rtx op1
= XEXP (operands
[1], 1);
20011 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
20013 enum machine_mode cmode
;
20015 /* Since we've no cmove for sse registers, don't force bad register
20016 allocation just to gain access to it. Deny movcc when the
20017 comparison mode doesn't match the move mode. */
20018 cmode
= GET_MODE (op0
);
20019 if (cmode
== VOIDmode
)
20020 cmode
= GET_MODE (op1
);
20024 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
20025 if (code
== UNKNOWN
)
20028 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
20029 operands
[2], operands
[3]))
20032 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
20033 operands
[2], operands
[3]);
20034 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
20038 /* The floating point conditional move instructions don't directly
20039 support conditions resulting from a signed integer comparison. */
20041 compare_op
= ix86_expand_compare (code
, op0
, op1
);
20042 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
20044 tmp
= gen_reg_rtx (QImode
);
20045 ix86_expand_setcc (tmp
, code
, op0
, op1
);
20047 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
20050 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
20051 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
20052 operands
[2], operands
[3])));
20057 /* Expand a floating-point vector conditional move; a vcond operation
20058 rather than a movcc operation. */
20061 ix86_expand_fp_vcond (rtx operands
[])
20063 enum rtx_code code
= GET_CODE (operands
[3]);
20066 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
20067 &operands
[4], &operands
[5]);
20068 if (code
== UNKNOWN
)
20071 switch (GET_CODE (operands
[3]))
20074 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
20075 operands
[5], operands
[0], operands
[0]);
20076 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
20077 operands
[5], operands
[1], operands
[2]);
20081 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
20082 operands
[5], operands
[0], operands
[0]);
20083 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
20084 operands
[5], operands
[1], operands
[2]);
20088 gcc_unreachable ();
20090 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
20092 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20096 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
20097 operands
[5], operands
[1], operands
[2]))
20100 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
20101 operands
[1], operands
[2]);
20102 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
20106 /* Expand a signed/unsigned integral vector conditional move. */
20109 ix86_expand_int_vcond (rtx operands
[])
20111 enum machine_mode data_mode
= GET_MODE (operands
[0]);
20112 enum machine_mode mode
= GET_MODE (operands
[4]);
20113 enum rtx_code code
= GET_CODE (operands
[3]);
20114 bool negate
= false;
20117 cop0
= operands
[4];
20118 cop1
= operands
[5];
20120 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
20121 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
20122 if ((code
== LT
|| code
== GE
)
20123 && data_mode
== mode
20124 && cop1
== CONST0_RTX (mode
)
20125 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
20126 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
20127 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
20128 && (GET_MODE_SIZE (data_mode
) == 16
20129 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
20131 rtx negop
= operands
[2 - (code
== LT
)];
20132 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
20133 if (negop
== CONST1_RTX (data_mode
))
20135 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
20136 operands
[0], 1, OPTAB_DIRECT
);
20137 if (res
!= operands
[0])
20138 emit_move_insn (operands
[0], res
);
20141 else if (GET_MODE_INNER (data_mode
) != DImode
20142 && vector_all_ones_operand (negop
, data_mode
))
20144 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
20145 operands
[0], 0, OPTAB_DIRECT
);
20146 if (res
!= operands
[0])
20147 emit_move_insn (operands
[0], res
);
20152 if (!nonimmediate_operand (cop1
, mode
))
20153 cop1
= force_reg (mode
, cop1
);
20154 if (!general_operand (operands
[1], data_mode
))
20155 operands
[1] = force_reg (data_mode
, operands
[1]);
20156 if (!general_operand (operands
[2], data_mode
))
20157 operands
[2] = force_reg (data_mode
, operands
[2]);
20159 /* XOP supports all of the comparisons on all 128-bit vector int types. */
20161 && (mode
== V16QImode
|| mode
== V8HImode
20162 || mode
== V4SImode
|| mode
== V2DImode
))
20166 /* Canonicalize the comparison to EQ, GT, GTU. */
20177 code
= reverse_condition (code
);
20183 code
= reverse_condition (code
);
20189 code
= swap_condition (code
);
20190 x
= cop0
, cop0
= cop1
, cop1
= x
;
20194 gcc_unreachable ();
20197 /* Only SSE4.1/SSE4.2 supports V2DImode. */
20198 if (mode
== V2DImode
)
20203 /* SSE4.1 supports EQ. */
20204 if (!TARGET_SSE4_1
)
20210 /* SSE4.2 supports GT/GTU. */
20211 if (!TARGET_SSE4_2
)
20216 gcc_unreachable ();
20220 /* Unsigned parallel compare is not supported by the hardware.
20221 Play some tricks to turn this into a signed comparison
20225 cop0
= force_reg (mode
, cop0
);
20235 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
20239 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
20240 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
20241 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
20242 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
20244 gcc_unreachable ();
20246 /* Subtract (-(INT MAX) - 1) from both operands to make
20248 mask
= ix86_build_signbit_mask (mode
, true, false);
20249 t1
= gen_reg_rtx (mode
);
20250 emit_insn (gen_sub3 (t1
, cop0
, mask
));
20252 t2
= gen_reg_rtx (mode
);
20253 emit_insn (gen_sub3 (t2
, cop1
, mask
));
20265 /* Perform a parallel unsigned saturating subtraction. */
20266 x
= gen_reg_rtx (mode
);
20267 emit_insn (gen_rtx_SET (VOIDmode
, x
,
20268 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
20271 cop1
= CONST0_RTX (mode
);
20277 gcc_unreachable ();
20282 /* Allow the comparison to be done in one mode, but the movcc to
20283 happen in another mode. */
20284 if (data_mode
== mode
)
20286 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20287 operands
[1+negate
], operands
[2-negate
]);
20291 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20292 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20294 operands
[1+negate
], operands
[2-negate
]);
20295 x
= gen_lowpart (data_mode
, x
);
20298 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20299 operands
[2-negate
]);
20303 /* Expand a variable vector permutation. */
20306 ix86_expand_vec_perm (rtx operands
[])
20308 rtx target
= operands
[0];
20309 rtx op0
= operands
[1];
20310 rtx op1
= operands
[2];
20311 rtx mask
= operands
[3];
20312 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20313 enum machine_mode mode
= GET_MODE (op0
);
20314 enum machine_mode maskmode
= GET_MODE (mask
);
20316 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20318 /* Number of elements in the vector. */
20319 w
= GET_MODE_NUNITS (mode
);
20320 e
= GET_MODE_UNIT_SIZE (mode
);
20321 gcc_assert (w
<= 32);
20325 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20327 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20328 an constant shuffle operand. With a tiny bit of effort we can
20329 use VPERMD instead. A re-interpretation stall for V4DFmode is
20330 unfortunate but there's no avoiding it.
20331 Similarly for V16HImode we don't have instructions for variable
20332 shuffling, while for V32QImode we can use after preparing suitable
20333 masks vpshufb; vpshufb; vpermq; vpor. */
20335 if (mode
== V16HImode
)
20337 maskmode
= mode
= V32QImode
;
20343 maskmode
= mode
= V8SImode
;
20347 t1
= gen_reg_rtx (maskmode
);
20349 /* Replicate the low bits of the V4DImode mask into V8SImode:
20351 t1 = { A A B B C C D D }. */
20352 for (i
= 0; i
< w
/ 2; ++i
)
20353 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20354 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20355 vt
= force_reg (maskmode
, vt
);
20356 mask
= gen_lowpart (maskmode
, mask
);
20357 if (maskmode
== V8SImode
)
20358 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20360 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20362 /* Multiply the shuffle indicies by two. */
20363 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20366 /* Add one to the odd shuffle indicies:
20367 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20368 for (i
= 0; i
< w
/ 2; ++i
)
20370 vec
[i
* 2] = const0_rtx
;
20371 vec
[i
* 2 + 1] = const1_rtx
;
20373 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20374 vt
= force_const_mem (maskmode
, vt
);
20375 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20378 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20379 operands
[3] = mask
= t1
;
20380 target
= gen_lowpart (mode
, target
);
20381 op0
= gen_lowpart (mode
, op0
);
20382 op1
= gen_lowpart (mode
, op1
);
20388 /* The VPERMD and VPERMPS instructions already properly ignore
20389 the high bits of the shuffle elements. No need for us to
20390 perform an AND ourselves. */
20391 if (one_operand_shuffle
)
20392 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20395 t1
= gen_reg_rtx (V8SImode
);
20396 t2
= gen_reg_rtx (V8SImode
);
20397 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20398 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20404 mask
= gen_lowpart (V8SFmode
, mask
);
20405 if (one_operand_shuffle
)
20406 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20409 t1
= gen_reg_rtx (V8SFmode
);
20410 t2
= gen_reg_rtx (V8SFmode
);
20411 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20412 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20418 /* By combining the two 128-bit input vectors into one 256-bit
20419 input vector, we can use VPERMD and VPERMPS for the full
20420 two-operand shuffle. */
20421 t1
= gen_reg_rtx (V8SImode
);
20422 t2
= gen_reg_rtx (V8SImode
);
20423 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20424 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20425 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20426 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20430 t1
= gen_reg_rtx (V8SFmode
);
20431 t2
= gen_reg_rtx (V8SImode
);
20432 mask
= gen_lowpart (V4SImode
, mask
);
20433 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20434 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20435 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20436 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20440 t1
= gen_reg_rtx (V32QImode
);
20441 t2
= gen_reg_rtx (V32QImode
);
20442 t3
= gen_reg_rtx (V32QImode
);
20443 vt2
= GEN_INT (128);
20444 for (i
= 0; i
< 32; i
++)
20446 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20447 vt
= force_reg (V32QImode
, vt
);
20448 for (i
= 0; i
< 32; i
++)
20449 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20450 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20451 vt2
= force_reg (V32QImode
, vt2
);
20452 /* From mask create two adjusted masks, which contain the same
20453 bits as mask in the low 7 bits of each vector element.
20454 The first mask will have the most significant bit clear
20455 if it requests element from the same 128-bit lane
20456 and MSB set if it requests element from the other 128-bit lane.
20457 The second mask will have the opposite values of the MSB,
20458 and additionally will have its 128-bit lanes swapped.
20459 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20460 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20461 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20462 stands for other 12 bytes. */
20463 /* The bit whether element is from the same lane or the other
20464 lane is bit 4, so shift it up by 3 to the MSB position. */
20465 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20466 gen_lowpart (V4DImode
, mask
),
20468 /* Clear MSB bits from the mask just in case it had them set. */
20469 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20470 /* After this t1 will have MSB set for elements from other lane. */
20471 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20472 /* Clear bits other than MSB. */
20473 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20474 /* Or in the lower bits from mask into t3. */
20475 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20476 /* And invert MSB bits in t1, so MSB is set for elements from the same
20478 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20479 /* Swap 128-bit lanes in t3. */
20480 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20481 gen_lowpart (V4DImode
, t3
),
20482 const2_rtx
, GEN_INT (3),
20483 const0_rtx
, const1_rtx
));
20484 /* And or in the lower bits from mask into t1. */
20485 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20486 if (one_operand_shuffle
)
20488 /* Each of these shuffles will put 0s in places where
20489 element from the other 128-bit lane is needed, otherwise
20490 will shuffle in the requested value. */
20491 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20492 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20493 /* For t3 the 128-bit lanes are swapped again. */
20494 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20495 gen_lowpart (V4DImode
, t3
),
20496 const2_rtx
, GEN_INT (3),
20497 const0_rtx
, const1_rtx
));
20498 /* And oring both together leads to the result. */
20499 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20503 t4
= gen_reg_rtx (V32QImode
);
20504 /* Similarly to the above one_operand_shuffle code,
20505 just for repeated twice for each operand. merge_two:
20506 code will merge the two results together. */
20507 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20508 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20509 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20510 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20511 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20512 gen_lowpart (V4DImode
, t4
),
20513 const2_rtx
, GEN_INT (3),
20514 const0_rtx
, const1_rtx
));
20515 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20516 gen_lowpart (V4DImode
, t3
),
20517 const2_rtx
, GEN_INT (3),
20518 const0_rtx
, const1_rtx
));
20519 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20520 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20526 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20533 /* The XOP VPPERM insn supports three inputs. By ignoring the
20534 one_operand_shuffle special case, we avoid creating another
20535 set of constant vectors in memory. */
20536 one_operand_shuffle
= false;
20538 /* mask = mask & {2*w-1, ...} */
20539 vt
= GEN_INT (2*w
- 1);
20543 /* mask = mask & {w-1, ...} */
20544 vt
= GEN_INT (w
- 1);
20547 for (i
= 0; i
< w
; i
++)
20549 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20550 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20551 NULL_RTX
, 0, OPTAB_DIRECT
);
20553 /* For non-QImode operations, convert the word permutation control
20554 into a byte permutation control. */
20555 if (mode
!= V16QImode
)
20557 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20558 GEN_INT (exact_log2 (e
)),
20559 NULL_RTX
, 0, OPTAB_DIRECT
);
20561 /* Convert mask to vector of chars. */
20562 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20564 /* Replicate each of the input bytes into byte positions:
20565 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20566 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20567 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20568 for (i
= 0; i
< 16; ++i
)
20569 vec
[i
] = GEN_INT (i
/e
* e
);
20570 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20571 vt
= force_const_mem (V16QImode
, vt
);
20573 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20575 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20577 /* Convert it into the byte positions by doing
20578 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20579 for (i
= 0; i
< 16; ++i
)
20580 vec
[i
] = GEN_INT (i
% e
);
20581 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20582 vt
= force_const_mem (V16QImode
, vt
);
20583 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20586 /* The actual shuffle operations all operate on V16QImode. */
20587 op0
= gen_lowpart (V16QImode
, op0
);
20588 op1
= gen_lowpart (V16QImode
, op1
);
20589 target
= gen_lowpart (V16QImode
, target
);
20593 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20595 else if (one_operand_shuffle
)
20597 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20604 /* Shuffle the two input vectors independently. */
20605 t1
= gen_reg_rtx (V16QImode
);
20606 t2
= gen_reg_rtx (V16QImode
);
20607 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20608 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20611 /* Then merge them together. The key is whether any given control
20612 element contained a bit set that indicates the second word. */
20613 mask
= operands
[3];
20615 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20617 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20618 more shuffle to convert the V2DI input mask into a V4SI
20619 input mask. At which point the masking that expand_int_vcond
20620 will work as desired. */
20621 rtx t3
= gen_reg_rtx (V4SImode
);
20622 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20623 const0_rtx
, const0_rtx
,
20624 const2_rtx
, const2_rtx
));
20626 maskmode
= V4SImode
;
20630 for (i
= 0; i
< w
; i
++)
20632 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20633 vt
= force_reg (maskmode
, vt
);
20634 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20635 NULL_RTX
, 0, OPTAB_DIRECT
);
20637 xops
[0] = gen_lowpart (mode
, operands
[0]);
20638 xops
[1] = gen_lowpart (mode
, t2
);
20639 xops
[2] = gen_lowpart (mode
, t1
);
20640 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20643 ok
= ix86_expand_int_vcond (xops
);
20648 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20649 true if we should do zero extension, else sign extension. HIGH_P is
20650 true if we want the N/2 high elements, else the low elements. */
20653 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20655 enum machine_mode imode
= GET_MODE (src
);
20660 rtx (*unpack
)(rtx
, rtx
);
20661 rtx (*extract
)(rtx
, rtx
) = NULL
;
20662 enum machine_mode halfmode
= BLKmode
;
20668 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20670 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20671 halfmode
= V16QImode
;
20673 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20677 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20679 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20680 halfmode
= V8HImode
;
20682 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20686 unpack
= gen_avx2_zero_extendv4siv4di2
;
20688 unpack
= gen_avx2_sign_extendv4siv4di2
;
20689 halfmode
= V4SImode
;
20691 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20695 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20697 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20701 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20703 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20707 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20709 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20712 gcc_unreachable ();
20715 if (GET_MODE_SIZE (imode
) == 32)
20717 tmp
= gen_reg_rtx (halfmode
);
20718 emit_insn (extract (tmp
, src
));
20722 /* Shift higher 8 bytes to lower 8 bytes. */
20723 tmp
= gen_reg_rtx (imode
);
20724 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20725 gen_lowpart (V1TImode
, src
),
20731 emit_insn (unpack (dest
, tmp
));
20735 rtx (*unpack
)(rtx
, rtx
, rtx
);
20741 unpack
= gen_vec_interleave_highv16qi
;
20743 unpack
= gen_vec_interleave_lowv16qi
;
20747 unpack
= gen_vec_interleave_highv8hi
;
20749 unpack
= gen_vec_interleave_lowv8hi
;
20753 unpack
= gen_vec_interleave_highv4si
;
20755 unpack
= gen_vec_interleave_lowv4si
;
20758 gcc_unreachable ();
20762 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20764 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20765 src
, pc_rtx
, pc_rtx
);
20767 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20771 /* Expand conditional increment or decrement using adb/sbb instructions.
20772 The default case using setcc followed by the conditional move can be
20773 done by generic code. */
20775 ix86_expand_int_addcc (rtx operands
[])
20777 enum rtx_code code
= GET_CODE (operands
[1]);
20779 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20781 rtx val
= const0_rtx
;
20782 bool fpcmp
= false;
20783 enum machine_mode mode
;
20784 rtx op0
= XEXP (operands
[1], 0);
20785 rtx op1
= XEXP (operands
[1], 1);
20787 if (operands
[3] != const1_rtx
20788 && operands
[3] != constm1_rtx
)
20790 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20792 code
= GET_CODE (compare_op
);
20794 flags
= XEXP (compare_op
, 0);
20796 if (GET_MODE (flags
) == CCFPmode
20797 || GET_MODE (flags
) == CCFPUmode
)
20800 code
= ix86_fp_compare_code_to_integer (code
);
20807 PUT_CODE (compare_op
,
20808 reverse_condition_maybe_unordered
20809 (GET_CODE (compare_op
)));
20811 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20814 mode
= GET_MODE (operands
[0]);
20816 /* Construct either adc or sbb insn. */
20817 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20822 insn
= gen_subqi3_carry
;
20825 insn
= gen_subhi3_carry
;
20828 insn
= gen_subsi3_carry
;
20831 insn
= gen_subdi3_carry
;
20834 gcc_unreachable ();
20842 insn
= gen_addqi3_carry
;
20845 insn
= gen_addhi3_carry
;
20848 insn
= gen_addsi3_carry
;
20851 insn
= gen_adddi3_carry
;
20854 gcc_unreachable ();
20857 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20863 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20864 but works for floating pointer parameters and nonoffsetable memories.
20865 For pushes, it returns just stack offsets; the values will be saved
20866 in the right order. Maximally three parts are generated. */
20869 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20874 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20876 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20878 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20879 gcc_assert (size
>= 2 && size
<= 4);
20881 /* Optimize constant pool reference to immediates. This is used by fp
20882 moves, that force all constants to memory to allow combining. */
20883 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20885 rtx tmp
= maybe_get_pool_constant (operand
);
20890 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20892 /* The only non-offsetable memories we handle are pushes. */
20893 int ok
= push_operand (operand
, VOIDmode
);
20897 operand
= copy_rtx (operand
);
20898 PUT_MODE (operand
, word_mode
);
20899 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20903 if (GET_CODE (operand
) == CONST_VECTOR
)
20905 enum machine_mode imode
= int_mode_for_mode (mode
);
20906 /* Caution: if we looked through a constant pool memory above,
20907 the operand may actually have a different mode now. That's
20908 ok, since we want to pun this all the way back to an integer. */
20909 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20910 gcc_assert (operand
!= NULL
);
20916 if (mode
== DImode
)
20917 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20922 if (REG_P (operand
))
20924 gcc_assert (reload_completed
);
20925 for (i
= 0; i
< size
; i
++)
20926 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20928 else if (offsettable_memref_p (operand
))
20930 operand
= adjust_address (operand
, SImode
, 0);
20931 parts
[0] = operand
;
20932 for (i
= 1; i
< size
; i
++)
20933 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20935 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20940 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20944 real_to_target (l
, &r
, mode
);
20945 parts
[3] = gen_int_mode (l
[3], SImode
);
20946 parts
[2] = gen_int_mode (l
[2], SImode
);
20949 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
20950 long double may not be 80-bit. */
20951 real_to_target (l
, &r
, mode
);
20952 parts
[2] = gen_int_mode (l
[2], SImode
);
20955 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20958 gcc_unreachable ();
20960 parts
[1] = gen_int_mode (l
[1], SImode
);
20961 parts
[0] = gen_int_mode (l
[0], SImode
);
20964 gcc_unreachable ();
20969 if (mode
== TImode
)
20970 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20971 if (mode
== XFmode
|| mode
== TFmode
)
20973 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20974 if (REG_P (operand
))
20976 gcc_assert (reload_completed
);
20977 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20978 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20980 else if (offsettable_memref_p (operand
))
20982 operand
= adjust_address (operand
, DImode
, 0);
20983 parts
[0] = operand
;
20984 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20986 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20991 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20992 real_to_target (l
, &r
, mode
);
20994 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20995 if (HOST_BITS_PER_WIDE_INT
>= 64)
20998 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20999 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
21002 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
21004 if (upper_mode
== SImode
)
21005 parts
[1] = gen_int_mode (l
[2], SImode
);
21006 else if (HOST_BITS_PER_WIDE_INT
>= 64)
21009 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
21010 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
21013 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
21016 gcc_unreachable ();
21023 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
21024 Return false when normal moves are needed; true when all required
21025 insns have been emitted. Operands 2-4 contain the input values
21026 int the correct order; operands 5-7 contain the output values. */
21029 ix86_split_long_move (rtx operands
[])
21034 int collisions
= 0;
21035 enum machine_mode mode
= GET_MODE (operands
[0]);
21036 bool collisionparts
[4];
21038 /* The DFmode expanders may ask us to move double.
21039 For 64bit target this is single move. By hiding the fact
21040 here we simplify i386.md splitters. */
21041 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
21043 /* Optimize constant pool reference to immediates. This is used by
21044 fp moves, that force all constants to memory to allow combining. */
21046 if (MEM_P (operands
[1])
21047 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
21048 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
21049 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
21050 if (push_operand (operands
[0], VOIDmode
))
21052 operands
[0] = copy_rtx (operands
[0]);
21053 PUT_MODE (operands
[0], word_mode
);
21056 operands
[0] = gen_lowpart (DImode
, operands
[0]);
21057 operands
[1] = gen_lowpart (DImode
, operands
[1]);
21058 emit_move_insn (operands
[0], operands
[1]);
21062 /* The only non-offsettable memory we handle is push. */
21063 if (push_operand (operands
[0], VOIDmode
))
21066 gcc_assert (!MEM_P (operands
[0])
21067 || offsettable_memref_p (operands
[0]));
21069 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
21070 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
21072 /* When emitting push, take care for source operands on the stack. */
21073 if (push
&& MEM_P (operands
[1])
21074 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
21076 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
21078 /* Compensate for the stack decrement by 4. */
21079 if (!TARGET_64BIT
&& nparts
== 3
21080 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
21081 src_base
= plus_constant (Pmode
, src_base
, 4);
21083 /* src_base refers to the stack pointer and is
21084 automatically decreased by emitted push. */
21085 for (i
= 0; i
< nparts
; i
++)
21086 part
[1][i
] = change_address (part
[1][i
],
21087 GET_MODE (part
[1][i
]), src_base
);
21090 /* We need to do copy in the right order in case an address register
21091 of the source overlaps the destination. */
21092 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
21096 for (i
= 0; i
< nparts
; i
++)
21099 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
21100 if (collisionparts
[i
])
21104 /* Collision in the middle part can be handled by reordering. */
21105 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
21107 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21108 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21110 else if (collisions
== 1
21112 && (collisionparts
[1] || collisionparts
[2]))
21114 if (collisionparts
[1])
21116 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
21117 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
21121 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
21122 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
21126 /* If there are more collisions, we can't handle it by reordering.
21127 Do an lea to the last part and use only one colliding move. */
21128 else if (collisions
> 1)
21134 base
= part
[0][nparts
- 1];
21136 /* Handle the case when the last part isn't valid for lea.
21137 Happens in 64-bit mode storing the 12-byte XFmode. */
21138 if (GET_MODE (base
) != Pmode
)
21139 base
= gen_rtx_REG (Pmode
, REGNO (base
));
21141 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
21142 part
[1][0] = replace_equiv_address (part
[1][0], base
);
21143 for (i
= 1; i
< nparts
; i
++)
21145 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
21146 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
21157 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
21158 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
21159 stack_pointer_rtx
, GEN_INT (-4)));
21160 emit_move_insn (part
[0][2], part
[1][2]);
21162 else if (nparts
== 4)
21164 emit_move_insn (part
[0][3], part
[1][3]);
21165 emit_move_insn (part
[0][2], part
[1][2]);
21170 /* In 64bit mode we don't have 32bit push available. In case this is
21171 register, it is OK - we will just use larger counterpart. We also
21172 retype memory - these comes from attempt to avoid REX prefix on
21173 moving of second half of TFmode value. */
21174 if (GET_MODE (part
[1][1]) == SImode
)
21176 switch (GET_CODE (part
[1][1]))
21179 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
21183 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
21187 gcc_unreachable ();
21190 if (GET_MODE (part
[1][0]) == SImode
)
21191 part
[1][0] = part
[1][1];
21194 emit_move_insn (part
[0][1], part
[1][1]);
21195 emit_move_insn (part
[0][0], part
[1][0]);
21199 /* Choose correct order to not overwrite the source before it is copied. */
21200 if ((REG_P (part
[0][0])
21201 && REG_P (part
[1][1])
21202 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
21204 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
21206 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
21208 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
21210 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
21212 operands
[2 + i
] = part
[0][j
];
21213 operands
[6 + i
] = part
[1][j
];
21218 for (i
= 0; i
< nparts
; i
++)
21220 operands
[2 + i
] = part
[0][i
];
21221 operands
[6 + i
] = part
[1][i
];
21225 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
21226 if (optimize_insn_for_size_p ())
21228 for (j
= 0; j
< nparts
- 1; j
++)
21229 if (CONST_INT_P (operands
[6 + j
])
21230 && operands
[6 + j
] != const0_rtx
21231 && REG_P (operands
[2 + j
]))
21232 for (i
= j
; i
< nparts
- 1; i
++)
21233 if (CONST_INT_P (operands
[7 + i
])
21234 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
21235 operands
[7 + i
] = operands
[2 + j
];
21238 for (i
= 0; i
< nparts
; i
++)
21239 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
21244 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
21245 left shift by a constant, either using a single shift or
21246 a sequence of add instructions. */
21249 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
21251 rtx (*insn
)(rtx
, rtx
, rtx
);
21254 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
21255 && !optimize_insn_for_size_p ()))
21257 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
21258 while (count
-- > 0)
21259 emit_insn (insn (operand
, operand
, operand
));
21263 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21264 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
21269 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21271 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
21272 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
21273 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21275 rtx low
[2], high
[2];
21278 if (CONST_INT_P (operands
[2]))
21280 split_double_mode (mode
, operands
, 2, low
, high
);
21281 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21283 if (count
>= half_width
)
21285 emit_move_insn (high
[0], low
[1]);
21286 emit_move_insn (low
[0], const0_rtx
);
21288 if (count
> half_width
)
21289 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21293 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21295 if (!rtx_equal_p (operands
[0], operands
[1]))
21296 emit_move_insn (operands
[0], operands
[1]);
21298 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21299 ix86_expand_ashl_const (low
[0], count
, mode
);
21304 split_double_mode (mode
, operands
, 1, low
, high
);
21306 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21308 if (operands
[1] == const1_rtx
)
21310 /* Assuming we've chosen a QImode capable registers, then 1 << N
21311 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21312 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21314 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21316 ix86_expand_clear (low
[0]);
21317 ix86_expand_clear (high
[0]);
21318 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21320 d
= gen_lowpart (QImode
, low
[0]);
21321 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21322 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21323 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21325 d
= gen_lowpart (QImode
, high
[0]);
21326 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21327 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21328 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21331 /* Otherwise, we can get the same results by manually performing
21332 a bit extract operation on bit 5/6, and then performing the two
21333 shifts. The two methods of getting 0/1 into low/high are exactly
21334 the same size. Avoiding the shift in the bit extract case helps
21335 pentium4 a bit; no one else seems to care much either way. */
21338 enum machine_mode half_mode
;
21339 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21340 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21341 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21342 HOST_WIDE_INT bits
;
21345 if (mode
== DImode
)
21347 half_mode
= SImode
;
21348 gen_lshr3
= gen_lshrsi3
;
21349 gen_and3
= gen_andsi3
;
21350 gen_xor3
= gen_xorsi3
;
21355 half_mode
= DImode
;
21356 gen_lshr3
= gen_lshrdi3
;
21357 gen_and3
= gen_anddi3
;
21358 gen_xor3
= gen_xordi3
;
21362 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21363 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21365 x
= gen_lowpart (half_mode
, operands
[2]);
21366 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21368 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21369 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21370 emit_move_insn (low
[0], high
[0]);
21371 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21374 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21375 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21379 if (operands
[1] == constm1_rtx
)
21381 /* For -1 << N, we can avoid the shld instruction, because we
21382 know that we're shifting 0...31/63 ones into a -1. */
21383 emit_move_insn (low
[0], constm1_rtx
);
21384 if (optimize_insn_for_size_p ())
21385 emit_move_insn (high
[0], low
[0]);
21387 emit_move_insn (high
[0], constm1_rtx
);
21391 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21393 if (!rtx_equal_p (operands
[0], operands
[1]))
21394 emit_move_insn (operands
[0], operands
[1]);
21396 split_double_mode (mode
, operands
, 1, low
, high
);
21397 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21400 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21402 if (TARGET_CMOVE
&& scratch
)
21404 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21405 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21407 ix86_expand_clear (scratch
);
21408 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21412 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21413 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21415 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21420 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21422 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21423 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21424 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21425 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21427 rtx low
[2], high
[2];
21430 if (CONST_INT_P (operands
[2]))
21432 split_double_mode (mode
, operands
, 2, low
, high
);
21433 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21435 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21437 emit_move_insn (high
[0], high
[1]);
21438 emit_insn (gen_ashr3 (high
[0], high
[0],
21439 GEN_INT (half_width
- 1)));
21440 emit_move_insn (low
[0], high
[0]);
21443 else if (count
>= half_width
)
21445 emit_move_insn (low
[0], high
[1]);
21446 emit_move_insn (high
[0], low
[0]);
21447 emit_insn (gen_ashr3 (high
[0], high
[0],
21448 GEN_INT (half_width
- 1)));
21450 if (count
> half_width
)
21451 emit_insn (gen_ashr3 (low
[0], low
[0],
21452 GEN_INT (count
- half_width
)));
21456 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21458 if (!rtx_equal_p (operands
[0], operands
[1]))
21459 emit_move_insn (operands
[0], operands
[1]);
21461 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21462 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21467 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21469 if (!rtx_equal_p (operands
[0], operands
[1]))
21470 emit_move_insn (operands
[0], operands
[1]);
21472 split_double_mode (mode
, operands
, 1, low
, high
);
21474 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21475 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21477 if (TARGET_CMOVE
&& scratch
)
21479 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21480 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21482 emit_move_insn (scratch
, high
[0]);
21483 emit_insn (gen_ashr3 (scratch
, scratch
,
21484 GEN_INT (half_width
- 1)));
21485 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21490 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21491 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21493 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21499 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21501 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21502 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21503 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21504 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21506 rtx low
[2], high
[2];
21509 if (CONST_INT_P (operands
[2]))
21511 split_double_mode (mode
, operands
, 2, low
, high
);
21512 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21514 if (count
>= half_width
)
21516 emit_move_insn (low
[0], high
[1]);
21517 ix86_expand_clear (high
[0]);
21519 if (count
> half_width
)
21520 emit_insn (gen_lshr3 (low
[0], low
[0],
21521 GEN_INT (count
- half_width
)));
21525 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21527 if (!rtx_equal_p (operands
[0], operands
[1]))
21528 emit_move_insn (operands
[0], operands
[1]);
21530 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21531 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21536 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21538 if (!rtx_equal_p (operands
[0], operands
[1]))
21539 emit_move_insn (operands
[0], operands
[1]);
21541 split_double_mode (mode
, operands
, 1, low
, high
);
21543 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21544 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21546 if (TARGET_CMOVE
&& scratch
)
21548 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21549 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21551 ix86_expand_clear (scratch
);
21552 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21557 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21558 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21560 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21565 /* Predict just emitted jump instruction to be taken with probability PROB. */
21567 predict_jump (int prob
)
21569 rtx insn
= get_last_insn ();
21570 gcc_assert (JUMP_P (insn
));
21571 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21574 /* Helper function for the string operations below. Dest VARIABLE whether
21575 it is aligned to VALUE bytes. If true, jump to the label. */
21577 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21579 rtx label
= gen_label_rtx ();
21580 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21581 if (GET_MODE (variable
) == DImode
)
21582 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21584 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21585 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21588 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21590 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21594 /* Adjust COUNTER by the VALUE. */
21596 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21598 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21599 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21601 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21604 /* Zero extend possibly SImode EXP to Pmode register. */
21606 ix86_zero_extend_to_Pmode (rtx exp
)
21608 if (GET_MODE (exp
) != Pmode
)
21609 exp
= convert_to_mode (Pmode
, exp
, 1);
21610 return force_reg (Pmode
, exp
);
21613 /* Divide COUNTREG by SCALE. */
21615 scale_counter (rtx countreg
, int scale
)
21621 if (CONST_INT_P (countreg
))
21622 return GEN_INT (INTVAL (countreg
) / scale
);
21623 gcc_assert (REG_P (countreg
));
21625 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21626 GEN_INT (exact_log2 (scale
)),
21627 NULL
, 1, OPTAB_DIRECT
);
21631 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21632 DImode for constant loop counts. */
21634 static enum machine_mode
21635 counter_mode (rtx count_exp
)
21637 if (GET_MODE (count_exp
) != VOIDmode
)
21638 return GET_MODE (count_exp
);
21639 if (!CONST_INT_P (count_exp
))
21641 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21646 /* When SRCPTR is non-NULL, output simple loop to move memory
21647 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21648 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21649 equivalent loop to set memory by VALUE (supposed to be in MODE).
21651 The size is rounded down to whole number of chunk size moved at once.
21652 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21656 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21657 rtx destptr
, rtx srcptr
, rtx value
,
21658 rtx count
, enum machine_mode mode
, int unroll
,
21661 rtx out_label
, top_label
, iter
, tmp
;
21662 enum machine_mode iter_mode
= counter_mode (count
);
21663 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21664 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21670 top_label
= gen_label_rtx ();
21671 out_label
= gen_label_rtx ();
21672 iter
= gen_reg_rtx (iter_mode
);
21674 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21675 NULL
, 1, OPTAB_DIRECT
);
21676 /* Those two should combine. */
21677 if (piece_size
== const1_rtx
)
21679 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21681 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21683 emit_move_insn (iter
, const0_rtx
);
21685 emit_label (top_label
);
21687 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21688 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21689 destmem
= change_address (destmem
, mode
, x_addr
);
21693 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21694 srcmem
= change_address (srcmem
, mode
, y_addr
);
21696 /* When unrolling for chips that reorder memory reads and writes,
21697 we can save registers by using single temporary.
21698 Also using 4 temporaries is overkill in 32bit mode. */
21699 if (!TARGET_64BIT
&& 0)
21701 for (i
= 0; i
< unroll
; i
++)
21706 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21708 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21710 emit_move_insn (destmem
, srcmem
);
21716 gcc_assert (unroll
<= 4);
21717 for (i
= 0; i
< unroll
; i
++)
21719 tmpreg
[i
] = gen_reg_rtx (mode
);
21723 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21725 emit_move_insn (tmpreg
[i
], srcmem
);
21727 for (i
= 0; i
< unroll
; i
++)
21732 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21734 emit_move_insn (destmem
, tmpreg
[i
]);
21739 for (i
= 0; i
< unroll
; i
++)
21743 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21744 emit_move_insn (destmem
, value
);
21747 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21748 true, OPTAB_LIB_WIDEN
);
21750 emit_move_insn (iter
, tmp
);
21752 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21754 if (expected_size
!= -1)
21756 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21757 if (expected_size
== 0)
21759 else if (expected_size
> REG_BR_PROB_BASE
)
21760 predict_jump (REG_BR_PROB_BASE
- 1);
21762 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21765 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21766 iter
= ix86_zero_extend_to_Pmode (iter
);
21767 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21768 true, OPTAB_LIB_WIDEN
);
21769 if (tmp
!= destptr
)
21770 emit_move_insn (destptr
, tmp
);
21773 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21774 true, OPTAB_LIB_WIDEN
);
21776 emit_move_insn (srcptr
, tmp
);
21778 emit_label (out_label
);
21781 /* Output "rep; mov" instruction.
21782 Arguments have same meaning as for previous function */
21784 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21785 rtx destptr
, rtx srcptr
,
21787 enum machine_mode mode
)
21792 HOST_WIDE_INT rounded_count
;
21794 /* If the size is known, it is shorter to use rep movs. */
21795 if (mode
== QImode
&& CONST_INT_P (count
)
21796 && !(INTVAL (count
) & 3))
21799 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21800 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21801 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21802 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21803 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21804 if (mode
!= QImode
)
21806 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21807 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21808 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21809 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21810 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21811 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21815 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21816 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21818 if (CONST_INT_P (count
))
21820 rounded_count
= (INTVAL (count
)
21821 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21822 destmem
= shallow_copy_rtx (destmem
);
21823 srcmem
= shallow_copy_rtx (srcmem
);
21824 set_mem_size (destmem
, rounded_count
);
21825 set_mem_size (srcmem
, rounded_count
);
21829 if (MEM_SIZE_KNOWN_P (destmem
))
21830 clear_mem_size (destmem
);
21831 if (MEM_SIZE_KNOWN_P (srcmem
))
21832 clear_mem_size (srcmem
);
21834 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21838 /* Output "rep; stos" instruction.
21839 Arguments have same meaning as for previous function */
21841 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21842 rtx count
, enum machine_mode mode
,
21847 HOST_WIDE_INT rounded_count
;
21849 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21850 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21851 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21852 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21853 if (mode
!= QImode
)
21855 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21856 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21857 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21860 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21861 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21863 rounded_count
= (INTVAL (count
)
21864 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21865 destmem
= shallow_copy_rtx (destmem
);
21866 set_mem_size (destmem
, rounded_count
);
21868 else if (MEM_SIZE_KNOWN_P (destmem
))
21869 clear_mem_size (destmem
);
21870 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21874 emit_strmov (rtx destmem
, rtx srcmem
,
21875 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21877 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21878 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21879 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21882 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21884 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21885 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21888 if (CONST_INT_P (count
))
21890 HOST_WIDE_INT countval
= INTVAL (count
);
21893 if ((countval
& 0x10) && max_size
> 16)
21897 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21898 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21901 gcc_unreachable ();
21904 if ((countval
& 0x08) && max_size
> 8)
21907 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21910 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21911 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21915 if ((countval
& 0x04) && max_size
> 4)
21917 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21920 if ((countval
& 0x02) && max_size
> 2)
21922 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21925 if ((countval
& 0x01) && max_size
> 1)
21927 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21934 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21935 count
, 1, OPTAB_DIRECT
);
21936 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21937 count
, QImode
, 1, 4);
21941 /* When there are stringops, we can cheaply increase dest and src pointers.
21942 Otherwise we save code size by maintaining offset (zero is readily
21943 available from preceding rep operation) and using x86 addressing modes.
21945 if (TARGET_SINGLE_STRINGOP
)
21949 rtx label
= ix86_expand_aligntest (count
, 4, true);
21950 src
= change_address (srcmem
, SImode
, srcptr
);
21951 dest
= change_address (destmem
, SImode
, destptr
);
21952 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21953 emit_label (label
);
21954 LABEL_NUSES (label
) = 1;
21958 rtx label
= ix86_expand_aligntest (count
, 2, true);
21959 src
= change_address (srcmem
, HImode
, srcptr
);
21960 dest
= change_address (destmem
, HImode
, destptr
);
21961 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21962 emit_label (label
);
21963 LABEL_NUSES (label
) = 1;
21967 rtx label
= ix86_expand_aligntest (count
, 1, true);
21968 src
= change_address (srcmem
, QImode
, srcptr
);
21969 dest
= change_address (destmem
, QImode
, destptr
);
21970 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21971 emit_label (label
);
21972 LABEL_NUSES (label
) = 1;
21977 rtx offset
= force_reg (Pmode
, const0_rtx
);
21982 rtx label
= ix86_expand_aligntest (count
, 4, true);
21983 src
= change_address (srcmem
, SImode
, srcptr
);
21984 dest
= change_address (destmem
, SImode
, destptr
);
21985 emit_move_insn (dest
, src
);
21986 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21987 true, OPTAB_LIB_WIDEN
);
21989 emit_move_insn (offset
, tmp
);
21990 emit_label (label
);
21991 LABEL_NUSES (label
) = 1;
21995 rtx label
= ix86_expand_aligntest (count
, 2, true);
21996 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21997 src
= change_address (srcmem
, HImode
, tmp
);
21998 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21999 dest
= change_address (destmem
, HImode
, tmp
);
22000 emit_move_insn (dest
, src
);
22001 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
22002 true, OPTAB_LIB_WIDEN
);
22004 emit_move_insn (offset
, tmp
);
22005 emit_label (label
);
22006 LABEL_NUSES (label
) = 1;
22010 rtx label
= ix86_expand_aligntest (count
, 1, true);
22011 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
22012 src
= change_address (srcmem
, QImode
, tmp
);
22013 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
22014 dest
= change_address (destmem
, QImode
, tmp
);
22015 emit_move_insn (dest
, src
);
22016 emit_label (label
);
22017 LABEL_NUSES (label
) = 1;
22022 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22024 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
22025 rtx count
, int max_size
)
22028 expand_simple_binop (counter_mode (count
), AND
, count
,
22029 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
22030 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
22031 gen_lowpart (QImode
, value
), count
, QImode
,
22035 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
22037 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
22041 if (CONST_INT_P (count
))
22043 HOST_WIDE_INT countval
= INTVAL (count
);
22046 if ((countval
& 0x10) && max_size
> 16)
22050 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22051 emit_insn (gen_strset (destptr
, dest
, value
));
22052 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
22053 emit_insn (gen_strset (destptr
, dest
, value
));
22056 gcc_unreachable ();
22059 if ((countval
& 0x08) && max_size
> 8)
22063 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
22064 emit_insn (gen_strset (destptr
, dest
, value
));
22068 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22069 emit_insn (gen_strset (destptr
, dest
, value
));
22070 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
22071 emit_insn (gen_strset (destptr
, dest
, value
));
22075 if ((countval
& 0x04) && max_size
> 4)
22077 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
22078 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22081 if ((countval
& 0x02) && max_size
> 2)
22083 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
22084 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22087 if ((countval
& 0x01) && max_size
> 1)
22089 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
22090 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22097 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
22102 rtx label
= ix86_expand_aligntest (count
, 16, true);
22105 dest
= change_address (destmem
, DImode
, destptr
);
22106 emit_insn (gen_strset (destptr
, dest
, value
));
22107 emit_insn (gen_strset (destptr
, dest
, value
));
22111 dest
= change_address (destmem
, SImode
, destptr
);
22112 emit_insn (gen_strset (destptr
, dest
, value
));
22113 emit_insn (gen_strset (destptr
, dest
, value
));
22114 emit_insn (gen_strset (destptr
, dest
, value
));
22115 emit_insn (gen_strset (destptr
, dest
, value
));
22117 emit_label (label
);
22118 LABEL_NUSES (label
) = 1;
22122 rtx label
= ix86_expand_aligntest (count
, 8, true);
22125 dest
= change_address (destmem
, DImode
, destptr
);
22126 emit_insn (gen_strset (destptr
, dest
, value
));
22130 dest
= change_address (destmem
, SImode
, destptr
);
22131 emit_insn (gen_strset (destptr
, dest
, value
));
22132 emit_insn (gen_strset (destptr
, dest
, value
));
22134 emit_label (label
);
22135 LABEL_NUSES (label
) = 1;
22139 rtx label
= ix86_expand_aligntest (count
, 4, true);
22140 dest
= change_address (destmem
, SImode
, destptr
);
22141 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
22142 emit_label (label
);
22143 LABEL_NUSES (label
) = 1;
22147 rtx label
= ix86_expand_aligntest (count
, 2, true);
22148 dest
= change_address (destmem
, HImode
, destptr
);
22149 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
22150 emit_label (label
);
22151 LABEL_NUSES (label
) = 1;
22155 rtx label
= ix86_expand_aligntest (count
, 1, true);
22156 dest
= change_address (destmem
, QImode
, destptr
);
22157 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
22158 emit_label (label
);
22159 LABEL_NUSES (label
) = 1;
22163 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
22164 DESIRED_ALIGNMENT. */
22166 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
22167 rtx destptr
, rtx srcptr
, rtx count
,
22168 int align
, int desired_alignment
)
22170 if (align
<= 1 && desired_alignment
> 1)
22172 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22173 srcmem
= change_address (srcmem
, QImode
, srcptr
);
22174 destmem
= change_address (destmem
, QImode
, destptr
);
22175 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22176 ix86_adjust_counter (count
, 1);
22177 emit_label (label
);
22178 LABEL_NUSES (label
) = 1;
22180 if (align
<= 2 && desired_alignment
> 2)
22182 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22183 srcmem
= change_address (srcmem
, HImode
, srcptr
);
22184 destmem
= change_address (destmem
, HImode
, destptr
);
22185 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22186 ix86_adjust_counter (count
, 2);
22187 emit_label (label
);
22188 LABEL_NUSES (label
) = 1;
22190 if (align
<= 4 && desired_alignment
> 4)
22192 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22193 srcmem
= change_address (srcmem
, SImode
, srcptr
);
22194 destmem
= change_address (destmem
, SImode
, destptr
);
22195 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
22196 ix86_adjust_counter (count
, 4);
22197 emit_label (label
);
22198 LABEL_NUSES (label
) = 1;
22200 gcc_assert (desired_alignment
<= 8);
22203 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
22204 ALIGN_BYTES is how many bytes need to be copied. */
22206 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
22207 int desired_align
, int align_bytes
)
22210 rtx orig_dst
= dst
;
22211 rtx orig_src
= src
;
22213 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
22214 if (src_align_bytes
>= 0)
22215 src_align_bytes
= desired_align
- src_align_bytes
;
22216 if (align_bytes
& 1)
22218 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22219 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
22221 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22223 if (align_bytes
& 2)
22225 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22226 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
22227 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22228 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22229 if (src_align_bytes
>= 0
22230 && (src_align_bytes
& 1) == (align_bytes
& 1)
22231 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
22232 set_mem_align (src
, 2 * BITS_PER_UNIT
);
22234 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22236 if (align_bytes
& 4)
22238 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22239 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
22240 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22241 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22242 if (src_align_bytes
>= 0)
22244 unsigned int src_align
= 0;
22245 if ((src_align_bytes
& 3) == (align_bytes
& 3))
22247 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22249 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22250 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22253 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
22255 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22256 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
22257 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22258 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22259 if (src_align_bytes
>= 0)
22261 unsigned int src_align
= 0;
22262 if ((src_align_bytes
& 7) == (align_bytes
& 7))
22264 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
22266 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
22268 if (src_align
> (unsigned int) desired_align
)
22269 src_align
= desired_align
;
22270 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
22271 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
22273 if (MEM_SIZE_KNOWN_P (orig_dst
))
22274 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22275 if (MEM_SIZE_KNOWN_P (orig_src
))
22276 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22281 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22282 DESIRED_ALIGNMENT. */
22284 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22285 int align
, int desired_alignment
)
22287 if (align
<= 1 && desired_alignment
> 1)
22289 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22290 destmem
= change_address (destmem
, QImode
, destptr
);
22291 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22292 ix86_adjust_counter (count
, 1);
22293 emit_label (label
);
22294 LABEL_NUSES (label
) = 1;
22296 if (align
<= 2 && desired_alignment
> 2)
22298 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22299 destmem
= change_address (destmem
, HImode
, destptr
);
22300 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22301 ix86_adjust_counter (count
, 2);
22302 emit_label (label
);
22303 LABEL_NUSES (label
) = 1;
22305 if (align
<= 4 && desired_alignment
> 4)
22307 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22308 destmem
= change_address (destmem
, SImode
, destptr
);
22309 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22310 ix86_adjust_counter (count
, 4);
22311 emit_label (label
);
22312 LABEL_NUSES (label
) = 1;
22314 gcc_assert (desired_alignment
<= 8);
22317 /* Set enough from DST to align DST known to by aligned by ALIGN to
22318 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22320 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22321 int desired_align
, int align_bytes
)
22324 rtx orig_dst
= dst
;
22325 if (align_bytes
& 1)
22327 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22329 emit_insn (gen_strset (destreg
, dst
,
22330 gen_lowpart (QImode
, value
)));
22332 if (align_bytes
& 2)
22334 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22335 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22336 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22338 emit_insn (gen_strset (destreg
, dst
,
22339 gen_lowpart (HImode
, value
)));
22341 if (align_bytes
& 4)
22343 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22344 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22345 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22347 emit_insn (gen_strset (destreg
, dst
,
22348 gen_lowpart (SImode
, value
)));
22350 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22351 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22352 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22353 if (MEM_SIZE_KNOWN_P (orig_dst
))
22354 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22358 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22359 static enum stringop_alg
22360 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22361 int *dynamic_check
)
22363 const struct stringop_algs
* algs
;
22364 bool optimize_for_speed
;
22365 /* Algorithms using the rep prefix want at least edi and ecx;
22366 additionally, memset wants eax and memcpy wants esi. Don't
22367 consider such algorithms if the user has appropriated those
22368 registers for their own purposes. */
22369 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22371 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22373 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22374 || (alg != rep_prefix_1_byte \
22375 && alg != rep_prefix_4_byte \
22376 && alg != rep_prefix_8_byte))
22377 const struct processor_costs
*cost
;
22379 /* Even if the string operation call is cold, we still might spend a lot
22380 of time processing large blocks. */
22381 if (optimize_function_for_size_p (cfun
)
22382 || (optimize_insn_for_size_p ()
22383 && expected_size
!= -1 && expected_size
< 256))
22384 optimize_for_speed
= false;
22386 optimize_for_speed
= true;
22388 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22390 *dynamic_check
= -1;
22392 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22394 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22395 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22396 return ix86_stringop_alg
;
22397 /* rep; movq or rep; movl is the smallest variant. */
22398 else if (!optimize_for_speed
)
22400 if (!count
|| (count
& 3))
22401 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22403 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22405 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22407 else if (expected_size
!= -1 && expected_size
< 4)
22408 return loop_1_byte
;
22409 else if (expected_size
!= -1)
22412 enum stringop_alg alg
= libcall
;
22413 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22415 /* We get here if the algorithms that were not libcall-based
22416 were rep-prefix based and we are unable to use rep prefixes
22417 based on global register usage. Break out of the loop and
22418 use the heuristic below. */
22419 if (algs
->size
[i
].max
== 0)
22421 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22423 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22425 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22427 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22428 last non-libcall inline algorithm. */
22429 if (TARGET_INLINE_ALL_STRINGOPS
)
22431 /* When the current size is best to be copied by a libcall,
22432 but we are still forced to inline, run the heuristic below
22433 that will pick code for medium sized blocks. */
22434 if (alg
!= libcall
)
22438 else if (ALG_USABLE_P (candidate
))
22442 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22444 /* When asked to inline the call anyway, try to pick meaningful choice.
22445 We look for maximal size of block that is faster to copy by hand and
22446 take blocks of at most of that size guessing that average size will
22447 be roughly half of the block.
22449 If this turns out to be bad, we might simply specify the preferred
22450 choice in ix86_costs. */
22451 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22452 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22455 enum stringop_alg alg
;
22457 bool any_alg_usable_p
= true;
22459 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22461 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22462 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22464 if (candidate
!= libcall
&& candidate
22465 && ALG_USABLE_P (candidate
))
22466 max
= algs
->size
[i
].max
;
22468 /* If there aren't any usable algorithms, then recursing on
22469 smaller sizes isn't going to find anything. Just return the
22470 simple byte-at-a-time copy loop. */
22471 if (!any_alg_usable_p
)
22473 /* Pick something reasonable. */
22474 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22475 *dynamic_check
= 128;
22476 return loop_1_byte
;
22480 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22481 gcc_assert (*dynamic_check
== -1);
22482 gcc_assert (alg
!= libcall
);
22483 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22484 *dynamic_check
= max
;
22487 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22488 #undef ALG_USABLE_P
22491 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22492 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22494 decide_alignment (int align
,
22495 enum stringop_alg alg
,
22498 int desired_align
= 0;
22502 gcc_unreachable ();
22504 case unrolled_loop
:
22505 desired_align
= GET_MODE_SIZE (Pmode
);
22507 case rep_prefix_8_byte
:
22510 case rep_prefix_4_byte
:
22511 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22512 copying whole cacheline at once. */
22513 if (TARGET_PENTIUMPRO
)
22518 case rep_prefix_1_byte
:
22519 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22520 copying whole cacheline at once. */
22521 if (TARGET_PENTIUMPRO
)
22535 if (desired_align
< align
)
22536 desired_align
= align
;
22537 if (expected_size
!= -1 && expected_size
< 4)
22538 desired_align
= align
;
22539 return desired_align
;
22542 /* Return the smallest power of 2 greater than VAL. */
22544 smallest_pow2_greater_than (int val
)
22552 /* Expand string move (memcpy) operation. Use i386 string operations
22553 when profitable. expand_setmem contains similar code. The code
22554 depends upon architecture, block size and alignment, but always has
22555 the same overall structure:
22557 1) Prologue guard: Conditional that jumps up to epilogues for small
22558 blocks that can be handled by epilogue alone. This is faster
22559 but also needed for correctness, since prologue assume the block
22560 is larger than the desired alignment.
22562 Optional dynamic check for size and libcall for large
22563 blocks is emitted here too, with -minline-stringops-dynamically.
22565 2) Prologue: copy first few bytes in order to get destination
22566 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22567 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22568 copied. We emit either a jump tree on power of two sized
22569 blocks, or a byte loop.
22571 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22572 with specified algorithm.
22574 4) Epilogue: code copying tail of the block that is too small to be
22575 handled by main body (or up to size guarded by prologue guard). */
22578 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22579 rtx expected_align_exp
, rtx expected_size_exp
)
22585 rtx jump_around_label
= NULL
;
22586 HOST_WIDE_INT align
= 1;
22587 unsigned HOST_WIDE_INT count
= 0;
22588 HOST_WIDE_INT expected_size
= -1;
22589 int size_needed
= 0, epilogue_size_needed
;
22590 int desired_align
= 0, align_bytes
= 0;
22591 enum stringop_alg alg
;
22593 bool need_zero_guard
= false;
22595 if (CONST_INT_P (align_exp
))
22596 align
= INTVAL (align_exp
);
22597 /* i386 can do misaligned access on reasonably increased cost. */
22598 if (CONST_INT_P (expected_align_exp
)
22599 && INTVAL (expected_align_exp
) > align
)
22600 align
= INTVAL (expected_align_exp
);
22601 /* ALIGN is the minimum of destination and source alignment, but we care here
22602 just about destination alignment. */
22603 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22604 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22606 if (CONST_INT_P (count_exp
))
22607 count
= expected_size
= INTVAL (count_exp
);
22608 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22609 expected_size
= INTVAL (expected_size_exp
);
22611 /* Make sure we don't need to care about overflow later on. */
22612 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22615 /* Step 0: Decide on preferred algorithm, desired alignment and
22616 size of chunks to be copied by main loop. */
22618 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22619 desired_align
= decide_alignment (align
, alg
, expected_size
);
22621 if (!TARGET_ALIGN_STRINGOPS
)
22622 align
= desired_align
;
22624 if (alg
== libcall
)
22626 gcc_assert (alg
!= no_stringop
);
22628 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22629 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22630 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22635 gcc_unreachable ();
22637 need_zero_guard
= true;
22638 size_needed
= GET_MODE_SIZE (word_mode
);
22640 case unrolled_loop
:
22641 need_zero_guard
= true;
22642 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22644 case rep_prefix_8_byte
:
22647 case rep_prefix_4_byte
:
22650 case rep_prefix_1_byte
:
22654 need_zero_guard
= true;
22659 epilogue_size_needed
= size_needed
;
22661 /* Step 1: Prologue guard. */
22663 /* Alignment code needs count to be in register. */
22664 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22666 if (INTVAL (count_exp
) > desired_align
22667 && INTVAL (count_exp
) > size_needed
)
22670 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22671 if (align_bytes
<= 0)
22674 align_bytes
= desired_align
- align_bytes
;
22676 if (align_bytes
== 0)
22677 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22679 gcc_assert (desired_align
>= 1 && align
>= 1);
22681 /* Ensure that alignment prologue won't copy past end of block. */
22682 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22684 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22685 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22686 Make sure it is power of 2. */
22687 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22691 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22693 /* If main algorithm works on QImode, no epilogue is needed.
22694 For small sizes just don't align anything. */
22695 if (size_needed
== 1)
22696 desired_align
= align
;
22703 label
= gen_label_rtx ();
22704 emit_cmp_and_jump_insns (count_exp
,
22705 GEN_INT (epilogue_size_needed
),
22706 LTU
, 0, counter_mode (count_exp
), 1, label
);
22707 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22708 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22710 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22714 /* Emit code to decide on runtime whether library call or inline should be
22716 if (dynamic_check
!= -1)
22718 if (CONST_INT_P (count_exp
))
22720 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22722 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22723 count_exp
= const0_rtx
;
22729 rtx hot_label
= gen_label_rtx ();
22730 jump_around_label
= gen_label_rtx ();
22731 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22732 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22733 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22734 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22735 emit_jump (jump_around_label
);
22736 emit_label (hot_label
);
22740 /* Step 2: Alignment prologue. */
22742 if (desired_align
> align
)
22744 if (align_bytes
== 0)
22746 /* Except for the first move in epilogue, we no longer know
22747 constant offset in aliasing info. It don't seems to worth
22748 the pain to maintain it for the first move, so throw away
22750 src
= change_address (src
, BLKmode
, srcreg
);
22751 dst
= change_address (dst
, BLKmode
, destreg
);
22752 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22757 /* If we know how many bytes need to be stored before dst is
22758 sufficiently aligned, maintain aliasing info accurately. */
22759 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22760 desired_align
, align_bytes
);
22761 count_exp
= plus_constant (counter_mode (count_exp
),
22762 count_exp
, -align_bytes
);
22763 count
-= align_bytes
;
22765 if (need_zero_guard
22766 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22767 || (align_bytes
== 0
22768 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22769 + desired_align
- align
))))
22771 /* It is possible that we copied enough so the main loop will not
22773 gcc_assert (size_needed
> 1);
22774 if (label
== NULL_RTX
)
22775 label
= gen_label_rtx ();
22776 emit_cmp_and_jump_insns (count_exp
,
22777 GEN_INT (size_needed
),
22778 LTU
, 0, counter_mode (count_exp
), 1, label
);
22779 if (expected_size
== -1
22780 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22781 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22783 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22786 if (label
&& size_needed
== 1)
22788 emit_label (label
);
22789 LABEL_NUSES (label
) = 1;
22791 epilogue_size_needed
= 1;
22793 else if (label
== NULL_RTX
)
22794 epilogue_size_needed
= size_needed
;
22796 /* Step 3: Main loop. */
22802 gcc_unreachable ();
22804 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22805 count_exp
, QImode
, 1, expected_size
);
22808 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22809 count_exp
, word_mode
, 1, expected_size
);
22811 case unrolled_loop
:
22812 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22813 registers for 4 temporaries anyway. */
22814 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22815 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22818 case rep_prefix_8_byte
:
22819 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22822 case rep_prefix_4_byte
:
22823 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22826 case rep_prefix_1_byte
:
22827 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22831 /* Adjust properly the offset of src and dest memory for aliasing. */
22832 if (CONST_INT_P (count_exp
))
22834 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22835 (count
/ size_needed
) * size_needed
);
22836 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22837 (count
/ size_needed
) * size_needed
);
22841 src
= change_address (src
, BLKmode
, srcreg
);
22842 dst
= change_address (dst
, BLKmode
, destreg
);
22845 /* Step 4: Epilogue to copy the remaining bytes. */
22849 /* When the main loop is done, COUNT_EXP might hold original count,
22850 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22851 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22852 bytes. Compensate if needed. */
22854 if (size_needed
< epilogue_size_needed
)
22857 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22858 GEN_INT (size_needed
- 1), count_exp
, 1,
22860 if (tmp
!= count_exp
)
22861 emit_move_insn (count_exp
, tmp
);
22863 emit_label (label
);
22864 LABEL_NUSES (label
) = 1;
22867 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22868 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22869 epilogue_size_needed
);
22870 if (jump_around_label
)
22871 emit_label (jump_around_label
);
22875 /* Helper function for memcpy. For QImode value 0xXY produce
22876 0xXYXYXYXY of wide specified by MODE. This is essentially
22877 a * 0x10101010, but we can do slightly better than
22878 synth_mult by unwinding the sequence by hand on CPUs with
22881 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22883 enum machine_mode valmode
= GET_MODE (val
);
22885 int nops
= mode
== DImode
? 3 : 2;
22887 gcc_assert (mode
== SImode
|| mode
== DImode
);
22888 if (val
== const0_rtx
)
22889 return copy_to_mode_reg (mode
, const0_rtx
);
22890 if (CONST_INT_P (val
))
22892 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22896 if (mode
== DImode
)
22897 v
|= (v
<< 16) << 16;
22898 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22901 if (valmode
== VOIDmode
)
22903 if (valmode
!= QImode
)
22904 val
= gen_lowpart (QImode
, val
);
22905 if (mode
== QImode
)
22907 if (!TARGET_PARTIAL_REG_STALL
)
22909 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22910 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22911 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22912 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22914 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22915 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22916 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22921 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22923 if (!TARGET_PARTIAL_REG_STALL
)
22924 if (mode
== SImode
)
22925 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22927 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22930 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22931 NULL
, 1, OPTAB_DIRECT
);
22933 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22935 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22936 NULL
, 1, OPTAB_DIRECT
);
22937 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22938 if (mode
== SImode
)
22940 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22941 NULL
, 1, OPTAB_DIRECT
);
22942 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22947 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22948 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22949 alignment from ALIGN to DESIRED_ALIGN. */
22951 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22956 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22957 promoted_val
= promote_duplicated_reg (DImode
, val
);
22958 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22959 promoted_val
= promote_duplicated_reg (SImode
, val
);
22960 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22961 promoted_val
= promote_duplicated_reg (HImode
, val
);
22963 promoted_val
= val
;
22965 return promoted_val
;
22968 /* Expand string clear operation (bzero). Use i386 string operations when
22969 profitable. See expand_movmem comment for explanation of individual
22970 steps performed. */
22972 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22973 rtx expected_align_exp
, rtx expected_size_exp
)
22978 rtx jump_around_label
= NULL
;
22979 HOST_WIDE_INT align
= 1;
22980 unsigned HOST_WIDE_INT count
= 0;
22981 HOST_WIDE_INT expected_size
= -1;
22982 int size_needed
= 0, epilogue_size_needed
;
22983 int desired_align
= 0, align_bytes
= 0;
22984 enum stringop_alg alg
;
22985 rtx promoted_val
= NULL
;
22986 bool force_loopy_epilogue
= false;
22988 bool need_zero_guard
= false;
22990 if (CONST_INT_P (align_exp
))
22991 align
= INTVAL (align_exp
);
22992 /* i386 can do misaligned access on reasonably increased cost. */
22993 if (CONST_INT_P (expected_align_exp
)
22994 && INTVAL (expected_align_exp
) > align
)
22995 align
= INTVAL (expected_align_exp
);
22996 if (CONST_INT_P (count_exp
))
22997 count
= expected_size
= INTVAL (count_exp
);
22998 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22999 expected_size
= INTVAL (expected_size_exp
);
23001 /* Make sure we don't need to care about overflow later on. */
23002 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
23005 /* Step 0: Decide on preferred algorithm, desired alignment and
23006 size of chunks to be copied by main loop. */
23008 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
23009 desired_align
= decide_alignment (align
, alg
, expected_size
);
23011 if (!TARGET_ALIGN_STRINGOPS
)
23012 align
= desired_align
;
23014 if (alg
== libcall
)
23016 gcc_assert (alg
!= no_stringop
);
23018 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
23019 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
23024 gcc_unreachable ();
23026 need_zero_guard
= true;
23027 size_needed
= GET_MODE_SIZE (word_mode
);
23029 case unrolled_loop
:
23030 need_zero_guard
= true;
23031 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
23033 case rep_prefix_8_byte
:
23036 case rep_prefix_4_byte
:
23039 case rep_prefix_1_byte
:
23043 need_zero_guard
= true;
23047 epilogue_size_needed
= size_needed
;
23049 /* Step 1: Prologue guard. */
23051 /* Alignment code needs count to be in register. */
23052 if (CONST_INT_P (count_exp
) && desired_align
> align
)
23054 if (INTVAL (count_exp
) > desired_align
23055 && INTVAL (count_exp
) > size_needed
)
23058 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
23059 if (align_bytes
<= 0)
23062 align_bytes
= desired_align
- align_bytes
;
23064 if (align_bytes
== 0)
23066 enum machine_mode mode
= SImode
;
23067 if (TARGET_64BIT
&& (count
& ~0xffffffff))
23069 count_exp
= force_reg (mode
, count_exp
);
23072 /* Do the cheap promotion to allow better CSE across the
23073 main loop and epilogue (ie one load of the big constant in the
23074 front of all code. */
23075 if (CONST_INT_P (val_exp
))
23076 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23077 desired_align
, align
);
23078 /* Ensure that alignment prologue won't copy past end of block. */
23079 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
23081 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
23082 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
23083 Make sure it is power of 2. */
23084 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
23086 /* To improve performance of small blocks, we jump around the VAL
23087 promoting mode. This mean that if the promoted VAL is not constant,
23088 we might not use it in the epilogue and have to use byte
23090 if (epilogue_size_needed
> 2 && !promoted_val
)
23091 force_loopy_epilogue
= true;
23094 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
23096 /* If main algorithm works on QImode, no epilogue is needed.
23097 For small sizes just don't align anything. */
23098 if (size_needed
== 1)
23099 desired_align
= align
;
23106 label
= gen_label_rtx ();
23107 emit_cmp_and_jump_insns (count_exp
,
23108 GEN_INT (epilogue_size_needed
),
23109 LTU
, 0, counter_mode (count_exp
), 1, label
);
23110 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
23111 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23113 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23116 if (dynamic_check
!= -1)
23118 rtx hot_label
= gen_label_rtx ();
23119 jump_around_label
= gen_label_rtx ();
23120 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
23121 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
23122 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
23123 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
23124 emit_jump (jump_around_label
);
23125 emit_label (hot_label
);
23128 /* Step 2: Alignment prologue. */
23130 /* Do the expensive promotion once we branched off the small blocks. */
23132 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
23133 desired_align
, align
);
23134 gcc_assert (desired_align
>= 1 && align
>= 1);
23136 if (desired_align
> align
)
23138 if (align_bytes
== 0)
23140 /* Except for the first move in epilogue, we no longer know
23141 constant offset in aliasing info. It don't seems to worth
23142 the pain to maintain it for the first move, so throw away
23144 dst
= change_address (dst
, BLKmode
, destreg
);
23145 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
23150 /* If we know how many bytes need to be stored before dst is
23151 sufficiently aligned, maintain aliasing info accurately. */
23152 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
23153 desired_align
, align_bytes
);
23154 count_exp
= plus_constant (counter_mode (count_exp
),
23155 count_exp
, -align_bytes
);
23156 count
-= align_bytes
;
23158 if (need_zero_guard
23159 && (count
< (unsigned HOST_WIDE_INT
) size_needed
23160 || (align_bytes
== 0
23161 && count
< ((unsigned HOST_WIDE_INT
) size_needed
23162 + desired_align
- align
))))
23164 /* It is possible that we copied enough so the main loop will not
23166 gcc_assert (size_needed
> 1);
23167 if (label
== NULL_RTX
)
23168 label
= gen_label_rtx ();
23169 emit_cmp_and_jump_insns (count_exp
,
23170 GEN_INT (size_needed
),
23171 LTU
, 0, counter_mode (count_exp
), 1, label
);
23172 if (expected_size
== -1
23173 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
23174 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
23176 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
23179 if (label
&& size_needed
== 1)
23181 emit_label (label
);
23182 LABEL_NUSES (label
) = 1;
23184 promoted_val
= val_exp
;
23185 epilogue_size_needed
= 1;
23187 else if (label
== NULL_RTX
)
23188 epilogue_size_needed
= size_needed
;
23190 /* Step 3: Main loop. */
23196 gcc_unreachable ();
23198 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23199 count_exp
, QImode
, 1, expected_size
);
23202 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23203 count_exp
, word_mode
, 1, expected_size
);
23205 case unrolled_loop
:
23206 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
23207 count_exp
, word_mode
, 4, expected_size
);
23209 case rep_prefix_8_byte
:
23210 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23213 case rep_prefix_4_byte
:
23214 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23217 case rep_prefix_1_byte
:
23218 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
23222 /* Adjust properly the offset of src and dest memory for aliasing. */
23223 if (CONST_INT_P (count_exp
))
23224 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
23225 (count
/ size_needed
) * size_needed
);
23227 dst
= change_address (dst
, BLKmode
, destreg
);
23229 /* Step 4: Epilogue to copy the remaining bytes. */
23233 /* When the main loop is done, COUNT_EXP might hold original count,
23234 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
23235 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
23236 bytes. Compensate if needed. */
23238 if (size_needed
< epilogue_size_needed
)
23241 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
23242 GEN_INT (size_needed
- 1), count_exp
, 1,
23244 if (tmp
!= count_exp
)
23245 emit_move_insn (count_exp
, tmp
);
23247 emit_label (label
);
23248 LABEL_NUSES (label
) = 1;
23251 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
23253 if (force_loopy_epilogue
)
23254 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
23255 epilogue_size_needed
);
23257 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
23258 epilogue_size_needed
);
23260 if (jump_around_label
)
23261 emit_label (jump_around_label
);
23265 /* Expand the appropriate insns for doing strlen if not just doing
23268 out = result, initialized with the start address
23269 align_rtx = alignment of the address.
23270 scratch = scratch register, initialized with the startaddress when
23271 not aligned, otherwise undefined
23273 This is just the body. It needs the initializations mentioned above and
23274 some address computing at the end. These things are done in i386.md. */
23277 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23281 rtx align_2_label
= NULL_RTX
;
23282 rtx align_3_label
= NULL_RTX
;
23283 rtx align_4_label
= gen_label_rtx ();
23284 rtx end_0_label
= gen_label_rtx ();
23286 rtx tmpreg
= gen_reg_rtx (SImode
);
23287 rtx scratch
= gen_reg_rtx (SImode
);
23291 if (CONST_INT_P (align_rtx
))
23292 align
= INTVAL (align_rtx
);
23294 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23296 /* Is there a known alignment and is it less than 4? */
23299 rtx scratch1
= gen_reg_rtx (Pmode
);
23300 emit_move_insn (scratch1
, out
);
23301 /* Is there a known alignment and is it not 2? */
23304 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23305 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23307 /* Leave just the 3 lower bits. */
23308 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23309 NULL_RTX
, 0, OPTAB_WIDEN
);
23311 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23312 Pmode
, 1, align_4_label
);
23313 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23314 Pmode
, 1, align_2_label
);
23315 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23316 Pmode
, 1, align_3_label
);
23320 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23321 check if is aligned to 4 - byte. */
23323 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23324 NULL_RTX
, 0, OPTAB_WIDEN
);
23326 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23327 Pmode
, 1, align_4_label
);
23330 mem
= change_address (src
, QImode
, out
);
23332 /* Now compare the bytes. */
23334 /* Compare the first n unaligned byte on a byte per byte basis. */
23335 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23336 QImode
, 1, end_0_label
);
23338 /* Increment the address. */
23339 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23341 /* Not needed with an alignment of 2 */
23344 emit_label (align_2_label
);
23346 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23349 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23351 emit_label (align_3_label
);
23354 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23357 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23360 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23361 align this loop. It gives only huge programs, but does not help to
23363 emit_label (align_4_label
);
23365 mem
= change_address (src
, SImode
, out
);
23366 emit_move_insn (scratch
, mem
);
23367 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23369 /* This formula yields a nonzero result iff one of the bytes is zero.
23370 This saves three branches inside loop and many cycles. */
23372 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23373 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23374 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23375 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23376 gen_int_mode (0x80808080, SImode
)));
23377 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23382 rtx reg
= gen_reg_rtx (SImode
);
23383 rtx reg2
= gen_reg_rtx (Pmode
);
23384 emit_move_insn (reg
, tmpreg
);
23385 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23387 /* If zero is not in the first two bytes, move two bytes forward. */
23388 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23389 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23390 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23391 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23392 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23395 /* Emit lea manually to avoid clobbering of flags. */
23396 emit_insn (gen_rtx_SET (SImode
, reg2
,
23397 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23399 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23400 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23401 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23402 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23408 rtx end_2_label
= gen_label_rtx ();
23409 /* Is zero in the first two bytes? */
23411 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23412 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23413 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23414 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23415 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23417 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23418 JUMP_LABEL (tmp
) = end_2_label
;
23420 /* Not in the first two. Move two bytes forward. */
23421 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23422 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23424 emit_label (end_2_label
);
23428 /* Avoid branch in fixing the byte. */
23429 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23430 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23431 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23432 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23433 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23435 emit_label (end_0_label
);
23438 /* Expand strlen. */
23441 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23443 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23445 /* The generic case of strlen expander is long. Avoid it's
23446 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23448 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23449 && !TARGET_INLINE_ALL_STRINGOPS
23450 && !optimize_insn_for_size_p ()
23451 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23454 addr
= force_reg (Pmode
, XEXP (src
, 0));
23455 scratch1
= gen_reg_rtx (Pmode
);
23457 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23458 && !optimize_insn_for_size_p ())
23460 /* Well it seems that some optimizer does not combine a call like
23461 foo(strlen(bar), strlen(bar));
23462 when the move and the subtraction is done here. It does calculate
23463 the length just once when these instructions are done inside of
23464 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23465 often used and I use one fewer register for the lifetime of
23466 output_strlen_unroll() this is better. */
23468 emit_move_insn (out
, addr
);
23470 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23472 /* strlensi_unroll_1 returns the address of the zero at the end of
23473 the string, like memchr(), so compute the length by subtracting
23474 the start address. */
23475 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23481 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23482 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23485 scratch2
= gen_reg_rtx (Pmode
);
23486 scratch3
= gen_reg_rtx (Pmode
);
23487 scratch4
= force_reg (Pmode
, constm1_rtx
);
23489 emit_move_insn (scratch3
, addr
);
23490 eoschar
= force_reg (QImode
, eoschar
);
23492 src
= replace_equiv_address_nv (src
, scratch3
);
23494 /* If .md starts supporting :P, this can be done in .md. */
23495 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23496 scratch4
), UNSPEC_SCAS
);
23497 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23498 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23499 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23504 /* For given symbol (function) construct code to compute address of it's PLT
23505 entry in large x86-64 PIC model. */
23507 construct_plt_address (rtx symbol
)
23511 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23512 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23513 gcc_assert (Pmode
== DImode
);
23515 tmp
= gen_reg_rtx (Pmode
);
23516 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23518 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23519 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23524 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23526 rtx pop
, bool sibcall
)
23528 /* We need to represent that SI and DI registers are clobbered
23530 static int clobbered_registers
[] = {
23531 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23532 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23533 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23534 XMM15_REG
, SI_REG
, DI_REG
23536 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23537 rtx use
= NULL
, call
;
23538 unsigned int vec_len
;
23540 if (pop
== const0_rtx
)
23542 gcc_assert (!TARGET_64BIT
|| !pop
);
23544 if (TARGET_MACHO
&& !TARGET_64BIT
)
23547 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23548 fnaddr
= machopic_indirect_call_target (fnaddr
);
23553 /* Static functions and indirect calls don't need the pic register. */
23554 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23555 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23556 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23557 use_reg (&use
, pic_offset_table_rtx
);
23560 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23562 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23563 emit_move_insn (al
, callarg2
);
23564 use_reg (&use
, al
);
23567 if (ix86_cmodel
== CM_LARGE_PIC
23569 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23570 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23571 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23573 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23574 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23576 fnaddr
= XEXP (fnaddr
, 0);
23577 if (GET_MODE (fnaddr
) != word_mode
)
23578 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23579 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23583 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23585 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23586 vec
[vec_len
++] = call
;
23590 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23591 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23592 vec
[vec_len
++] = pop
;
23595 if (TARGET_64BIT_MS_ABI
23596 && (!callarg2
|| INTVAL (callarg2
) != -2))
23600 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23601 UNSPEC_MS_TO_SYSV_CALL
);
23603 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23605 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23607 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23609 clobbered_registers
[i
]));
23612 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23613 if (TARGET_VZEROUPPER
)
23616 if (cfun
->machine
->callee_pass_avx256_p
)
23618 if (cfun
->machine
->callee_return_avx256_p
)
23619 avx256
= callee_return_pass_avx256
;
23621 avx256
= callee_pass_avx256
;
23623 else if (cfun
->machine
->callee_return_avx256_p
)
23624 avx256
= callee_return_avx256
;
23626 avx256
= call_no_avx256
;
23628 if (reload_completed
)
23629 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23631 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23632 gen_rtvec (1, GEN_INT (avx256
)),
23633 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23637 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23638 call
= emit_call_insn (call
);
23640 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23646 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23648 rtx pat
= PATTERN (insn
);
23649 rtvec vec
= XVEC (pat
, 0);
23650 int len
= GET_NUM_ELEM (vec
) - 1;
23652 /* Strip off the last entry of the parallel. */
23653 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23654 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23656 pat
= RTVEC_ELT (vec
, 0);
23658 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23660 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23661 emit_call_insn (pat
);
23664 /* Output the assembly for a call instruction. */
23667 ix86_output_call_insn (rtx insn
, rtx call_op
)
23669 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23670 bool seh_nop_p
= false;
23673 if (SIBLING_CALL_P (insn
))
23677 /* SEH epilogue detection requires the indirect branch case
23678 to include REX.W. */
23679 else if (TARGET_SEH
)
23680 xasm
= "rex.W jmp %A0";
23684 output_asm_insn (xasm
, &call_op
);
23688 /* SEH unwinding can require an extra nop to be emitted in several
23689 circumstances. Determine if we have one of those. */
23694 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23696 /* If we get to another real insn, we don't need the nop. */
23700 /* If we get to the epilogue note, prevent a catch region from
23701 being adjacent to the standard epilogue sequence. If non-
23702 call-exceptions, we'll have done this during epilogue emission. */
23703 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23704 && !flag_non_call_exceptions
23705 && !can_throw_internal (insn
))
23712 /* If we didn't find a real insn following the call, prevent the
23713 unwinder from looking into the next function. */
23719 xasm
= "call\t%P0";
23721 xasm
= "call\t%A0";
23723 output_asm_insn (xasm
, &call_op
);
23731 /* Clear stack slot assignments remembered from previous functions.
23732 This is called from INIT_EXPANDERS once before RTL is emitted for each
23735 static struct machine_function
*
23736 ix86_init_machine_status (void)
23738 struct machine_function
*f
;
23740 f
= ggc_alloc_cleared_machine_function ();
23741 f
->use_fast_prologue_epilogue_nregs
= -1;
23742 f
->tls_descriptor_call_expanded_p
= 0;
23743 f
->call_abi
= ix86_abi
;
23748 /* Return a MEM corresponding to a stack slot with mode MODE.
23749 Allocate a new slot if necessary.
23751 The RTL for a function can have several slots available: N is
23752 which slot to use. */
23755 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23757 struct stack_local_entry
*s
;
23759 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23761 /* Virtual slot is valid only before vregs are instantiated. */
23762 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23764 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23765 if (s
->mode
== mode
&& s
->n
== n
)
23766 return validize_mem (copy_rtx (s
->rtl
));
23768 s
= ggc_alloc_stack_local_entry ();
23771 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23773 s
->next
= ix86_stack_locals
;
23774 ix86_stack_locals
= s
;
23775 return validize_mem (s
->rtl
);
23778 /* Calculate the length of the memory address in the instruction encoding.
23779 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23780 or other prefixes. We never generate addr32 prefix for LEA insn. */
23783 memory_address_length (rtx addr
, bool lea
)
23785 struct ix86_address parts
;
23786 rtx base
, index
, disp
;
23790 if (GET_CODE (addr
) == PRE_DEC
23791 || GET_CODE (addr
) == POST_INC
23792 || GET_CODE (addr
) == PRE_MODIFY
23793 || GET_CODE (addr
) == POST_MODIFY
)
23796 ok
= ix86_decompose_address (addr
, &parts
);
23799 len
= (parts
.seg
== SEG_DEFAULT
) ? 0 : 1;
23801 /* If this is not LEA instruction, add the length of addr32 prefix. */
23802 if (TARGET_64BIT
&& !lea
23803 && (SImode_address_operand (addr
, VOIDmode
)
23804 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
23805 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
23809 index
= parts
.index
;
23812 if (base
&& GET_CODE (base
) == SUBREG
)
23813 base
= SUBREG_REG (base
);
23814 if (index
&& GET_CODE (index
) == SUBREG
)
23815 index
= SUBREG_REG (index
);
23817 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
23818 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
23821 - esp as the base always wants an index,
23822 - ebp as the base always wants a displacement,
23823 - r12 as the base always wants an index,
23824 - r13 as the base always wants a displacement. */
23826 /* Register Indirect. */
23827 if (base
&& !index
&& !disp
)
23829 /* esp (for its index) and ebp (for its displacement) need
23830 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23832 if (base
== arg_pointer_rtx
23833 || base
== frame_pointer_rtx
23834 || REGNO (base
) == SP_REG
23835 || REGNO (base
) == BP_REG
23836 || REGNO (base
) == R12_REG
23837 || REGNO (base
) == R13_REG
)
23841 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23842 is not disp32, but disp32(%rip), so for disp32
23843 SIB byte is needed, unless print_operand_address
23844 optimizes it into disp32(%rip) or (%rip) is implied
23846 else if (disp
&& !base
&& !index
)
23853 if (GET_CODE (disp
) == CONST
)
23854 symbol
= XEXP (disp
, 0);
23855 if (GET_CODE (symbol
) == PLUS
23856 && CONST_INT_P (XEXP (symbol
, 1)))
23857 symbol
= XEXP (symbol
, 0);
23859 if (GET_CODE (symbol
) != LABEL_REF
23860 && (GET_CODE (symbol
) != SYMBOL_REF
23861 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23862 && (GET_CODE (symbol
) != UNSPEC
23863 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23864 && XINT (symbol
, 1) != UNSPEC_PCREL
23865 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23871 /* Find the length of the displacement constant. */
23874 if (base
&& satisfies_constraint_K (disp
))
23879 /* ebp always wants a displacement. Similarly r13. */
23880 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23883 /* An index requires the two-byte modrm form.... */
23885 /* ...like esp (or r12), which always wants an index. */
23886 || base
== arg_pointer_rtx
23887 || base
== frame_pointer_rtx
23888 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23895 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23896 is set, expect that insn have 8bit immediate alternative. */
23898 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23902 extract_insn_cached (insn
);
23903 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23904 if (CONSTANT_P (recog_data
.operand
[i
]))
23906 enum attr_mode mode
= get_attr_mode (insn
);
23909 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23911 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23918 ival
= trunc_int_for_mode (ival
, HImode
);
23921 ival
= trunc_int_for_mode (ival
, SImode
);
23926 if (IN_RANGE (ival
, -128, 127))
23943 /* Immediates for DImode instructions are encoded
23944 as 32bit sign extended values. */
23949 fatal_insn ("unknown insn mode", insn
);
23955 /* Compute default value for "length_address" attribute. */
23957 ix86_attr_length_address_default (rtx insn
)
23961 if (get_attr_type (insn
) == TYPE_LEA
)
23963 rtx set
= PATTERN (insn
), addr
;
23965 if (GET_CODE (set
) == PARALLEL
)
23966 set
= XVECEXP (set
, 0, 0);
23968 gcc_assert (GET_CODE (set
) == SET
);
23970 addr
= SET_SRC (set
);
23972 return memory_address_length (addr
, true);
23975 extract_insn_cached (insn
);
23976 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23977 if (MEM_P (recog_data
.operand
[i
]))
23979 constrain_operands_cached (reload_completed
);
23980 if (which_alternative
!= -1)
23982 const char *constraints
= recog_data
.constraints
[i
];
23983 int alt
= which_alternative
;
23985 while (*constraints
== '=' || *constraints
== '+')
23988 while (*constraints
++ != ',')
23990 /* Skip ignored operands. */
23991 if (*constraints
== 'X')
23994 return memory_address_length (XEXP (recog_data
.operand
[i
], 0), false);
23999 /* Compute default value for "length_vex" attribute. It includes
24000 2 or 3 byte VEX prefix and 1 opcode byte. */
24003 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
24007 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
24008 byte VEX prefix. */
24009 if (!has_0f_opcode
|| has_vex_w
)
24012 /* We can always use 2 byte VEX prefix in 32bit. */
24016 extract_insn_cached (insn
);
24018 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24019 if (REG_P (recog_data
.operand
[i
]))
24021 /* REX.W bit uses 3 byte VEX prefix. */
24022 if (GET_MODE (recog_data
.operand
[i
]) == DImode
24023 && GENERAL_REG_P (recog_data
.operand
[i
]))
24028 /* REX.X or REX.B bits use 3 byte VEX prefix. */
24029 if (MEM_P (recog_data
.operand
[i
])
24030 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
24037 /* Return the maximum number of instructions a cpu can issue. */
24040 ix86_issue_rate (void)
24044 case PROCESSOR_PENTIUM
:
24045 case PROCESSOR_ATOM
:
24047 case PROCESSOR_BTVER2
:
24050 case PROCESSOR_PENTIUMPRO
:
24051 case PROCESSOR_PENTIUM4
:
24052 case PROCESSOR_CORE2_32
:
24053 case PROCESSOR_CORE2_64
:
24054 case PROCESSOR_COREI7_32
:
24055 case PROCESSOR_COREI7_64
:
24056 case PROCESSOR_ATHLON
:
24058 case PROCESSOR_AMDFAM10
:
24059 case PROCESSOR_NOCONA
:
24060 case PROCESSOR_GENERIC32
:
24061 case PROCESSOR_GENERIC64
:
24062 case PROCESSOR_BDVER1
:
24063 case PROCESSOR_BDVER2
:
24064 case PROCESSOR_BTVER1
:
24072 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
24073 by DEP_INSN and nothing set by DEP_INSN. */
24076 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
24080 /* Simplify the test for uninteresting insns. */
24081 if (insn_type
!= TYPE_SETCC
24082 && insn_type
!= TYPE_ICMOV
24083 && insn_type
!= TYPE_FCMOV
24084 && insn_type
!= TYPE_IBR
)
24087 if ((set
= single_set (dep_insn
)) != 0)
24089 set
= SET_DEST (set
);
24092 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
24093 && XVECLEN (PATTERN (dep_insn
), 0) == 2
24094 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
24095 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
24097 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24098 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
24103 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
24106 /* This test is true if the dependent insn reads the flags but
24107 not any other potentially set register. */
24108 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
24111 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
24117 /* Return true iff USE_INSN has a memory address with operands set by
24121 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
24124 extract_insn_cached (use_insn
);
24125 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
24126 if (MEM_P (recog_data
.operand
[i
]))
24128 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
24129 return modified_in_p (addr
, set_insn
) != 0;
24135 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
24137 enum attr_type insn_type
, dep_insn_type
;
24138 enum attr_memory memory
;
24140 int dep_insn_code_number
;
24142 /* Anti and output dependencies have zero cost on all CPUs. */
24143 if (REG_NOTE_KIND (link
) != 0)
24146 dep_insn_code_number
= recog_memoized (dep_insn
);
24148 /* If we can't recognize the insns, we can't really do anything. */
24149 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
24152 insn_type
= get_attr_type (insn
);
24153 dep_insn_type
= get_attr_type (dep_insn
);
24157 case PROCESSOR_PENTIUM
:
24158 /* Address Generation Interlock adds a cycle of latency. */
24159 if (insn_type
== TYPE_LEA
)
24161 rtx addr
= PATTERN (insn
);
24163 if (GET_CODE (addr
) == PARALLEL
)
24164 addr
= XVECEXP (addr
, 0, 0);
24166 gcc_assert (GET_CODE (addr
) == SET
);
24168 addr
= SET_SRC (addr
);
24169 if (modified_in_p (addr
, dep_insn
))
24172 else if (ix86_agi_dependent (dep_insn
, insn
))
24175 /* ??? Compares pair with jump/setcc. */
24176 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
24179 /* Floating point stores require value to be ready one cycle earlier. */
24180 if (insn_type
== TYPE_FMOV
24181 && get_attr_memory (insn
) == MEMORY_STORE
24182 && !ix86_agi_dependent (dep_insn
, insn
))
24186 case PROCESSOR_PENTIUMPRO
:
24187 memory
= get_attr_memory (insn
);
24189 /* INT->FP conversion is expensive. */
24190 if (get_attr_fp_int_src (dep_insn
))
24193 /* There is one cycle extra latency between an FP op and a store. */
24194 if (insn_type
== TYPE_FMOV
24195 && (set
= single_set (dep_insn
)) != NULL_RTX
24196 && (set2
= single_set (insn
)) != NULL_RTX
24197 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
24198 && MEM_P (SET_DEST (set2
)))
24201 /* Show ability of reorder buffer to hide latency of load by executing
24202 in parallel with previous instruction in case
24203 previous instruction is not needed to compute the address. */
24204 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24205 && !ix86_agi_dependent (dep_insn
, insn
))
24207 /* Claim moves to take one cycle, as core can issue one load
24208 at time and the next load can start cycle later. */
24209 if (dep_insn_type
== TYPE_IMOV
24210 || dep_insn_type
== TYPE_FMOV
)
24218 memory
= get_attr_memory (insn
);
24220 /* The esp dependency is resolved before the instruction is really
24222 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
24223 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
24226 /* INT->FP conversion is expensive. */
24227 if (get_attr_fp_int_src (dep_insn
))
24230 /* Show ability of reorder buffer to hide latency of load by executing
24231 in parallel with previous instruction in case
24232 previous instruction is not needed to compute the address. */
24233 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24234 && !ix86_agi_dependent (dep_insn
, insn
))
24236 /* Claim moves to take one cycle, as core can issue one load
24237 at time and the next load can start cycle later. */
24238 if (dep_insn_type
== TYPE_IMOV
24239 || dep_insn_type
== TYPE_FMOV
)
24248 case PROCESSOR_ATHLON
:
24250 case PROCESSOR_AMDFAM10
:
24251 case PROCESSOR_BDVER1
:
24252 case PROCESSOR_BDVER2
:
24253 case PROCESSOR_BTVER1
:
24254 case PROCESSOR_BTVER2
:
24255 case PROCESSOR_ATOM
:
24256 case PROCESSOR_GENERIC32
:
24257 case PROCESSOR_GENERIC64
:
24258 memory
= get_attr_memory (insn
);
24260 /* Show ability of reorder buffer to hide latency of load by executing
24261 in parallel with previous instruction in case
24262 previous instruction is not needed to compute the address. */
24263 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24264 && !ix86_agi_dependent (dep_insn
, insn
))
24266 enum attr_unit unit
= get_attr_unit (insn
);
24269 /* Because of the difference between the length of integer and
24270 floating unit pipeline preparation stages, the memory operands
24271 for floating point are cheaper.
24273 ??? For Athlon it the difference is most probably 2. */
24274 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24277 loadcost
= TARGET_ATHLON
? 2 : 0;
24279 if (cost
>= loadcost
)
24292 /* How many alternative schedules to try. This should be as wide as the
24293 scheduling freedom in the DFA, but no wider. Making this value too
24294 large results extra work for the scheduler. */
24297 ia32_multipass_dfa_lookahead (void)
24301 case PROCESSOR_PENTIUM
:
24304 case PROCESSOR_PENTIUMPRO
:
24308 case PROCESSOR_CORE2_32
:
24309 case PROCESSOR_CORE2_64
:
24310 case PROCESSOR_COREI7_32
:
24311 case PROCESSOR_COREI7_64
:
24312 case PROCESSOR_ATOM
:
24313 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24314 as many instructions can be executed on a cycle, i.e.,
24315 issue_rate. I wonder why tuning for many CPUs does not do this. */
24316 if (reload_completed
)
24317 return ix86_issue_rate ();
24318 /* Don't use lookahead for pre-reload schedule to save compile time. */
24326 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24327 execution. It is applied if
24328 (1) IMUL instruction is on the top of list;
24329 (2) There exists the only producer of independent IMUL instruction in
24331 (3) Put found producer on the top of ready list.
24332 Returns issue rate. */
24335 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24336 int clock_var ATTRIBUTE_UNUSED
)
24338 static int issue_rate
= -1;
24339 int n_ready
= *pn_ready
;
24340 rtx insn
, insn1
, insn2
;
24342 sd_iterator_def sd_it
;
24346 /* Set up issue rate. */
24347 issue_rate
= ix86_issue_rate();
24349 /* Do reodering for Atom only. */
24350 if (ix86_tune
!= PROCESSOR_ATOM
)
24352 /* Do not perform ready list reodering for pre-reload schedule pass. */
24353 if (!reload_completed
)
24355 /* Nothing to do if ready list contains only 1 instruction. */
24359 /* Check that IMUL instruction is on the top of ready list. */
24360 insn
= ready
[n_ready
- 1];
24361 if (!NONDEBUG_INSN_P (insn
))
24363 insn
= PATTERN (insn
);
24364 if (GET_CODE (insn
) == PARALLEL
)
24365 insn
= XVECEXP (insn
, 0, 0);
24366 if (GET_CODE (insn
) != SET
)
24368 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24369 && GET_MODE (SET_SRC (insn
)) == SImode
))
24372 /* Search for producer of independent IMUL instruction. */
24373 for (i
= n_ready
- 2; i
>= 0; i
--)
24376 if (!NONDEBUG_INSN_P (insn
))
24378 /* Skip IMUL instruction. */
24379 insn2
= PATTERN (insn
);
24380 if (GET_CODE (insn2
) == PARALLEL
)
24381 insn2
= XVECEXP (insn2
, 0, 0);
24382 if (GET_CODE (insn2
) == SET
24383 && GET_CODE (SET_SRC (insn2
)) == MULT
24384 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24387 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24390 con
= DEP_CON (dep
);
24391 if (!NONDEBUG_INSN_P (con
))
24393 insn1
= PATTERN (con
);
24394 if (GET_CODE (insn1
) == PARALLEL
)
24395 insn1
= XVECEXP (insn1
, 0, 0);
24397 if (GET_CODE (insn1
) == SET
24398 && GET_CODE (SET_SRC (insn1
)) == MULT
24399 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24401 sd_iterator_def sd_it1
;
24403 /* Check if there is no other dependee for IMUL. */
24405 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24408 pro
= DEP_PRO (dep1
);
24409 if (!NONDEBUG_INSN_P (pro
))
24422 return issue_rate
; /* Didn't find IMUL producer. */
24424 if (sched_verbose
> 1)
24425 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24426 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24428 /* Put IMUL producer (ready[index]) at the top of ready list. */
24429 insn1
= ready
[index
];
24430 for (i
= index
; i
< n_ready
- 1; i
++)
24431 ready
[i
] = ready
[i
+ 1];
24432 ready
[n_ready
- 1] = insn1
;
24438 ix86_class_likely_spilled_p (reg_class_t
);
24440 /* Returns true if lhs of insn is HW function argument register and set up
24441 is_spilled to true if it is likely spilled HW register. */
24443 insn_is_function_arg (rtx insn
, bool* is_spilled
)
24447 if (!NONDEBUG_INSN_P (insn
))
24449 /* Call instructions are not movable, ignore it. */
24452 insn
= PATTERN (insn
);
24453 if (GET_CODE (insn
) == PARALLEL
)
24454 insn
= XVECEXP (insn
, 0, 0);
24455 if (GET_CODE (insn
) != SET
)
24457 dst
= SET_DEST (insn
);
24458 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
24459 && ix86_function_arg_regno_p (REGNO (dst
)))
24461 /* Is it likely spilled HW register? */
24462 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
24463 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
24464 *is_spilled
= true;
24470 /* Add output dependencies for chain of function adjacent arguments if only
24471 there is a move to likely spilled HW register. Return first argument
24472 if at least one dependence was added or NULL otherwise. */
24474 add_parameter_dependencies (rtx call
, rtx head
)
24478 rtx first_arg
= NULL
;
24479 bool is_spilled
= false;
24481 head
= PREV_INSN (head
);
24483 /* Find nearest to call argument passing instruction. */
24486 last
= PREV_INSN (last
);
24489 if (!NONDEBUG_INSN_P (last
))
24491 if (insn_is_function_arg (last
, &is_spilled
))
24499 insn
= PREV_INSN (last
);
24500 if (!INSN_P (insn
))
24504 if (!NONDEBUG_INSN_P (insn
))
24509 if (insn_is_function_arg (insn
, &is_spilled
))
24511 /* Add output depdendence between two function arguments if chain
24512 of output arguments contains likely spilled HW registers. */
24514 add_dependence (last
, insn
, REG_DEP_OUTPUT
);
24515 first_arg
= last
= insn
;
24525 /* Add output or anti dependency from insn to first_arg to restrict its code
24528 avoid_func_arg_motion (rtx first_arg
, rtx insn
)
24533 set
= single_set (insn
);
24536 tmp
= SET_DEST (set
);
24539 /* Add output dependency to the first function argument. */
24540 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
24543 /* Add anti dependency. */
24544 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
24547 /* Avoid cross block motion of function argument through adding dependency
24548 from the first non-jump instruction in bb. */
24550 add_dependee_for_func_arg (rtx arg
, basic_block bb
)
24552 rtx insn
= BB_END (bb
);
24556 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
24558 rtx set
= single_set (insn
);
24561 avoid_func_arg_motion (arg
, insn
);
24565 if (insn
== BB_HEAD (bb
))
24567 insn
= PREV_INSN (insn
);
24571 /* Hook for pre-reload schedule - avoid motion of function arguments
24572 passed in likely spilled HW registers. */
24574 ix86_dependencies_evaluation_hook (rtx head
, rtx tail
)
24577 rtx first_arg
= NULL
;
24578 if (reload_completed
)
24580 while (head
!= tail
&& DEBUG_INSN_P (head
))
24581 head
= NEXT_INSN (head
);
24582 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
24583 if (INSN_P (insn
) && CALL_P (insn
))
24585 first_arg
= add_parameter_dependencies (insn
, head
);
24588 /* Add dependee for first argument to predecessors if only
24589 region contains more than one block. */
24590 basic_block bb
= BLOCK_FOR_INSN (insn
);
24591 int rgn
= CONTAINING_RGN (bb
->index
);
24592 int nr_blks
= RGN_NR_BLOCKS (rgn
);
24593 /* Skip trivial regions and region head blocks that can have
24594 predecessors outside of region. */
24595 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
24599 /* Assume that region is SCC, i.e. all immediate predecessors
24600 of non-head block are in the same region. */
24601 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
24603 /* Avoid creating of loop-carried dependencies through
24604 using topological odering in region. */
24605 if (BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
24606 add_dependee_for_func_arg (first_arg
, e
->src
);
24614 else if (first_arg
)
24615 avoid_func_arg_motion (first_arg
, insn
);
24618 /* Hook for pre-reload schedule - set priority of moves from likely spilled
24619 HW registers to maximum, to schedule them at soon as possible. These are
24620 moves from function argument registers at the top of the function entry
24621 and moves from function return value registers after call. */
24623 ix86_adjust_priority (rtx insn
, int priority
)
24627 if (reload_completed
)
24630 if (!NONDEBUG_INSN_P (insn
))
24633 set
= single_set (insn
);
24636 rtx tmp
= SET_SRC (set
);
24638 && HARD_REGISTER_P (tmp
)
24639 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
24640 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
24641 return current_sched_info
->sched_max_insns_priority
;
24647 /* Model decoder of Core 2/i7.
24648 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24649 track the instruction fetch block boundaries and make sure that long
24650 (9+ bytes) instructions are assigned to D0. */
24652 /* Maximum length of an insn that can be handled by
24653 a secondary decoder unit. '8' for Core 2/i7. */
24654 static int core2i7_secondary_decoder_max_insn_size
;
24656 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24657 '16' for Core 2/i7. */
24658 static int core2i7_ifetch_block_size
;
24660 /* Maximum number of instructions decoder can handle per cycle.
24661 '6' for Core 2/i7. */
24662 static int core2i7_ifetch_block_max_insns
;
24664 typedef struct ix86_first_cycle_multipass_data_
*
24665 ix86_first_cycle_multipass_data_t
;
24666 typedef const struct ix86_first_cycle_multipass_data_
*
24667 const_ix86_first_cycle_multipass_data_t
;
24669 /* A variable to store target state across calls to max_issue within
24671 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24672 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24674 /* Initialize DATA. */
24676 core2i7_first_cycle_multipass_init (void *_data
)
24678 ix86_first_cycle_multipass_data_t data
24679 = (ix86_first_cycle_multipass_data_t
) _data
;
24681 data
->ifetch_block_len
= 0;
24682 data
->ifetch_block_n_insns
= 0;
24683 data
->ready_try_change
= NULL
;
24684 data
->ready_try_change_size
= 0;
24687 /* Advancing the cycle; reset ifetch block counts. */
24689 core2i7_dfa_post_advance_cycle (void)
24691 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24693 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24695 data
->ifetch_block_len
= 0;
24696 data
->ifetch_block_n_insns
= 0;
24699 static int min_insn_size (rtx
);
24701 /* Filter out insns from ready_try that the core will not be able to issue
24702 on current cycle due to decoder. */
24704 core2i7_first_cycle_multipass_filter_ready_try
24705 (const_ix86_first_cycle_multipass_data_t data
,
24706 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24713 if (ready_try
[n_ready
])
24716 insn
= get_ready_element (n_ready
);
24717 insn_size
= min_insn_size (insn
);
24719 if (/* If this is a too long an insn for a secondary decoder ... */
24720 (!first_cycle_insn_p
24721 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24722 /* ... or it would not fit into the ifetch block ... */
24723 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24724 /* ... or the decoder is full already ... */
24725 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24726 /* ... mask the insn out. */
24728 ready_try
[n_ready
] = 1;
24730 if (data
->ready_try_change
)
24731 SET_BIT (data
->ready_try_change
, n_ready
);
24736 /* Prepare for a new round of multipass lookahead scheduling. */
24738 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24739 bool first_cycle_insn_p
)
24741 ix86_first_cycle_multipass_data_t data
24742 = (ix86_first_cycle_multipass_data_t
) _data
;
24743 const_ix86_first_cycle_multipass_data_t prev_data
24744 = ix86_first_cycle_multipass_data
;
24746 /* Restore the state from the end of the previous round. */
24747 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24748 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24750 /* Filter instructions that cannot be issued on current cycle due to
24751 decoder restrictions. */
24752 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24753 first_cycle_insn_p
);
24756 /* INSN is being issued in current solution. Account for its impact on
24757 the decoder model. */
24759 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24760 rtx insn
, const void *_prev_data
)
24762 ix86_first_cycle_multipass_data_t data
24763 = (ix86_first_cycle_multipass_data_t
) _data
;
24764 const_ix86_first_cycle_multipass_data_t prev_data
24765 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24767 int insn_size
= min_insn_size (insn
);
24769 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24770 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24771 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24772 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24774 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24775 if (!data
->ready_try_change
)
24777 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24778 data
->ready_try_change_size
= n_ready
;
24780 else if (data
->ready_try_change_size
< n_ready
)
24782 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24784 data
->ready_try_change_size
= n_ready
;
24786 sbitmap_zero (data
->ready_try_change
);
24788 /* Filter out insns from ready_try that the core will not be able to issue
24789 on current cycle due to decoder. */
24790 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24794 /* Revert the effect on ready_try. */
24796 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24798 int n_ready ATTRIBUTE_UNUSED
)
24800 const_ix86_first_cycle_multipass_data_t data
24801 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24802 unsigned int i
= 0;
24803 sbitmap_iterator sbi
;
24805 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24806 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24812 /* Save the result of multipass lookahead scheduling for the next round. */
24814 core2i7_first_cycle_multipass_end (const void *_data
)
24816 const_ix86_first_cycle_multipass_data_t data
24817 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24818 ix86_first_cycle_multipass_data_t next_data
24819 = ix86_first_cycle_multipass_data
;
24823 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24824 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24828 /* Deallocate target data. */
24830 core2i7_first_cycle_multipass_fini (void *_data
)
24832 ix86_first_cycle_multipass_data_t data
24833 = (ix86_first_cycle_multipass_data_t
) _data
;
24835 if (data
->ready_try_change
)
24837 sbitmap_free (data
->ready_try_change
);
24838 data
->ready_try_change
= NULL
;
24839 data
->ready_try_change_size
= 0;
24843 /* Prepare for scheduling pass. */
24845 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24846 int verbose ATTRIBUTE_UNUSED
,
24847 int max_uid ATTRIBUTE_UNUSED
)
24849 /* Install scheduling hooks for current CPU. Some of these hooks are used
24850 in time-critical parts of the scheduler, so we only set them up when
24851 they are actually used. */
24854 case PROCESSOR_CORE2_32
:
24855 case PROCESSOR_CORE2_64
:
24856 case PROCESSOR_COREI7_32
:
24857 case PROCESSOR_COREI7_64
:
24858 /* Do not perform multipass scheduling for pre-reload schedule
24859 to save compile time. */
24860 if (reload_completed
)
24862 targetm
.sched
.dfa_post_advance_cycle
24863 = core2i7_dfa_post_advance_cycle
;
24864 targetm
.sched
.first_cycle_multipass_init
24865 = core2i7_first_cycle_multipass_init
;
24866 targetm
.sched
.first_cycle_multipass_begin
24867 = core2i7_first_cycle_multipass_begin
;
24868 targetm
.sched
.first_cycle_multipass_issue
24869 = core2i7_first_cycle_multipass_issue
;
24870 targetm
.sched
.first_cycle_multipass_backtrack
24871 = core2i7_first_cycle_multipass_backtrack
;
24872 targetm
.sched
.first_cycle_multipass_end
24873 = core2i7_first_cycle_multipass_end
;
24874 targetm
.sched
.first_cycle_multipass_fini
24875 = core2i7_first_cycle_multipass_fini
;
24877 /* Set decoder parameters. */
24878 core2i7_secondary_decoder_max_insn_size
= 8;
24879 core2i7_ifetch_block_size
= 16;
24880 core2i7_ifetch_block_max_insns
= 6;
24883 /* ... Fall through ... */
24885 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24886 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24887 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24888 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24889 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24890 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24891 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24897 /* Compute the alignment given to a constant that is being placed in memory.
24898 EXP is the constant and ALIGN is the alignment that the object would
24900 The value of this function is used instead of that alignment to align
24904 ix86_constant_alignment (tree exp
, int align
)
24906 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24907 || TREE_CODE (exp
) == INTEGER_CST
)
24909 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24911 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24914 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24915 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24916 return BITS_PER_WORD
;
24921 /* Compute the alignment for a static variable.
24922 TYPE is the data type, and ALIGN is the alignment that
24923 the object would ordinarily have. The value of this function is used
24924 instead of that alignment to align the object. */
24927 ix86_data_alignment (tree type
, int align
)
24929 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24931 if (AGGREGATE_TYPE_P (type
)
24932 && TYPE_SIZE (type
)
24933 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24934 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24935 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24936 && align
< max_align
)
24939 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24940 to 16byte boundary. */
24943 if (AGGREGATE_TYPE_P (type
)
24944 && TYPE_SIZE (type
)
24945 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24946 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24947 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24951 if (TREE_CODE (type
) == ARRAY_TYPE
)
24953 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24955 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24958 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24961 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24963 if ((TYPE_MODE (type
) == XCmode
24964 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24967 else if ((TREE_CODE (type
) == RECORD_TYPE
24968 || TREE_CODE (type
) == UNION_TYPE
24969 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24970 && TYPE_FIELDS (type
))
24972 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24974 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24977 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24978 || TREE_CODE (type
) == INTEGER_TYPE
)
24980 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24982 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24989 /* Compute the alignment for a local variable or a stack slot. EXP is
24990 the data type or decl itself, MODE is the widest mode available and
24991 ALIGN is the alignment that the object would ordinarily have. The
24992 value of this macro is used instead of that alignment to align the
24996 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24997 unsigned int align
)
25001 if (exp
&& DECL_P (exp
))
25003 type
= TREE_TYPE (exp
);
25012 /* Don't do dynamic stack realignment for long long objects with
25013 -mpreferred-stack-boundary=2. */
25016 && ix86_preferred_stack_boundary
< 64
25017 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25018 && (!type
|| !TYPE_USER_ALIGN (type
))
25019 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25022 /* If TYPE is NULL, we are allocating a stack slot for caller-save
25023 register in MODE. We will return the largest alignment of XF
25027 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
25028 align
= GET_MODE_ALIGNMENT (DFmode
);
25032 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
25033 to 16byte boundary. Exact wording is:
25035 An array uses the same alignment as its elements, except that a local or
25036 global array variable of length at least 16 bytes or
25037 a C99 variable-length array variable always has alignment of at least 16 bytes.
25039 This was added to allow use of aligned SSE instructions at arrays. This
25040 rule is meant for static storage (where compiler can not do the analysis
25041 by itself). We follow it for automatic variables only when convenient.
25042 We fully control everything in the function compiled and functions from
25043 other unit can not rely on the alignment.
25045 Exclude va_list type. It is the common case of local array where
25046 we can not benefit from the alignment. */
25047 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
25050 if (AGGREGATE_TYPE_P (type
)
25051 && (va_list_type_node
== NULL_TREE
25052 || (TYPE_MAIN_VARIANT (type
)
25053 != TYPE_MAIN_VARIANT (va_list_type_node
)))
25054 && TYPE_SIZE (type
)
25055 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
25056 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
25057 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
25060 if (TREE_CODE (type
) == ARRAY_TYPE
)
25062 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
25064 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
25067 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
25069 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
25071 if ((TYPE_MODE (type
) == XCmode
25072 || TYPE_MODE (type
) == TCmode
) && align
< 128)
25075 else if ((TREE_CODE (type
) == RECORD_TYPE
25076 || TREE_CODE (type
) == UNION_TYPE
25077 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
25078 && TYPE_FIELDS (type
))
25080 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
25082 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
25085 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
25086 || TREE_CODE (type
) == INTEGER_TYPE
)
25089 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
25091 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
25097 /* Compute the minimum required alignment for dynamic stack realignment
25098 purposes for a local variable, parameter or a stack slot. EXP is
25099 the data type or decl itself, MODE is its mode and ALIGN is the
25100 alignment that the object would ordinarily have. */
25103 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
25104 unsigned int align
)
25108 if (exp
&& DECL_P (exp
))
25110 type
= TREE_TYPE (exp
);
25119 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
25122 /* Don't do dynamic stack realignment for long long objects with
25123 -mpreferred-stack-boundary=2. */
25124 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
25125 && (!type
|| !TYPE_USER_ALIGN (type
))
25126 && (!decl
|| !DECL_USER_ALIGN (decl
)))
25132 /* Find a location for the static chain incoming to a nested function.
25133 This is a register, unless all free registers are used by arguments. */
25136 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
25140 if (!DECL_STATIC_CHAIN (fndecl
))
25145 /* We always use R10 in 64-bit mode. */
25153 /* By default in 32-bit mode we use ECX to pass the static chain. */
25156 fntype
= TREE_TYPE (fndecl
);
25157 ccvt
= ix86_get_callcvt (fntype
);
25158 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
25160 /* Fastcall functions use ecx/edx for arguments, which leaves
25161 us with EAX for the static chain.
25162 Thiscall functions use ecx for arguments, which also
25163 leaves us with EAX for the static chain. */
25166 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
25168 /* For regparm 3, we have no free call-clobbered registers in
25169 which to store the static chain. In order to implement this,
25170 we have the trampoline push the static chain to the stack.
25171 However, we can't push a value below the return address when
25172 we call the nested function directly, so we have to use an
25173 alternate entry point. For this we use ESI, and have the
25174 alternate entry point push ESI, so that things appear the
25175 same once we're executing the nested function. */
25178 if (fndecl
== current_function_decl
)
25179 ix86_static_chain_on_stack
= true;
25180 return gen_frame_mem (SImode
,
25181 plus_constant (Pmode
,
25182 arg_pointer_rtx
, -8));
25188 return gen_rtx_REG (Pmode
, regno
);
25191 /* Emit RTL insns to initialize the variable parts of a trampoline.
25192 FNDECL is the decl of the target address; M_TRAMP is a MEM for
25193 the trampoline, and CHAIN_VALUE is an RTX for the static chain
25194 to be passed to the target function. */
25197 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
25203 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
25209 /* Load the function address to r11. Try to load address using
25210 the shorter movl instead of movabs. We may want to support
25211 movq for kernel mode, but kernel does not use trampolines at
25212 the moment. FNADDR is a 32bit address and may not be in
25213 DImode when ptr_mode == SImode. Always use movl in this
25215 if (ptr_mode
== SImode
25216 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
25218 fnaddr
= copy_addr_to_reg (fnaddr
);
25220 mem
= adjust_address (m_tramp
, HImode
, offset
);
25221 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
25223 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
25224 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
25229 mem
= adjust_address (m_tramp
, HImode
, offset
);
25230 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
25232 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
25233 emit_move_insn (mem
, fnaddr
);
25237 /* Load static chain using movabs to r10. Use the shorter movl
25238 instead of movabs when ptr_mode == SImode. */
25239 if (ptr_mode
== SImode
)
25250 mem
= adjust_address (m_tramp
, HImode
, offset
);
25251 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
25253 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
25254 emit_move_insn (mem
, chain_value
);
25257 /* Jump to r11; the last (unused) byte is a nop, only there to
25258 pad the write out to a single 32-bit store. */
25259 mem
= adjust_address (m_tramp
, SImode
, offset
);
25260 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
25267 /* Depending on the static chain location, either load a register
25268 with a constant, or push the constant to the stack. All of the
25269 instructions are the same size. */
25270 chain
= ix86_static_chain (fndecl
, true);
25273 switch (REGNO (chain
))
25276 opcode
= 0xb8; break;
25278 opcode
= 0xb9; break;
25280 gcc_unreachable ();
25286 mem
= adjust_address (m_tramp
, QImode
, offset
);
25287 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
25289 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25290 emit_move_insn (mem
, chain_value
);
25293 mem
= adjust_address (m_tramp
, QImode
, offset
);
25294 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
25296 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
25298 /* Compute offset from the end of the jmp to the target function.
25299 In the case in which the trampoline stores the static chain on
25300 the stack, we need to skip the first insn which pushes the
25301 (call-saved) register static chain; this push is 1 byte. */
25303 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
25304 plus_constant (Pmode
, XEXP (m_tramp
, 0),
25305 offset
- (MEM_P (chain
) ? 1 : 0)),
25306 NULL_RTX
, 1, OPTAB_DIRECT
);
25307 emit_move_insn (mem
, disp
);
25310 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
25312 #ifdef HAVE_ENABLE_EXECUTE_STACK
25313 #ifdef CHECK_EXECUTE_STACK_ENABLED
25314 if (CHECK_EXECUTE_STACK_ENABLED
)
25316 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
25317 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
25321 /* The following file contains several enumerations and data structures
25322 built from the definitions in i386-builtin-types.def. */
25324 #include "i386-builtin-types.inc"
25326 /* Table for the ix86 builtin non-function types. */
25327 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
25329 /* Retrieve an element from the above table, building some of
25330 the types lazily. */
25333 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
25335 unsigned int index
;
25338 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
25340 type
= ix86_builtin_type_tab
[(int) tcode
];
25344 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
25345 if (tcode
<= IX86_BT_LAST_VECT
)
25347 enum machine_mode mode
;
25349 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
25350 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
25351 mode
= ix86_builtin_type_vect_mode
[index
];
25353 type
= build_vector_type_for_mode (itype
, mode
);
25359 index
= tcode
- IX86_BT_LAST_VECT
- 1;
25360 if (tcode
<= IX86_BT_LAST_PTR
)
25361 quals
= TYPE_UNQUALIFIED
;
25363 quals
= TYPE_QUAL_CONST
;
25365 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
25366 if (quals
!= TYPE_UNQUALIFIED
)
25367 itype
= build_qualified_type (itype
, quals
);
25369 type
= build_pointer_type (itype
);
25372 ix86_builtin_type_tab
[(int) tcode
] = type
;
25376 /* Table for the ix86 builtin function types. */
25377 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
25379 /* Retrieve an element from the above table, building some of
25380 the types lazily. */
25383 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
25387 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
25389 type
= ix86_builtin_func_type_tab
[(int) tcode
];
25393 if (tcode
<= IX86_BT_LAST_FUNC
)
25395 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
25396 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
25397 tree rtype
, atype
, args
= void_list_node
;
25400 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
25401 for (i
= after
- 1; i
> start
; --i
)
25403 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
25404 args
= tree_cons (NULL
, atype
, args
);
25407 type
= build_function_type (rtype
, args
);
25411 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
25412 enum ix86_builtin_func_type icode
;
25414 icode
= ix86_builtin_func_alias_base
[index
];
25415 type
= ix86_get_builtin_func_type (icode
);
25418 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
25423 /* Codes for all the SSE/MMX builtins. */
25426 IX86_BUILTIN_ADDPS
,
25427 IX86_BUILTIN_ADDSS
,
25428 IX86_BUILTIN_DIVPS
,
25429 IX86_BUILTIN_DIVSS
,
25430 IX86_BUILTIN_MULPS
,
25431 IX86_BUILTIN_MULSS
,
25432 IX86_BUILTIN_SUBPS
,
25433 IX86_BUILTIN_SUBSS
,
25435 IX86_BUILTIN_CMPEQPS
,
25436 IX86_BUILTIN_CMPLTPS
,
25437 IX86_BUILTIN_CMPLEPS
,
25438 IX86_BUILTIN_CMPGTPS
,
25439 IX86_BUILTIN_CMPGEPS
,
25440 IX86_BUILTIN_CMPNEQPS
,
25441 IX86_BUILTIN_CMPNLTPS
,
25442 IX86_BUILTIN_CMPNLEPS
,
25443 IX86_BUILTIN_CMPNGTPS
,
25444 IX86_BUILTIN_CMPNGEPS
,
25445 IX86_BUILTIN_CMPORDPS
,
25446 IX86_BUILTIN_CMPUNORDPS
,
25447 IX86_BUILTIN_CMPEQSS
,
25448 IX86_BUILTIN_CMPLTSS
,
25449 IX86_BUILTIN_CMPLESS
,
25450 IX86_BUILTIN_CMPNEQSS
,
25451 IX86_BUILTIN_CMPNLTSS
,
25452 IX86_BUILTIN_CMPNLESS
,
25453 IX86_BUILTIN_CMPNGTSS
,
25454 IX86_BUILTIN_CMPNGESS
,
25455 IX86_BUILTIN_CMPORDSS
,
25456 IX86_BUILTIN_CMPUNORDSS
,
25458 IX86_BUILTIN_COMIEQSS
,
25459 IX86_BUILTIN_COMILTSS
,
25460 IX86_BUILTIN_COMILESS
,
25461 IX86_BUILTIN_COMIGTSS
,
25462 IX86_BUILTIN_COMIGESS
,
25463 IX86_BUILTIN_COMINEQSS
,
25464 IX86_BUILTIN_UCOMIEQSS
,
25465 IX86_BUILTIN_UCOMILTSS
,
25466 IX86_BUILTIN_UCOMILESS
,
25467 IX86_BUILTIN_UCOMIGTSS
,
25468 IX86_BUILTIN_UCOMIGESS
,
25469 IX86_BUILTIN_UCOMINEQSS
,
25471 IX86_BUILTIN_CVTPI2PS
,
25472 IX86_BUILTIN_CVTPS2PI
,
25473 IX86_BUILTIN_CVTSI2SS
,
25474 IX86_BUILTIN_CVTSI642SS
,
25475 IX86_BUILTIN_CVTSS2SI
,
25476 IX86_BUILTIN_CVTSS2SI64
,
25477 IX86_BUILTIN_CVTTPS2PI
,
25478 IX86_BUILTIN_CVTTSS2SI
,
25479 IX86_BUILTIN_CVTTSS2SI64
,
25481 IX86_BUILTIN_MAXPS
,
25482 IX86_BUILTIN_MAXSS
,
25483 IX86_BUILTIN_MINPS
,
25484 IX86_BUILTIN_MINSS
,
25486 IX86_BUILTIN_LOADUPS
,
25487 IX86_BUILTIN_STOREUPS
,
25488 IX86_BUILTIN_MOVSS
,
25490 IX86_BUILTIN_MOVHLPS
,
25491 IX86_BUILTIN_MOVLHPS
,
25492 IX86_BUILTIN_LOADHPS
,
25493 IX86_BUILTIN_LOADLPS
,
25494 IX86_BUILTIN_STOREHPS
,
25495 IX86_BUILTIN_STORELPS
,
25497 IX86_BUILTIN_MASKMOVQ
,
25498 IX86_BUILTIN_MOVMSKPS
,
25499 IX86_BUILTIN_PMOVMSKB
,
25501 IX86_BUILTIN_MOVNTPS
,
25502 IX86_BUILTIN_MOVNTQ
,
25504 IX86_BUILTIN_LOADDQU
,
25505 IX86_BUILTIN_STOREDQU
,
25507 IX86_BUILTIN_PACKSSWB
,
25508 IX86_BUILTIN_PACKSSDW
,
25509 IX86_BUILTIN_PACKUSWB
,
25511 IX86_BUILTIN_PADDB
,
25512 IX86_BUILTIN_PADDW
,
25513 IX86_BUILTIN_PADDD
,
25514 IX86_BUILTIN_PADDQ
,
25515 IX86_BUILTIN_PADDSB
,
25516 IX86_BUILTIN_PADDSW
,
25517 IX86_BUILTIN_PADDUSB
,
25518 IX86_BUILTIN_PADDUSW
,
25519 IX86_BUILTIN_PSUBB
,
25520 IX86_BUILTIN_PSUBW
,
25521 IX86_BUILTIN_PSUBD
,
25522 IX86_BUILTIN_PSUBQ
,
25523 IX86_BUILTIN_PSUBSB
,
25524 IX86_BUILTIN_PSUBSW
,
25525 IX86_BUILTIN_PSUBUSB
,
25526 IX86_BUILTIN_PSUBUSW
,
25529 IX86_BUILTIN_PANDN
,
25533 IX86_BUILTIN_PAVGB
,
25534 IX86_BUILTIN_PAVGW
,
25536 IX86_BUILTIN_PCMPEQB
,
25537 IX86_BUILTIN_PCMPEQW
,
25538 IX86_BUILTIN_PCMPEQD
,
25539 IX86_BUILTIN_PCMPGTB
,
25540 IX86_BUILTIN_PCMPGTW
,
25541 IX86_BUILTIN_PCMPGTD
,
25543 IX86_BUILTIN_PMADDWD
,
25545 IX86_BUILTIN_PMAXSW
,
25546 IX86_BUILTIN_PMAXUB
,
25547 IX86_BUILTIN_PMINSW
,
25548 IX86_BUILTIN_PMINUB
,
25550 IX86_BUILTIN_PMULHUW
,
25551 IX86_BUILTIN_PMULHW
,
25552 IX86_BUILTIN_PMULLW
,
25554 IX86_BUILTIN_PSADBW
,
25555 IX86_BUILTIN_PSHUFW
,
25557 IX86_BUILTIN_PSLLW
,
25558 IX86_BUILTIN_PSLLD
,
25559 IX86_BUILTIN_PSLLQ
,
25560 IX86_BUILTIN_PSRAW
,
25561 IX86_BUILTIN_PSRAD
,
25562 IX86_BUILTIN_PSRLW
,
25563 IX86_BUILTIN_PSRLD
,
25564 IX86_BUILTIN_PSRLQ
,
25565 IX86_BUILTIN_PSLLWI
,
25566 IX86_BUILTIN_PSLLDI
,
25567 IX86_BUILTIN_PSLLQI
,
25568 IX86_BUILTIN_PSRAWI
,
25569 IX86_BUILTIN_PSRADI
,
25570 IX86_BUILTIN_PSRLWI
,
25571 IX86_BUILTIN_PSRLDI
,
25572 IX86_BUILTIN_PSRLQI
,
25574 IX86_BUILTIN_PUNPCKHBW
,
25575 IX86_BUILTIN_PUNPCKHWD
,
25576 IX86_BUILTIN_PUNPCKHDQ
,
25577 IX86_BUILTIN_PUNPCKLBW
,
25578 IX86_BUILTIN_PUNPCKLWD
,
25579 IX86_BUILTIN_PUNPCKLDQ
,
25581 IX86_BUILTIN_SHUFPS
,
25583 IX86_BUILTIN_RCPPS
,
25584 IX86_BUILTIN_RCPSS
,
25585 IX86_BUILTIN_RSQRTPS
,
25586 IX86_BUILTIN_RSQRTPS_NR
,
25587 IX86_BUILTIN_RSQRTSS
,
25588 IX86_BUILTIN_RSQRTF
,
25589 IX86_BUILTIN_SQRTPS
,
25590 IX86_BUILTIN_SQRTPS_NR
,
25591 IX86_BUILTIN_SQRTSS
,
25593 IX86_BUILTIN_UNPCKHPS
,
25594 IX86_BUILTIN_UNPCKLPS
,
25596 IX86_BUILTIN_ANDPS
,
25597 IX86_BUILTIN_ANDNPS
,
25599 IX86_BUILTIN_XORPS
,
25602 IX86_BUILTIN_LDMXCSR
,
25603 IX86_BUILTIN_STMXCSR
,
25604 IX86_BUILTIN_SFENCE
,
25606 IX86_BUILTIN_FXSAVE
,
25607 IX86_BUILTIN_FXRSTOR
,
25608 IX86_BUILTIN_FXSAVE64
,
25609 IX86_BUILTIN_FXRSTOR64
,
25611 IX86_BUILTIN_XSAVE
,
25612 IX86_BUILTIN_XRSTOR
,
25613 IX86_BUILTIN_XSAVE64
,
25614 IX86_BUILTIN_XRSTOR64
,
25616 IX86_BUILTIN_XSAVEOPT
,
25617 IX86_BUILTIN_XSAVEOPT64
,
25619 /* 3DNow! Original */
25620 IX86_BUILTIN_FEMMS
,
25621 IX86_BUILTIN_PAVGUSB
,
25622 IX86_BUILTIN_PF2ID
,
25623 IX86_BUILTIN_PFACC
,
25624 IX86_BUILTIN_PFADD
,
25625 IX86_BUILTIN_PFCMPEQ
,
25626 IX86_BUILTIN_PFCMPGE
,
25627 IX86_BUILTIN_PFCMPGT
,
25628 IX86_BUILTIN_PFMAX
,
25629 IX86_BUILTIN_PFMIN
,
25630 IX86_BUILTIN_PFMUL
,
25631 IX86_BUILTIN_PFRCP
,
25632 IX86_BUILTIN_PFRCPIT1
,
25633 IX86_BUILTIN_PFRCPIT2
,
25634 IX86_BUILTIN_PFRSQIT1
,
25635 IX86_BUILTIN_PFRSQRT
,
25636 IX86_BUILTIN_PFSUB
,
25637 IX86_BUILTIN_PFSUBR
,
25638 IX86_BUILTIN_PI2FD
,
25639 IX86_BUILTIN_PMULHRW
,
25641 /* 3DNow! Athlon Extensions */
25642 IX86_BUILTIN_PF2IW
,
25643 IX86_BUILTIN_PFNACC
,
25644 IX86_BUILTIN_PFPNACC
,
25645 IX86_BUILTIN_PI2FW
,
25646 IX86_BUILTIN_PSWAPDSI
,
25647 IX86_BUILTIN_PSWAPDSF
,
25650 IX86_BUILTIN_ADDPD
,
25651 IX86_BUILTIN_ADDSD
,
25652 IX86_BUILTIN_DIVPD
,
25653 IX86_BUILTIN_DIVSD
,
25654 IX86_BUILTIN_MULPD
,
25655 IX86_BUILTIN_MULSD
,
25656 IX86_BUILTIN_SUBPD
,
25657 IX86_BUILTIN_SUBSD
,
25659 IX86_BUILTIN_CMPEQPD
,
25660 IX86_BUILTIN_CMPLTPD
,
25661 IX86_BUILTIN_CMPLEPD
,
25662 IX86_BUILTIN_CMPGTPD
,
25663 IX86_BUILTIN_CMPGEPD
,
25664 IX86_BUILTIN_CMPNEQPD
,
25665 IX86_BUILTIN_CMPNLTPD
,
25666 IX86_BUILTIN_CMPNLEPD
,
25667 IX86_BUILTIN_CMPNGTPD
,
25668 IX86_BUILTIN_CMPNGEPD
,
25669 IX86_BUILTIN_CMPORDPD
,
25670 IX86_BUILTIN_CMPUNORDPD
,
25671 IX86_BUILTIN_CMPEQSD
,
25672 IX86_BUILTIN_CMPLTSD
,
25673 IX86_BUILTIN_CMPLESD
,
25674 IX86_BUILTIN_CMPNEQSD
,
25675 IX86_BUILTIN_CMPNLTSD
,
25676 IX86_BUILTIN_CMPNLESD
,
25677 IX86_BUILTIN_CMPORDSD
,
25678 IX86_BUILTIN_CMPUNORDSD
,
25680 IX86_BUILTIN_COMIEQSD
,
25681 IX86_BUILTIN_COMILTSD
,
25682 IX86_BUILTIN_COMILESD
,
25683 IX86_BUILTIN_COMIGTSD
,
25684 IX86_BUILTIN_COMIGESD
,
25685 IX86_BUILTIN_COMINEQSD
,
25686 IX86_BUILTIN_UCOMIEQSD
,
25687 IX86_BUILTIN_UCOMILTSD
,
25688 IX86_BUILTIN_UCOMILESD
,
25689 IX86_BUILTIN_UCOMIGTSD
,
25690 IX86_BUILTIN_UCOMIGESD
,
25691 IX86_BUILTIN_UCOMINEQSD
,
25693 IX86_BUILTIN_MAXPD
,
25694 IX86_BUILTIN_MAXSD
,
25695 IX86_BUILTIN_MINPD
,
25696 IX86_BUILTIN_MINSD
,
25698 IX86_BUILTIN_ANDPD
,
25699 IX86_BUILTIN_ANDNPD
,
25701 IX86_BUILTIN_XORPD
,
25703 IX86_BUILTIN_SQRTPD
,
25704 IX86_BUILTIN_SQRTSD
,
25706 IX86_BUILTIN_UNPCKHPD
,
25707 IX86_BUILTIN_UNPCKLPD
,
25709 IX86_BUILTIN_SHUFPD
,
25711 IX86_BUILTIN_LOADUPD
,
25712 IX86_BUILTIN_STOREUPD
,
25713 IX86_BUILTIN_MOVSD
,
25715 IX86_BUILTIN_LOADHPD
,
25716 IX86_BUILTIN_LOADLPD
,
25718 IX86_BUILTIN_CVTDQ2PD
,
25719 IX86_BUILTIN_CVTDQ2PS
,
25721 IX86_BUILTIN_CVTPD2DQ
,
25722 IX86_BUILTIN_CVTPD2PI
,
25723 IX86_BUILTIN_CVTPD2PS
,
25724 IX86_BUILTIN_CVTTPD2DQ
,
25725 IX86_BUILTIN_CVTTPD2PI
,
25727 IX86_BUILTIN_CVTPI2PD
,
25728 IX86_BUILTIN_CVTSI2SD
,
25729 IX86_BUILTIN_CVTSI642SD
,
25731 IX86_BUILTIN_CVTSD2SI
,
25732 IX86_BUILTIN_CVTSD2SI64
,
25733 IX86_BUILTIN_CVTSD2SS
,
25734 IX86_BUILTIN_CVTSS2SD
,
25735 IX86_BUILTIN_CVTTSD2SI
,
25736 IX86_BUILTIN_CVTTSD2SI64
,
25738 IX86_BUILTIN_CVTPS2DQ
,
25739 IX86_BUILTIN_CVTPS2PD
,
25740 IX86_BUILTIN_CVTTPS2DQ
,
25742 IX86_BUILTIN_MOVNTI
,
25743 IX86_BUILTIN_MOVNTI64
,
25744 IX86_BUILTIN_MOVNTPD
,
25745 IX86_BUILTIN_MOVNTDQ
,
25747 IX86_BUILTIN_MOVQ128
,
25750 IX86_BUILTIN_MASKMOVDQU
,
25751 IX86_BUILTIN_MOVMSKPD
,
25752 IX86_BUILTIN_PMOVMSKB128
,
25754 IX86_BUILTIN_PACKSSWB128
,
25755 IX86_BUILTIN_PACKSSDW128
,
25756 IX86_BUILTIN_PACKUSWB128
,
25758 IX86_BUILTIN_PADDB128
,
25759 IX86_BUILTIN_PADDW128
,
25760 IX86_BUILTIN_PADDD128
,
25761 IX86_BUILTIN_PADDQ128
,
25762 IX86_BUILTIN_PADDSB128
,
25763 IX86_BUILTIN_PADDSW128
,
25764 IX86_BUILTIN_PADDUSB128
,
25765 IX86_BUILTIN_PADDUSW128
,
25766 IX86_BUILTIN_PSUBB128
,
25767 IX86_BUILTIN_PSUBW128
,
25768 IX86_BUILTIN_PSUBD128
,
25769 IX86_BUILTIN_PSUBQ128
,
25770 IX86_BUILTIN_PSUBSB128
,
25771 IX86_BUILTIN_PSUBSW128
,
25772 IX86_BUILTIN_PSUBUSB128
,
25773 IX86_BUILTIN_PSUBUSW128
,
25775 IX86_BUILTIN_PAND128
,
25776 IX86_BUILTIN_PANDN128
,
25777 IX86_BUILTIN_POR128
,
25778 IX86_BUILTIN_PXOR128
,
25780 IX86_BUILTIN_PAVGB128
,
25781 IX86_BUILTIN_PAVGW128
,
25783 IX86_BUILTIN_PCMPEQB128
,
25784 IX86_BUILTIN_PCMPEQW128
,
25785 IX86_BUILTIN_PCMPEQD128
,
25786 IX86_BUILTIN_PCMPGTB128
,
25787 IX86_BUILTIN_PCMPGTW128
,
25788 IX86_BUILTIN_PCMPGTD128
,
25790 IX86_BUILTIN_PMADDWD128
,
25792 IX86_BUILTIN_PMAXSW128
,
25793 IX86_BUILTIN_PMAXUB128
,
25794 IX86_BUILTIN_PMINSW128
,
25795 IX86_BUILTIN_PMINUB128
,
25797 IX86_BUILTIN_PMULUDQ
,
25798 IX86_BUILTIN_PMULUDQ128
,
25799 IX86_BUILTIN_PMULHUW128
,
25800 IX86_BUILTIN_PMULHW128
,
25801 IX86_BUILTIN_PMULLW128
,
25803 IX86_BUILTIN_PSADBW128
,
25804 IX86_BUILTIN_PSHUFHW
,
25805 IX86_BUILTIN_PSHUFLW
,
25806 IX86_BUILTIN_PSHUFD
,
25808 IX86_BUILTIN_PSLLDQI128
,
25809 IX86_BUILTIN_PSLLWI128
,
25810 IX86_BUILTIN_PSLLDI128
,
25811 IX86_BUILTIN_PSLLQI128
,
25812 IX86_BUILTIN_PSRAWI128
,
25813 IX86_BUILTIN_PSRADI128
,
25814 IX86_BUILTIN_PSRLDQI128
,
25815 IX86_BUILTIN_PSRLWI128
,
25816 IX86_BUILTIN_PSRLDI128
,
25817 IX86_BUILTIN_PSRLQI128
,
25819 IX86_BUILTIN_PSLLDQ128
,
25820 IX86_BUILTIN_PSLLW128
,
25821 IX86_BUILTIN_PSLLD128
,
25822 IX86_BUILTIN_PSLLQ128
,
25823 IX86_BUILTIN_PSRAW128
,
25824 IX86_BUILTIN_PSRAD128
,
25825 IX86_BUILTIN_PSRLW128
,
25826 IX86_BUILTIN_PSRLD128
,
25827 IX86_BUILTIN_PSRLQ128
,
25829 IX86_BUILTIN_PUNPCKHBW128
,
25830 IX86_BUILTIN_PUNPCKHWD128
,
25831 IX86_BUILTIN_PUNPCKHDQ128
,
25832 IX86_BUILTIN_PUNPCKHQDQ128
,
25833 IX86_BUILTIN_PUNPCKLBW128
,
25834 IX86_BUILTIN_PUNPCKLWD128
,
25835 IX86_BUILTIN_PUNPCKLDQ128
,
25836 IX86_BUILTIN_PUNPCKLQDQ128
,
25838 IX86_BUILTIN_CLFLUSH
,
25839 IX86_BUILTIN_MFENCE
,
25840 IX86_BUILTIN_LFENCE
,
25841 IX86_BUILTIN_PAUSE
,
25843 IX86_BUILTIN_BSRSI
,
25844 IX86_BUILTIN_BSRDI
,
25845 IX86_BUILTIN_RDPMC
,
25846 IX86_BUILTIN_RDTSC
,
25847 IX86_BUILTIN_RDTSCP
,
25848 IX86_BUILTIN_ROLQI
,
25849 IX86_BUILTIN_ROLHI
,
25850 IX86_BUILTIN_RORQI
,
25851 IX86_BUILTIN_RORHI
,
25854 IX86_BUILTIN_ADDSUBPS
,
25855 IX86_BUILTIN_HADDPS
,
25856 IX86_BUILTIN_HSUBPS
,
25857 IX86_BUILTIN_MOVSHDUP
,
25858 IX86_BUILTIN_MOVSLDUP
,
25859 IX86_BUILTIN_ADDSUBPD
,
25860 IX86_BUILTIN_HADDPD
,
25861 IX86_BUILTIN_HSUBPD
,
25862 IX86_BUILTIN_LDDQU
,
25864 IX86_BUILTIN_MONITOR
,
25865 IX86_BUILTIN_MWAIT
,
25868 IX86_BUILTIN_PHADDW
,
25869 IX86_BUILTIN_PHADDD
,
25870 IX86_BUILTIN_PHADDSW
,
25871 IX86_BUILTIN_PHSUBW
,
25872 IX86_BUILTIN_PHSUBD
,
25873 IX86_BUILTIN_PHSUBSW
,
25874 IX86_BUILTIN_PMADDUBSW
,
25875 IX86_BUILTIN_PMULHRSW
,
25876 IX86_BUILTIN_PSHUFB
,
25877 IX86_BUILTIN_PSIGNB
,
25878 IX86_BUILTIN_PSIGNW
,
25879 IX86_BUILTIN_PSIGND
,
25880 IX86_BUILTIN_PALIGNR
,
25881 IX86_BUILTIN_PABSB
,
25882 IX86_BUILTIN_PABSW
,
25883 IX86_BUILTIN_PABSD
,
25885 IX86_BUILTIN_PHADDW128
,
25886 IX86_BUILTIN_PHADDD128
,
25887 IX86_BUILTIN_PHADDSW128
,
25888 IX86_BUILTIN_PHSUBW128
,
25889 IX86_BUILTIN_PHSUBD128
,
25890 IX86_BUILTIN_PHSUBSW128
,
25891 IX86_BUILTIN_PMADDUBSW128
,
25892 IX86_BUILTIN_PMULHRSW128
,
25893 IX86_BUILTIN_PSHUFB128
,
25894 IX86_BUILTIN_PSIGNB128
,
25895 IX86_BUILTIN_PSIGNW128
,
25896 IX86_BUILTIN_PSIGND128
,
25897 IX86_BUILTIN_PALIGNR128
,
25898 IX86_BUILTIN_PABSB128
,
25899 IX86_BUILTIN_PABSW128
,
25900 IX86_BUILTIN_PABSD128
,
25902 /* AMDFAM10 - SSE4A New Instructions. */
25903 IX86_BUILTIN_MOVNTSD
,
25904 IX86_BUILTIN_MOVNTSS
,
25905 IX86_BUILTIN_EXTRQI
,
25906 IX86_BUILTIN_EXTRQ
,
25907 IX86_BUILTIN_INSERTQI
,
25908 IX86_BUILTIN_INSERTQ
,
25911 IX86_BUILTIN_BLENDPD
,
25912 IX86_BUILTIN_BLENDPS
,
25913 IX86_BUILTIN_BLENDVPD
,
25914 IX86_BUILTIN_BLENDVPS
,
25915 IX86_BUILTIN_PBLENDVB128
,
25916 IX86_BUILTIN_PBLENDW128
,
25921 IX86_BUILTIN_INSERTPS128
,
25923 IX86_BUILTIN_MOVNTDQA
,
25924 IX86_BUILTIN_MPSADBW128
,
25925 IX86_BUILTIN_PACKUSDW128
,
25926 IX86_BUILTIN_PCMPEQQ
,
25927 IX86_BUILTIN_PHMINPOSUW128
,
25929 IX86_BUILTIN_PMAXSB128
,
25930 IX86_BUILTIN_PMAXSD128
,
25931 IX86_BUILTIN_PMAXUD128
,
25932 IX86_BUILTIN_PMAXUW128
,
25934 IX86_BUILTIN_PMINSB128
,
25935 IX86_BUILTIN_PMINSD128
,
25936 IX86_BUILTIN_PMINUD128
,
25937 IX86_BUILTIN_PMINUW128
,
25939 IX86_BUILTIN_PMOVSXBW128
,
25940 IX86_BUILTIN_PMOVSXBD128
,
25941 IX86_BUILTIN_PMOVSXBQ128
,
25942 IX86_BUILTIN_PMOVSXWD128
,
25943 IX86_BUILTIN_PMOVSXWQ128
,
25944 IX86_BUILTIN_PMOVSXDQ128
,
25946 IX86_BUILTIN_PMOVZXBW128
,
25947 IX86_BUILTIN_PMOVZXBD128
,
25948 IX86_BUILTIN_PMOVZXBQ128
,
25949 IX86_BUILTIN_PMOVZXWD128
,
25950 IX86_BUILTIN_PMOVZXWQ128
,
25951 IX86_BUILTIN_PMOVZXDQ128
,
25953 IX86_BUILTIN_PMULDQ128
,
25954 IX86_BUILTIN_PMULLD128
,
25956 IX86_BUILTIN_ROUNDSD
,
25957 IX86_BUILTIN_ROUNDSS
,
25959 IX86_BUILTIN_ROUNDPD
,
25960 IX86_BUILTIN_ROUNDPS
,
25962 IX86_BUILTIN_FLOORPD
,
25963 IX86_BUILTIN_CEILPD
,
25964 IX86_BUILTIN_TRUNCPD
,
25965 IX86_BUILTIN_RINTPD
,
25966 IX86_BUILTIN_ROUNDPD_AZ
,
25968 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25969 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25970 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25972 IX86_BUILTIN_FLOORPS
,
25973 IX86_BUILTIN_CEILPS
,
25974 IX86_BUILTIN_TRUNCPS
,
25975 IX86_BUILTIN_RINTPS
,
25976 IX86_BUILTIN_ROUNDPS_AZ
,
25978 IX86_BUILTIN_FLOORPS_SFIX
,
25979 IX86_BUILTIN_CEILPS_SFIX
,
25980 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25982 IX86_BUILTIN_PTESTZ
,
25983 IX86_BUILTIN_PTESTC
,
25984 IX86_BUILTIN_PTESTNZC
,
25986 IX86_BUILTIN_VEC_INIT_V2SI
,
25987 IX86_BUILTIN_VEC_INIT_V4HI
,
25988 IX86_BUILTIN_VEC_INIT_V8QI
,
25989 IX86_BUILTIN_VEC_EXT_V2DF
,
25990 IX86_BUILTIN_VEC_EXT_V2DI
,
25991 IX86_BUILTIN_VEC_EXT_V4SF
,
25992 IX86_BUILTIN_VEC_EXT_V4SI
,
25993 IX86_BUILTIN_VEC_EXT_V8HI
,
25994 IX86_BUILTIN_VEC_EXT_V2SI
,
25995 IX86_BUILTIN_VEC_EXT_V4HI
,
25996 IX86_BUILTIN_VEC_EXT_V16QI
,
25997 IX86_BUILTIN_VEC_SET_V2DI
,
25998 IX86_BUILTIN_VEC_SET_V4SF
,
25999 IX86_BUILTIN_VEC_SET_V4SI
,
26000 IX86_BUILTIN_VEC_SET_V8HI
,
26001 IX86_BUILTIN_VEC_SET_V4HI
,
26002 IX86_BUILTIN_VEC_SET_V16QI
,
26004 IX86_BUILTIN_VEC_PACK_SFIX
,
26005 IX86_BUILTIN_VEC_PACK_SFIX256
,
26008 IX86_BUILTIN_CRC32QI
,
26009 IX86_BUILTIN_CRC32HI
,
26010 IX86_BUILTIN_CRC32SI
,
26011 IX86_BUILTIN_CRC32DI
,
26013 IX86_BUILTIN_PCMPESTRI128
,
26014 IX86_BUILTIN_PCMPESTRM128
,
26015 IX86_BUILTIN_PCMPESTRA128
,
26016 IX86_BUILTIN_PCMPESTRC128
,
26017 IX86_BUILTIN_PCMPESTRO128
,
26018 IX86_BUILTIN_PCMPESTRS128
,
26019 IX86_BUILTIN_PCMPESTRZ128
,
26020 IX86_BUILTIN_PCMPISTRI128
,
26021 IX86_BUILTIN_PCMPISTRM128
,
26022 IX86_BUILTIN_PCMPISTRA128
,
26023 IX86_BUILTIN_PCMPISTRC128
,
26024 IX86_BUILTIN_PCMPISTRO128
,
26025 IX86_BUILTIN_PCMPISTRS128
,
26026 IX86_BUILTIN_PCMPISTRZ128
,
26028 IX86_BUILTIN_PCMPGTQ
,
26030 /* AES instructions */
26031 IX86_BUILTIN_AESENC128
,
26032 IX86_BUILTIN_AESENCLAST128
,
26033 IX86_BUILTIN_AESDEC128
,
26034 IX86_BUILTIN_AESDECLAST128
,
26035 IX86_BUILTIN_AESIMC128
,
26036 IX86_BUILTIN_AESKEYGENASSIST128
,
26038 /* PCLMUL instruction */
26039 IX86_BUILTIN_PCLMULQDQ128
,
26042 IX86_BUILTIN_ADDPD256
,
26043 IX86_BUILTIN_ADDPS256
,
26044 IX86_BUILTIN_ADDSUBPD256
,
26045 IX86_BUILTIN_ADDSUBPS256
,
26046 IX86_BUILTIN_ANDPD256
,
26047 IX86_BUILTIN_ANDPS256
,
26048 IX86_BUILTIN_ANDNPD256
,
26049 IX86_BUILTIN_ANDNPS256
,
26050 IX86_BUILTIN_BLENDPD256
,
26051 IX86_BUILTIN_BLENDPS256
,
26052 IX86_BUILTIN_BLENDVPD256
,
26053 IX86_BUILTIN_BLENDVPS256
,
26054 IX86_BUILTIN_DIVPD256
,
26055 IX86_BUILTIN_DIVPS256
,
26056 IX86_BUILTIN_DPPS256
,
26057 IX86_BUILTIN_HADDPD256
,
26058 IX86_BUILTIN_HADDPS256
,
26059 IX86_BUILTIN_HSUBPD256
,
26060 IX86_BUILTIN_HSUBPS256
,
26061 IX86_BUILTIN_MAXPD256
,
26062 IX86_BUILTIN_MAXPS256
,
26063 IX86_BUILTIN_MINPD256
,
26064 IX86_BUILTIN_MINPS256
,
26065 IX86_BUILTIN_MULPD256
,
26066 IX86_BUILTIN_MULPS256
,
26067 IX86_BUILTIN_ORPD256
,
26068 IX86_BUILTIN_ORPS256
,
26069 IX86_BUILTIN_SHUFPD256
,
26070 IX86_BUILTIN_SHUFPS256
,
26071 IX86_BUILTIN_SUBPD256
,
26072 IX86_BUILTIN_SUBPS256
,
26073 IX86_BUILTIN_XORPD256
,
26074 IX86_BUILTIN_XORPS256
,
26075 IX86_BUILTIN_CMPSD
,
26076 IX86_BUILTIN_CMPSS
,
26077 IX86_BUILTIN_CMPPD
,
26078 IX86_BUILTIN_CMPPS
,
26079 IX86_BUILTIN_CMPPD256
,
26080 IX86_BUILTIN_CMPPS256
,
26081 IX86_BUILTIN_CVTDQ2PD256
,
26082 IX86_BUILTIN_CVTDQ2PS256
,
26083 IX86_BUILTIN_CVTPD2PS256
,
26084 IX86_BUILTIN_CVTPS2DQ256
,
26085 IX86_BUILTIN_CVTPS2PD256
,
26086 IX86_BUILTIN_CVTTPD2DQ256
,
26087 IX86_BUILTIN_CVTPD2DQ256
,
26088 IX86_BUILTIN_CVTTPS2DQ256
,
26089 IX86_BUILTIN_EXTRACTF128PD256
,
26090 IX86_BUILTIN_EXTRACTF128PS256
,
26091 IX86_BUILTIN_EXTRACTF128SI256
,
26092 IX86_BUILTIN_VZEROALL
,
26093 IX86_BUILTIN_VZEROUPPER
,
26094 IX86_BUILTIN_VPERMILVARPD
,
26095 IX86_BUILTIN_VPERMILVARPS
,
26096 IX86_BUILTIN_VPERMILVARPD256
,
26097 IX86_BUILTIN_VPERMILVARPS256
,
26098 IX86_BUILTIN_VPERMILPD
,
26099 IX86_BUILTIN_VPERMILPS
,
26100 IX86_BUILTIN_VPERMILPD256
,
26101 IX86_BUILTIN_VPERMILPS256
,
26102 IX86_BUILTIN_VPERMIL2PD
,
26103 IX86_BUILTIN_VPERMIL2PS
,
26104 IX86_BUILTIN_VPERMIL2PD256
,
26105 IX86_BUILTIN_VPERMIL2PS256
,
26106 IX86_BUILTIN_VPERM2F128PD256
,
26107 IX86_BUILTIN_VPERM2F128PS256
,
26108 IX86_BUILTIN_VPERM2F128SI256
,
26109 IX86_BUILTIN_VBROADCASTSS
,
26110 IX86_BUILTIN_VBROADCASTSD256
,
26111 IX86_BUILTIN_VBROADCASTSS256
,
26112 IX86_BUILTIN_VBROADCASTPD256
,
26113 IX86_BUILTIN_VBROADCASTPS256
,
26114 IX86_BUILTIN_VINSERTF128PD256
,
26115 IX86_BUILTIN_VINSERTF128PS256
,
26116 IX86_BUILTIN_VINSERTF128SI256
,
26117 IX86_BUILTIN_LOADUPD256
,
26118 IX86_BUILTIN_LOADUPS256
,
26119 IX86_BUILTIN_STOREUPD256
,
26120 IX86_BUILTIN_STOREUPS256
,
26121 IX86_BUILTIN_LDDQU256
,
26122 IX86_BUILTIN_MOVNTDQ256
,
26123 IX86_BUILTIN_MOVNTPD256
,
26124 IX86_BUILTIN_MOVNTPS256
,
26125 IX86_BUILTIN_LOADDQU256
,
26126 IX86_BUILTIN_STOREDQU256
,
26127 IX86_BUILTIN_MASKLOADPD
,
26128 IX86_BUILTIN_MASKLOADPS
,
26129 IX86_BUILTIN_MASKSTOREPD
,
26130 IX86_BUILTIN_MASKSTOREPS
,
26131 IX86_BUILTIN_MASKLOADPD256
,
26132 IX86_BUILTIN_MASKLOADPS256
,
26133 IX86_BUILTIN_MASKSTOREPD256
,
26134 IX86_BUILTIN_MASKSTOREPS256
,
26135 IX86_BUILTIN_MOVSHDUP256
,
26136 IX86_BUILTIN_MOVSLDUP256
,
26137 IX86_BUILTIN_MOVDDUP256
,
26139 IX86_BUILTIN_SQRTPD256
,
26140 IX86_BUILTIN_SQRTPS256
,
26141 IX86_BUILTIN_SQRTPS_NR256
,
26142 IX86_BUILTIN_RSQRTPS256
,
26143 IX86_BUILTIN_RSQRTPS_NR256
,
26145 IX86_BUILTIN_RCPPS256
,
26147 IX86_BUILTIN_ROUNDPD256
,
26148 IX86_BUILTIN_ROUNDPS256
,
26150 IX86_BUILTIN_FLOORPD256
,
26151 IX86_BUILTIN_CEILPD256
,
26152 IX86_BUILTIN_TRUNCPD256
,
26153 IX86_BUILTIN_RINTPD256
,
26154 IX86_BUILTIN_ROUNDPD_AZ256
,
26156 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
26157 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
26158 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
26160 IX86_BUILTIN_FLOORPS256
,
26161 IX86_BUILTIN_CEILPS256
,
26162 IX86_BUILTIN_TRUNCPS256
,
26163 IX86_BUILTIN_RINTPS256
,
26164 IX86_BUILTIN_ROUNDPS_AZ256
,
26166 IX86_BUILTIN_FLOORPS_SFIX256
,
26167 IX86_BUILTIN_CEILPS_SFIX256
,
26168 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
26170 IX86_BUILTIN_UNPCKHPD256
,
26171 IX86_BUILTIN_UNPCKLPD256
,
26172 IX86_BUILTIN_UNPCKHPS256
,
26173 IX86_BUILTIN_UNPCKLPS256
,
26175 IX86_BUILTIN_SI256_SI
,
26176 IX86_BUILTIN_PS256_PS
,
26177 IX86_BUILTIN_PD256_PD
,
26178 IX86_BUILTIN_SI_SI256
,
26179 IX86_BUILTIN_PS_PS256
,
26180 IX86_BUILTIN_PD_PD256
,
26182 IX86_BUILTIN_VTESTZPD
,
26183 IX86_BUILTIN_VTESTCPD
,
26184 IX86_BUILTIN_VTESTNZCPD
,
26185 IX86_BUILTIN_VTESTZPS
,
26186 IX86_BUILTIN_VTESTCPS
,
26187 IX86_BUILTIN_VTESTNZCPS
,
26188 IX86_BUILTIN_VTESTZPD256
,
26189 IX86_BUILTIN_VTESTCPD256
,
26190 IX86_BUILTIN_VTESTNZCPD256
,
26191 IX86_BUILTIN_VTESTZPS256
,
26192 IX86_BUILTIN_VTESTCPS256
,
26193 IX86_BUILTIN_VTESTNZCPS256
,
26194 IX86_BUILTIN_PTESTZ256
,
26195 IX86_BUILTIN_PTESTC256
,
26196 IX86_BUILTIN_PTESTNZC256
,
26198 IX86_BUILTIN_MOVMSKPD256
,
26199 IX86_BUILTIN_MOVMSKPS256
,
26202 IX86_BUILTIN_MPSADBW256
,
26203 IX86_BUILTIN_PABSB256
,
26204 IX86_BUILTIN_PABSW256
,
26205 IX86_BUILTIN_PABSD256
,
26206 IX86_BUILTIN_PACKSSDW256
,
26207 IX86_BUILTIN_PACKSSWB256
,
26208 IX86_BUILTIN_PACKUSDW256
,
26209 IX86_BUILTIN_PACKUSWB256
,
26210 IX86_BUILTIN_PADDB256
,
26211 IX86_BUILTIN_PADDW256
,
26212 IX86_BUILTIN_PADDD256
,
26213 IX86_BUILTIN_PADDQ256
,
26214 IX86_BUILTIN_PADDSB256
,
26215 IX86_BUILTIN_PADDSW256
,
26216 IX86_BUILTIN_PADDUSB256
,
26217 IX86_BUILTIN_PADDUSW256
,
26218 IX86_BUILTIN_PALIGNR256
,
26219 IX86_BUILTIN_AND256I
,
26220 IX86_BUILTIN_ANDNOT256I
,
26221 IX86_BUILTIN_PAVGB256
,
26222 IX86_BUILTIN_PAVGW256
,
26223 IX86_BUILTIN_PBLENDVB256
,
26224 IX86_BUILTIN_PBLENDVW256
,
26225 IX86_BUILTIN_PCMPEQB256
,
26226 IX86_BUILTIN_PCMPEQW256
,
26227 IX86_BUILTIN_PCMPEQD256
,
26228 IX86_BUILTIN_PCMPEQQ256
,
26229 IX86_BUILTIN_PCMPGTB256
,
26230 IX86_BUILTIN_PCMPGTW256
,
26231 IX86_BUILTIN_PCMPGTD256
,
26232 IX86_BUILTIN_PCMPGTQ256
,
26233 IX86_BUILTIN_PHADDW256
,
26234 IX86_BUILTIN_PHADDD256
,
26235 IX86_BUILTIN_PHADDSW256
,
26236 IX86_BUILTIN_PHSUBW256
,
26237 IX86_BUILTIN_PHSUBD256
,
26238 IX86_BUILTIN_PHSUBSW256
,
26239 IX86_BUILTIN_PMADDUBSW256
,
26240 IX86_BUILTIN_PMADDWD256
,
26241 IX86_BUILTIN_PMAXSB256
,
26242 IX86_BUILTIN_PMAXSW256
,
26243 IX86_BUILTIN_PMAXSD256
,
26244 IX86_BUILTIN_PMAXUB256
,
26245 IX86_BUILTIN_PMAXUW256
,
26246 IX86_BUILTIN_PMAXUD256
,
26247 IX86_BUILTIN_PMINSB256
,
26248 IX86_BUILTIN_PMINSW256
,
26249 IX86_BUILTIN_PMINSD256
,
26250 IX86_BUILTIN_PMINUB256
,
26251 IX86_BUILTIN_PMINUW256
,
26252 IX86_BUILTIN_PMINUD256
,
26253 IX86_BUILTIN_PMOVMSKB256
,
26254 IX86_BUILTIN_PMOVSXBW256
,
26255 IX86_BUILTIN_PMOVSXBD256
,
26256 IX86_BUILTIN_PMOVSXBQ256
,
26257 IX86_BUILTIN_PMOVSXWD256
,
26258 IX86_BUILTIN_PMOVSXWQ256
,
26259 IX86_BUILTIN_PMOVSXDQ256
,
26260 IX86_BUILTIN_PMOVZXBW256
,
26261 IX86_BUILTIN_PMOVZXBD256
,
26262 IX86_BUILTIN_PMOVZXBQ256
,
26263 IX86_BUILTIN_PMOVZXWD256
,
26264 IX86_BUILTIN_PMOVZXWQ256
,
26265 IX86_BUILTIN_PMOVZXDQ256
,
26266 IX86_BUILTIN_PMULDQ256
,
26267 IX86_BUILTIN_PMULHRSW256
,
26268 IX86_BUILTIN_PMULHUW256
,
26269 IX86_BUILTIN_PMULHW256
,
26270 IX86_BUILTIN_PMULLW256
,
26271 IX86_BUILTIN_PMULLD256
,
26272 IX86_BUILTIN_PMULUDQ256
,
26273 IX86_BUILTIN_POR256
,
26274 IX86_BUILTIN_PSADBW256
,
26275 IX86_BUILTIN_PSHUFB256
,
26276 IX86_BUILTIN_PSHUFD256
,
26277 IX86_BUILTIN_PSHUFHW256
,
26278 IX86_BUILTIN_PSHUFLW256
,
26279 IX86_BUILTIN_PSIGNB256
,
26280 IX86_BUILTIN_PSIGNW256
,
26281 IX86_BUILTIN_PSIGND256
,
26282 IX86_BUILTIN_PSLLDQI256
,
26283 IX86_BUILTIN_PSLLWI256
,
26284 IX86_BUILTIN_PSLLW256
,
26285 IX86_BUILTIN_PSLLDI256
,
26286 IX86_BUILTIN_PSLLD256
,
26287 IX86_BUILTIN_PSLLQI256
,
26288 IX86_BUILTIN_PSLLQ256
,
26289 IX86_BUILTIN_PSRAWI256
,
26290 IX86_BUILTIN_PSRAW256
,
26291 IX86_BUILTIN_PSRADI256
,
26292 IX86_BUILTIN_PSRAD256
,
26293 IX86_BUILTIN_PSRLDQI256
,
26294 IX86_BUILTIN_PSRLWI256
,
26295 IX86_BUILTIN_PSRLW256
,
26296 IX86_BUILTIN_PSRLDI256
,
26297 IX86_BUILTIN_PSRLD256
,
26298 IX86_BUILTIN_PSRLQI256
,
26299 IX86_BUILTIN_PSRLQ256
,
26300 IX86_BUILTIN_PSUBB256
,
26301 IX86_BUILTIN_PSUBW256
,
26302 IX86_BUILTIN_PSUBD256
,
26303 IX86_BUILTIN_PSUBQ256
,
26304 IX86_BUILTIN_PSUBSB256
,
26305 IX86_BUILTIN_PSUBSW256
,
26306 IX86_BUILTIN_PSUBUSB256
,
26307 IX86_BUILTIN_PSUBUSW256
,
26308 IX86_BUILTIN_PUNPCKHBW256
,
26309 IX86_BUILTIN_PUNPCKHWD256
,
26310 IX86_BUILTIN_PUNPCKHDQ256
,
26311 IX86_BUILTIN_PUNPCKHQDQ256
,
26312 IX86_BUILTIN_PUNPCKLBW256
,
26313 IX86_BUILTIN_PUNPCKLWD256
,
26314 IX86_BUILTIN_PUNPCKLDQ256
,
26315 IX86_BUILTIN_PUNPCKLQDQ256
,
26316 IX86_BUILTIN_PXOR256
,
26317 IX86_BUILTIN_MOVNTDQA256
,
26318 IX86_BUILTIN_VBROADCASTSS_PS
,
26319 IX86_BUILTIN_VBROADCASTSS_PS256
,
26320 IX86_BUILTIN_VBROADCASTSD_PD256
,
26321 IX86_BUILTIN_VBROADCASTSI256
,
26322 IX86_BUILTIN_PBLENDD256
,
26323 IX86_BUILTIN_PBLENDD128
,
26324 IX86_BUILTIN_PBROADCASTB256
,
26325 IX86_BUILTIN_PBROADCASTW256
,
26326 IX86_BUILTIN_PBROADCASTD256
,
26327 IX86_BUILTIN_PBROADCASTQ256
,
26328 IX86_BUILTIN_PBROADCASTB128
,
26329 IX86_BUILTIN_PBROADCASTW128
,
26330 IX86_BUILTIN_PBROADCASTD128
,
26331 IX86_BUILTIN_PBROADCASTQ128
,
26332 IX86_BUILTIN_VPERMVARSI256
,
26333 IX86_BUILTIN_VPERMDF256
,
26334 IX86_BUILTIN_VPERMVARSF256
,
26335 IX86_BUILTIN_VPERMDI256
,
26336 IX86_BUILTIN_VPERMTI256
,
26337 IX86_BUILTIN_VEXTRACT128I256
,
26338 IX86_BUILTIN_VINSERT128I256
,
26339 IX86_BUILTIN_MASKLOADD
,
26340 IX86_BUILTIN_MASKLOADQ
,
26341 IX86_BUILTIN_MASKLOADD256
,
26342 IX86_BUILTIN_MASKLOADQ256
,
26343 IX86_BUILTIN_MASKSTORED
,
26344 IX86_BUILTIN_MASKSTOREQ
,
26345 IX86_BUILTIN_MASKSTORED256
,
26346 IX86_BUILTIN_MASKSTOREQ256
,
26347 IX86_BUILTIN_PSLLVV4DI
,
26348 IX86_BUILTIN_PSLLVV2DI
,
26349 IX86_BUILTIN_PSLLVV8SI
,
26350 IX86_BUILTIN_PSLLVV4SI
,
26351 IX86_BUILTIN_PSRAVV8SI
,
26352 IX86_BUILTIN_PSRAVV4SI
,
26353 IX86_BUILTIN_PSRLVV4DI
,
26354 IX86_BUILTIN_PSRLVV2DI
,
26355 IX86_BUILTIN_PSRLVV8SI
,
26356 IX86_BUILTIN_PSRLVV4SI
,
26358 IX86_BUILTIN_GATHERSIV2DF
,
26359 IX86_BUILTIN_GATHERSIV4DF
,
26360 IX86_BUILTIN_GATHERDIV2DF
,
26361 IX86_BUILTIN_GATHERDIV4DF
,
26362 IX86_BUILTIN_GATHERSIV4SF
,
26363 IX86_BUILTIN_GATHERSIV8SF
,
26364 IX86_BUILTIN_GATHERDIV4SF
,
26365 IX86_BUILTIN_GATHERDIV8SF
,
26366 IX86_BUILTIN_GATHERSIV2DI
,
26367 IX86_BUILTIN_GATHERSIV4DI
,
26368 IX86_BUILTIN_GATHERDIV2DI
,
26369 IX86_BUILTIN_GATHERDIV4DI
,
26370 IX86_BUILTIN_GATHERSIV4SI
,
26371 IX86_BUILTIN_GATHERSIV8SI
,
26372 IX86_BUILTIN_GATHERDIV4SI
,
26373 IX86_BUILTIN_GATHERDIV8SI
,
26375 /* Alternate 4 element gather for the vectorizer where
26376 all operands are 32-byte wide. */
26377 IX86_BUILTIN_GATHERALTSIV4DF
,
26378 IX86_BUILTIN_GATHERALTDIV8SF
,
26379 IX86_BUILTIN_GATHERALTSIV4DI
,
26380 IX86_BUILTIN_GATHERALTDIV8SI
,
26382 /* TFmode support builtins. */
26384 IX86_BUILTIN_HUGE_VALQ
,
26385 IX86_BUILTIN_FABSQ
,
26386 IX86_BUILTIN_COPYSIGNQ
,
26388 /* Vectorizer support builtins. */
26389 IX86_BUILTIN_CPYSGNPS
,
26390 IX86_BUILTIN_CPYSGNPD
,
26391 IX86_BUILTIN_CPYSGNPS256
,
26392 IX86_BUILTIN_CPYSGNPD256
,
26394 /* FMA4 instructions. */
26395 IX86_BUILTIN_VFMADDSS
,
26396 IX86_BUILTIN_VFMADDSD
,
26397 IX86_BUILTIN_VFMADDPS
,
26398 IX86_BUILTIN_VFMADDPD
,
26399 IX86_BUILTIN_VFMADDPS256
,
26400 IX86_BUILTIN_VFMADDPD256
,
26401 IX86_BUILTIN_VFMADDSUBPS
,
26402 IX86_BUILTIN_VFMADDSUBPD
,
26403 IX86_BUILTIN_VFMADDSUBPS256
,
26404 IX86_BUILTIN_VFMADDSUBPD256
,
26406 /* FMA3 instructions. */
26407 IX86_BUILTIN_VFMADDSS3
,
26408 IX86_BUILTIN_VFMADDSD3
,
26410 /* XOP instructions. */
26411 IX86_BUILTIN_VPCMOV
,
26412 IX86_BUILTIN_VPCMOV_V2DI
,
26413 IX86_BUILTIN_VPCMOV_V4SI
,
26414 IX86_BUILTIN_VPCMOV_V8HI
,
26415 IX86_BUILTIN_VPCMOV_V16QI
,
26416 IX86_BUILTIN_VPCMOV_V4SF
,
26417 IX86_BUILTIN_VPCMOV_V2DF
,
26418 IX86_BUILTIN_VPCMOV256
,
26419 IX86_BUILTIN_VPCMOV_V4DI256
,
26420 IX86_BUILTIN_VPCMOV_V8SI256
,
26421 IX86_BUILTIN_VPCMOV_V16HI256
,
26422 IX86_BUILTIN_VPCMOV_V32QI256
,
26423 IX86_BUILTIN_VPCMOV_V8SF256
,
26424 IX86_BUILTIN_VPCMOV_V4DF256
,
26426 IX86_BUILTIN_VPPERM
,
26428 IX86_BUILTIN_VPMACSSWW
,
26429 IX86_BUILTIN_VPMACSWW
,
26430 IX86_BUILTIN_VPMACSSWD
,
26431 IX86_BUILTIN_VPMACSWD
,
26432 IX86_BUILTIN_VPMACSSDD
,
26433 IX86_BUILTIN_VPMACSDD
,
26434 IX86_BUILTIN_VPMACSSDQL
,
26435 IX86_BUILTIN_VPMACSSDQH
,
26436 IX86_BUILTIN_VPMACSDQL
,
26437 IX86_BUILTIN_VPMACSDQH
,
26438 IX86_BUILTIN_VPMADCSSWD
,
26439 IX86_BUILTIN_VPMADCSWD
,
26441 IX86_BUILTIN_VPHADDBW
,
26442 IX86_BUILTIN_VPHADDBD
,
26443 IX86_BUILTIN_VPHADDBQ
,
26444 IX86_BUILTIN_VPHADDWD
,
26445 IX86_BUILTIN_VPHADDWQ
,
26446 IX86_BUILTIN_VPHADDDQ
,
26447 IX86_BUILTIN_VPHADDUBW
,
26448 IX86_BUILTIN_VPHADDUBD
,
26449 IX86_BUILTIN_VPHADDUBQ
,
26450 IX86_BUILTIN_VPHADDUWD
,
26451 IX86_BUILTIN_VPHADDUWQ
,
26452 IX86_BUILTIN_VPHADDUDQ
,
26453 IX86_BUILTIN_VPHSUBBW
,
26454 IX86_BUILTIN_VPHSUBWD
,
26455 IX86_BUILTIN_VPHSUBDQ
,
26457 IX86_BUILTIN_VPROTB
,
26458 IX86_BUILTIN_VPROTW
,
26459 IX86_BUILTIN_VPROTD
,
26460 IX86_BUILTIN_VPROTQ
,
26461 IX86_BUILTIN_VPROTB_IMM
,
26462 IX86_BUILTIN_VPROTW_IMM
,
26463 IX86_BUILTIN_VPROTD_IMM
,
26464 IX86_BUILTIN_VPROTQ_IMM
,
26466 IX86_BUILTIN_VPSHLB
,
26467 IX86_BUILTIN_VPSHLW
,
26468 IX86_BUILTIN_VPSHLD
,
26469 IX86_BUILTIN_VPSHLQ
,
26470 IX86_BUILTIN_VPSHAB
,
26471 IX86_BUILTIN_VPSHAW
,
26472 IX86_BUILTIN_VPSHAD
,
26473 IX86_BUILTIN_VPSHAQ
,
26475 IX86_BUILTIN_VFRCZSS
,
26476 IX86_BUILTIN_VFRCZSD
,
26477 IX86_BUILTIN_VFRCZPS
,
26478 IX86_BUILTIN_VFRCZPD
,
26479 IX86_BUILTIN_VFRCZPS256
,
26480 IX86_BUILTIN_VFRCZPD256
,
26482 IX86_BUILTIN_VPCOMEQUB
,
26483 IX86_BUILTIN_VPCOMNEUB
,
26484 IX86_BUILTIN_VPCOMLTUB
,
26485 IX86_BUILTIN_VPCOMLEUB
,
26486 IX86_BUILTIN_VPCOMGTUB
,
26487 IX86_BUILTIN_VPCOMGEUB
,
26488 IX86_BUILTIN_VPCOMFALSEUB
,
26489 IX86_BUILTIN_VPCOMTRUEUB
,
26491 IX86_BUILTIN_VPCOMEQUW
,
26492 IX86_BUILTIN_VPCOMNEUW
,
26493 IX86_BUILTIN_VPCOMLTUW
,
26494 IX86_BUILTIN_VPCOMLEUW
,
26495 IX86_BUILTIN_VPCOMGTUW
,
26496 IX86_BUILTIN_VPCOMGEUW
,
26497 IX86_BUILTIN_VPCOMFALSEUW
,
26498 IX86_BUILTIN_VPCOMTRUEUW
,
26500 IX86_BUILTIN_VPCOMEQUD
,
26501 IX86_BUILTIN_VPCOMNEUD
,
26502 IX86_BUILTIN_VPCOMLTUD
,
26503 IX86_BUILTIN_VPCOMLEUD
,
26504 IX86_BUILTIN_VPCOMGTUD
,
26505 IX86_BUILTIN_VPCOMGEUD
,
26506 IX86_BUILTIN_VPCOMFALSEUD
,
26507 IX86_BUILTIN_VPCOMTRUEUD
,
26509 IX86_BUILTIN_VPCOMEQUQ
,
26510 IX86_BUILTIN_VPCOMNEUQ
,
26511 IX86_BUILTIN_VPCOMLTUQ
,
26512 IX86_BUILTIN_VPCOMLEUQ
,
26513 IX86_BUILTIN_VPCOMGTUQ
,
26514 IX86_BUILTIN_VPCOMGEUQ
,
26515 IX86_BUILTIN_VPCOMFALSEUQ
,
26516 IX86_BUILTIN_VPCOMTRUEUQ
,
26518 IX86_BUILTIN_VPCOMEQB
,
26519 IX86_BUILTIN_VPCOMNEB
,
26520 IX86_BUILTIN_VPCOMLTB
,
26521 IX86_BUILTIN_VPCOMLEB
,
26522 IX86_BUILTIN_VPCOMGTB
,
26523 IX86_BUILTIN_VPCOMGEB
,
26524 IX86_BUILTIN_VPCOMFALSEB
,
26525 IX86_BUILTIN_VPCOMTRUEB
,
26527 IX86_BUILTIN_VPCOMEQW
,
26528 IX86_BUILTIN_VPCOMNEW
,
26529 IX86_BUILTIN_VPCOMLTW
,
26530 IX86_BUILTIN_VPCOMLEW
,
26531 IX86_BUILTIN_VPCOMGTW
,
26532 IX86_BUILTIN_VPCOMGEW
,
26533 IX86_BUILTIN_VPCOMFALSEW
,
26534 IX86_BUILTIN_VPCOMTRUEW
,
26536 IX86_BUILTIN_VPCOMEQD
,
26537 IX86_BUILTIN_VPCOMNED
,
26538 IX86_BUILTIN_VPCOMLTD
,
26539 IX86_BUILTIN_VPCOMLED
,
26540 IX86_BUILTIN_VPCOMGTD
,
26541 IX86_BUILTIN_VPCOMGED
,
26542 IX86_BUILTIN_VPCOMFALSED
,
26543 IX86_BUILTIN_VPCOMTRUED
,
26545 IX86_BUILTIN_VPCOMEQQ
,
26546 IX86_BUILTIN_VPCOMNEQ
,
26547 IX86_BUILTIN_VPCOMLTQ
,
26548 IX86_BUILTIN_VPCOMLEQ
,
26549 IX86_BUILTIN_VPCOMGTQ
,
26550 IX86_BUILTIN_VPCOMGEQ
,
26551 IX86_BUILTIN_VPCOMFALSEQ
,
26552 IX86_BUILTIN_VPCOMTRUEQ
,
26554 /* LWP instructions. */
26555 IX86_BUILTIN_LLWPCB
,
26556 IX86_BUILTIN_SLWPCB
,
26557 IX86_BUILTIN_LWPVAL32
,
26558 IX86_BUILTIN_LWPVAL64
,
26559 IX86_BUILTIN_LWPINS32
,
26560 IX86_BUILTIN_LWPINS64
,
26565 IX86_BUILTIN_XBEGIN
,
26567 IX86_BUILTIN_XABORT
,
26568 IX86_BUILTIN_XTEST
,
26570 /* BMI instructions. */
26571 IX86_BUILTIN_BEXTR32
,
26572 IX86_BUILTIN_BEXTR64
,
26575 /* TBM instructions. */
26576 IX86_BUILTIN_BEXTRI32
,
26577 IX86_BUILTIN_BEXTRI64
,
26579 /* BMI2 instructions. */
26580 IX86_BUILTIN_BZHI32
,
26581 IX86_BUILTIN_BZHI64
,
26582 IX86_BUILTIN_PDEP32
,
26583 IX86_BUILTIN_PDEP64
,
26584 IX86_BUILTIN_PEXT32
,
26585 IX86_BUILTIN_PEXT64
,
26587 /* ADX instructions. */
26588 IX86_BUILTIN_ADDCARRYX32
,
26589 IX86_BUILTIN_ADDCARRYX64
,
26591 /* FSGSBASE instructions. */
26592 IX86_BUILTIN_RDFSBASE32
,
26593 IX86_BUILTIN_RDFSBASE64
,
26594 IX86_BUILTIN_RDGSBASE32
,
26595 IX86_BUILTIN_RDGSBASE64
,
26596 IX86_BUILTIN_WRFSBASE32
,
26597 IX86_BUILTIN_WRFSBASE64
,
26598 IX86_BUILTIN_WRGSBASE32
,
26599 IX86_BUILTIN_WRGSBASE64
,
26601 /* RDRND instructions. */
26602 IX86_BUILTIN_RDRAND16_STEP
,
26603 IX86_BUILTIN_RDRAND32_STEP
,
26604 IX86_BUILTIN_RDRAND64_STEP
,
26606 /* RDSEED instructions. */
26607 IX86_BUILTIN_RDSEED16_STEP
,
26608 IX86_BUILTIN_RDSEED32_STEP
,
26609 IX86_BUILTIN_RDSEED64_STEP
,
26611 /* F16C instructions. */
26612 IX86_BUILTIN_CVTPH2PS
,
26613 IX86_BUILTIN_CVTPH2PS256
,
26614 IX86_BUILTIN_CVTPS2PH
,
26615 IX86_BUILTIN_CVTPS2PH256
,
26617 /* CFString built-in for darwin */
26618 IX86_BUILTIN_CFSTRING
,
26620 /* Builtins to get CPU type and supported features. */
26621 IX86_BUILTIN_CPU_INIT
,
26622 IX86_BUILTIN_CPU_IS
,
26623 IX86_BUILTIN_CPU_SUPPORTS
,
26628 /* Table for the ix86 builtin decls. */
26629 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26631 /* Table of all of the builtin functions that are possible with different ISA's
26632 but are waiting to be built until a function is declared to use that
26634 struct builtin_isa
{
26635 const char *name
; /* function name */
26636 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26637 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26638 bool const_p
; /* true if the declaration is constant */
26639 bool set_and_not_built_p
;
26642 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26645 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26646 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26647 function decl in the ix86_builtins array. Returns the function decl or
26648 NULL_TREE, if the builtin was not added.
26650 If the front end has a special hook for builtin functions, delay adding
26651 builtin functions that aren't in the current ISA until the ISA is changed
26652 with function specific optimization. Doing so, can save about 300K for the
26653 default compiler. When the builtin is expanded, check at that time whether
26656 If the front end doesn't have a special hook, record all builtins, even if
26657 it isn't an instruction set in the current ISA in case the user uses
26658 function specific options for a different ISA, so that we don't get scope
26659 errors if a builtin is added in the middle of a function scope. */
26662 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26663 enum ix86_builtin_func_type tcode
,
26664 enum ix86_builtins code
)
26666 tree decl
= NULL_TREE
;
26668 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26670 ix86_builtins_isa
[(int) code
].isa
= mask
;
26672 mask
&= ~OPTION_MASK_ISA_64BIT
;
26674 || (mask
& ix86_isa_flags
) != 0
26675 || (lang_hooks
.builtin_function
26676 == lang_hooks
.builtin_function_ext_scope
))
26679 tree type
= ix86_get_builtin_func_type (tcode
);
26680 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26682 ix86_builtins
[(int) code
] = decl
;
26683 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26687 ix86_builtins
[(int) code
] = NULL_TREE
;
26688 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26689 ix86_builtins_isa
[(int) code
].name
= name
;
26690 ix86_builtins_isa
[(int) code
].const_p
= false;
26691 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26698 /* Like def_builtin, but also marks the function decl "const". */
26701 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26702 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26704 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26706 TREE_READONLY (decl
) = 1;
26708 ix86_builtins_isa
[(int) code
].const_p
= true;
26713 /* Add any new builtin functions for a given ISA that may not have been
26714 declared. This saves a bit of space compared to adding all of the
26715 declarations to the tree, even if we didn't use them. */
26718 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26722 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26724 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26725 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26729 /* Don't define the builtin again. */
26730 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26732 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26733 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26734 type
, i
, BUILT_IN_MD
, NULL
,
26737 ix86_builtins
[i
] = decl
;
26738 if (ix86_builtins_isa
[i
].const_p
)
26739 TREE_READONLY (decl
) = 1;
26744 /* Bits for builtin_description.flag. */
26746 /* Set when we don't support the comparison natively, and should
26747 swap_comparison in order to support it. */
26748 #define BUILTIN_DESC_SWAP_OPERANDS 1
26750 struct builtin_description
26752 const HOST_WIDE_INT mask
;
26753 const enum insn_code icode
;
26754 const char *const name
;
26755 const enum ix86_builtins code
;
26756 const enum rtx_code comparison
;
26760 static const struct builtin_description bdesc_comi
[] =
26762 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26763 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26764 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26765 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26766 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26767 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26768 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26769 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26770 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26771 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26772 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26773 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26774 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26778 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26782 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26783 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26788 static const struct builtin_description bdesc_pcmpestr
[] =
26791 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26792 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26793 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26794 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26795 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26796 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26797 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26800 static const struct builtin_description bdesc_pcmpistr
[] =
26803 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26804 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26805 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26806 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26807 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26808 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26809 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26812 /* Special builtins with variable number of arguments. */
26813 static const struct builtin_description bdesc_special_args
[] =
26815 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26816 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26817 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26820 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26823 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26825 /* FXSR, XSAVE and XSAVEOPT */
26826 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26827 { OPTION_MASK_ISA_FXSR
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26828 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26829 { OPTION_MASK_ISA_XSAVE
, CODE_FOR_nothing
, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26830 { OPTION_MASK_ISA_XSAVEOPT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26832 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26833 { OPTION_MASK_ISA_FXSR
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26834 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26835 { OPTION_MASK_ISA_XSAVE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26836 { OPTION_MASK_ISA_XSAVEOPT
| OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64
, UNKNOWN
, (int) VOID_FTYPE_PVOID_INT64
},
26839 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storeups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26840 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26841 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26843 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26844 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26845 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26846 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26848 /* SSE or 3DNow!A */
26849 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26850 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26853 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26854 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26855 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storeupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26856 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_storedqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26857 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26858 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26859 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26860 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26861 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26862 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loaddqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26864 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26865 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26868 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26871 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26874 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26875 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26878 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26879 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26881 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26882 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26883 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26884 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26885 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26887 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26888 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loadups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26889 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26890 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storeups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26891 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_loaddqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26892 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_storedqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26893 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26895 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26896 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26897 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26899 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26900 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26901 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26902 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26903 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26904 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26905 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26906 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26909 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26910 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26911 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26912 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26913 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26914 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26915 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26916 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26917 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26919 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26920 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26921 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26922 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26923 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26924 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26927 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26928 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26929 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26930 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26931 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26932 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26933 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26934 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26937 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26938 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26939 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26942 /* Builtins with variable number of arguments. */
26943 static const struct builtin_description bdesc_args
[] =
26945 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26946 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26947 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_nothing
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26948 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26949 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26950 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26951 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26954 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26955 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26956 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26957 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26958 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26959 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26961 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26962 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26963 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26964 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26965 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26966 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26967 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26968 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26970 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26971 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26973 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26974 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26975 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26976 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26978 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26979 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26980 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26981 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26982 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26983 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26985 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26986 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26987 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26988 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26989 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26990 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26992 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26993 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26994 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26996 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26998 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26999 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27000 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27001 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27002 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27003 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27005 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27006 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27007 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
27008 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27009 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27010 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
27012 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
27013 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
27014 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
27015 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
27018 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27019 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27020 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27021 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27023 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27024 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27025 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27026 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27027 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27028 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
27029 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27030 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27031 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27032 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27033 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27034 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27035 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27036 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27037 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27040 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
27041 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
27042 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27043 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
27044 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27045 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
27048 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27049 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27050 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27051 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27052 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27053 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27054 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27055 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27056 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27057 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
27058 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
27059 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
27061 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27063 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27064 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27065 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27066 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27067 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27068 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27069 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27070 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27072 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27073 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27074 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27075 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27076 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27077 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27078 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27079 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27080 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27081 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27082 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27083 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27084 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
27085 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27086 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27087 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27088 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27089 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
27090 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
27091 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27092 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
27093 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
27095 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27096 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27097 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27098 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27100 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27101 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27102 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27103 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27105 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27107 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27108 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27109 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27110 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27111 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27113 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
27114 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
27115 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
27117 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
27119 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27120 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27121 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
27123 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
27124 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
27126 /* SSE MMX or 3Dnow!A */
27127 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27128 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27129 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27131 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27132 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27133 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27134 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27136 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
27137 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
27139 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
27142 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27144 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27145 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
27146 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27147 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
27148 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
27150 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27151 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27152 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
27153 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
27154 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
27156 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
27158 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27159 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
27160 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27161 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
27163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27164 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
27165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27167 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27168 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27169 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27170 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27171 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27172 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27173 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27174 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27176 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27177 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27178 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27179 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27180 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27181 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27182 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27183 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27184 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27185 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27186 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
27187 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27188 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
27189 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27190 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27191 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27192 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27193 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
27194 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
27195 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
27197 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27198 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27199 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27200 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27202 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27203 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27204 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27205 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27207 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27209 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27210 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27211 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27213 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27215 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27216 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27217 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27218 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27219 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27220 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27221 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27222 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27224 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27225 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27226 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27227 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27228 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27229 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27230 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27231 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27233 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27234 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
27236 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27237 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27238 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27239 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27241 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27242 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27244 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27245 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27246 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27247 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27248 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27249 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27251 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27252 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27253 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27254 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27256 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27257 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27258 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27259 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27260 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27261 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27262 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27263 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27265 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27266 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27267 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
27269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
27272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
27273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
27277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
27278 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
27279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
27280 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
27282 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27283 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27284 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27285 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27286 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27287 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27288 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27290 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
27291 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27292 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27293 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
27294 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27295 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27296 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
27298 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
27299 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
27300 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
27301 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
27303 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
27304 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27305 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
27307 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
27309 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
27316 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27317 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27319 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27320 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27321 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27322 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27323 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
27324 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
27327 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27328 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
27329 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27330 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
27331 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27332 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
27334 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27335 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27336 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27337 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27338 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27339 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27340 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27341 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27342 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27343 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27344 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27345 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27346 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
27347 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
27348 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27349 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27350 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27351 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27352 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27353 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
27354 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27355 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
27356 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27357 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
27360 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
27361 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
27364 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27365 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27366 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
27367 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
27368 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27369 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27370 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27371 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
27372 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
27373 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
27375 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27376 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27377 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27378 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27379 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27380 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27381 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
27382 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
27383 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
27384 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
27385 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
27386 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
27387 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27389 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
27390 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27391 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27392 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27393 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27394 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27395 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
27396 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27397 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27398 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
27399 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
27400 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27403 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27404 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27405 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27406 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27408 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27409 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
27410 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
27411 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
27413 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27414 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
27416 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
27417 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
27419 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27420 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
27421 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
27422 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
27424 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
27425 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
27427 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27428 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
27430 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27431 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27432 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
27435 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27436 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
27437 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
27438 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27439 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27442 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
27443 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
27444 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
27445 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27448 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
27449 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27451 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27452 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27453 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27454 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27457 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
27460 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27461 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27462 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27463 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27464 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27465 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27466 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27467 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27468 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27469 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27470 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27471 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27472 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27473 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27474 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27475 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27476 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27477 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27478 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27479 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27480 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27481 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27482 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27483 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27484 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27485 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27487 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
27488 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
27489 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
27490 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27492 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27493 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27494 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
27495 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
27496 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27497 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27498 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27499 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27500 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27501 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
27502 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
27503 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27504 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27505 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
27506 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
27507 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
27508 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
27509 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
27510 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
27511 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27512 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
27513 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27514 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
27515 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27516 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
27517 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
27518 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27519 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27520 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27521 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27522 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27523 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27524 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27525 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27527 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27528 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27529 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27531 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27532 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27533 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27534 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27535 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27537 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27539 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27540 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27542 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27543 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27544 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27545 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27547 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27548 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27550 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27551 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27553 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27554 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27555 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27556 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27558 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27559 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27561 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27562 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27564 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27565 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27566 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27567 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27569 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27570 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27571 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27572 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27573 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27574 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27576 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27577 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27578 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27579 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27580 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27581 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27582 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27583 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27584 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27585 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27586 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27587 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27588 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27589 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27590 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27592 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27593 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27595 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27596 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27598 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27601 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27602 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27603 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27604 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27605 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27606 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27607 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27608 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27609 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27610 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27611 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27612 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27613 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27614 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27615 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27616 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27617 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27618 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27619 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27620 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27621 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27622 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27623 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27624 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27625 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27626 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27627 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27628 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27629 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27630 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27631 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27632 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27633 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27634 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27635 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27636 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27637 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27638 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27639 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27640 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27641 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27642 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27643 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27644 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27645 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27646 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27647 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27648 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27649 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27650 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27651 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27652 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27653 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27654 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27655 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27656 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27657 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27658 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27659 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27660 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27661 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27662 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27663 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27664 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27665 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27666 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27667 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27668 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27669 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27670 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27671 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27672 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27673 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27674 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27675 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27676 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27677 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27678 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27679 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27680 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27681 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27682 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27683 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27684 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27685 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27686 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27687 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27688 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27689 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27690 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27691 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27692 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27693 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27694 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27695 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27696 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27697 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27698 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27699 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27700 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27701 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27702 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27703 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27704 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27705 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27706 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27707 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27708 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27709 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27710 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27711 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27712 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27713 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27714 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27715 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27716 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27717 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27718 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27719 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27720 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27721 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27722 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27723 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27724 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27725 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27726 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27727 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27728 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27729 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27730 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27731 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27732 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27733 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27734 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27735 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27736 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27737 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27738 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27739 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27740 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27741 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27742 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27743 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27744 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27745 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27746 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27748 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27751 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27752 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27753 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27756 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27757 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27760 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27761 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27762 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27763 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27766 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27767 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27768 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27769 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27770 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27771 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27774 /* FMA4 and XOP. */
27775 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27776 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27777 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27778 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27779 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27780 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27781 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27782 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27783 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27784 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27785 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27786 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27787 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27788 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27789 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27790 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27791 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27792 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27793 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27794 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27795 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27796 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27797 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27798 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27799 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27800 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27801 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27802 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27803 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27804 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27805 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27806 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27807 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27808 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27809 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27810 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27811 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27812 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27813 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27814 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27815 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27816 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27817 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27818 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27819 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27820 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27821 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27822 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27823 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27824 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27825 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27826 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27828 static const struct builtin_description bdesc_multi_arg
[] =
27830 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27831 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27832 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27833 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27834 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27835 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27837 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27838 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27839 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27840 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27841 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27842 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27844 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27845 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27846 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27847 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27848 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27849 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27850 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27851 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27852 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27853 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27854 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27855 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27857 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27858 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27859 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27860 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27861 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27862 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27863 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27864 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27865 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27866 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27867 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27868 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27870 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27871 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27872 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27873 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27874 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27875 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27876 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27878 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27879 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27880 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27881 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27882 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27883 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27884 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27886 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27888 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27889 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27890 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27891 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27892 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27893 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27894 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27895 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27896 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27897 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27898 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27899 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27901 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27902 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27903 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27904 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27905 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27906 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27907 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27908 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27909 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27910 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27911 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27912 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27913 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27914 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27915 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27916 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27918 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27919 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27920 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27921 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27922 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27923 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27925 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27926 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27927 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27928 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27929 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27930 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27931 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27932 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27933 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27934 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27935 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27936 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27937 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27938 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27939 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27941 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27942 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27943 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27944 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27945 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27946 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27947 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27949 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27950 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27951 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27952 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27953 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27954 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27955 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27957 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27958 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27959 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27960 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27961 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27962 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27963 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27965 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27966 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27967 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27968 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27969 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27970 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27971 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27973 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27974 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27975 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27976 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27977 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27978 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27979 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27981 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27982 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27983 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27984 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27985 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27986 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27987 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27989 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27990 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27991 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27992 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27993 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27994 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27995 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27997 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27998 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27999 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
28000 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
28001 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
28002 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
28003 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
28005 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28006 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28007 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28008 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28009 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
28010 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
28011 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
28012 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
28014 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28015 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28016 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28017 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28018 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
28019 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
28020 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
28021 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
28023 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
28024 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
28025 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
28026 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
28030 /* TM vector builtins. */
28032 /* Reuse the existing x86-specific `struct builtin_description' cause
28033 we're lazy. Add casts to make them fit. */
28034 static const struct builtin_description bdesc_tm
[] =
28036 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28037 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28038 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
28039 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28040 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28041 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28042 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
28044 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28045 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28046 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
28047 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28048 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28049 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28050 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
28052 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28053 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28054 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
28055 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28056 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28057 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28058 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
28060 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28061 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28062 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
28065 /* TM callbacks. */
28067 /* Return the builtin decl needed to load a vector of TYPE. */
28070 ix86_builtin_tm_load (tree type
)
28072 if (TREE_CODE (type
) == VECTOR_TYPE
)
28074 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28077 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
28079 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
28081 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
28087 /* Return the builtin decl needed to store a vector of TYPE. */
28090 ix86_builtin_tm_store (tree type
)
28092 if (TREE_CODE (type
) == VECTOR_TYPE
)
28094 switch (tree_low_cst (TYPE_SIZE (type
), 1))
28097 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
28099 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
28101 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
28107 /* Initialize the transactional memory vector load/store builtins. */
28110 ix86_init_tm_builtins (void)
28112 enum ix86_builtin_func_type ftype
;
28113 const struct builtin_description
*d
;
28116 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
28117 tree attrs_log
, attrs_type_log
;
28122 /* If there are no builtins defined, we must be compiling in a
28123 language without trans-mem support. */
28124 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
28127 /* Use whatever attributes a normal TM load has. */
28128 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
28129 attrs_load
= DECL_ATTRIBUTES (decl
);
28130 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28131 /* Use whatever attributes a normal TM store has. */
28132 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
28133 attrs_store
= DECL_ATTRIBUTES (decl
);
28134 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28135 /* Use whatever attributes a normal TM log has. */
28136 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
28137 attrs_log
= DECL_ATTRIBUTES (decl
);
28138 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
28140 for (i
= 0, d
= bdesc_tm
;
28141 i
< ARRAY_SIZE (bdesc_tm
);
28144 if ((d
->mask
& ix86_isa_flags
) != 0
28145 || (lang_hooks
.builtin_function
28146 == lang_hooks
.builtin_function_ext_scope
))
28148 tree type
, attrs
, attrs_type
;
28149 enum built_in_function code
= (enum built_in_function
) d
->code
;
28151 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28152 type
= ix86_get_builtin_func_type (ftype
);
28154 if (BUILTIN_TM_LOAD_P (code
))
28156 attrs
= attrs_load
;
28157 attrs_type
= attrs_type_load
;
28159 else if (BUILTIN_TM_STORE_P (code
))
28161 attrs
= attrs_store
;
28162 attrs_type
= attrs_type_store
;
28167 attrs_type
= attrs_type_log
;
28169 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
28170 /* The builtin without the prefix for
28171 calling it directly. */
28172 d
->name
+ strlen ("__builtin_"),
28174 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
28175 set the TYPE_ATTRIBUTES. */
28176 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
28178 set_builtin_decl (code
, decl
, false);
28183 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
28184 in the current target ISA to allow the user to compile particular modules
28185 with different target specific options that differ from the command line
28188 ix86_init_mmx_sse_builtins (void)
28190 const struct builtin_description
* d
;
28191 enum ix86_builtin_func_type ftype
;
28194 /* Add all special builtins with variable number of operands. */
28195 for (i
= 0, d
= bdesc_special_args
;
28196 i
< ARRAY_SIZE (bdesc_special_args
);
28202 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28203 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
28206 /* Add all builtins with variable number of operands. */
28207 for (i
= 0, d
= bdesc_args
;
28208 i
< ARRAY_SIZE (bdesc_args
);
28214 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28215 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28218 /* pcmpestr[im] insns. */
28219 for (i
= 0, d
= bdesc_pcmpestr
;
28220 i
< ARRAY_SIZE (bdesc_pcmpestr
);
28223 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
28224 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
28226 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
28227 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28230 /* pcmpistr[im] insns. */
28231 for (i
= 0, d
= bdesc_pcmpistr
;
28232 i
< ARRAY_SIZE (bdesc_pcmpistr
);
28235 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28236 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
28238 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
28239 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28242 /* comi/ucomi insns. */
28243 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
28245 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
28246 ftype
= INT_FTYPE_V2DF_V2DF
;
28248 ftype
= INT_FTYPE_V4SF_V4SF
;
28249 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28253 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
28254 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
28255 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
28256 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
28258 /* SSE or 3DNow!A */
28259 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28260 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
28261 IX86_BUILTIN_MASKMOVQ
);
28264 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
28265 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
28267 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
28268 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
28269 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
28270 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
28273 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
28274 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
28275 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
28276 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
28279 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
28280 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
28281 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
28282 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
28283 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
28284 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
28285 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
28286 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
28287 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
28288 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
28289 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
28290 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
28293 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
28294 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
28297 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
28298 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
28299 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
28300 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
28301 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
28302 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
28303 IX86_BUILTIN_RDRAND64_STEP
);
28306 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
28307 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
28308 IX86_BUILTIN_GATHERSIV2DF
);
28310 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
28311 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
28312 IX86_BUILTIN_GATHERSIV4DF
);
28314 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
28315 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
28316 IX86_BUILTIN_GATHERDIV2DF
);
28318 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
28319 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
28320 IX86_BUILTIN_GATHERDIV4DF
);
28322 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
28323 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
28324 IX86_BUILTIN_GATHERSIV4SF
);
28326 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
28327 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
28328 IX86_BUILTIN_GATHERSIV8SF
);
28330 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
28331 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
28332 IX86_BUILTIN_GATHERDIV4SF
);
28334 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
28335 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
28336 IX86_BUILTIN_GATHERDIV8SF
);
28338 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
28339 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
28340 IX86_BUILTIN_GATHERSIV2DI
);
28342 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
28343 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
28344 IX86_BUILTIN_GATHERSIV4DI
);
28346 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
28347 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
28348 IX86_BUILTIN_GATHERDIV2DI
);
28350 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
28351 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
28352 IX86_BUILTIN_GATHERDIV4DI
);
28354 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
28355 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
28356 IX86_BUILTIN_GATHERSIV4SI
);
28358 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
28359 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
28360 IX86_BUILTIN_GATHERSIV8SI
);
28362 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
28363 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
28364 IX86_BUILTIN_GATHERDIV4SI
);
28366 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
28367 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
28368 IX86_BUILTIN_GATHERDIV8SI
);
28370 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
28371 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
28372 IX86_BUILTIN_GATHERALTSIV4DF
);
28374 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
28375 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
28376 IX86_BUILTIN_GATHERALTDIV8SF
);
28378 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
28379 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
28380 IX86_BUILTIN_GATHERALTSIV4DI
);
28382 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
28383 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
28384 IX86_BUILTIN_GATHERALTDIV8SI
);
28387 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
28388 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
28390 /* MMX access to the vec_init patterns. */
28391 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
28392 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
28394 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
28395 V4HI_FTYPE_HI_HI_HI_HI
,
28396 IX86_BUILTIN_VEC_INIT_V4HI
);
28398 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
28399 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
28400 IX86_BUILTIN_VEC_INIT_V8QI
);
28402 /* Access to the vec_extract patterns. */
28403 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
28404 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
28405 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
28406 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
28407 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
28408 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
28409 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
28410 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
28411 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
28412 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
28414 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28415 "__builtin_ia32_vec_ext_v4hi",
28416 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
28418 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
28419 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
28421 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
28422 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
28424 /* Access to the vec_set patterns. */
28425 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
28426 "__builtin_ia32_vec_set_v2di",
28427 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
28429 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
28430 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
28432 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
28433 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
28435 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
28436 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
28438 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
28439 "__builtin_ia32_vec_set_v4hi",
28440 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
28442 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
28443 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
28446 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_hi_step",
28447 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDSEED16_STEP
);
28448 def_builtin (OPTION_MASK_ISA_RDSEED
, "__builtin_ia32_rdseed_si_step",
28449 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDSEED32_STEP
);
28450 def_builtin (OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_64BIT
,
28451 "__builtin_ia32_rdseed_di_step",
28452 INT_FTYPE_PULONGLONG
, IX86_BUILTIN_RDSEED64_STEP
);
28455 def_builtin (0, "__builtin_ia32_addcarryx_u32",
28456 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
, IX86_BUILTIN_ADDCARRYX32
);
28457 def_builtin (OPTION_MASK_ISA_64BIT
,
28458 "__builtin_ia32_addcarryx_u64",
28459 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
,
28460 IX86_BUILTIN_ADDCARRYX64
);
28462 /* Add FMA4 multi-arg argument instructions */
28463 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
28468 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
28469 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
28473 /* This builds the processor_model struct type defined in
28474 libgcc/config/i386/cpuinfo.c */
28477 build_processor_model_struct (void)
28479 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
28481 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
28483 tree type
= make_node (RECORD_TYPE
);
28485 /* The first 3 fields are unsigned int. */
28486 for (i
= 0; i
< 3; ++i
)
28488 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
28489 get_identifier (field_name
[i
]), unsigned_type_node
);
28490 if (field_chain
!= NULL_TREE
)
28491 DECL_CHAIN (field
) = field_chain
;
28492 field_chain
= field
;
28495 /* The last field is an array of unsigned integers of size one. */
28496 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
28497 get_identifier (field_name
[3]),
28498 build_array_type (unsigned_type_node
,
28499 build_index_type (size_one_node
)));
28500 if (field_chain
!= NULL_TREE
)
28501 DECL_CHAIN (field
) = field_chain
;
28502 field_chain
= field
;
28504 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
28508 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
28511 make_var_decl (tree type
, const char *name
)
28515 new_decl
= build_decl (UNKNOWN_LOCATION
,
28517 get_identifier(name
),
28520 DECL_EXTERNAL (new_decl
) = 1;
28521 TREE_STATIC (new_decl
) = 1;
28522 TREE_PUBLIC (new_decl
) = 1;
28523 DECL_INITIAL (new_decl
) = 0;
28524 DECL_ARTIFICIAL (new_decl
) = 0;
28525 DECL_PRESERVE_P (new_decl
) = 1;
28527 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
28528 assemble_variable (new_decl
, 0, 0, 0);
28533 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
28534 into an integer defined in libgcc/config/i386/cpuinfo.c */
28537 fold_builtin_cpu (tree fndecl
, tree
*args
)
28540 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28541 DECL_FUNCTION_CODE (fndecl
);
28542 tree param_string_cst
= NULL
;
28544 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
28545 enum processor_features
28561 /* These are the values for vendor types and cpu types and subtypes
28562 in cpuinfo.c. Cpu types and subtypes should be subtracted by
28563 the corresponding start value. */
28564 enum processor_model
28574 M_CPU_SUBTYPE_START
,
28575 M_INTEL_COREI7_NEHALEM
,
28576 M_INTEL_COREI7_WESTMERE
,
28577 M_INTEL_COREI7_SANDYBRIDGE
,
28578 M_AMDFAM10H_BARCELONA
,
28579 M_AMDFAM10H_SHANGHAI
,
28580 M_AMDFAM10H_ISTANBUL
,
28581 M_AMDFAM15H_BDVER1
,
28585 static struct _arch_names_table
28587 const char *const name
;
28588 const enum processor_model model
;
28590 const arch_names_table
[] =
28593 {"intel", M_INTEL
},
28594 {"atom", M_INTEL_ATOM
},
28595 {"core2", M_INTEL_CORE2
},
28596 {"corei7", M_INTEL_COREI7
},
28597 {"nehalem", M_INTEL_COREI7_NEHALEM
},
28598 {"westmere", M_INTEL_COREI7_WESTMERE
},
28599 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
28600 {"amdfam10h", M_AMDFAM10H
},
28601 {"barcelona", M_AMDFAM10H_BARCELONA
},
28602 {"shanghai", M_AMDFAM10H_SHANGHAI
},
28603 {"istanbul", M_AMDFAM10H_ISTANBUL
},
28604 {"amdfam15h", M_AMDFAM15H
},
28605 {"bdver1", M_AMDFAM15H_BDVER1
},
28606 {"bdver2", M_AMDFAM15H_BDVER2
},
28609 static struct _isa_names_table
28611 const char *const name
;
28612 const enum processor_features feature
;
28614 const isa_names_table
[] =
28618 {"popcnt", F_POPCNT
},
28622 {"ssse3", F_SSSE3
},
28623 {"sse4.1", F_SSE4_1
},
28624 {"sse4.2", F_SSE4_2
},
28629 static tree __processor_model_type
= NULL_TREE
;
28630 static tree __cpu_model_var
= NULL_TREE
;
28632 if (__processor_model_type
== NULL_TREE
)
28633 __processor_model_type
= build_processor_model_struct ();
28635 if (__cpu_model_var
== NULL_TREE
)
28636 __cpu_model_var
= make_var_decl (__processor_model_type
,
28639 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
28641 param_string_cst
= *args
;
28642 while (param_string_cst
28643 && TREE_CODE (param_string_cst
) != STRING_CST
)
28645 /* *args must be a expr that can contain other EXPRS leading to a
28647 if (!EXPR_P (param_string_cst
))
28649 error ("Parameter to builtin must be a string constant or literal");
28650 return integer_zero_node
;
28652 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
28655 gcc_assert (param_string_cst
);
28657 if (fn_code
== IX86_BUILTIN_CPU_IS
)
28661 unsigned int field_val
= 0;
28662 unsigned int NUM_ARCH_NAMES
28663 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
28665 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
28666 if (strcmp (arch_names_table
[i
].name
,
28667 TREE_STRING_POINTER (param_string_cst
)) == 0)
28670 if (i
== NUM_ARCH_NAMES
)
28672 error ("Parameter to builtin not valid: %s",
28673 TREE_STRING_POINTER (param_string_cst
));
28674 return integer_zero_node
;
28677 field
= TYPE_FIELDS (__processor_model_type
);
28678 field_val
= arch_names_table
[i
].model
;
28680 /* CPU types are stored in the next field. */
28681 if (field_val
> M_CPU_TYPE_START
28682 && field_val
< M_CPU_SUBTYPE_START
)
28684 field
= DECL_CHAIN (field
);
28685 field_val
-= M_CPU_TYPE_START
;
28688 /* CPU subtypes are stored in the next field. */
28689 if (field_val
> M_CPU_SUBTYPE_START
)
28691 field
= DECL_CHAIN ( DECL_CHAIN (field
));
28692 field_val
-= M_CPU_SUBTYPE_START
;
28695 /* Get the appropriate field in __cpu_model. */
28696 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28699 /* Check the value. */
28700 return build2 (EQ_EXPR
, unsigned_type_node
, ref
,
28701 build_int_cstu (unsigned_type_node
, field_val
));
28703 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28708 unsigned int field_val
= 0;
28709 unsigned int NUM_ISA_NAMES
28710 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
28712 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
28713 if (strcmp (isa_names_table
[i
].name
,
28714 TREE_STRING_POINTER (param_string_cst
)) == 0)
28717 if (i
== NUM_ISA_NAMES
)
28719 error ("Parameter to builtin not valid: %s",
28720 TREE_STRING_POINTER (param_string_cst
));
28721 return integer_zero_node
;
28724 field
= TYPE_FIELDS (__processor_model_type
);
28725 /* Get the last field, which is __cpu_features. */
28726 while (DECL_CHAIN (field
))
28727 field
= DECL_CHAIN (field
);
28729 /* Get the appropriate field: __cpu_model.__cpu_features */
28730 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28733 /* Access the 0th element of __cpu_features array. */
28734 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
28735 integer_zero_node
, NULL_TREE
, NULL_TREE
);
28737 field_val
= (1 << isa_names_table
[i
].feature
);
28738 /* Return __cpu_model.__cpu_features[0] & field_val */
28739 return build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
28740 build_int_cstu (unsigned_type_node
, field_val
));
28742 gcc_unreachable ();
28746 ix86_fold_builtin (tree fndecl
, int n_args
,
28747 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
28749 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
28751 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28752 DECL_FUNCTION_CODE (fndecl
);
28753 if (fn_code
== IX86_BUILTIN_CPU_IS
28754 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28756 gcc_assert (n_args
== 1);
28757 return fold_builtin_cpu (fndecl
, args
);
28761 #ifdef SUBTARGET_FOLD_BUILTIN
28762 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
28768 /* Make builtins to detect cpu type and features supported. NAME is
28769 the builtin name, CODE is the builtin code, and FTYPE is the function
28770 type of the builtin. */
28773 make_cpu_type_builtin (const char* name
, int code
,
28774 enum ix86_builtin_func_type ftype
, bool is_const
)
28779 type
= ix86_get_builtin_func_type (ftype
);
28780 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
28782 gcc_assert (decl
!= NULL_TREE
);
28783 ix86_builtins
[(int) code
] = decl
;
28784 TREE_READONLY (decl
) = is_const
;
28787 /* Make builtins to get CPU type and features supported. The created
28790 __builtin_cpu_init (), to detect cpu type and features,
28791 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
28792 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
28796 ix86_init_platform_type_builtins (void)
28798 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
28799 INT_FTYPE_VOID
, false);
28800 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
28801 INT_FTYPE_PCCHAR
, true);
28802 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
28803 INT_FTYPE_PCCHAR
, true);
28806 /* Internal method for ix86_init_builtins. */
28809 ix86_init_builtins_va_builtins_abi (void)
28811 tree ms_va_ref
, sysv_va_ref
;
28812 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
28813 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
28814 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
28815 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
28819 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
28820 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
28821 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
28823 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
28826 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28827 fnvoid_va_start_ms
=
28828 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28829 fnvoid_va_end_sysv
=
28830 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
28831 fnvoid_va_start_sysv
=
28832 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
28834 fnvoid_va_copy_ms
=
28835 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
28837 fnvoid_va_copy_sysv
=
28838 build_function_type_list (void_type_node
, sysv_va_ref
,
28839 sysv_va_ref
, NULL_TREE
);
28841 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
28842 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28843 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
28844 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28845 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
28846 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28847 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
28848 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28849 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
28850 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28851 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
28852 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28856 ix86_init_builtin_types (void)
28858 tree float128_type_node
, float80_type_node
;
28860 /* The __float80 type. */
28861 float80_type_node
= long_double_type_node
;
28862 if (TYPE_MODE (float80_type_node
) != XFmode
)
28864 /* The __float80 type. */
28865 float80_type_node
= make_node (REAL_TYPE
);
28867 TYPE_PRECISION (float80_type_node
) = 80;
28868 layout_type (float80_type_node
);
28870 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
28872 /* The __float128 type. */
28873 float128_type_node
= make_node (REAL_TYPE
);
28874 TYPE_PRECISION (float128_type_node
) = 128;
28875 layout_type (float128_type_node
);
28876 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
28878 /* This macro is built by i386-builtin-types.awk. */
28879 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
28883 ix86_init_builtins (void)
28887 ix86_init_builtin_types ();
28889 /* Builtins to get CPU type and features. */
28890 ix86_init_platform_type_builtins ();
28892 /* TFmode support builtins. */
28893 def_builtin_const (0, "__builtin_infq",
28894 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
28895 def_builtin_const (0, "__builtin_huge_valq",
28896 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
28898 /* We will expand them to normal call if SSE isn't available since
28899 they are used by libgcc. */
28900 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
28901 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
28902 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
28903 TREE_READONLY (t
) = 1;
28904 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
28906 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
28907 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
28908 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
28909 TREE_READONLY (t
) = 1;
28910 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
28912 ix86_init_tm_builtins ();
28913 ix86_init_mmx_sse_builtins ();
28916 ix86_init_builtins_va_builtins_abi ();
28918 #ifdef SUBTARGET_INIT_BUILTINS
28919 SUBTARGET_INIT_BUILTINS
;
28923 /* Return the ix86 builtin for CODE. */
28926 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
28928 if (code
>= IX86_BUILTIN_MAX
)
28929 return error_mark_node
;
28931 return ix86_builtins
[code
];
28934 /* Errors in the source file can cause expand_expr to return const0_rtx
28935 where we expect a vector. To avoid crashing, use one of the vector
28936 clear instructions. */
28938 safe_vector_operand (rtx x
, enum machine_mode mode
)
28940 if (x
== const0_rtx
)
28941 x
= CONST0_RTX (mode
);
28945 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28948 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
28951 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28952 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28953 rtx op0
= expand_normal (arg0
);
28954 rtx op1
= expand_normal (arg1
);
28955 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28956 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28957 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
28959 if (VECTOR_MODE_P (mode0
))
28960 op0
= safe_vector_operand (op0
, mode0
);
28961 if (VECTOR_MODE_P (mode1
))
28962 op1
= safe_vector_operand (op1
, mode1
);
28964 if (optimize
|| !target
28965 || GET_MODE (target
) != tmode
28966 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28967 target
= gen_reg_rtx (tmode
);
28969 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
28971 rtx x
= gen_reg_rtx (V4SImode
);
28972 emit_insn (gen_sse2_loadd (x
, op1
));
28973 op1
= gen_lowpart (TImode
, x
);
28976 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28977 op0
= copy_to_mode_reg (mode0
, op0
);
28978 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
28979 op1
= copy_to_mode_reg (mode1
, op1
);
28981 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28990 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28993 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
28994 enum ix86_builtin_func_type m_type
,
28995 enum rtx_code sub_code
)
29000 bool comparison_p
= false;
29002 bool last_arg_constant
= false;
29003 int num_memory
= 0;
29006 enum machine_mode mode
;
29009 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
29013 case MULTI_ARG_4_DF2_DI_I
:
29014 case MULTI_ARG_4_DF2_DI_I1
:
29015 case MULTI_ARG_4_SF2_SI_I
:
29016 case MULTI_ARG_4_SF2_SI_I1
:
29018 last_arg_constant
= true;
29021 case MULTI_ARG_3_SF
:
29022 case MULTI_ARG_3_DF
:
29023 case MULTI_ARG_3_SF2
:
29024 case MULTI_ARG_3_DF2
:
29025 case MULTI_ARG_3_DI
:
29026 case MULTI_ARG_3_SI
:
29027 case MULTI_ARG_3_SI_DI
:
29028 case MULTI_ARG_3_HI
:
29029 case MULTI_ARG_3_HI_SI
:
29030 case MULTI_ARG_3_QI
:
29031 case MULTI_ARG_3_DI2
:
29032 case MULTI_ARG_3_SI2
:
29033 case MULTI_ARG_3_HI2
:
29034 case MULTI_ARG_3_QI2
:
29038 case MULTI_ARG_2_SF
:
29039 case MULTI_ARG_2_DF
:
29040 case MULTI_ARG_2_DI
:
29041 case MULTI_ARG_2_SI
:
29042 case MULTI_ARG_2_HI
:
29043 case MULTI_ARG_2_QI
:
29047 case MULTI_ARG_2_DI_IMM
:
29048 case MULTI_ARG_2_SI_IMM
:
29049 case MULTI_ARG_2_HI_IMM
:
29050 case MULTI_ARG_2_QI_IMM
:
29052 last_arg_constant
= true;
29055 case MULTI_ARG_1_SF
:
29056 case MULTI_ARG_1_DF
:
29057 case MULTI_ARG_1_SF2
:
29058 case MULTI_ARG_1_DF2
:
29059 case MULTI_ARG_1_DI
:
29060 case MULTI_ARG_1_SI
:
29061 case MULTI_ARG_1_HI
:
29062 case MULTI_ARG_1_QI
:
29063 case MULTI_ARG_1_SI_DI
:
29064 case MULTI_ARG_1_HI_DI
:
29065 case MULTI_ARG_1_HI_SI
:
29066 case MULTI_ARG_1_QI_DI
:
29067 case MULTI_ARG_1_QI_SI
:
29068 case MULTI_ARG_1_QI_HI
:
29072 case MULTI_ARG_2_DI_CMP
:
29073 case MULTI_ARG_2_SI_CMP
:
29074 case MULTI_ARG_2_HI_CMP
:
29075 case MULTI_ARG_2_QI_CMP
:
29077 comparison_p
= true;
29080 case MULTI_ARG_2_SF_TF
:
29081 case MULTI_ARG_2_DF_TF
:
29082 case MULTI_ARG_2_DI_TF
:
29083 case MULTI_ARG_2_SI_TF
:
29084 case MULTI_ARG_2_HI_TF
:
29085 case MULTI_ARG_2_QI_TF
:
29091 gcc_unreachable ();
29094 if (optimize
|| !target
29095 || GET_MODE (target
) != tmode
29096 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
29097 target
= gen_reg_rtx (tmode
);
29099 gcc_assert (nargs
<= 4);
29101 for (i
= 0; i
< nargs
; i
++)
29103 tree arg
= CALL_EXPR_ARG (exp
, i
);
29104 rtx op
= expand_normal (arg
);
29105 int adjust
= (comparison_p
) ? 1 : 0;
29106 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
29108 if (last_arg_constant
&& i
== nargs
- 1)
29110 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
29112 enum insn_code new_icode
= icode
;
29115 case CODE_FOR_xop_vpermil2v2df3
:
29116 case CODE_FOR_xop_vpermil2v4sf3
:
29117 case CODE_FOR_xop_vpermil2v4df3
:
29118 case CODE_FOR_xop_vpermil2v8sf3
:
29119 error ("the last argument must be a 2-bit immediate");
29120 return gen_reg_rtx (tmode
);
29121 case CODE_FOR_xop_rotlv2di3
:
29122 new_icode
= CODE_FOR_rotlv2di3
;
29124 case CODE_FOR_xop_rotlv4si3
:
29125 new_icode
= CODE_FOR_rotlv4si3
;
29127 case CODE_FOR_xop_rotlv8hi3
:
29128 new_icode
= CODE_FOR_rotlv8hi3
;
29130 case CODE_FOR_xop_rotlv16qi3
:
29131 new_icode
= CODE_FOR_rotlv16qi3
;
29133 if (CONST_INT_P (op
))
29135 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
29136 op
= GEN_INT (INTVAL (op
) & mask
);
29137 gcc_checking_assert
29138 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
29142 gcc_checking_assert
29144 && insn_data
[new_icode
].operand
[0].mode
== tmode
29145 && insn_data
[new_icode
].operand
[1].mode
== tmode
29146 && insn_data
[new_icode
].operand
[2].mode
== mode
29147 && insn_data
[new_icode
].operand
[0].predicate
29148 == insn_data
[icode
].operand
[0].predicate
29149 && insn_data
[new_icode
].operand
[1].predicate
29150 == insn_data
[icode
].operand
[1].predicate
);
29156 gcc_unreachable ();
29163 if (VECTOR_MODE_P (mode
))
29164 op
= safe_vector_operand (op
, mode
);
29166 /* If we aren't optimizing, only allow one memory operand to be
29168 if (memory_operand (op
, mode
))
29171 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
29174 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
29176 op
= force_reg (mode
, op
);
29180 args
[i
].mode
= mode
;
29186 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29191 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
29192 GEN_INT ((int)sub_code
));
29193 else if (! comparison_p
)
29194 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29197 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
29201 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
29206 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29210 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
29214 gcc_unreachable ();
29224 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
29225 insns with vec_merge. */
29228 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
29232 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29233 rtx op1
, op0
= expand_normal (arg0
);
29234 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
29235 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
29237 if (optimize
|| !target
29238 || GET_MODE (target
) != tmode
29239 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
29240 target
= gen_reg_rtx (tmode
);
29242 if (VECTOR_MODE_P (mode0
))
29243 op0
= safe_vector_operand (op0
, mode0
);
29245 if ((optimize
&& !register_operand (op0
, mode0
))
29246 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29247 op0
= copy_to_mode_reg (mode0
, op0
);
29250 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
29251 op1
= copy_to_mode_reg (mode0
, op1
);
29253 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
29260 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
29263 ix86_expand_sse_compare (const struct builtin_description
*d
,
29264 tree exp
, rtx target
, bool swap
)
29267 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29268 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29269 rtx op0
= expand_normal (arg0
);
29270 rtx op1
= expand_normal (arg1
);
29272 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
29273 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
29274 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
29275 enum rtx_code comparison
= d
->comparison
;
29277 if (VECTOR_MODE_P (mode0
))
29278 op0
= safe_vector_operand (op0
, mode0
);
29279 if (VECTOR_MODE_P (mode1
))
29280 op1
= safe_vector_operand (op1
, mode1
);
29282 /* Swap operands if we have a comparison that isn't available in
29286 rtx tmp
= gen_reg_rtx (mode1
);
29287 emit_move_insn (tmp
, op1
);
29292 if (optimize
|| !target
29293 || GET_MODE (target
) != tmode
29294 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
29295 target
= gen_reg_rtx (tmode
);
29297 if ((optimize
&& !register_operand (op0
, mode0
))
29298 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
29299 op0
= copy_to_mode_reg (mode0
, op0
);
29300 if ((optimize
&& !register_operand (op1
, mode1
))
29301 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
29302 op1
= copy_to_mode_reg (mode1
, op1
);
29304 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
29305 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
29312 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
29315 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
29319 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29320 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29321 rtx op0
= expand_normal (arg0
);
29322 rtx op1
= expand_normal (arg1
);
29323 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
29324 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
29325 enum rtx_code comparison
= d
->comparison
;
29327 if (VECTOR_MODE_P (mode0
))
29328 op0
= safe_vector_operand (op0
, mode0
);
29329 if (VECTOR_MODE_P (mode1
))
29330 op1
= safe_vector_operand (op1
, mode1
);
29332 /* Swap operands if we have a comparison that isn't available in
29334 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
29341 target
= gen_reg_rtx (SImode
);
29342 emit_move_insn (target
, const0_rtx
);
29343 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29345 if ((optimize
&& !register_operand (op0
, mode0
))
29346 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
29347 op0
= copy_to_mode_reg (mode0
, op0
);
29348 if ((optimize
&& !register_operand (op1
, mode1
))
29349 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
29350 op1
= copy_to_mode_reg (mode1
, op1
);
29352 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
29356 emit_insn (gen_rtx_SET (VOIDmode
,
29357 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29358 gen_rtx_fmt_ee (comparison
, QImode
,
29362 return SUBREG_REG (target
);
29365 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
29368 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
29372 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29373 rtx op1
, op0
= expand_normal (arg0
);
29374 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
29375 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
29377 if (optimize
|| target
== 0
29378 || GET_MODE (target
) != tmode
29379 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
29380 target
= gen_reg_rtx (tmode
);
29382 if (VECTOR_MODE_P (mode0
))
29383 op0
= safe_vector_operand (op0
, mode0
);
29385 if ((optimize
&& !register_operand (op0
, mode0
))
29386 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
29387 op0
= copy_to_mode_reg (mode0
, op0
);
29389 op1
= GEN_INT (d
->comparison
);
29391 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
29399 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
29400 tree exp
, rtx target
)
29403 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29404 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29405 rtx op0
= expand_normal (arg0
);
29406 rtx op1
= expand_normal (arg1
);
29408 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
29409 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
29410 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
29412 if (optimize
|| target
== 0
29413 || GET_MODE (target
) != tmode
29414 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
29415 target
= gen_reg_rtx (tmode
);
29417 op0
= safe_vector_operand (op0
, mode0
);
29418 op1
= safe_vector_operand (op1
, mode1
);
29420 if ((optimize
&& !register_operand (op0
, mode0
))
29421 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
29422 op0
= copy_to_mode_reg (mode0
, op0
);
29423 if ((optimize
&& !register_operand (op1
, mode1
))
29424 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
29425 op1
= copy_to_mode_reg (mode1
, op1
);
29427 op2
= GEN_INT (d
->comparison
);
29429 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
29436 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
29439 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
29443 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29444 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29445 rtx op0
= expand_normal (arg0
);
29446 rtx op1
= expand_normal (arg1
);
29447 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
29448 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
29449 enum rtx_code comparison
= d
->comparison
;
29451 if (VECTOR_MODE_P (mode0
))
29452 op0
= safe_vector_operand (op0
, mode0
);
29453 if (VECTOR_MODE_P (mode1
))
29454 op1
= safe_vector_operand (op1
, mode1
);
29456 target
= gen_reg_rtx (SImode
);
29457 emit_move_insn (target
, const0_rtx
);
29458 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29460 if ((optimize
&& !register_operand (op0
, mode0
))
29461 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
29462 op0
= copy_to_mode_reg (mode0
, op0
);
29463 if ((optimize
&& !register_operand (op1
, mode1
))
29464 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
29465 op1
= copy_to_mode_reg (mode1
, op1
);
29467 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
29471 emit_insn (gen_rtx_SET (VOIDmode
,
29472 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29473 gen_rtx_fmt_ee (comparison
, QImode
,
29477 return SUBREG_REG (target
);
29480 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
29483 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
29484 tree exp
, rtx target
)
29487 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29488 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29489 tree arg2
= CALL_EXPR_ARG (exp
, 2);
29490 tree arg3
= CALL_EXPR_ARG (exp
, 3);
29491 tree arg4
= CALL_EXPR_ARG (exp
, 4);
29492 rtx scratch0
, scratch1
;
29493 rtx op0
= expand_normal (arg0
);
29494 rtx op1
= expand_normal (arg1
);
29495 rtx op2
= expand_normal (arg2
);
29496 rtx op3
= expand_normal (arg3
);
29497 rtx op4
= expand_normal (arg4
);
29498 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
29500 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
29501 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
29502 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
29503 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
29504 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
29505 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
29506 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
29508 if (VECTOR_MODE_P (modev2
))
29509 op0
= safe_vector_operand (op0
, modev2
);
29510 if (VECTOR_MODE_P (modev4
))
29511 op2
= safe_vector_operand (op2
, modev4
);
29513 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
29514 op0
= copy_to_mode_reg (modev2
, op0
);
29515 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
29516 op1
= copy_to_mode_reg (modei3
, op1
);
29517 if ((optimize
&& !register_operand (op2
, modev4
))
29518 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
29519 op2
= copy_to_mode_reg (modev4
, op2
);
29520 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
29521 op3
= copy_to_mode_reg (modei5
, op3
);
29523 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
29525 error ("the fifth argument must be an 8-bit immediate");
29529 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
29531 if (optimize
|| !target
29532 || GET_MODE (target
) != tmode0
29533 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
29534 target
= gen_reg_rtx (tmode0
);
29536 scratch1
= gen_reg_rtx (tmode1
);
29538 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
29540 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
29542 if (optimize
|| !target
29543 || GET_MODE (target
) != tmode1
29544 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
29545 target
= gen_reg_rtx (tmode1
);
29547 scratch0
= gen_reg_rtx (tmode0
);
29549 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
29553 gcc_assert (d
->flag
);
29555 scratch0
= gen_reg_rtx (tmode0
);
29556 scratch1
= gen_reg_rtx (tmode1
);
29558 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
29568 target
= gen_reg_rtx (SImode
);
29569 emit_move_insn (target
, const0_rtx
);
29570 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29573 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29574 gen_rtx_fmt_ee (EQ
, QImode
,
29575 gen_rtx_REG ((enum machine_mode
) d
->flag
,
29578 return SUBREG_REG (target
);
29585 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
29588 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
29589 tree exp
, rtx target
)
29592 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29593 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29594 tree arg2
= CALL_EXPR_ARG (exp
, 2);
29595 rtx scratch0
, scratch1
;
29596 rtx op0
= expand_normal (arg0
);
29597 rtx op1
= expand_normal (arg1
);
29598 rtx op2
= expand_normal (arg2
);
29599 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
29601 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
29602 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
29603 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
29604 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
29605 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
29607 if (VECTOR_MODE_P (modev2
))
29608 op0
= safe_vector_operand (op0
, modev2
);
29609 if (VECTOR_MODE_P (modev3
))
29610 op1
= safe_vector_operand (op1
, modev3
);
29612 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
29613 op0
= copy_to_mode_reg (modev2
, op0
);
29614 if ((optimize
&& !register_operand (op1
, modev3
))
29615 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
29616 op1
= copy_to_mode_reg (modev3
, op1
);
29618 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
29620 error ("the third argument must be an 8-bit immediate");
29624 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
29626 if (optimize
|| !target
29627 || GET_MODE (target
) != tmode0
29628 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
29629 target
= gen_reg_rtx (tmode0
);
29631 scratch1
= gen_reg_rtx (tmode1
);
29633 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
29635 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29637 if (optimize
|| !target
29638 || GET_MODE (target
) != tmode1
29639 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
29640 target
= gen_reg_rtx (tmode1
);
29642 scratch0
= gen_reg_rtx (tmode0
);
29644 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
29648 gcc_assert (d
->flag
);
29650 scratch0
= gen_reg_rtx (tmode0
);
29651 scratch1
= gen_reg_rtx (tmode1
);
29653 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
29663 target
= gen_reg_rtx (SImode
);
29664 emit_move_insn (target
, const0_rtx
);
29665 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29668 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29669 gen_rtx_fmt_ee (EQ
, QImode
,
29670 gen_rtx_REG ((enum machine_mode
) d
->flag
,
29673 return SUBREG_REG (target
);
29679 /* Subroutine of ix86_expand_builtin to take care of insns with
29680 variable number of operands. */
29683 ix86_expand_args_builtin (const struct builtin_description
*d
,
29684 tree exp
, rtx target
)
29686 rtx pat
, real_target
;
29687 unsigned int i
, nargs
;
29688 unsigned int nargs_constant
= 0;
29689 int num_memory
= 0;
29693 enum machine_mode mode
;
29695 bool last_arg_count
= false;
29696 enum insn_code icode
= d
->icode
;
29697 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29698 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29699 enum machine_mode rmode
= VOIDmode
;
29701 enum rtx_code comparison
= d
->comparison
;
29703 switch ((enum ix86_builtin_func_type
) d
->flag
)
29705 case V2DF_FTYPE_V2DF_ROUND
:
29706 case V4DF_FTYPE_V4DF_ROUND
:
29707 case V4SF_FTYPE_V4SF_ROUND
:
29708 case V8SF_FTYPE_V8SF_ROUND
:
29709 case V4SI_FTYPE_V4SF_ROUND
:
29710 case V8SI_FTYPE_V8SF_ROUND
:
29711 return ix86_expand_sse_round (d
, exp
, target
);
29712 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
29713 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
29714 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
29715 case INT_FTYPE_V8SF_V8SF_PTEST
:
29716 case INT_FTYPE_V4DI_V4DI_PTEST
:
29717 case INT_FTYPE_V4DF_V4DF_PTEST
:
29718 case INT_FTYPE_V4SF_V4SF_PTEST
:
29719 case INT_FTYPE_V2DI_V2DI_PTEST
:
29720 case INT_FTYPE_V2DF_V2DF_PTEST
:
29721 return ix86_expand_sse_ptest (d
, exp
, target
);
29722 case FLOAT128_FTYPE_FLOAT128
:
29723 case FLOAT_FTYPE_FLOAT
:
29724 case INT_FTYPE_INT
:
29725 case UINT64_FTYPE_INT
:
29726 case UINT16_FTYPE_UINT16
:
29727 case INT64_FTYPE_INT64
:
29728 case INT64_FTYPE_V4SF
:
29729 case INT64_FTYPE_V2DF
:
29730 case INT_FTYPE_V16QI
:
29731 case INT_FTYPE_V8QI
:
29732 case INT_FTYPE_V8SF
:
29733 case INT_FTYPE_V4DF
:
29734 case INT_FTYPE_V4SF
:
29735 case INT_FTYPE_V2DF
:
29736 case INT_FTYPE_V32QI
:
29737 case V16QI_FTYPE_V16QI
:
29738 case V8SI_FTYPE_V8SF
:
29739 case V8SI_FTYPE_V4SI
:
29740 case V8HI_FTYPE_V8HI
:
29741 case V8HI_FTYPE_V16QI
:
29742 case V8QI_FTYPE_V8QI
:
29743 case V8SF_FTYPE_V8SF
:
29744 case V8SF_FTYPE_V8SI
:
29745 case V8SF_FTYPE_V4SF
:
29746 case V8SF_FTYPE_V8HI
:
29747 case V4SI_FTYPE_V4SI
:
29748 case V4SI_FTYPE_V16QI
:
29749 case V4SI_FTYPE_V4SF
:
29750 case V4SI_FTYPE_V8SI
:
29751 case V4SI_FTYPE_V8HI
:
29752 case V4SI_FTYPE_V4DF
:
29753 case V4SI_FTYPE_V2DF
:
29754 case V4HI_FTYPE_V4HI
:
29755 case V4DF_FTYPE_V4DF
:
29756 case V4DF_FTYPE_V4SI
:
29757 case V4DF_FTYPE_V4SF
:
29758 case V4DF_FTYPE_V2DF
:
29759 case V4SF_FTYPE_V4SF
:
29760 case V4SF_FTYPE_V4SI
:
29761 case V4SF_FTYPE_V8SF
:
29762 case V4SF_FTYPE_V4DF
:
29763 case V4SF_FTYPE_V8HI
:
29764 case V4SF_FTYPE_V2DF
:
29765 case V2DI_FTYPE_V2DI
:
29766 case V2DI_FTYPE_V16QI
:
29767 case V2DI_FTYPE_V8HI
:
29768 case V2DI_FTYPE_V4SI
:
29769 case V2DF_FTYPE_V2DF
:
29770 case V2DF_FTYPE_V4SI
:
29771 case V2DF_FTYPE_V4DF
:
29772 case V2DF_FTYPE_V4SF
:
29773 case V2DF_FTYPE_V2SI
:
29774 case V2SI_FTYPE_V2SI
:
29775 case V2SI_FTYPE_V4SF
:
29776 case V2SI_FTYPE_V2SF
:
29777 case V2SI_FTYPE_V2DF
:
29778 case V2SF_FTYPE_V2SF
:
29779 case V2SF_FTYPE_V2SI
:
29780 case V32QI_FTYPE_V32QI
:
29781 case V32QI_FTYPE_V16QI
:
29782 case V16HI_FTYPE_V16HI
:
29783 case V16HI_FTYPE_V8HI
:
29784 case V8SI_FTYPE_V8SI
:
29785 case V16HI_FTYPE_V16QI
:
29786 case V8SI_FTYPE_V16QI
:
29787 case V4DI_FTYPE_V16QI
:
29788 case V8SI_FTYPE_V8HI
:
29789 case V4DI_FTYPE_V8HI
:
29790 case V4DI_FTYPE_V4SI
:
29791 case V4DI_FTYPE_V2DI
:
29794 case V4SF_FTYPE_V4SF_VEC_MERGE
:
29795 case V2DF_FTYPE_V2DF_VEC_MERGE
:
29796 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
29797 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
29798 case V16QI_FTYPE_V16QI_V16QI
:
29799 case V16QI_FTYPE_V8HI_V8HI
:
29800 case V8QI_FTYPE_V8QI_V8QI
:
29801 case V8QI_FTYPE_V4HI_V4HI
:
29802 case V8HI_FTYPE_V8HI_V8HI
:
29803 case V8HI_FTYPE_V16QI_V16QI
:
29804 case V8HI_FTYPE_V4SI_V4SI
:
29805 case V8SF_FTYPE_V8SF_V8SF
:
29806 case V8SF_FTYPE_V8SF_V8SI
:
29807 case V4SI_FTYPE_V4SI_V4SI
:
29808 case V4SI_FTYPE_V8HI_V8HI
:
29809 case V4SI_FTYPE_V4SF_V4SF
:
29810 case V4SI_FTYPE_V2DF_V2DF
:
29811 case V4HI_FTYPE_V4HI_V4HI
:
29812 case V4HI_FTYPE_V8QI_V8QI
:
29813 case V4HI_FTYPE_V2SI_V2SI
:
29814 case V4DF_FTYPE_V4DF_V4DF
:
29815 case V4DF_FTYPE_V4DF_V4DI
:
29816 case V4SF_FTYPE_V4SF_V4SF
:
29817 case V4SF_FTYPE_V4SF_V4SI
:
29818 case V4SF_FTYPE_V4SF_V2SI
:
29819 case V4SF_FTYPE_V4SF_V2DF
:
29820 case V4SF_FTYPE_V4SF_DI
:
29821 case V4SF_FTYPE_V4SF_SI
:
29822 case V2DI_FTYPE_V2DI_V2DI
:
29823 case V2DI_FTYPE_V16QI_V16QI
:
29824 case V2DI_FTYPE_V4SI_V4SI
:
29825 case V2UDI_FTYPE_V4USI_V4USI
:
29826 case V2DI_FTYPE_V2DI_V16QI
:
29827 case V2DI_FTYPE_V2DF_V2DF
:
29828 case V2SI_FTYPE_V2SI_V2SI
:
29829 case V2SI_FTYPE_V4HI_V4HI
:
29830 case V2SI_FTYPE_V2SF_V2SF
:
29831 case V2DF_FTYPE_V2DF_V2DF
:
29832 case V2DF_FTYPE_V2DF_V4SF
:
29833 case V2DF_FTYPE_V2DF_V2DI
:
29834 case V2DF_FTYPE_V2DF_DI
:
29835 case V2DF_FTYPE_V2DF_SI
:
29836 case V2SF_FTYPE_V2SF_V2SF
:
29837 case V1DI_FTYPE_V1DI_V1DI
:
29838 case V1DI_FTYPE_V8QI_V8QI
:
29839 case V1DI_FTYPE_V2SI_V2SI
:
29840 case V32QI_FTYPE_V16HI_V16HI
:
29841 case V16HI_FTYPE_V8SI_V8SI
:
29842 case V32QI_FTYPE_V32QI_V32QI
:
29843 case V16HI_FTYPE_V32QI_V32QI
:
29844 case V16HI_FTYPE_V16HI_V16HI
:
29845 case V8SI_FTYPE_V4DF_V4DF
:
29846 case V8SI_FTYPE_V8SI_V8SI
:
29847 case V8SI_FTYPE_V16HI_V16HI
:
29848 case V4DI_FTYPE_V4DI_V4DI
:
29849 case V4DI_FTYPE_V8SI_V8SI
:
29850 case V4UDI_FTYPE_V8USI_V8USI
:
29851 if (comparison
== UNKNOWN
)
29852 return ix86_expand_binop_builtin (icode
, exp
, target
);
29855 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
29856 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
29857 gcc_assert (comparison
!= UNKNOWN
);
29861 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
29862 case V16HI_FTYPE_V16HI_SI_COUNT
:
29863 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
29864 case V8SI_FTYPE_V8SI_SI_COUNT
:
29865 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
29866 case V4DI_FTYPE_V4DI_INT_COUNT
:
29867 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
29868 case V8HI_FTYPE_V8HI_SI_COUNT
:
29869 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
29870 case V4SI_FTYPE_V4SI_SI_COUNT
:
29871 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
29872 case V4HI_FTYPE_V4HI_SI_COUNT
:
29873 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
29874 case V2DI_FTYPE_V2DI_SI_COUNT
:
29875 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
29876 case V2SI_FTYPE_V2SI_SI_COUNT
:
29877 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
29878 case V1DI_FTYPE_V1DI_SI_COUNT
:
29880 last_arg_count
= true;
29882 case UINT64_FTYPE_UINT64_UINT64
:
29883 case UINT_FTYPE_UINT_UINT
:
29884 case UINT_FTYPE_UINT_USHORT
:
29885 case UINT_FTYPE_UINT_UCHAR
:
29886 case UINT16_FTYPE_UINT16_INT
:
29887 case UINT8_FTYPE_UINT8_INT
:
29890 case V2DI_FTYPE_V2DI_INT_CONVERT
:
29893 nargs_constant
= 1;
29895 case V4DI_FTYPE_V4DI_INT_CONVERT
:
29898 nargs_constant
= 1;
29900 case V8HI_FTYPE_V8HI_INT
:
29901 case V8HI_FTYPE_V8SF_INT
:
29902 case V8HI_FTYPE_V4SF_INT
:
29903 case V8SF_FTYPE_V8SF_INT
:
29904 case V4SI_FTYPE_V4SI_INT
:
29905 case V4SI_FTYPE_V8SI_INT
:
29906 case V4HI_FTYPE_V4HI_INT
:
29907 case V4DF_FTYPE_V4DF_INT
:
29908 case V4SF_FTYPE_V4SF_INT
:
29909 case V4SF_FTYPE_V8SF_INT
:
29910 case V2DI_FTYPE_V2DI_INT
:
29911 case V2DF_FTYPE_V2DF_INT
:
29912 case V2DF_FTYPE_V4DF_INT
:
29913 case V16HI_FTYPE_V16HI_INT
:
29914 case V8SI_FTYPE_V8SI_INT
:
29915 case V4DI_FTYPE_V4DI_INT
:
29916 case V2DI_FTYPE_V4DI_INT
:
29918 nargs_constant
= 1;
29920 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
29921 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
29922 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
29923 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
29924 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
29925 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
29928 case V32QI_FTYPE_V32QI_V32QI_INT
:
29929 case V16HI_FTYPE_V16HI_V16HI_INT
:
29930 case V16QI_FTYPE_V16QI_V16QI_INT
:
29931 case V4DI_FTYPE_V4DI_V4DI_INT
:
29932 case V8HI_FTYPE_V8HI_V8HI_INT
:
29933 case V8SI_FTYPE_V8SI_V8SI_INT
:
29934 case V8SI_FTYPE_V8SI_V4SI_INT
:
29935 case V8SF_FTYPE_V8SF_V8SF_INT
:
29936 case V8SF_FTYPE_V8SF_V4SF_INT
:
29937 case V4SI_FTYPE_V4SI_V4SI_INT
:
29938 case V4DF_FTYPE_V4DF_V4DF_INT
:
29939 case V4DF_FTYPE_V4DF_V2DF_INT
:
29940 case V4SF_FTYPE_V4SF_V4SF_INT
:
29941 case V2DI_FTYPE_V2DI_V2DI_INT
:
29942 case V4DI_FTYPE_V4DI_V2DI_INT
:
29943 case V2DF_FTYPE_V2DF_V2DF_INT
:
29945 nargs_constant
= 1;
29947 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
29950 nargs_constant
= 1;
29952 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
29955 nargs_constant
= 1;
29957 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
29960 nargs_constant
= 1;
29962 case V2DI_FTYPE_V2DI_UINT_UINT
:
29964 nargs_constant
= 2;
29966 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
29967 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
29968 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
29969 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
29971 nargs_constant
= 1;
29973 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
29975 nargs_constant
= 2;
29977 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED
:
29978 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG
:
29982 gcc_unreachable ();
29985 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29987 if (comparison
!= UNKNOWN
)
29989 gcc_assert (nargs
== 2);
29990 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
29993 if (rmode
== VOIDmode
|| rmode
== tmode
)
29997 || GET_MODE (target
) != tmode
29998 || !insn_p
->operand
[0].predicate (target
, tmode
))
29999 target
= gen_reg_rtx (tmode
);
30000 real_target
= target
;
30004 target
= gen_reg_rtx (rmode
);
30005 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
30008 for (i
= 0; i
< nargs
; i
++)
30010 tree arg
= CALL_EXPR_ARG (exp
, i
);
30011 rtx op
= expand_normal (arg
);
30012 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
30013 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
30015 if (last_arg_count
&& (i
+ 1) == nargs
)
30017 /* SIMD shift insns take either an 8-bit immediate or
30018 register as count. But builtin functions take int as
30019 count. If count doesn't match, we put it in register. */
30022 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
30023 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
30024 op
= copy_to_reg (op
);
30027 else if ((nargs
- i
) <= nargs_constant
)
30032 case CODE_FOR_avx2_inserti128
:
30033 case CODE_FOR_avx2_extracti128
:
30034 error ("the last argument must be an 1-bit immediate");
30037 case CODE_FOR_sse4_1_roundsd
:
30038 case CODE_FOR_sse4_1_roundss
:
30040 case CODE_FOR_sse4_1_roundpd
:
30041 case CODE_FOR_sse4_1_roundps
:
30042 case CODE_FOR_avx_roundpd256
:
30043 case CODE_FOR_avx_roundps256
:
30045 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
30046 case CODE_FOR_sse4_1_roundps_sfix
:
30047 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
30048 case CODE_FOR_avx_roundps_sfix256
:
30050 case CODE_FOR_sse4_1_blendps
:
30051 case CODE_FOR_avx_blendpd256
:
30052 case CODE_FOR_avx_vpermilv4df
:
30053 error ("the last argument must be a 4-bit immediate");
30056 case CODE_FOR_sse4_1_blendpd
:
30057 case CODE_FOR_avx_vpermilv2df
:
30058 case CODE_FOR_xop_vpermil2v2df3
:
30059 case CODE_FOR_xop_vpermil2v4sf3
:
30060 case CODE_FOR_xop_vpermil2v4df3
:
30061 case CODE_FOR_xop_vpermil2v8sf3
:
30062 error ("the last argument must be a 2-bit immediate");
30065 case CODE_FOR_avx_vextractf128v4df
:
30066 case CODE_FOR_avx_vextractf128v8sf
:
30067 case CODE_FOR_avx_vextractf128v8si
:
30068 case CODE_FOR_avx_vinsertf128v4df
:
30069 case CODE_FOR_avx_vinsertf128v8sf
:
30070 case CODE_FOR_avx_vinsertf128v8si
:
30071 error ("the last argument must be a 1-bit immediate");
30074 case CODE_FOR_avx_vmcmpv2df3
:
30075 case CODE_FOR_avx_vmcmpv4sf3
:
30076 case CODE_FOR_avx_cmpv2df3
:
30077 case CODE_FOR_avx_cmpv4sf3
:
30078 case CODE_FOR_avx_cmpv4df3
:
30079 case CODE_FOR_avx_cmpv8sf3
:
30080 error ("the last argument must be a 5-bit immediate");
30084 switch (nargs_constant
)
30087 if ((nargs
- i
) == nargs_constant
)
30089 error ("the next to last argument must be an 8-bit immediate");
30093 error ("the last argument must be an 8-bit immediate");
30096 gcc_unreachable ();
30103 if (VECTOR_MODE_P (mode
))
30104 op
= safe_vector_operand (op
, mode
);
30106 /* If we aren't optimizing, only allow one memory operand to
30108 if (memory_operand (op
, mode
))
30111 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
30113 if (optimize
|| !match
|| num_memory
> 1)
30114 op
= copy_to_mode_reg (mode
, op
);
30118 op
= copy_to_reg (op
);
30119 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
30124 args
[i
].mode
= mode
;
30130 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
30133 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
30136 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
30140 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
30141 args
[2].op
, args
[3].op
);
30144 gcc_unreachable ();
30154 /* Subroutine of ix86_expand_builtin to take care of special insns
30155 with variable number of operands. */
30158 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
30159 tree exp
, rtx target
)
30163 unsigned int i
, nargs
, arg_adjust
, memory
;
30167 enum machine_mode mode
;
30169 enum insn_code icode
= d
->icode
;
30170 bool last_arg_constant
= false;
30171 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
30172 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
30173 enum { load
, store
} klass
;
30175 switch ((enum ix86_builtin_func_type
) d
->flag
)
30177 case VOID_FTYPE_VOID
:
30178 if (icode
== CODE_FOR_avx_vzeroupper
)
30179 target
= GEN_INT (vzeroupper_intrinsic
);
30180 emit_insn (GEN_FCN (icode
) (target
));
30182 case VOID_FTYPE_UINT64
:
30183 case VOID_FTYPE_UNSIGNED
:
30189 case INT_FTYPE_VOID
:
30190 case UINT64_FTYPE_VOID
:
30191 case UNSIGNED_FTYPE_VOID
:
30196 case UINT64_FTYPE_PUNSIGNED
:
30197 case V2DI_FTYPE_PV2DI
:
30198 case V4DI_FTYPE_PV4DI
:
30199 case V32QI_FTYPE_PCCHAR
:
30200 case V16QI_FTYPE_PCCHAR
:
30201 case V8SF_FTYPE_PCV4SF
:
30202 case V8SF_FTYPE_PCFLOAT
:
30203 case V4SF_FTYPE_PCFLOAT
:
30204 case V4DF_FTYPE_PCV2DF
:
30205 case V4DF_FTYPE_PCDOUBLE
:
30206 case V2DF_FTYPE_PCDOUBLE
:
30207 case VOID_FTYPE_PVOID
:
30212 case VOID_FTYPE_PV2SF_V4SF
:
30213 case VOID_FTYPE_PV4DI_V4DI
:
30214 case VOID_FTYPE_PV2DI_V2DI
:
30215 case VOID_FTYPE_PCHAR_V32QI
:
30216 case VOID_FTYPE_PCHAR_V16QI
:
30217 case VOID_FTYPE_PFLOAT_V8SF
:
30218 case VOID_FTYPE_PFLOAT_V4SF
:
30219 case VOID_FTYPE_PDOUBLE_V4DF
:
30220 case VOID_FTYPE_PDOUBLE_V2DF
:
30221 case VOID_FTYPE_PLONGLONG_LONGLONG
:
30222 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
30223 case VOID_FTYPE_PINT_INT
:
30226 /* Reserve memory operand for target. */
30227 memory
= ARRAY_SIZE (args
);
30229 case V4SF_FTYPE_V4SF_PCV2SF
:
30230 case V2DF_FTYPE_V2DF_PCDOUBLE
:
30235 case V8SF_FTYPE_PCV8SF_V8SI
:
30236 case V4DF_FTYPE_PCV4DF_V4DI
:
30237 case V4SF_FTYPE_PCV4SF_V4SI
:
30238 case V2DF_FTYPE_PCV2DF_V2DI
:
30239 case V8SI_FTYPE_PCV8SI_V8SI
:
30240 case V4DI_FTYPE_PCV4DI_V4DI
:
30241 case V4SI_FTYPE_PCV4SI_V4SI
:
30242 case V2DI_FTYPE_PCV2DI_V2DI
:
30247 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
30248 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
30249 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
30250 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
30251 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
30252 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
30253 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
30254 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
30257 /* Reserve memory operand for target. */
30258 memory
= ARRAY_SIZE (args
);
30260 case VOID_FTYPE_UINT_UINT_UINT
:
30261 case VOID_FTYPE_UINT64_UINT_UINT
:
30262 case UCHAR_FTYPE_UINT_UINT_UINT
:
30263 case UCHAR_FTYPE_UINT64_UINT_UINT
:
30266 memory
= ARRAY_SIZE (args
);
30267 last_arg_constant
= true;
30270 gcc_unreachable ();
30273 gcc_assert (nargs
<= ARRAY_SIZE (args
));
30275 if (klass
== store
)
30277 arg
= CALL_EXPR_ARG (exp
, 0);
30278 op
= expand_normal (arg
);
30279 gcc_assert (target
== 0);
30282 if (GET_MODE (op
) != Pmode
)
30283 op
= convert_to_mode (Pmode
, op
, 1);
30284 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
30287 target
= force_reg (tmode
, op
);
30295 || !register_operand (target
, tmode
)
30296 || GET_MODE (target
) != tmode
)
30297 target
= gen_reg_rtx (tmode
);
30300 for (i
= 0; i
< nargs
; i
++)
30302 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
30305 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
30306 op
= expand_normal (arg
);
30307 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
30309 if (last_arg_constant
&& (i
+ 1) == nargs
)
30313 if (icode
== CODE_FOR_lwp_lwpvalsi3
30314 || icode
== CODE_FOR_lwp_lwpinssi3
30315 || icode
== CODE_FOR_lwp_lwpvaldi3
30316 || icode
== CODE_FOR_lwp_lwpinsdi3
)
30317 error ("the last argument must be a 32-bit immediate");
30319 error ("the last argument must be an 8-bit immediate");
30327 /* This must be the memory operand. */
30328 if (GET_MODE (op
) != Pmode
)
30329 op
= convert_to_mode (Pmode
, op
, 1);
30330 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
30331 gcc_assert (GET_MODE (op
) == mode
30332 || GET_MODE (op
) == VOIDmode
);
30336 /* This must be register. */
30337 if (VECTOR_MODE_P (mode
))
30338 op
= safe_vector_operand (op
, mode
);
30340 gcc_assert (GET_MODE (op
) == mode
30341 || GET_MODE (op
) == VOIDmode
);
30342 op
= copy_to_mode_reg (mode
, op
);
30347 args
[i
].mode
= mode
;
30353 pat
= GEN_FCN (icode
) (target
);
30356 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
30359 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
30362 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
30365 gcc_unreachable ();
30371 return klass
== store
? 0 : target
;
30374 /* Return the integer constant in ARG. Constrain it to be in the range
30375 of the subparts of VEC_TYPE; issue an error if not. */
30378 get_element_number (tree vec_type
, tree arg
)
30380 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
30382 if (!host_integerp (arg
, 1)
30383 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
30385 error ("selector must be an integer constant in the range 0..%wi", max
);
30392 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
30393 ix86_expand_vector_init. We DO have language-level syntax for this, in
30394 the form of (type){ init-list }. Except that since we can't place emms
30395 instructions from inside the compiler, we can't allow the use of MMX
30396 registers unless the user explicitly asks for it. So we do *not* define
30397 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
30398 we have builtins invoked by mmintrin.h that gives us license to emit
30399 these sorts of instructions. */
30402 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
30404 enum machine_mode tmode
= TYPE_MODE (type
);
30405 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
30406 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
30407 rtvec v
= rtvec_alloc (n_elt
);
30409 gcc_assert (VECTOR_MODE_P (tmode
));
30410 gcc_assert (call_expr_nargs (exp
) == n_elt
);
30412 for (i
= 0; i
< n_elt
; ++i
)
30414 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
30415 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
30418 if (!target
|| !register_operand (target
, tmode
))
30419 target
= gen_reg_rtx (tmode
);
30421 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
30425 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
30426 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
30427 had a language-level syntax for referencing vector elements. */
30430 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
30432 enum machine_mode tmode
, mode0
;
30437 arg0
= CALL_EXPR_ARG (exp
, 0);
30438 arg1
= CALL_EXPR_ARG (exp
, 1);
30440 op0
= expand_normal (arg0
);
30441 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
30443 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
30444 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
30445 gcc_assert (VECTOR_MODE_P (mode0
));
30447 op0
= force_reg (mode0
, op0
);
30449 if (optimize
|| !target
|| !register_operand (target
, tmode
))
30450 target
= gen_reg_rtx (tmode
);
30452 ix86_expand_vector_extract (true, target
, op0
, elt
);
30457 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
30458 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
30459 a language-level syntax for referencing vector elements. */
30462 ix86_expand_vec_set_builtin (tree exp
)
30464 enum machine_mode tmode
, mode1
;
30465 tree arg0
, arg1
, arg2
;
30467 rtx op0
, op1
, target
;
30469 arg0
= CALL_EXPR_ARG (exp
, 0);
30470 arg1
= CALL_EXPR_ARG (exp
, 1);
30471 arg2
= CALL_EXPR_ARG (exp
, 2);
30473 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
30474 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
30475 gcc_assert (VECTOR_MODE_P (tmode
));
30477 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
30478 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
30479 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
30481 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
30482 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
30484 op0
= force_reg (tmode
, op0
);
30485 op1
= force_reg (mode1
, op1
);
30487 /* OP0 is the source of these builtin functions and shouldn't be
30488 modified. Create a copy, use it and return it as target. */
30489 target
= gen_reg_rtx (tmode
);
30490 emit_move_insn (target
, op0
);
30491 ix86_expand_vector_set (true, target
, op1
, elt
);
30496 /* Expand an expression EXP that calls a built-in function,
30497 with result going to TARGET if that's convenient
30498 (and in mode MODE if that's convenient).
30499 SUBTARGET may be used as the target for computing one of EXP's operands.
30500 IGNORE is nonzero if the value is to be ignored. */
30503 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
30504 enum machine_mode mode ATTRIBUTE_UNUSED
,
30505 int ignore ATTRIBUTE_UNUSED
)
30507 const struct builtin_description
*d
;
30509 enum insn_code icode
;
30510 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
30511 tree arg0
, arg1
, arg2
, arg3
, arg4
;
30512 rtx op0
, op1
, op2
, op3
, op4
, pat
, insn
;
30513 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
30514 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
30516 /* For CPU builtins that can be folded, fold first and expand the fold. */
30519 case IX86_BUILTIN_CPU_INIT
:
30521 /* Make it call __cpu_indicator_init in libgcc. */
30522 tree call_expr
, fndecl
, type
;
30523 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
30524 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
30525 call_expr
= build_call_expr (fndecl
, 0);
30526 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
30528 case IX86_BUILTIN_CPU_IS
:
30529 case IX86_BUILTIN_CPU_SUPPORTS
:
30531 tree arg0
= CALL_EXPR_ARG (exp
, 0);
30532 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
30533 gcc_assert (fold_expr
!= NULL_TREE
);
30534 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
30538 /* Determine whether the builtin function is available under the current ISA.
30539 Originally the builtin was not created if it wasn't applicable to the
30540 current ISA based on the command line switches. With function specific
30541 options, we need to check in the context of the function making the call
30542 whether it is supported. */
30543 if (ix86_builtins_isa
[fcode
].isa
30544 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
30546 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
30547 NULL
, (enum fpmath_unit
) 0, false);
30550 error ("%qE needs unknown isa option", fndecl
);
30553 gcc_assert (opts
!= NULL
);
30554 error ("%qE needs isa option %s", fndecl
, opts
);
30562 case IX86_BUILTIN_MASKMOVQ
:
30563 case IX86_BUILTIN_MASKMOVDQU
:
30564 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
30565 ? CODE_FOR_mmx_maskmovq
30566 : CODE_FOR_sse2_maskmovdqu
);
30567 /* Note the arg order is different from the operand order. */
30568 arg1
= CALL_EXPR_ARG (exp
, 0);
30569 arg2
= CALL_EXPR_ARG (exp
, 1);
30570 arg0
= CALL_EXPR_ARG (exp
, 2);
30571 op0
= expand_normal (arg0
);
30572 op1
= expand_normal (arg1
);
30573 op2
= expand_normal (arg2
);
30574 mode0
= insn_data
[icode
].operand
[0].mode
;
30575 mode1
= insn_data
[icode
].operand
[1].mode
;
30576 mode2
= insn_data
[icode
].operand
[2].mode
;
30578 if (GET_MODE (op0
) != Pmode
)
30579 op0
= convert_to_mode (Pmode
, op0
, 1);
30580 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
30582 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30583 op0
= copy_to_mode_reg (mode0
, op0
);
30584 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
30585 op1
= copy_to_mode_reg (mode1
, op1
);
30586 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
30587 op2
= copy_to_mode_reg (mode2
, op2
);
30588 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
30594 case IX86_BUILTIN_LDMXCSR
:
30595 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
30596 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
30597 emit_move_insn (target
, op0
);
30598 emit_insn (gen_sse_ldmxcsr (target
));
30601 case IX86_BUILTIN_STMXCSR
:
30602 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
30603 emit_insn (gen_sse_stmxcsr (target
));
30604 return copy_to_mode_reg (SImode
, target
);
30606 case IX86_BUILTIN_CLFLUSH
:
30607 arg0
= CALL_EXPR_ARG (exp
, 0);
30608 op0
= expand_normal (arg0
);
30609 icode
= CODE_FOR_sse2_clflush
;
30610 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30612 if (GET_MODE (op0
) != Pmode
)
30613 op0
= convert_to_mode (Pmode
, op0
, 1);
30614 op0
= force_reg (Pmode
, op0
);
30617 emit_insn (gen_sse2_clflush (op0
));
30620 case IX86_BUILTIN_MONITOR
:
30621 arg0
= CALL_EXPR_ARG (exp
, 0);
30622 arg1
= CALL_EXPR_ARG (exp
, 1);
30623 arg2
= CALL_EXPR_ARG (exp
, 2);
30624 op0
= expand_normal (arg0
);
30625 op1
= expand_normal (arg1
);
30626 op2
= expand_normal (arg2
);
30629 if (GET_MODE (op0
) != Pmode
)
30630 op0
= convert_to_mode (Pmode
, op0
, 1);
30631 op0
= force_reg (Pmode
, op0
);
30634 op1
= copy_to_mode_reg (SImode
, op1
);
30636 op2
= copy_to_mode_reg (SImode
, op2
);
30637 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
30640 case IX86_BUILTIN_MWAIT
:
30641 arg0
= CALL_EXPR_ARG (exp
, 0);
30642 arg1
= CALL_EXPR_ARG (exp
, 1);
30643 op0
= expand_normal (arg0
);
30644 op1
= expand_normal (arg1
);
30646 op0
= copy_to_mode_reg (SImode
, op0
);
30648 op1
= copy_to_mode_reg (SImode
, op1
);
30649 emit_insn (gen_sse3_mwait (op0
, op1
));
30652 case IX86_BUILTIN_VEC_INIT_V2SI
:
30653 case IX86_BUILTIN_VEC_INIT_V4HI
:
30654 case IX86_BUILTIN_VEC_INIT_V8QI
:
30655 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
30657 case IX86_BUILTIN_VEC_EXT_V2DF
:
30658 case IX86_BUILTIN_VEC_EXT_V2DI
:
30659 case IX86_BUILTIN_VEC_EXT_V4SF
:
30660 case IX86_BUILTIN_VEC_EXT_V4SI
:
30661 case IX86_BUILTIN_VEC_EXT_V8HI
:
30662 case IX86_BUILTIN_VEC_EXT_V2SI
:
30663 case IX86_BUILTIN_VEC_EXT_V4HI
:
30664 case IX86_BUILTIN_VEC_EXT_V16QI
:
30665 return ix86_expand_vec_ext_builtin (exp
, target
);
30667 case IX86_BUILTIN_VEC_SET_V2DI
:
30668 case IX86_BUILTIN_VEC_SET_V4SF
:
30669 case IX86_BUILTIN_VEC_SET_V4SI
:
30670 case IX86_BUILTIN_VEC_SET_V8HI
:
30671 case IX86_BUILTIN_VEC_SET_V4HI
:
30672 case IX86_BUILTIN_VEC_SET_V16QI
:
30673 return ix86_expand_vec_set_builtin (exp
);
30675 case IX86_BUILTIN_INFQ
:
30676 case IX86_BUILTIN_HUGE_VALQ
:
30678 REAL_VALUE_TYPE inf
;
30682 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
30684 tmp
= validize_mem (force_const_mem (mode
, tmp
));
30687 target
= gen_reg_rtx (mode
);
30689 emit_move_insn (target
, tmp
);
30693 case IX86_BUILTIN_RDPMC
:
30694 case IX86_BUILTIN_RDTSC
:
30695 case IX86_BUILTIN_RDTSCP
:
30697 op0
= gen_reg_rtx (DImode
);
30698 op1
= gen_reg_rtx (DImode
);
30700 if (fcode
== IX86_BUILTIN_RDPMC
)
30702 arg0
= CALL_EXPR_ARG (exp
, 0);
30703 op2
= expand_normal (arg0
);
30704 if (!register_operand (op2
, SImode
))
30705 op2
= copy_to_mode_reg (SImode
, op2
);
30707 insn
= (TARGET_64BIT
30708 ? gen_rdpmc_rex64 (op0
, op1
, op2
)
30709 : gen_rdpmc (op0
, op2
));
30712 else if (fcode
== IX86_BUILTIN_RDTSC
)
30714 insn
= (TARGET_64BIT
30715 ? gen_rdtsc_rex64 (op0
, op1
)
30716 : gen_rdtsc (op0
));
30721 op2
= gen_reg_rtx (SImode
);
30723 insn
= (TARGET_64BIT
30724 ? gen_rdtscp_rex64 (op0
, op1
, op2
)
30725 : gen_rdtscp (op0
, op2
));
30728 arg0
= CALL_EXPR_ARG (exp
, 0);
30729 op4
= expand_normal (arg0
);
30730 if (!address_operand (op4
, VOIDmode
))
30732 op4
= convert_memory_address (Pmode
, op4
);
30733 op4
= copy_addr_to_reg (op4
);
30735 emit_move_insn (gen_rtx_MEM (SImode
, op4
), op2
);
30739 target
= gen_reg_rtx (mode
);
30743 op1
= expand_simple_binop (DImode
, ASHIFT
, op1
, GEN_INT (32),
30744 op1
, 1, OPTAB_DIRECT
);
30745 op0
= expand_simple_binop (DImode
, IOR
, op0
, op1
,
30746 op0
, 1, OPTAB_DIRECT
);
30749 emit_move_insn (target
, op0
);
30752 case IX86_BUILTIN_FXSAVE
:
30753 case IX86_BUILTIN_FXRSTOR
:
30754 case IX86_BUILTIN_FXSAVE64
:
30755 case IX86_BUILTIN_FXRSTOR64
:
30758 case IX86_BUILTIN_FXSAVE
:
30759 icode
= CODE_FOR_fxsave
;
30761 case IX86_BUILTIN_FXRSTOR
:
30762 icode
= CODE_FOR_fxrstor
;
30764 case IX86_BUILTIN_FXSAVE64
:
30765 icode
= CODE_FOR_fxsave64
;
30767 case IX86_BUILTIN_FXRSTOR64
:
30768 icode
= CODE_FOR_fxrstor64
;
30771 gcc_unreachable ();
30774 arg0
= CALL_EXPR_ARG (exp
, 0);
30775 op0
= expand_normal (arg0
);
30777 if (!address_operand (op0
, VOIDmode
))
30779 op0
= convert_memory_address (Pmode
, op0
);
30780 op0
= copy_addr_to_reg (op0
);
30782 op0
= gen_rtx_MEM (BLKmode
, op0
);
30784 pat
= GEN_FCN (icode
) (op0
);
30789 case IX86_BUILTIN_XSAVE
:
30790 case IX86_BUILTIN_XRSTOR
:
30791 case IX86_BUILTIN_XSAVE64
:
30792 case IX86_BUILTIN_XRSTOR64
:
30793 case IX86_BUILTIN_XSAVEOPT
:
30794 case IX86_BUILTIN_XSAVEOPT64
:
30795 arg0
= CALL_EXPR_ARG (exp
, 0);
30796 arg1
= CALL_EXPR_ARG (exp
, 1);
30797 op0
= expand_normal (arg0
);
30798 op1
= expand_normal (arg1
);
30800 if (!address_operand (op0
, VOIDmode
))
30802 op0
= convert_memory_address (Pmode
, op0
);
30803 op0
= copy_addr_to_reg (op0
);
30805 op0
= gen_rtx_MEM (BLKmode
, op0
);
30807 op1
= force_reg (DImode
, op1
);
30811 op2
= expand_simple_binop (DImode
, LSHIFTRT
, op1
, GEN_INT (32),
30812 NULL
, 1, OPTAB_DIRECT
);
30815 case IX86_BUILTIN_XSAVE
:
30816 icode
= CODE_FOR_xsave_rex64
;
30818 case IX86_BUILTIN_XRSTOR
:
30819 icode
= CODE_FOR_xrstor_rex64
;
30821 case IX86_BUILTIN_XSAVE64
:
30822 icode
= CODE_FOR_xsave64
;
30824 case IX86_BUILTIN_XRSTOR64
:
30825 icode
= CODE_FOR_xrstor64
;
30827 case IX86_BUILTIN_XSAVEOPT
:
30828 icode
= CODE_FOR_xsaveopt_rex64
;
30830 case IX86_BUILTIN_XSAVEOPT64
:
30831 icode
= CODE_FOR_xsaveopt64
;
30834 gcc_unreachable ();
30837 op2
= gen_lowpart (SImode
, op2
);
30838 op1
= gen_lowpart (SImode
, op1
);
30839 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
30845 case IX86_BUILTIN_XSAVE
:
30846 icode
= CODE_FOR_xsave
;
30848 case IX86_BUILTIN_XRSTOR
:
30849 icode
= CODE_FOR_xrstor
;
30851 case IX86_BUILTIN_XSAVEOPT
:
30852 icode
= CODE_FOR_xsaveopt
;
30855 gcc_unreachable ();
30857 pat
= GEN_FCN (icode
) (op0
, op1
);
30864 case IX86_BUILTIN_LLWPCB
:
30865 arg0
= CALL_EXPR_ARG (exp
, 0);
30866 op0
= expand_normal (arg0
);
30867 icode
= CODE_FOR_lwp_llwpcb
;
30868 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30870 if (GET_MODE (op0
) != Pmode
)
30871 op0
= convert_to_mode (Pmode
, op0
, 1);
30872 op0
= force_reg (Pmode
, op0
);
30874 emit_insn (gen_lwp_llwpcb (op0
));
30877 case IX86_BUILTIN_SLWPCB
:
30878 icode
= CODE_FOR_lwp_slwpcb
;
30880 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
30881 target
= gen_reg_rtx (Pmode
);
30882 emit_insn (gen_lwp_slwpcb (target
));
30885 case IX86_BUILTIN_BEXTRI32
:
30886 case IX86_BUILTIN_BEXTRI64
:
30887 arg0
= CALL_EXPR_ARG (exp
, 0);
30888 arg1
= CALL_EXPR_ARG (exp
, 1);
30889 op0
= expand_normal (arg0
);
30890 op1
= expand_normal (arg1
);
30891 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
30892 ? CODE_FOR_tbm_bextri_si
30893 : CODE_FOR_tbm_bextri_di
);
30894 if (!CONST_INT_P (op1
))
30896 error ("last argument must be an immediate");
30901 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
30902 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
30903 op1
= GEN_INT (length
);
30904 op2
= GEN_INT (lsb_index
);
30905 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
30911 case IX86_BUILTIN_RDRAND16_STEP
:
30912 icode
= CODE_FOR_rdrandhi_1
;
30916 case IX86_BUILTIN_RDRAND32_STEP
:
30917 icode
= CODE_FOR_rdrandsi_1
;
30921 case IX86_BUILTIN_RDRAND64_STEP
:
30922 icode
= CODE_FOR_rdranddi_1
;
30926 op0
= gen_reg_rtx (mode0
);
30927 emit_insn (GEN_FCN (icode
) (op0
));
30929 arg0
= CALL_EXPR_ARG (exp
, 0);
30930 op1
= expand_normal (arg0
);
30931 if (!address_operand (op1
, VOIDmode
))
30933 op1
= convert_memory_address (Pmode
, op1
);
30934 op1
= copy_addr_to_reg (op1
);
30936 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
30938 op1
= gen_reg_rtx (SImode
);
30939 emit_move_insn (op1
, CONST1_RTX (SImode
));
30941 /* Emit SImode conditional move. */
30942 if (mode0
== HImode
)
30944 op2
= gen_reg_rtx (SImode
);
30945 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
30947 else if (mode0
== SImode
)
30950 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
30953 target
= gen_reg_rtx (SImode
);
30955 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
30957 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30958 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
30961 case IX86_BUILTIN_RDSEED16_STEP
:
30962 icode
= CODE_FOR_rdseedhi_1
;
30966 case IX86_BUILTIN_RDSEED32_STEP
:
30967 icode
= CODE_FOR_rdseedsi_1
;
30971 case IX86_BUILTIN_RDSEED64_STEP
:
30972 icode
= CODE_FOR_rdseeddi_1
;
30976 op0
= gen_reg_rtx (mode0
);
30977 emit_insn (GEN_FCN (icode
) (op0
));
30979 arg0
= CALL_EXPR_ARG (exp
, 0);
30980 op1
= expand_normal (arg0
);
30981 if (!address_operand (op1
, VOIDmode
))
30983 op1
= convert_memory_address (Pmode
, op1
);
30984 op1
= copy_addr_to_reg (op1
);
30986 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
30988 op2
= gen_reg_rtx (QImode
);
30990 pat
= gen_rtx_LTU (QImode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
30992 emit_insn (gen_rtx_SET (VOIDmode
, op2
, pat
));
30995 target
= gen_reg_rtx (SImode
);
30997 emit_insn (gen_zero_extendqisi2 (target
, op2
));
31000 case IX86_BUILTIN_ADDCARRYX32
:
31001 icode
= TARGET_ADX
? CODE_FOR_adcxsi3
: CODE_FOR_addsi3_carry
;
31005 case IX86_BUILTIN_ADDCARRYX64
:
31006 icode
= TARGET_ADX
? CODE_FOR_adcxdi3
: CODE_FOR_adddi3_carry
;
31010 arg0
= CALL_EXPR_ARG (exp
, 0); /* unsigned char c_in. */
31011 arg1
= CALL_EXPR_ARG (exp
, 1); /* unsigned int src1. */
31012 arg2
= CALL_EXPR_ARG (exp
, 2); /* unsigned int src2. */
31013 arg3
= CALL_EXPR_ARG (exp
, 3); /* unsigned int *sum_out. */
31015 op0
= gen_reg_rtx (QImode
);
31017 /* Generate CF from input operand. */
31018 op1
= expand_normal (arg0
);
31019 if (GET_MODE (op1
) != QImode
)
31020 op1
= convert_to_mode (QImode
, op1
, 1);
31021 op1
= copy_to_mode_reg (QImode
, op1
);
31022 emit_insn (gen_addqi3_cc (op0
, op1
, constm1_rtx
));
31024 /* Gen ADCX instruction to compute X+Y+CF. */
31025 op2
= expand_normal (arg1
);
31026 op3
= expand_normal (arg2
);
31029 op2
= copy_to_mode_reg (mode0
, op2
);
31031 op3
= copy_to_mode_reg (mode0
, op3
);
31033 op0
= gen_reg_rtx (mode0
);
31035 op4
= gen_rtx_REG (CCCmode
, FLAGS_REG
);
31036 pat
= gen_rtx_LTU (VOIDmode
, op4
, const0_rtx
);
31037 emit_insn (GEN_FCN (icode
) (op0
, op2
, op3
, op4
, pat
));
31039 /* Store the result. */
31040 op4
= expand_normal (arg3
);
31041 if (!address_operand (op4
, VOIDmode
))
31043 op4
= convert_memory_address (Pmode
, op4
);
31044 op4
= copy_addr_to_reg (op4
);
31046 emit_move_insn (gen_rtx_MEM (mode0
, op4
), op0
);
31048 /* Return current CF value. */
31050 target
= gen_reg_rtx (QImode
);
31052 PUT_MODE (pat
, QImode
);
31053 emit_insn (gen_rtx_SET (VOIDmode
, target
, pat
));
31056 case IX86_BUILTIN_GATHERSIV2DF
:
31057 icode
= CODE_FOR_avx2_gathersiv2df
;
31059 case IX86_BUILTIN_GATHERSIV4DF
:
31060 icode
= CODE_FOR_avx2_gathersiv4df
;
31062 case IX86_BUILTIN_GATHERDIV2DF
:
31063 icode
= CODE_FOR_avx2_gatherdiv2df
;
31065 case IX86_BUILTIN_GATHERDIV4DF
:
31066 icode
= CODE_FOR_avx2_gatherdiv4df
;
31068 case IX86_BUILTIN_GATHERSIV4SF
:
31069 icode
= CODE_FOR_avx2_gathersiv4sf
;
31071 case IX86_BUILTIN_GATHERSIV8SF
:
31072 icode
= CODE_FOR_avx2_gathersiv8sf
;
31074 case IX86_BUILTIN_GATHERDIV4SF
:
31075 icode
= CODE_FOR_avx2_gatherdiv4sf
;
31077 case IX86_BUILTIN_GATHERDIV8SF
:
31078 icode
= CODE_FOR_avx2_gatherdiv8sf
;
31080 case IX86_BUILTIN_GATHERSIV2DI
:
31081 icode
= CODE_FOR_avx2_gathersiv2di
;
31083 case IX86_BUILTIN_GATHERSIV4DI
:
31084 icode
= CODE_FOR_avx2_gathersiv4di
;
31086 case IX86_BUILTIN_GATHERDIV2DI
:
31087 icode
= CODE_FOR_avx2_gatherdiv2di
;
31089 case IX86_BUILTIN_GATHERDIV4DI
:
31090 icode
= CODE_FOR_avx2_gatherdiv4di
;
31092 case IX86_BUILTIN_GATHERSIV4SI
:
31093 icode
= CODE_FOR_avx2_gathersiv4si
;
31095 case IX86_BUILTIN_GATHERSIV8SI
:
31096 icode
= CODE_FOR_avx2_gathersiv8si
;
31098 case IX86_BUILTIN_GATHERDIV4SI
:
31099 icode
= CODE_FOR_avx2_gatherdiv4si
;
31101 case IX86_BUILTIN_GATHERDIV8SI
:
31102 icode
= CODE_FOR_avx2_gatherdiv8si
;
31104 case IX86_BUILTIN_GATHERALTSIV4DF
:
31105 icode
= CODE_FOR_avx2_gathersiv4df
;
31107 case IX86_BUILTIN_GATHERALTDIV8SF
:
31108 icode
= CODE_FOR_avx2_gatherdiv8sf
;
31110 case IX86_BUILTIN_GATHERALTSIV4DI
:
31111 icode
= CODE_FOR_avx2_gathersiv4di
;
31113 case IX86_BUILTIN_GATHERALTDIV8SI
:
31114 icode
= CODE_FOR_avx2_gatherdiv8si
;
31118 arg0
= CALL_EXPR_ARG (exp
, 0);
31119 arg1
= CALL_EXPR_ARG (exp
, 1);
31120 arg2
= CALL_EXPR_ARG (exp
, 2);
31121 arg3
= CALL_EXPR_ARG (exp
, 3);
31122 arg4
= CALL_EXPR_ARG (exp
, 4);
31123 op0
= expand_normal (arg0
);
31124 op1
= expand_normal (arg1
);
31125 op2
= expand_normal (arg2
);
31126 op3
= expand_normal (arg3
);
31127 op4
= expand_normal (arg4
);
31128 /* Note the arg order is different from the operand order. */
31129 mode0
= insn_data
[icode
].operand
[1].mode
;
31130 mode2
= insn_data
[icode
].operand
[3].mode
;
31131 mode3
= insn_data
[icode
].operand
[4].mode
;
31132 mode4
= insn_data
[icode
].operand
[5].mode
;
31134 if (target
== NULL_RTX
31135 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
31136 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
31138 subtarget
= target
;
31140 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
31141 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
31143 rtx half
= gen_reg_rtx (V4SImode
);
31144 if (!nonimmediate_operand (op2
, V8SImode
))
31145 op2
= copy_to_mode_reg (V8SImode
, op2
);
31146 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
31149 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
31150 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
31152 rtx (*gen
) (rtx
, rtx
);
31153 rtx half
= gen_reg_rtx (mode0
);
31154 if (mode0
== V4SFmode
)
31155 gen
= gen_vec_extract_lo_v8sf
;
31157 gen
= gen_vec_extract_lo_v8si
;
31158 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
31159 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
31160 emit_insn (gen (half
, op0
));
31162 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
31163 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
31164 emit_insn (gen (half
, op3
));
31168 /* Force memory operand only with base register here. But we
31169 don't want to do it on memory operand for other builtin
31171 if (GET_MODE (op1
) != Pmode
)
31172 op1
= convert_to_mode (Pmode
, op1
, 1);
31173 op1
= force_reg (Pmode
, op1
);
31175 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
31176 op0
= copy_to_mode_reg (mode0
, op0
);
31177 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
31178 op1
= copy_to_mode_reg (Pmode
, op1
);
31179 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
31180 op2
= copy_to_mode_reg (mode2
, op2
);
31181 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
31182 op3
= copy_to_mode_reg (mode3
, op3
);
31183 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
31185 error ("last argument must be scale 1, 2, 4, 8");
31189 /* Optimize. If mask is known to have all high bits set,
31190 replace op0 with pc_rtx to signal that the instruction
31191 overwrites the whole destination and doesn't use its
31192 previous contents. */
31195 if (TREE_CODE (arg3
) == VECTOR_CST
)
31197 unsigned int negative
= 0;
31198 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
31200 tree cst
= VECTOR_CST_ELT (arg3
, i
);
31201 if (TREE_CODE (cst
) == INTEGER_CST
31202 && tree_int_cst_sign_bit (cst
))
31204 else if (TREE_CODE (cst
) == REAL_CST
31205 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
31208 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
31211 else if (TREE_CODE (arg3
) == SSA_NAME
)
31213 /* Recognize also when mask is like:
31214 __v2df src = _mm_setzero_pd ();
31215 __v2df mask = _mm_cmpeq_pd (src, src);
31217 __v8sf src = _mm256_setzero_ps ();
31218 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
31219 as that is a cheaper way to load all ones into
31220 a register than having to load a constant from
31222 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
31223 if (is_gimple_call (def_stmt
))
31225 tree fndecl
= gimple_call_fndecl (def_stmt
);
31227 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
31228 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
31230 case IX86_BUILTIN_CMPPD
:
31231 case IX86_BUILTIN_CMPPS
:
31232 case IX86_BUILTIN_CMPPD256
:
31233 case IX86_BUILTIN_CMPPS256
:
31234 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
31237 case IX86_BUILTIN_CMPEQPD
:
31238 case IX86_BUILTIN_CMPEQPS
:
31239 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
31240 && initializer_zerop (gimple_call_arg (def_stmt
,
31251 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
31256 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
31257 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
31259 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
31260 ? V4SFmode
: V4SImode
;
31261 if (target
== NULL_RTX
)
31262 target
= gen_reg_rtx (tmode
);
31263 if (tmode
== V4SFmode
)
31264 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
31266 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
31269 target
= subtarget
;
31273 case IX86_BUILTIN_XABORT
:
31274 icode
= CODE_FOR_xabort
;
31275 arg0
= CALL_EXPR_ARG (exp
, 0);
31276 op0
= expand_normal (arg0
);
31277 mode0
= insn_data
[icode
].operand
[0].mode
;
31278 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
31280 error ("the xabort's argument must be an 8-bit immediate");
31283 emit_insn (gen_xabort (op0
));
31290 for (i
= 0, d
= bdesc_special_args
;
31291 i
< ARRAY_SIZE (bdesc_special_args
);
31293 if (d
->code
== fcode
)
31294 return ix86_expand_special_args_builtin (d
, exp
, target
);
31296 for (i
= 0, d
= bdesc_args
;
31297 i
< ARRAY_SIZE (bdesc_args
);
31299 if (d
->code
== fcode
)
31302 case IX86_BUILTIN_FABSQ
:
31303 case IX86_BUILTIN_COPYSIGNQ
:
31305 /* Emit a normal call if SSE isn't available. */
31306 return expand_call (exp
, target
, ignore
);
31308 return ix86_expand_args_builtin (d
, exp
, target
);
31311 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
31312 if (d
->code
== fcode
)
31313 return ix86_expand_sse_comi (d
, exp
, target
);
31315 for (i
= 0, d
= bdesc_pcmpestr
;
31316 i
< ARRAY_SIZE (bdesc_pcmpestr
);
31318 if (d
->code
== fcode
)
31319 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
31321 for (i
= 0, d
= bdesc_pcmpistr
;
31322 i
< ARRAY_SIZE (bdesc_pcmpistr
);
31324 if (d
->code
== fcode
)
31325 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
31327 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
31328 if (d
->code
== fcode
)
31329 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
31330 (enum ix86_builtin_func_type
)
31331 d
->flag
, d
->comparison
);
31333 gcc_unreachable ();
31336 /* Returns a function decl for a vectorized version of the builtin function
31337 with builtin function code FN and the result vector type TYPE, or NULL_TREE
31338 if it is not available. */
31341 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
31344 enum machine_mode in_mode
, out_mode
;
31346 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
31348 if (TREE_CODE (type_out
) != VECTOR_TYPE
31349 || TREE_CODE (type_in
) != VECTOR_TYPE
31350 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
31353 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
31354 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
31355 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
31356 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
31360 case BUILT_IN_SQRT
:
31361 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31363 if (out_n
== 2 && in_n
== 2)
31364 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
31365 else if (out_n
== 4 && in_n
== 4)
31366 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
31370 case BUILT_IN_SQRTF
:
31371 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31373 if (out_n
== 4 && in_n
== 4)
31374 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
31375 else if (out_n
== 8 && in_n
== 8)
31376 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
31380 case BUILT_IN_IFLOOR
:
31381 case BUILT_IN_LFLOOR
:
31382 case BUILT_IN_LLFLOOR
:
31383 /* The round insn does not trap on denormals. */
31384 if (flag_trapping_math
|| !TARGET_ROUND
)
31387 if (out_mode
== SImode
&& in_mode
== DFmode
)
31389 if (out_n
== 4 && in_n
== 2)
31390 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
31391 else if (out_n
== 8 && in_n
== 4)
31392 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
31396 case BUILT_IN_IFLOORF
:
31397 case BUILT_IN_LFLOORF
:
31398 case BUILT_IN_LLFLOORF
:
31399 /* The round insn does not trap on denormals. */
31400 if (flag_trapping_math
|| !TARGET_ROUND
)
31403 if (out_mode
== SImode
&& in_mode
== SFmode
)
31405 if (out_n
== 4 && in_n
== 4)
31406 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
31407 else if (out_n
== 8 && in_n
== 8)
31408 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
31412 case BUILT_IN_ICEIL
:
31413 case BUILT_IN_LCEIL
:
31414 case BUILT_IN_LLCEIL
:
31415 /* The round insn does not trap on denormals. */
31416 if (flag_trapping_math
|| !TARGET_ROUND
)
31419 if (out_mode
== SImode
&& in_mode
== DFmode
)
31421 if (out_n
== 4 && in_n
== 2)
31422 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
31423 else if (out_n
== 8 && in_n
== 4)
31424 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
31428 case BUILT_IN_ICEILF
:
31429 case BUILT_IN_LCEILF
:
31430 case BUILT_IN_LLCEILF
:
31431 /* The round insn does not trap on denormals. */
31432 if (flag_trapping_math
|| !TARGET_ROUND
)
31435 if (out_mode
== SImode
&& in_mode
== SFmode
)
31437 if (out_n
== 4 && in_n
== 4)
31438 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
31439 else if (out_n
== 8 && in_n
== 8)
31440 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
31444 case BUILT_IN_IRINT
:
31445 case BUILT_IN_LRINT
:
31446 case BUILT_IN_LLRINT
:
31447 if (out_mode
== SImode
&& in_mode
== DFmode
)
31449 if (out_n
== 4 && in_n
== 2)
31450 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
31451 else if (out_n
== 8 && in_n
== 4)
31452 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
31456 case BUILT_IN_IRINTF
:
31457 case BUILT_IN_LRINTF
:
31458 case BUILT_IN_LLRINTF
:
31459 if (out_mode
== SImode
&& in_mode
== SFmode
)
31461 if (out_n
== 4 && in_n
== 4)
31462 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
31463 else if (out_n
== 8 && in_n
== 8)
31464 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
31468 case BUILT_IN_IROUND
:
31469 case BUILT_IN_LROUND
:
31470 case BUILT_IN_LLROUND
:
31471 /* The round insn does not trap on denormals. */
31472 if (flag_trapping_math
|| !TARGET_ROUND
)
31475 if (out_mode
== SImode
&& in_mode
== DFmode
)
31477 if (out_n
== 4 && in_n
== 2)
31478 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
31479 else if (out_n
== 8 && in_n
== 4)
31480 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
31484 case BUILT_IN_IROUNDF
:
31485 case BUILT_IN_LROUNDF
:
31486 case BUILT_IN_LLROUNDF
:
31487 /* The round insn does not trap on denormals. */
31488 if (flag_trapping_math
|| !TARGET_ROUND
)
31491 if (out_mode
== SImode
&& in_mode
== SFmode
)
31493 if (out_n
== 4 && in_n
== 4)
31494 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
31495 else if (out_n
== 8 && in_n
== 8)
31496 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
31500 case BUILT_IN_COPYSIGN
:
31501 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31503 if (out_n
== 2 && in_n
== 2)
31504 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
31505 else if (out_n
== 4 && in_n
== 4)
31506 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
31510 case BUILT_IN_COPYSIGNF
:
31511 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31513 if (out_n
== 4 && in_n
== 4)
31514 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
31515 else if (out_n
== 8 && in_n
== 8)
31516 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
31520 case BUILT_IN_FLOOR
:
31521 /* The round insn does not trap on denormals. */
31522 if (flag_trapping_math
|| !TARGET_ROUND
)
31525 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31527 if (out_n
== 2 && in_n
== 2)
31528 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
31529 else if (out_n
== 4 && in_n
== 4)
31530 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
31534 case BUILT_IN_FLOORF
:
31535 /* The round insn does not trap on denormals. */
31536 if (flag_trapping_math
|| !TARGET_ROUND
)
31539 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31541 if (out_n
== 4 && in_n
== 4)
31542 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
31543 else if (out_n
== 8 && in_n
== 8)
31544 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
31548 case BUILT_IN_CEIL
:
31549 /* The round insn does not trap on denormals. */
31550 if (flag_trapping_math
|| !TARGET_ROUND
)
31553 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31555 if (out_n
== 2 && in_n
== 2)
31556 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
31557 else if (out_n
== 4 && in_n
== 4)
31558 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
31562 case BUILT_IN_CEILF
:
31563 /* The round insn does not trap on denormals. */
31564 if (flag_trapping_math
|| !TARGET_ROUND
)
31567 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31569 if (out_n
== 4 && in_n
== 4)
31570 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
31571 else if (out_n
== 8 && in_n
== 8)
31572 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
31576 case BUILT_IN_TRUNC
:
31577 /* The round insn does not trap on denormals. */
31578 if (flag_trapping_math
|| !TARGET_ROUND
)
31581 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31583 if (out_n
== 2 && in_n
== 2)
31584 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
31585 else if (out_n
== 4 && in_n
== 4)
31586 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
31590 case BUILT_IN_TRUNCF
:
31591 /* The round insn does not trap on denormals. */
31592 if (flag_trapping_math
|| !TARGET_ROUND
)
31595 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31597 if (out_n
== 4 && in_n
== 4)
31598 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
31599 else if (out_n
== 8 && in_n
== 8)
31600 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
31604 case BUILT_IN_RINT
:
31605 /* The round insn does not trap on denormals. */
31606 if (flag_trapping_math
|| !TARGET_ROUND
)
31609 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31611 if (out_n
== 2 && in_n
== 2)
31612 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
31613 else if (out_n
== 4 && in_n
== 4)
31614 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
31618 case BUILT_IN_RINTF
:
31619 /* The round insn does not trap on denormals. */
31620 if (flag_trapping_math
|| !TARGET_ROUND
)
31623 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31625 if (out_n
== 4 && in_n
== 4)
31626 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
31627 else if (out_n
== 8 && in_n
== 8)
31628 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
31632 case BUILT_IN_ROUND
:
31633 /* The round insn does not trap on denormals. */
31634 if (flag_trapping_math
|| !TARGET_ROUND
)
31637 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31639 if (out_n
== 2 && in_n
== 2)
31640 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
31641 else if (out_n
== 4 && in_n
== 4)
31642 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
31646 case BUILT_IN_ROUNDF
:
31647 /* The round insn does not trap on denormals. */
31648 if (flag_trapping_math
|| !TARGET_ROUND
)
31651 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31653 if (out_n
== 4 && in_n
== 4)
31654 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
31655 else if (out_n
== 8 && in_n
== 8)
31656 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
31661 if (out_mode
== DFmode
&& in_mode
== DFmode
)
31663 if (out_n
== 2 && in_n
== 2)
31664 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
31665 if (out_n
== 4 && in_n
== 4)
31666 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
31670 case BUILT_IN_FMAF
:
31671 if (out_mode
== SFmode
&& in_mode
== SFmode
)
31673 if (out_n
== 4 && in_n
== 4)
31674 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
31675 if (out_n
== 8 && in_n
== 8)
31676 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
31684 /* Dispatch to a handler for a vectorization library. */
31685 if (ix86_veclib_handler
)
31686 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
31692 /* Handler for an SVML-style interface to
31693 a library with vectorized intrinsics. */
31696 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
31699 tree fntype
, new_fndecl
, args
;
31702 enum machine_mode el_mode
, in_mode
;
31705 /* The SVML is suitable for unsafe math only. */
31706 if (!flag_unsafe_math_optimizations
)
31709 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
31710 n
= TYPE_VECTOR_SUBPARTS (type_out
);
31711 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
31712 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
31713 if (el_mode
!= in_mode
31721 case BUILT_IN_LOG10
:
31723 case BUILT_IN_TANH
:
31725 case BUILT_IN_ATAN
:
31726 case BUILT_IN_ATAN2
:
31727 case BUILT_IN_ATANH
:
31728 case BUILT_IN_CBRT
:
31729 case BUILT_IN_SINH
:
31731 case BUILT_IN_ASINH
:
31732 case BUILT_IN_ASIN
:
31733 case BUILT_IN_COSH
:
31735 case BUILT_IN_ACOSH
:
31736 case BUILT_IN_ACOS
:
31737 if (el_mode
!= DFmode
|| n
!= 2)
31741 case BUILT_IN_EXPF
:
31742 case BUILT_IN_LOGF
:
31743 case BUILT_IN_LOG10F
:
31744 case BUILT_IN_POWF
:
31745 case BUILT_IN_TANHF
:
31746 case BUILT_IN_TANF
:
31747 case BUILT_IN_ATANF
:
31748 case BUILT_IN_ATAN2F
:
31749 case BUILT_IN_ATANHF
:
31750 case BUILT_IN_CBRTF
:
31751 case BUILT_IN_SINHF
:
31752 case BUILT_IN_SINF
:
31753 case BUILT_IN_ASINHF
:
31754 case BUILT_IN_ASINF
:
31755 case BUILT_IN_COSHF
:
31756 case BUILT_IN_COSF
:
31757 case BUILT_IN_ACOSHF
:
31758 case BUILT_IN_ACOSF
:
31759 if (el_mode
!= SFmode
|| n
!= 4)
31767 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
31769 if (fn
== BUILT_IN_LOGF
)
31770 strcpy (name
, "vmlsLn4");
31771 else if (fn
== BUILT_IN_LOG
)
31772 strcpy (name
, "vmldLn2");
31775 sprintf (name
, "vmls%s", bname
+10);
31776 name
[strlen (name
)-1] = '4';
31779 sprintf (name
, "vmld%s2", bname
+10);
31781 /* Convert to uppercase. */
31785 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
31787 args
= TREE_CHAIN (args
))
31791 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
31793 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
31795 /* Build a function declaration for the vectorized function. */
31796 new_fndecl
= build_decl (BUILTINS_LOCATION
,
31797 FUNCTION_DECL
, get_identifier (name
), fntype
);
31798 TREE_PUBLIC (new_fndecl
) = 1;
31799 DECL_EXTERNAL (new_fndecl
) = 1;
31800 DECL_IS_NOVOPS (new_fndecl
) = 1;
31801 TREE_READONLY (new_fndecl
) = 1;
31806 /* Handler for an ACML-style interface to
31807 a library with vectorized intrinsics. */
31810 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
31812 char name
[20] = "__vr.._";
31813 tree fntype
, new_fndecl
, args
;
31816 enum machine_mode el_mode
, in_mode
;
31819 /* The ACML is 64bits only and suitable for unsafe math only as
31820 it does not correctly support parts of IEEE with the required
31821 precision such as denormals. */
31823 || !flag_unsafe_math_optimizations
)
31826 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
31827 n
= TYPE_VECTOR_SUBPARTS (type_out
);
31828 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
31829 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
31830 if (el_mode
!= in_mode
31840 case BUILT_IN_LOG2
:
31841 case BUILT_IN_LOG10
:
31844 if (el_mode
!= DFmode
31849 case BUILT_IN_SINF
:
31850 case BUILT_IN_COSF
:
31851 case BUILT_IN_EXPF
:
31852 case BUILT_IN_POWF
:
31853 case BUILT_IN_LOGF
:
31854 case BUILT_IN_LOG2F
:
31855 case BUILT_IN_LOG10F
:
31858 if (el_mode
!= SFmode
31867 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
31868 sprintf (name
+ 7, "%s", bname
+10);
31871 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
31873 args
= TREE_CHAIN (args
))
31877 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
31879 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
31881 /* Build a function declaration for the vectorized function. */
31882 new_fndecl
= build_decl (BUILTINS_LOCATION
,
31883 FUNCTION_DECL
, get_identifier (name
), fntype
);
31884 TREE_PUBLIC (new_fndecl
) = 1;
31885 DECL_EXTERNAL (new_fndecl
) = 1;
31886 DECL_IS_NOVOPS (new_fndecl
) = 1;
31887 TREE_READONLY (new_fndecl
) = 1;
31892 /* Returns a decl of a function that implements gather load with
31893 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
31894 Return NULL_TREE if it is not available. */
31897 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
31898 const_tree index_type
, int scale
)
31901 enum ix86_builtins code
;
31906 if ((TREE_CODE (index_type
) != INTEGER_TYPE
31907 && !POINTER_TYPE_P (index_type
))
31908 || (TYPE_MODE (index_type
) != SImode
31909 && TYPE_MODE (index_type
) != DImode
))
31912 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
31915 /* v*gather* insn sign extends index to pointer mode. */
31916 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
31917 && TYPE_UNSIGNED (index_type
))
31922 || (scale
& (scale
- 1)) != 0)
31925 si
= TYPE_MODE (index_type
) == SImode
;
31926 switch (TYPE_MODE (mem_vectype
))
31929 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
31932 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
31935 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
31938 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
31941 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
31944 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
31947 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
31950 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
31956 return ix86_builtins
[code
];
31959 /* Returns a code for a target-specific builtin that implements
31960 reciprocal of the function, or NULL_TREE if not available. */
31963 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
31964 bool sqrt ATTRIBUTE_UNUSED
)
31966 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
31967 && flag_finite_math_only
&& !flag_trapping_math
31968 && flag_unsafe_math_optimizations
))
31972 /* Machine dependent builtins. */
31975 /* Vectorized version of sqrt to rsqrt conversion. */
31976 case IX86_BUILTIN_SQRTPS_NR
:
31977 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
31979 case IX86_BUILTIN_SQRTPS_NR256
:
31980 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
31986 /* Normal builtins. */
31989 /* Sqrt to rsqrt conversion. */
31990 case BUILT_IN_SQRTF
:
31991 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
31998 /* Helper for avx_vpermilps256_operand et al. This is also used by
31999 the expansion functions to turn the parallel back into a mask.
32000 The return value is 0 for no match and the imm8+1 for a match. */
32003 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
32005 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
32007 unsigned char ipar
[8];
32009 if (XVECLEN (par
, 0) != (int) nelt
)
32012 /* Validate that all of the elements are constants, and not totally
32013 out of range. Copy the data into an integral array to make the
32014 subsequent checks easier. */
32015 for (i
= 0; i
< nelt
; ++i
)
32017 rtx er
= XVECEXP (par
, 0, i
);
32018 unsigned HOST_WIDE_INT ei
;
32020 if (!CONST_INT_P (er
))
32031 /* In the 256-bit DFmode case, we can only move elements within
32033 for (i
= 0; i
< 2; ++i
)
32037 mask
|= ipar
[i
] << i
;
32039 for (i
= 2; i
< 4; ++i
)
32043 mask
|= (ipar
[i
] - 2) << i
;
32048 /* In the 256-bit SFmode case, we have full freedom of movement
32049 within the low 128-bit lane, but the high 128-bit lane must
32050 mirror the exact same pattern. */
32051 for (i
= 0; i
< 4; ++i
)
32052 if (ipar
[i
] + 4 != ipar
[i
+ 4])
32059 /* In the 128-bit case, we've full freedom in the placement of
32060 the elements from the source operand. */
32061 for (i
= 0; i
< nelt
; ++i
)
32062 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
32066 gcc_unreachable ();
32069 /* Make sure success has a non-zero value by adding one. */
32073 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
32074 the expansion functions to turn the parallel back into a mask.
32075 The return value is 0 for no match and the imm8+1 for a match. */
32078 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
32080 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
32082 unsigned char ipar
[8];
32084 if (XVECLEN (par
, 0) != (int) nelt
)
32087 /* Validate that all of the elements are constants, and not totally
32088 out of range. Copy the data into an integral array to make the
32089 subsequent checks easier. */
32090 for (i
= 0; i
< nelt
; ++i
)
32092 rtx er
= XVECEXP (par
, 0, i
);
32093 unsigned HOST_WIDE_INT ei
;
32095 if (!CONST_INT_P (er
))
32098 if (ei
>= 2 * nelt
)
32103 /* Validate that the halves of the permute are halves. */
32104 for (i
= 0; i
< nelt2
- 1; ++i
)
32105 if (ipar
[i
] + 1 != ipar
[i
+ 1])
32107 for (i
= nelt2
; i
< nelt
- 1; ++i
)
32108 if (ipar
[i
] + 1 != ipar
[i
+ 1])
32111 /* Reconstruct the mask. */
32112 for (i
= 0; i
< 2; ++i
)
32114 unsigned e
= ipar
[i
* nelt2
];
32118 mask
|= e
<< (i
* 4);
32121 /* Make sure success has a non-zero value by adding one. */
32125 /* Store OPERAND to the memory after reload is completed. This means
32126 that we can't easily use assign_stack_local. */
32128 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
32132 gcc_assert (reload_completed
);
32133 if (ix86_using_red_zone ())
32135 result
= gen_rtx_MEM (mode
,
32136 gen_rtx_PLUS (Pmode
,
32138 GEN_INT (-RED_ZONE_SIZE
)));
32139 emit_move_insn (result
, operand
);
32141 else if (TARGET_64BIT
)
32147 operand
= gen_lowpart (DImode
, operand
);
32151 gen_rtx_SET (VOIDmode
,
32152 gen_rtx_MEM (DImode
,
32153 gen_rtx_PRE_DEC (DImode
,
32154 stack_pointer_rtx
)),
32158 gcc_unreachable ();
32160 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
32169 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
32171 gen_rtx_SET (VOIDmode
,
32172 gen_rtx_MEM (SImode
,
32173 gen_rtx_PRE_DEC (Pmode
,
32174 stack_pointer_rtx
)),
32177 gen_rtx_SET (VOIDmode
,
32178 gen_rtx_MEM (SImode
,
32179 gen_rtx_PRE_DEC (Pmode
,
32180 stack_pointer_rtx
)),
32185 /* Store HImodes as SImodes. */
32186 operand
= gen_lowpart (SImode
, operand
);
32190 gen_rtx_SET (VOIDmode
,
32191 gen_rtx_MEM (GET_MODE (operand
),
32192 gen_rtx_PRE_DEC (SImode
,
32193 stack_pointer_rtx
)),
32197 gcc_unreachable ();
32199 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
32204 /* Free operand from the memory. */
32206 ix86_free_from_memory (enum machine_mode mode
)
32208 if (!ix86_using_red_zone ())
32212 if (mode
== DImode
|| TARGET_64BIT
)
32216 /* Use LEA to deallocate stack space. In peephole2 it will be converted
32217 to pop or add instruction if registers are available. */
32218 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
32219 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
32224 /* Return true if we use LRA instead of reload pass. */
32231 /* Return a register priority for hard reg REGNO. */
32233 ix86_register_priority (int hard_regno
)
32235 /* ebp and r13 as the base always wants a displacement, r12 as the
32236 base always wants an index. So discourage their usage in an
32238 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
32240 if (hard_regno
== BP_REG
)
32242 /* New x86-64 int registers result in bigger code size. Discourage
32244 if (FIRST_REX_INT_REG
<= hard_regno
&& hard_regno
<= LAST_REX_INT_REG
)
32246 /* New x86-64 SSE registers result in bigger code size. Discourage
32248 if (FIRST_REX_SSE_REG
<= hard_regno
&& hard_regno
<= LAST_REX_SSE_REG
)
32250 /* Usage of AX register results in smaller code. Prefer it. */
32251 if (hard_regno
== 0)
32256 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
32258 Put float CONST_DOUBLE in the constant pool instead of fp regs.
32259 QImode must go into class Q_REGS.
32260 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
32261 movdf to do mem-to-mem moves through integer regs. */
32264 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
32266 enum machine_mode mode
= GET_MODE (x
);
32268 /* We're only allowed to return a subclass of CLASS. Many of the
32269 following checks fail for NO_REGS, so eliminate that early. */
32270 if (regclass
== NO_REGS
)
32273 /* All classes can load zeros. */
32274 if (x
== CONST0_RTX (mode
))
32277 /* Force constants into memory if we are loading a (nonzero) constant into
32278 an MMX or SSE register. This is because there are no MMX/SSE instructions
32279 to load from a constant. */
32281 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
32284 /* Prefer SSE regs only, if we can use them for math. */
32285 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
32286 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
32288 /* Floating-point constants need more complex checks. */
32289 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
32291 /* General regs can load everything. */
32292 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
32295 /* Floats can load 0 and 1 plus some others. Note that we eliminated
32296 zero above. We only want to wind up preferring 80387 registers if
32297 we plan on doing computation with them. */
32299 && standard_80387_constant_p (x
) > 0)
32301 /* Limit class to non-sse. */
32302 if (regclass
== FLOAT_SSE_REGS
)
32304 if (regclass
== FP_TOP_SSE_REGS
)
32306 if (regclass
== FP_SECOND_SSE_REGS
)
32307 return FP_SECOND_REG
;
32308 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
32315 /* Generally when we see PLUS here, it's the function invariant
32316 (plus soft-fp const_int). Which can only be computed into general
32318 if (GET_CODE (x
) == PLUS
)
32319 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
32321 /* QImode constants are easy to load, but non-constant QImode data
32322 must go into Q_REGS. */
32323 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
32325 if (reg_class_subset_p (regclass
, Q_REGS
))
32327 if (reg_class_subset_p (Q_REGS
, regclass
))
32335 /* Discourage putting floating-point values in SSE registers unless
32336 SSE math is being used, and likewise for the 387 registers. */
32338 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
32340 enum machine_mode mode
= GET_MODE (x
);
32342 /* Restrict the output reload class to the register bank that we are doing
32343 math on. If we would like not to return a subset of CLASS, reject this
32344 alternative: if reload cannot do this, it will still use its choice. */
32345 mode
= GET_MODE (x
);
32346 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
32347 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
32349 if (X87_FLOAT_MODE_P (mode
))
32351 if (regclass
== FP_TOP_SSE_REGS
)
32353 else if (regclass
== FP_SECOND_SSE_REGS
)
32354 return FP_SECOND_REG
;
32356 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
32363 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
32364 enum machine_mode mode
, secondary_reload_info
*sri
)
32366 /* Double-word spills from general registers to non-offsettable memory
32367 references (zero-extended addresses) require special handling. */
32370 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
32371 && rclass
== GENERAL_REGS
32372 && !offsettable_memref_p (x
))
32375 ? CODE_FOR_reload_noff_load
32376 : CODE_FOR_reload_noff_store
);
32377 /* Add the cost of moving address to a temporary. */
32378 sri
->extra_cost
= 1;
32383 /* QImode spills from non-QI registers require
32384 intermediate register on 32bit targets. */
32386 && !in_p
&& mode
== QImode
32387 && (rclass
== GENERAL_REGS
32388 || rclass
== LEGACY_REGS
32389 || rclass
== NON_Q_REGS
32392 || rclass
== INDEX_REGS
))
32401 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
32402 regno
= true_regnum (x
);
32404 /* Return Q_REGS if the operand is in memory. */
32409 /* This condition handles corner case where an expression involving
32410 pointers gets vectorized. We're trying to use the address of a
32411 stack slot as a vector initializer.
32413 (set (reg:V2DI 74 [ vect_cst_.2 ])
32414 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
32416 Eventually frame gets turned into sp+offset like this:
32418 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
32419 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
32420 (const_int 392 [0x188]))))
32422 That later gets turned into:
32424 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
32425 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
32426 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
32428 We'll have the following reload recorded:
32430 Reload 0: reload_in (DI) =
32431 (plus:DI (reg/f:DI 7 sp)
32432 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
32433 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
32434 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
32435 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
32436 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
32437 reload_reg_rtx: (reg:V2DI 22 xmm1)
32439 Which isn't going to work since SSE instructions can't handle scalar
32440 additions. Returning GENERAL_REGS forces the addition into integer
32441 register and reload can handle subsequent reloads without problems. */
32443 if (in_p
&& GET_CODE (x
) == PLUS
32444 && SSE_CLASS_P (rclass
)
32445 && SCALAR_INT_MODE_P (mode
))
32446 return GENERAL_REGS
;
32451 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
32454 ix86_class_likely_spilled_p (reg_class_t rclass
)
32465 case SSE_FIRST_REG
:
32467 case FP_SECOND_REG
:
32477 /* If we are copying between general and FP registers, we need a memory
32478 location. The same is true for SSE and MMX registers.
32480 To optimize register_move_cost performance, allow inline variant.
32482 The macro can't work reliably when one of the CLASSES is class containing
32483 registers from multiple units (SSE, MMX, integer). We avoid this by never
32484 combining those units in single alternative in the machine description.
32485 Ensure that this constraint holds to avoid unexpected surprises.
32487 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
32488 enforce these sanity checks. */
32491 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
32492 enum machine_mode mode
, int strict
)
32494 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
32495 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
32496 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
32497 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
32498 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
32499 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
32501 gcc_assert (!strict
|| lra_in_progress
);
32505 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
32508 /* ??? This is a lie. We do have moves between mmx/general, and for
32509 mmx/sse2. But by saying we need secondary memory we discourage the
32510 register allocator from using the mmx registers unless needed. */
32511 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
32514 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
32516 /* SSE1 doesn't have any direct moves from other classes. */
32520 /* If the target says that inter-unit moves are more expensive
32521 than moving through memory, then don't generate them. */
32522 if (!TARGET_INTER_UNIT_MOVES
)
32525 /* Between SSE and general, we have moves no larger than word size. */
32526 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32534 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
32535 enum machine_mode mode
, int strict
)
32537 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
32540 /* Implement the TARGET_CLASS_MAX_NREGS hook.
32542 On the 80386, this is the size of MODE in words,
32543 except in the FP regs, where a single reg is always enough. */
32545 static unsigned char
32546 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
32548 if (MAYBE_INTEGER_CLASS_P (rclass
))
32550 if (mode
== XFmode
)
32551 return (TARGET_64BIT
? 2 : 3);
32552 else if (mode
== XCmode
)
32553 return (TARGET_64BIT
? 4 : 6);
32555 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
32559 if (COMPLEX_MODE_P (mode
))
32566 /* Return true if the registers in CLASS cannot represent the change from
32567 modes FROM to TO. */
32570 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
32571 enum reg_class regclass
)
32576 /* x87 registers can't do subreg at all, as all values are reformatted
32577 to extended precision. */
32578 if (MAYBE_FLOAT_CLASS_P (regclass
))
32581 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
32583 /* Vector registers do not support QI or HImode loads. If we don't
32584 disallow a change to these modes, reload will assume it's ok to
32585 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
32586 the vec_dupv4hi pattern. */
32587 if (GET_MODE_SIZE (from
) < 4)
32590 /* Vector registers do not support subreg with nonzero offsets, which
32591 are otherwise valid for integer registers. Since we can't see
32592 whether we have a nonzero offset from here, prohibit all
32593 nonparadoxical subregs changing size. */
32594 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
32601 /* Return the cost of moving data of mode M between a
32602 register and memory. A value of 2 is the default; this cost is
32603 relative to those in `REGISTER_MOVE_COST'.
32605 This function is used extensively by register_move_cost that is used to
32606 build tables at startup. Make it inline in this case.
32607 When IN is 2, return maximum of in and out move cost.
32609 If moving between registers and memory is more expensive than
32610 between two registers, you should define this macro to express the
32613 Model also increased moving costs of QImode registers in non
32617 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
32621 if (FLOAT_CLASS_P (regclass
))
32639 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
32640 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
32642 if (SSE_CLASS_P (regclass
))
32645 switch (GET_MODE_SIZE (mode
))
32660 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
32661 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
32663 if (MMX_CLASS_P (regclass
))
32666 switch (GET_MODE_SIZE (mode
))
32678 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
32679 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
32681 switch (GET_MODE_SIZE (mode
))
32684 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
32687 return ix86_cost
->int_store
[0];
32688 if (TARGET_PARTIAL_REG_DEPENDENCY
32689 && optimize_function_for_speed_p (cfun
))
32690 cost
= ix86_cost
->movzbl_load
;
32692 cost
= ix86_cost
->int_load
[0];
32694 return MAX (cost
, ix86_cost
->int_store
[0]);
32700 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
32702 return ix86_cost
->movzbl_load
;
32704 return ix86_cost
->int_store
[0] + 4;
32709 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
32710 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
32712 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
32713 if (mode
== TFmode
)
32716 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
32718 cost
= ix86_cost
->int_load
[2];
32720 cost
= ix86_cost
->int_store
[2];
32721 return (cost
* (((int) GET_MODE_SIZE (mode
)
32722 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
32727 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
32730 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
32734 /* Return the cost of moving data from a register in class CLASS1 to
32735 one in class CLASS2.
32737 It is not required that the cost always equal 2 when FROM is the same as TO;
32738 on some machines it is expensive to move between registers if they are not
32739 general registers. */
32742 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
32743 reg_class_t class2_i
)
32745 enum reg_class class1
= (enum reg_class
) class1_i
;
32746 enum reg_class class2
= (enum reg_class
) class2_i
;
32748 /* In case we require secondary memory, compute cost of the store followed
32749 by load. In order to avoid bad register allocation choices, we need
32750 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
32752 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
32756 cost
+= inline_memory_move_cost (mode
, class1
, 2);
32757 cost
+= inline_memory_move_cost (mode
, class2
, 2);
32759 /* In case of copying from general_purpose_register we may emit multiple
32760 stores followed by single load causing memory size mismatch stall.
32761 Count this as arbitrarily high cost of 20. */
32762 if (targetm
.class_max_nregs (class1
, mode
)
32763 > targetm
.class_max_nregs (class2
, mode
))
32766 /* In the case of FP/MMX moves, the registers actually overlap, and we
32767 have to switch modes in order to treat them differently. */
32768 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
32769 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
32775 /* Moves between SSE/MMX and integer unit are expensive. */
32776 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
32777 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
32779 /* ??? By keeping returned value relatively high, we limit the number
32780 of moves between integer and MMX/SSE registers for all targets.
32781 Additionally, high value prevents problem with x86_modes_tieable_p(),
32782 where integer modes in MMX/SSE registers are not tieable
32783 because of missing QImode and HImode moves to, from or between
32784 MMX/SSE registers. */
32785 return MAX (8, ix86_cost
->mmxsse_to_integer
);
32787 if (MAYBE_FLOAT_CLASS_P (class1
))
32788 return ix86_cost
->fp_move
;
32789 if (MAYBE_SSE_CLASS_P (class1
))
32790 return ix86_cost
->sse_move
;
32791 if (MAYBE_MMX_CLASS_P (class1
))
32792 return ix86_cost
->mmx_move
;
32796 /* Return TRUE if hard register REGNO can hold a value of machine-mode
32800 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
32802 /* Flags and only flags can only hold CCmode values. */
32803 if (CC_REGNO_P (regno
))
32804 return GET_MODE_CLASS (mode
) == MODE_CC
;
32805 if (GET_MODE_CLASS (mode
) == MODE_CC
32806 || GET_MODE_CLASS (mode
) == MODE_RANDOM
32807 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
32809 if (STACK_REGNO_P (regno
))
32810 return VALID_FP_MODE_P (mode
);
32811 if (SSE_REGNO_P (regno
))
32813 /* We implement the move patterns for all vector modes into and
32814 out of SSE registers, even when no operation instructions
32815 are available. OImode move is available only when AVX is
32817 return ((TARGET_AVX
&& mode
== OImode
)
32818 || VALID_AVX256_REG_MODE (mode
)
32819 || VALID_SSE_REG_MODE (mode
)
32820 || VALID_SSE2_REG_MODE (mode
)
32821 || VALID_MMX_REG_MODE (mode
)
32822 || VALID_MMX_REG_MODE_3DNOW (mode
));
32824 if (MMX_REGNO_P (regno
))
32826 /* We implement the move patterns for 3DNOW modes even in MMX mode,
32827 so if the register is available at all, then we can move data of
32828 the given mode into or out of it. */
32829 return (VALID_MMX_REG_MODE (mode
)
32830 || VALID_MMX_REG_MODE_3DNOW (mode
));
32833 if (mode
== QImode
)
32835 /* Take care for QImode values - they can be in non-QI regs,
32836 but then they do cause partial register stalls. */
32837 if (TARGET_64BIT
|| QI_REGNO_P (regno
))
32839 if (!TARGET_PARTIAL_REG_STALL
)
32841 return !can_create_pseudo_p ();
32843 /* We handle both integer and floats in the general purpose registers. */
32844 else if (VALID_INT_MODE_P (mode
))
32846 else if (VALID_FP_MODE_P (mode
))
32848 else if (VALID_DFP_MODE_P (mode
))
32850 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
32851 on to use that value in smaller contexts, this can easily force a
32852 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
32853 supporting DImode, allow it. */
32854 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
32860 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
32861 tieable integer mode. */
32864 ix86_tieable_integer_mode_p (enum machine_mode mode
)
32873 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
32876 return TARGET_64BIT
;
32883 /* Return true if MODE1 is accessible in a register that can hold MODE2
32884 without copying. That is, all register classes that can hold MODE2
32885 can also hold MODE1. */
32888 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
32890 if (mode1
== mode2
)
32893 if (ix86_tieable_integer_mode_p (mode1
)
32894 && ix86_tieable_integer_mode_p (mode2
))
32897 /* MODE2 being XFmode implies fp stack or general regs, which means we
32898 can tie any smaller floating point modes to it. Note that we do not
32899 tie this with TFmode. */
32900 if (mode2
== XFmode
)
32901 return mode1
== SFmode
|| mode1
== DFmode
;
32903 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
32904 that we can tie it with SFmode. */
32905 if (mode2
== DFmode
)
32906 return mode1
== SFmode
;
32908 /* If MODE2 is only appropriate for an SSE register, then tie with
32909 any other mode acceptable to SSE registers. */
32910 if (GET_MODE_SIZE (mode2
) == 32
32911 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
32912 return (GET_MODE_SIZE (mode1
) == 32
32913 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
32914 if (GET_MODE_SIZE (mode2
) == 16
32915 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
32916 return (GET_MODE_SIZE (mode1
) == 16
32917 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
32919 /* If MODE2 is appropriate for an MMX register, then tie
32920 with any other mode acceptable to MMX registers. */
32921 if (GET_MODE_SIZE (mode2
) == 8
32922 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
32923 return (GET_MODE_SIZE (mode1
) == 8
32924 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
32929 /* Return the cost of moving between two registers of mode MODE. */
32932 ix86_set_reg_reg_cost (enum machine_mode mode
)
32934 unsigned int units
= UNITS_PER_WORD
;
32936 switch (GET_MODE_CLASS (mode
))
32942 units
= GET_MODE_SIZE (CCmode
);
32946 if ((TARGET_SSE
&& mode
== TFmode
)
32947 || (TARGET_80387
&& mode
== XFmode
)
32948 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
32949 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
32950 units
= GET_MODE_SIZE (mode
);
32953 case MODE_COMPLEX_FLOAT
:
32954 if ((TARGET_SSE
&& mode
== TCmode
)
32955 || (TARGET_80387
&& mode
== XCmode
)
32956 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
32957 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
32958 units
= GET_MODE_SIZE (mode
);
32961 case MODE_VECTOR_INT
:
32962 case MODE_VECTOR_FLOAT
:
32963 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
32964 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
32965 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
32966 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
32967 units
= GET_MODE_SIZE (mode
);
32970 /* Return the cost of moving between two registers of mode MODE,
32971 assuming that the move will be in pieces of at most UNITS bytes. */
32972 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
32975 /* Compute a (partial) cost for rtx X. Return true if the complete
32976 cost has been computed, and false if subexpressions should be
32977 scanned. In either case, *TOTAL contains the cost result. */
32980 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
32983 enum rtx_code code
= (enum rtx_code
) code_i
;
32984 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
32985 enum machine_mode mode
= GET_MODE (x
);
32986 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
32991 if (register_operand (SET_DEST (x
), VOIDmode
)
32992 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
32994 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
33003 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
33005 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
33007 else if (flag_pic
&& SYMBOLIC_CONST (x
)
33009 || (!GET_CODE (x
) != LABEL_REF
33010 && (GET_CODE (x
) != SYMBOL_REF
33011 || !SYMBOL_REF_LOCAL_P (x
)))))
33018 if (mode
== VOIDmode
)
33023 switch (standard_80387_constant_p (x
))
33028 default: /* Other constants */
33035 if (SSE_FLOAT_MODE_P (mode
))
33038 switch (standard_sse_constant_p (x
))
33042 case 1: /* 0: xor eliminates false dependency */
33045 default: /* -1: cmp contains false dependency */
33050 /* Fall back to (MEM (SYMBOL_REF)), since that's where
33051 it'll probably end up. Add a penalty for size. */
33052 *total
= (COSTS_N_INSNS (1)
33053 + (flag_pic
!= 0 && !TARGET_64BIT
)
33054 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
33058 /* The zero extensions is often completely free on x86_64, so make
33059 it as cheap as possible. */
33060 if (TARGET_64BIT
&& mode
== DImode
33061 && GET_MODE (XEXP (x
, 0)) == SImode
)
33063 else if (TARGET_ZERO_EXTEND_WITH_AND
)
33064 *total
= cost
->add
;
33066 *total
= cost
->movzx
;
33070 *total
= cost
->movsx
;
33074 if (SCALAR_INT_MODE_P (mode
)
33075 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
33076 && CONST_INT_P (XEXP (x
, 1)))
33078 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
33081 *total
= cost
->add
;
33084 if ((value
== 2 || value
== 3)
33085 && cost
->lea
<= cost
->shift_const
)
33087 *total
= cost
->lea
;
33097 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
33099 /* ??? Should be SSE vector operation cost. */
33100 /* At least for published AMD latencies, this really is the same
33101 as the latency for a simple fpu operation like fabs. */
33102 /* V*QImode is emulated with 1-11 insns. */
33103 if (mode
== V16QImode
|| mode
== V32QImode
)
33106 if (TARGET_XOP
&& mode
== V16QImode
)
33108 /* For XOP we use vpshab, which requires a broadcast of the
33109 value to the variable shift insn. For constants this
33110 means a V16Q const in mem; even when we can perform the
33111 shift with one insn set the cost to prefer paddb. */
33112 if (CONSTANT_P (XEXP (x
, 1)))
33114 *total
= (cost
->fabs
33115 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
33116 + (speed
? 2 : COSTS_N_BYTES (16)));
33121 else if (TARGET_SSSE3
)
33123 *total
= cost
->fabs
* count
;
33126 *total
= cost
->fabs
;
33128 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33130 if (CONST_INT_P (XEXP (x
, 1)))
33132 if (INTVAL (XEXP (x
, 1)) > 32)
33133 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
33135 *total
= cost
->shift_const
* 2;
33139 if (GET_CODE (XEXP (x
, 1)) == AND
)
33140 *total
= cost
->shift_var
* 2;
33142 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
33147 if (CONST_INT_P (XEXP (x
, 1)))
33148 *total
= cost
->shift_const
;
33150 *total
= cost
->shift_var
;
33158 gcc_assert (FLOAT_MODE_P (mode
));
33159 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
33161 /* ??? SSE scalar/vector cost should be used here. */
33162 /* ??? Bald assumption that fma has the same cost as fmul. */
33163 *total
= cost
->fmul
;
33164 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
33166 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
33168 if (GET_CODE (sub
) == NEG
)
33169 sub
= XEXP (sub
, 0);
33170 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
33173 if (GET_CODE (sub
) == NEG
)
33174 sub
= XEXP (sub
, 0);
33175 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
33180 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
33182 /* ??? SSE scalar cost should be used here. */
33183 *total
= cost
->fmul
;
33186 else if (X87_FLOAT_MODE_P (mode
))
33188 *total
= cost
->fmul
;
33191 else if (FLOAT_MODE_P (mode
))
33193 /* ??? SSE vector cost should be used here. */
33194 *total
= cost
->fmul
;
33197 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
33199 /* V*QImode is emulated with 7-13 insns. */
33200 if (mode
== V16QImode
|| mode
== V32QImode
)
33203 if (TARGET_XOP
&& mode
== V16QImode
)
33205 else if (TARGET_SSSE3
)
33207 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
33209 /* V*DImode is emulated with 5-8 insns. */
33210 else if (mode
== V2DImode
|| mode
== V4DImode
)
33212 if (TARGET_XOP
&& mode
== V2DImode
)
33213 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
33215 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
33217 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
33218 insns, including two PMULUDQ. */
33219 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
33220 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
33222 *total
= cost
->fmul
;
33227 rtx op0
= XEXP (x
, 0);
33228 rtx op1
= XEXP (x
, 1);
33230 if (CONST_INT_P (XEXP (x
, 1)))
33232 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
33233 for (nbits
= 0; value
!= 0; value
&= value
- 1)
33237 /* This is arbitrary. */
33240 /* Compute costs correctly for widening multiplication. */
33241 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
33242 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
33243 == GET_MODE_SIZE (mode
))
33245 int is_mulwiden
= 0;
33246 enum machine_mode inner_mode
= GET_MODE (op0
);
33248 if (GET_CODE (op0
) == GET_CODE (op1
))
33249 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
33250 else if (CONST_INT_P (op1
))
33252 if (GET_CODE (op0
) == SIGN_EXTEND
)
33253 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
33256 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
33260 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
33263 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
33264 + nbits
* cost
->mult_bit
33265 + rtx_cost (op0
, outer_code
, opno
, speed
)
33266 + rtx_cost (op1
, outer_code
, opno
, speed
));
33275 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
33276 /* ??? SSE cost should be used here. */
33277 *total
= cost
->fdiv
;
33278 else if (X87_FLOAT_MODE_P (mode
))
33279 *total
= cost
->fdiv
;
33280 else if (FLOAT_MODE_P (mode
))
33281 /* ??? SSE vector cost should be used here. */
33282 *total
= cost
->fdiv
;
33284 *total
= cost
->divide
[MODE_INDEX (mode
)];
33288 if (GET_MODE_CLASS (mode
) == MODE_INT
33289 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
33291 if (GET_CODE (XEXP (x
, 0)) == PLUS
33292 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
33293 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
33294 && CONSTANT_P (XEXP (x
, 1)))
33296 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
33297 if (val
== 2 || val
== 4 || val
== 8)
33299 *total
= cost
->lea
;
33300 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
33301 outer_code
, opno
, speed
);
33302 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
33303 outer_code
, opno
, speed
);
33304 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
33308 else if (GET_CODE (XEXP (x
, 0)) == MULT
33309 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
33311 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
33312 if (val
== 2 || val
== 4 || val
== 8)
33314 *total
= cost
->lea
;
33315 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
33316 outer_code
, opno
, speed
);
33317 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
33321 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
33323 *total
= cost
->lea
;
33324 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
33325 outer_code
, opno
, speed
);
33326 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
33327 outer_code
, opno
, speed
);
33328 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
33335 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
33337 /* ??? SSE cost should be used here. */
33338 *total
= cost
->fadd
;
33341 else if (X87_FLOAT_MODE_P (mode
))
33343 *total
= cost
->fadd
;
33346 else if (FLOAT_MODE_P (mode
))
33348 /* ??? SSE vector cost should be used here. */
33349 *total
= cost
->fadd
;
33357 if (GET_MODE_CLASS (mode
) == MODE_INT
33358 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33360 *total
= (cost
->add
* 2
33361 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
33362 << (GET_MODE (XEXP (x
, 0)) != DImode
))
33363 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
33364 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
33370 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
33372 /* ??? SSE cost should be used here. */
33373 *total
= cost
->fchs
;
33376 else if (X87_FLOAT_MODE_P (mode
))
33378 *total
= cost
->fchs
;
33381 else if (FLOAT_MODE_P (mode
))
33383 /* ??? SSE vector cost should be used here. */
33384 *total
= cost
->fchs
;
33390 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
33392 /* ??? Should be SSE vector operation cost. */
33393 /* At least for published AMD latencies, this really is the same
33394 as the latency for a simple fpu operation like fabs. */
33395 *total
= cost
->fabs
;
33397 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
33398 *total
= cost
->add
* 2;
33400 *total
= cost
->add
;
33404 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
33405 && XEXP (XEXP (x
, 0), 1) == const1_rtx
33406 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
33407 && XEXP (x
, 1) == const0_rtx
)
33409 /* This kind of construct is implemented using test[bwl].
33410 Treat it as if we had an AND. */
33411 *total
= (cost
->add
33412 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
33413 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
33419 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
33424 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
33425 /* ??? SSE cost should be used here. */
33426 *total
= cost
->fabs
;
33427 else if (X87_FLOAT_MODE_P (mode
))
33428 *total
= cost
->fabs
;
33429 else if (FLOAT_MODE_P (mode
))
33430 /* ??? SSE vector cost should be used here. */
33431 *total
= cost
->fabs
;
33435 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
33436 /* ??? SSE cost should be used here. */
33437 *total
= cost
->fsqrt
;
33438 else if (X87_FLOAT_MODE_P (mode
))
33439 *total
= cost
->fsqrt
;
33440 else if (FLOAT_MODE_P (mode
))
33441 /* ??? SSE vector cost should be used here. */
33442 *total
= cost
->fsqrt
;
33446 if (XINT (x
, 1) == UNSPEC_TP
)
33453 case VEC_DUPLICATE
:
33454 /* ??? Assume all of these vector manipulation patterns are
33455 recognizable. In which case they all pretty much have the
33457 *total
= cost
->fabs
;
33467 static int current_machopic_label_num
;
33469 /* Given a symbol name and its associated stub, write out the
33470 definition of the stub. */
33473 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
33475 unsigned int length
;
33476 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
33477 int label
= ++current_machopic_label_num
;
33479 /* For 64-bit we shouldn't get here. */
33480 gcc_assert (!TARGET_64BIT
);
33482 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33483 symb
= targetm
.strip_name_encoding (symb
);
33485 length
= strlen (stub
);
33486 binder_name
= XALLOCAVEC (char, length
+ 32);
33487 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
33489 length
= strlen (symb
);
33490 symbol_name
= XALLOCAVEC (char, length
+ 32);
33491 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
33493 sprintf (lazy_ptr_name
, "L%d$lz", label
);
33495 if (MACHOPIC_ATT_STUB
)
33496 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
33497 else if (MACHOPIC_PURE
)
33498 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
33500 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
33502 fprintf (file
, "%s:\n", stub
);
33503 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
33505 if (MACHOPIC_ATT_STUB
)
33507 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
33509 else if (MACHOPIC_PURE
)
33512 /* 25-byte PIC stub using "CALL get_pc_thunk". */
33513 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
33514 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
33515 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
33516 label
, lazy_ptr_name
, label
);
33517 fprintf (file
, "\tjmp\t*%%ecx\n");
33520 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
33522 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
33523 it needs no stub-binding-helper. */
33524 if (MACHOPIC_ATT_STUB
)
33527 fprintf (file
, "%s:\n", binder_name
);
33531 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
33532 fprintf (file
, "\tpushl\t%%ecx\n");
33535 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
33537 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
33539 /* N.B. Keep the correspondence of these
33540 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
33541 old-pic/new-pic/non-pic stubs; altering this will break
33542 compatibility with existing dylibs. */
33545 /* 25-byte PIC stub using "CALL get_pc_thunk". */
33546 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
33549 /* 16-byte -mdynamic-no-pic stub. */
33550 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
33552 fprintf (file
, "%s:\n", lazy_ptr_name
);
33553 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
33554 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
33556 #endif /* TARGET_MACHO */
33558 /* Order the registers for register allocator. */
33561 x86_order_regs_for_local_alloc (void)
33566 /* First allocate the local general purpose registers. */
33567 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
33568 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
33569 reg_alloc_order
[pos
++] = i
;
33571 /* Global general purpose registers. */
33572 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
33573 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
33574 reg_alloc_order
[pos
++] = i
;
33576 /* x87 registers come first in case we are doing FP math
33578 if (!TARGET_SSE_MATH
)
33579 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
33580 reg_alloc_order
[pos
++] = i
;
33582 /* SSE registers. */
33583 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
33584 reg_alloc_order
[pos
++] = i
;
33585 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
33586 reg_alloc_order
[pos
++] = i
;
33588 /* x87 registers. */
33589 if (TARGET_SSE_MATH
)
33590 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
33591 reg_alloc_order
[pos
++] = i
;
33593 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
33594 reg_alloc_order
[pos
++] = i
;
33596 /* Initialize the rest of array as we do not allocate some registers
33598 while (pos
< FIRST_PSEUDO_REGISTER
)
33599 reg_alloc_order
[pos
++] = 0;
33602 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
33603 in struct attribute_spec handler. */
33605 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
33607 int flags ATTRIBUTE_UNUSED
,
33608 bool *no_add_attrs
)
33610 if (TREE_CODE (*node
) != FUNCTION_TYPE
33611 && TREE_CODE (*node
) != METHOD_TYPE
33612 && TREE_CODE (*node
) != FIELD_DECL
33613 && TREE_CODE (*node
) != TYPE_DECL
)
33615 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
33617 *no_add_attrs
= true;
33622 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
33624 *no_add_attrs
= true;
33627 if (is_attribute_p ("callee_pop_aggregate_return", name
))
33631 cst
= TREE_VALUE (args
);
33632 if (TREE_CODE (cst
) != INTEGER_CST
)
33634 warning (OPT_Wattributes
,
33635 "%qE attribute requires an integer constant argument",
33637 *no_add_attrs
= true;
33639 else if (compare_tree_int (cst
, 0) != 0
33640 && compare_tree_int (cst
, 1) != 0)
33642 warning (OPT_Wattributes
,
33643 "argument to %qE attribute is neither zero, nor one",
33645 *no_add_attrs
= true;
33654 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
33655 struct attribute_spec.handler. */
33657 ix86_handle_abi_attribute (tree
*node
, tree name
,
33658 tree args ATTRIBUTE_UNUSED
,
33659 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
33661 if (TREE_CODE (*node
) != FUNCTION_TYPE
33662 && TREE_CODE (*node
) != METHOD_TYPE
33663 && TREE_CODE (*node
) != FIELD_DECL
33664 && TREE_CODE (*node
) != TYPE_DECL
)
33666 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
33668 *no_add_attrs
= true;
33672 /* Can combine regparm with all attributes but fastcall. */
33673 if (is_attribute_p ("ms_abi", name
))
33675 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
33677 error ("ms_abi and sysv_abi attributes are not compatible");
33682 else if (is_attribute_p ("sysv_abi", name
))
33684 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
33686 error ("ms_abi and sysv_abi attributes are not compatible");
33695 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
33696 struct attribute_spec.handler. */
33698 ix86_handle_struct_attribute (tree
*node
, tree name
,
33699 tree args ATTRIBUTE_UNUSED
,
33700 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
33703 if (DECL_P (*node
))
33705 if (TREE_CODE (*node
) == TYPE_DECL
)
33706 type
= &TREE_TYPE (*node
);
33711 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
33713 warning (OPT_Wattributes
, "%qE attribute ignored",
33715 *no_add_attrs
= true;
33718 else if ((is_attribute_p ("ms_struct", name
)
33719 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
33720 || ((is_attribute_p ("gcc_struct", name
)
33721 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
33723 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
33725 *no_add_attrs
= true;
33732 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
33733 tree args ATTRIBUTE_UNUSED
,
33734 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
33736 if (TREE_CODE (*node
) != FUNCTION_DECL
)
33738 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
33740 *no_add_attrs
= true;
33746 ix86_ms_bitfield_layout_p (const_tree record_type
)
33748 return ((TARGET_MS_BITFIELD_LAYOUT
33749 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
33750 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
33753 /* Returns an expression indicating where the this parameter is
33754 located on entry to the FUNCTION. */
33757 x86_this_parameter (tree function
)
33759 tree type
= TREE_TYPE (function
);
33760 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
33765 const int *parm_regs
;
33767 if (ix86_function_type_abi (type
) == MS_ABI
)
33768 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
33770 parm_regs
= x86_64_int_parameter_registers
;
33771 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
33774 nregs
= ix86_function_regparm (type
, function
);
33776 if (nregs
> 0 && !stdarg_p (type
))
33779 unsigned int ccvt
= ix86_get_callcvt (type
);
33781 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
33782 regno
= aggr
? DX_REG
: CX_REG
;
33783 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
33787 return gen_rtx_MEM (SImode
,
33788 plus_constant (Pmode
, stack_pointer_rtx
, 4));
33797 return gen_rtx_MEM (SImode
,
33798 plus_constant (Pmode
,
33799 stack_pointer_rtx
, 4));
33802 return gen_rtx_REG (SImode
, regno
);
33805 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
33809 /* Determine whether x86_output_mi_thunk can succeed. */
33812 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
33813 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
33814 HOST_WIDE_INT vcall_offset
, const_tree function
)
33816 /* 64-bit can handle anything. */
33820 /* For 32-bit, everything's fine if we have one free register. */
33821 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
33824 /* Need a free register for vcall_offset. */
33828 /* Need a free register for GOT references. */
33829 if (flag_pic
&& !targetm
.binds_local_p (function
))
33832 /* Otherwise ok. */
33836 /* Output the assembler code for a thunk function. THUNK_DECL is the
33837 declaration for the thunk function itself, FUNCTION is the decl for
33838 the target function. DELTA is an immediate constant offset to be
33839 added to THIS. If VCALL_OFFSET is nonzero, the word at
33840 *(*this + vcall_offset) should be added to THIS. */
33843 x86_output_mi_thunk (FILE *file
,
33844 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
33845 HOST_WIDE_INT vcall_offset
, tree function
)
33847 rtx this_param
= x86_this_parameter (function
);
33848 rtx this_reg
, tmp
, fnaddr
;
33849 unsigned int tmp_regno
;
33852 tmp_regno
= R10_REG
;
33855 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
33856 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
33857 tmp_regno
= AX_REG
;
33859 tmp_regno
= CX_REG
;
33862 emit_note (NOTE_INSN_PROLOGUE_END
);
33864 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
33865 pull it in now and let DELTA benefit. */
33866 if (REG_P (this_param
))
33867 this_reg
= this_param
;
33868 else if (vcall_offset
)
33870 /* Put the this parameter into %eax. */
33871 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
33872 emit_move_insn (this_reg
, this_param
);
33875 this_reg
= NULL_RTX
;
33877 /* Adjust the this parameter by a fixed constant. */
33880 rtx delta_rtx
= GEN_INT (delta
);
33881 rtx delta_dst
= this_reg
? this_reg
: this_param
;
33885 if (!x86_64_general_operand (delta_rtx
, Pmode
))
33887 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
33888 emit_move_insn (tmp
, delta_rtx
);
33893 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
33896 /* Adjust the this parameter by a value stored in the vtable. */
33899 rtx vcall_addr
, vcall_mem
, this_mem
;
33901 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
33903 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
33904 if (Pmode
!= ptr_mode
)
33905 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
33906 emit_move_insn (tmp
, this_mem
);
33908 /* Adjust the this parameter. */
33909 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
33911 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
33913 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
33914 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
33915 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
33918 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
33919 if (Pmode
!= ptr_mode
)
33920 emit_insn (gen_addsi_1_zext (this_reg
,
33921 gen_rtx_REG (ptr_mode
,
33925 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
33928 /* If necessary, drop THIS back to its stack slot. */
33929 if (this_reg
&& this_reg
!= this_param
)
33930 emit_move_insn (this_param
, this_reg
);
33932 fnaddr
= XEXP (DECL_RTL (function
), 0);
33935 if (!flag_pic
|| targetm
.binds_local_p (function
)
33936 || cfun
->machine
->call_abi
== MS_ABI
)
33940 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
33941 tmp
= gen_rtx_CONST (Pmode
, tmp
);
33942 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
33947 if (!flag_pic
|| targetm
.binds_local_p (function
))
33950 else if (TARGET_MACHO
)
33952 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
33953 fnaddr
= XEXP (fnaddr
, 0);
33955 #endif /* TARGET_MACHO */
33958 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
33959 output_set_got (tmp
, NULL_RTX
);
33961 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
33962 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
33963 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
33967 /* Our sibling call patterns do not allow memories, because we have no
33968 predicate that can distinguish between frame and non-frame memory.
33969 For our purposes here, we can get away with (ab)using a jump pattern,
33970 because we're going to do no optimization. */
33971 if (MEM_P (fnaddr
))
33972 emit_jump_insn (gen_indirect_jump (fnaddr
));
33975 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
33976 fnaddr
= legitimize_pic_address (fnaddr
,
33977 gen_rtx_REG (Pmode
, tmp_regno
));
33979 if (!sibcall_insn_operand (fnaddr
, word_mode
))
33981 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
33982 if (GET_MODE (fnaddr
) != word_mode
)
33983 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
33984 emit_move_insn (tmp
, fnaddr
);
33988 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
33989 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
33990 tmp
= emit_call_insn (tmp
);
33991 SIBLING_CALL_P (tmp
) = 1;
33995 /* Emit just enough of rest_of_compilation to get the insns emitted.
33996 Note that use_thunk calls assemble_start_function et al. */
33997 tmp
= get_insns ();
33998 shorten_branches (tmp
);
33999 final_start_function (tmp
, file
, 1);
34000 final (tmp
, file
, 1);
34001 final_end_function ();
34005 x86_file_start (void)
34007 default_file_start ();
34009 darwin_file_start ();
34011 if (X86_FILE_START_VERSION_DIRECTIVE
)
34012 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
34013 if (X86_FILE_START_FLTUSED
)
34014 fputs ("\t.global\t__fltused\n", asm_out_file
);
34015 if (ix86_asm_dialect
== ASM_INTEL
)
34016 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
34020 x86_field_alignment (tree field
, int computed
)
34022 enum machine_mode mode
;
34023 tree type
= TREE_TYPE (field
);
34025 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
34027 mode
= TYPE_MODE (strip_array_types (type
));
34028 if (mode
== DFmode
|| mode
== DCmode
34029 || GET_MODE_CLASS (mode
) == MODE_INT
34030 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
34031 return MIN (32, computed
);
34035 /* Output assembler code to FILE to increment profiler label # LABELNO
34036 for profiling a function entry. */
34038 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
34040 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
34045 #ifndef NO_PROFILE_COUNTERS
34046 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
34049 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
34050 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
34052 fprintf (file
, "\tcall\t%s\n", mcount_name
);
34056 #ifndef NO_PROFILE_COUNTERS
34057 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
34060 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
34064 #ifndef NO_PROFILE_COUNTERS
34065 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
34068 fprintf (file
, "\tcall\t%s\n", mcount_name
);
34072 /* We don't have exact information about the insn sizes, but we may assume
34073 quite safely that we are informed about all 1 byte insns and memory
34074 address sizes. This is enough to eliminate unnecessary padding in
34078 min_insn_size (rtx insn
)
34082 if (!INSN_P (insn
) || !active_insn_p (insn
))
34085 /* Discard alignments we've emit and jump instructions. */
34086 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
34087 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
34089 if (JUMP_TABLE_DATA_P (insn
))
34092 /* Important case - calls are always 5 bytes.
34093 It is common to have many calls in the row. */
34095 && symbolic_reference_mentioned_p (PATTERN (insn
))
34096 && !SIBLING_CALL_P (insn
))
34098 len
= get_attr_length (insn
);
34102 /* For normal instructions we rely on get_attr_length being exact,
34103 with a few exceptions. */
34104 if (!JUMP_P (insn
))
34106 enum attr_type type
= get_attr_type (insn
);
34111 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
34112 || asm_noperands (PATTERN (insn
)) >= 0)
34119 /* Otherwise trust get_attr_length. */
34123 l
= get_attr_length_address (insn
);
34124 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
34133 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
34135 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
34139 ix86_avoid_jump_mispredicts (void)
34141 rtx insn
, start
= get_insns ();
34142 int nbytes
= 0, njumps
= 0;
34145 /* Look for all minimal intervals of instructions containing 4 jumps.
34146 The intervals are bounded by START and INSN. NBYTES is the total
34147 size of instructions in the interval including INSN and not including
34148 START. When the NBYTES is smaller than 16 bytes, it is possible
34149 that the end of START and INSN ends up in the same 16byte page.
34151 The smallest offset in the page INSN can start is the case where START
34152 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
34153 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
34155 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
34159 if (LABEL_P (insn
))
34161 int align
= label_to_alignment (insn
);
34162 int max_skip
= label_to_max_skip (insn
);
34166 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
34167 already in the current 16 byte page, because otherwise
34168 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
34169 bytes to reach 16 byte boundary. */
34171 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
34174 fprintf (dump_file
, "Label %i with max_skip %i\n",
34175 INSN_UID (insn
), max_skip
);
34178 while (nbytes
+ max_skip
>= 16)
34180 start
= NEXT_INSN (start
);
34181 if ((JUMP_P (start
)
34182 && GET_CODE (PATTERN (start
)) != ADDR_VEC
34183 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
34185 njumps
--, isjump
= 1;
34188 nbytes
-= min_insn_size (start
);
34194 min_size
= min_insn_size (insn
);
34195 nbytes
+= min_size
;
34197 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
34198 INSN_UID (insn
), min_size
);
34200 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
34201 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
34209 start
= NEXT_INSN (start
);
34210 if ((JUMP_P (start
)
34211 && GET_CODE (PATTERN (start
)) != ADDR_VEC
34212 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
34214 njumps
--, isjump
= 1;
34217 nbytes
-= min_insn_size (start
);
34219 gcc_assert (njumps
>= 0);
34221 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
34222 INSN_UID (start
), INSN_UID (insn
), nbytes
);
34224 if (njumps
== 3 && isjump
&& nbytes
< 16)
34226 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
34229 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
34230 INSN_UID (insn
), padsize
);
34231 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
34237 /* AMD Athlon works faster
34238 when RET is not destination of conditional jump or directly preceded
34239 by other jump instruction. We avoid the penalty by inserting NOP just
34240 before the RET instructions in such cases. */
34242 ix86_pad_returns (void)
34247 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
34249 basic_block bb
= e
->src
;
34250 rtx ret
= BB_END (bb
);
34252 bool replace
= false;
34254 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
34255 || optimize_bb_for_size_p (bb
))
34257 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
34258 if (active_insn_p (prev
) || LABEL_P (prev
))
34260 if (prev
&& LABEL_P (prev
))
34265 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
34266 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
34267 && !(e
->flags
& EDGE_FALLTHRU
))
34272 prev
= prev_active_insn (ret
);
34274 && ((JUMP_P (prev
) && any_condjump_p (prev
))
34277 /* Empty functions get branch mispredict even when
34278 the jump destination is not visible to us. */
34279 if (!prev
&& !optimize_function_for_size_p (cfun
))
34284 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
34290 /* Count the minimum number of instructions in BB. Return 4 if the
34291 number of instructions >= 4. */
34294 ix86_count_insn_bb (basic_block bb
)
34297 int insn_count
= 0;
34299 /* Count number of instructions in this block. Return 4 if the number
34300 of instructions >= 4. */
34301 FOR_BB_INSNS (bb
, insn
)
34303 /* Only happen in exit blocks. */
34305 && ANY_RETURN_P (PATTERN (insn
)))
34308 if (NONDEBUG_INSN_P (insn
)
34309 && GET_CODE (PATTERN (insn
)) != USE
34310 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
34313 if (insn_count
>= 4)
34322 /* Count the minimum number of instructions in code path in BB.
34323 Return 4 if the number of instructions >= 4. */
34326 ix86_count_insn (basic_block bb
)
34330 int min_prev_count
;
34332 /* Only bother counting instructions along paths with no
34333 more than 2 basic blocks between entry and exit. Given
34334 that BB has an edge to exit, determine if a predecessor
34335 of BB has an edge from entry. If so, compute the number
34336 of instructions in the predecessor block. If there
34337 happen to be multiple such blocks, compute the minimum. */
34338 min_prev_count
= 4;
34339 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
34342 edge_iterator prev_ei
;
34344 if (e
->src
== ENTRY_BLOCK_PTR
)
34346 min_prev_count
= 0;
34349 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
34351 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
34353 int count
= ix86_count_insn_bb (e
->src
);
34354 if (count
< min_prev_count
)
34355 min_prev_count
= count
;
34361 if (min_prev_count
< 4)
34362 min_prev_count
+= ix86_count_insn_bb (bb
);
34364 return min_prev_count
;
34367 /* Pad short function to 4 instructions. */
34370 ix86_pad_short_function (void)
34375 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
34377 rtx ret
= BB_END (e
->src
);
34378 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
34380 int insn_count
= ix86_count_insn (e
->src
);
34382 /* Pad short function. */
34383 if (insn_count
< 4)
34387 /* Find epilogue. */
34390 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
34391 insn
= PREV_INSN (insn
);
34396 /* Two NOPs count as one instruction. */
34397 insn_count
= 2 * (4 - insn_count
);
34398 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
34404 /* Implement machine specific optimizations. We implement padding of returns
34405 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
34409 /* We are freeing block_for_insn in the toplev to keep compatibility
34410 with old MDEP_REORGS that are not CFG based. Recompute it now. */
34411 compute_bb_for_insn ();
34413 /* Run the vzeroupper optimization if needed. */
34414 if (TARGET_VZEROUPPER
)
34415 move_or_delete_vzeroupper ();
34417 if (optimize
&& optimize_function_for_speed_p (cfun
))
34419 if (TARGET_PAD_SHORT_FUNCTION
)
34420 ix86_pad_short_function ();
34421 else if (TARGET_PAD_RETURNS
)
34422 ix86_pad_returns ();
34423 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
34424 if (TARGET_FOUR_JUMP_LIMIT
)
34425 ix86_avoid_jump_mispredicts ();
34430 /* Return nonzero when QImode register that must be represented via REX prefix
34433 x86_extended_QIreg_mentioned_p (rtx insn
)
34436 extract_insn_cached (insn
);
34437 for (i
= 0; i
< recog_data
.n_operands
; i
++)
34438 if (GENERAL_REG_P (recog_data
.operand
[i
])
34439 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
34444 /* Return nonzero when P points to register encoded via REX prefix.
34445 Called via for_each_rtx. */
34447 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
34449 unsigned int regno
;
34452 regno
= REGNO (*p
);
34453 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
34456 /* Return true when INSN mentions register that must be encoded using REX
34459 x86_extended_reg_mentioned_p (rtx insn
)
34461 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
34462 extended_reg_mentioned_1
, NULL
);
34465 /* If profitable, negate (without causing overflow) integer constant
34466 of mode MODE at location LOC. Return true in this case. */
34468 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
34472 if (!CONST_INT_P (*loc
))
34478 /* DImode x86_64 constants must fit in 32 bits. */
34479 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
34490 gcc_unreachable ();
34493 /* Avoid overflows. */
34494 if (mode_signbit_p (mode
, *loc
))
34497 val
= INTVAL (*loc
);
34499 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
34500 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
34501 if ((val
< 0 && val
!= -128)
34504 *loc
= GEN_INT (-val
);
34511 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
34512 optabs would emit if we didn't have TFmode patterns. */
34515 x86_emit_floatuns (rtx operands
[2])
34517 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
34518 enum machine_mode mode
, inmode
;
34520 inmode
= GET_MODE (operands
[1]);
34521 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
34524 in
= force_reg (inmode
, operands
[1]);
34525 mode
= GET_MODE (out
);
34526 neglab
= gen_label_rtx ();
34527 donelab
= gen_label_rtx ();
34528 f0
= gen_reg_rtx (mode
);
34530 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
34532 expand_float (out
, in
, 0);
34534 emit_jump_insn (gen_jump (donelab
));
34537 emit_label (neglab
);
34539 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
34541 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
34543 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
34545 expand_float (f0
, i0
, 0);
34547 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
34549 emit_label (donelab
);
34552 /* AVX2 does support 32-byte integer vector operations,
34553 thus the longest vector we are faced with is V32QImode. */
34554 #define MAX_VECT_LEN 32
34556 struct expand_vec_perm_d
34558 rtx target
, op0
, op1
;
34559 unsigned char perm
[MAX_VECT_LEN
];
34560 enum machine_mode vmode
;
34561 unsigned char nelt
;
34562 bool one_operand_p
;
34566 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
34567 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
34568 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
34570 /* Get a vector mode of the same size as the original but with elements
34571 twice as wide. This is only guaranteed to apply to integral vectors. */
34573 static inline enum machine_mode
34574 get_mode_wider_vector (enum machine_mode o
)
34576 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
34577 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
34578 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
34579 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
34583 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34584 with all elements equal to VAR. Return true if successful. */
34587 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
34588 rtx target
, rtx val
)
34611 /* First attempt to recognize VAL as-is. */
34612 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
34613 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
34614 if (recog_memoized (insn
) < 0)
34617 /* If that fails, force VAL into a register. */
34620 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
34621 seq
= get_insns ();
34624 emit_insn_before (seq
, insn
);
34626 ok
= recog_memoized (insn
) >= 0;
34635 if (TARGET_SSE
|| TARGET_3DNOW_A
)
34639 val
= gen_lowpart (SImode
, val
);
34640 x
= gen_rtx_TRUNCATE (HImode
, val
);
34641 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
34642 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
34655 struct expand_vec_perm_d dperm
;
34659 memset (&dperm
, 0, sizeof (dperm
));
34660 dperm
.target
= target
;
34661 dperm
.vmode
= mode
;
34662 dperm
.nelt
= GET_MODE_NUNITS (mode
);
34663 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
34664 dperm
.one_operand_p
= true;
34666 /* Extend to SImode using a paradoxical SUBREG. */
34667 tmp1
= gen_reg_rtx (SImode
);
34668 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
34670 /* Insert the SImode value as low element of a V4SImode vector. */
34671 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
34672 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
34674 ok
= (expand_vec_perm_1 (&dperm
)
34675 || expand_vec_perm_broadcast_1 (&dperm
));
34687 /* Replicate the value once into the next wider mode and recurse. */
34689 enum machine_mode smode
, wsmode
, wvmode
;
34692 smode
= GET_MODE_INNER (mode
);
34693 wvmode
= get_mode_wider_vector (mode
);
34694 wsmode
= GET_MODE_INNER (wvmode
);
34696 val
= convert_modes (wsmode
, smode
, val
, true);
34697 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
34698 GEN_INT (GET_MODE_BITSIZE (smode
)),
34699 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
34700 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
34702 x
= gen_lowpart (wvmode
, target
);
34703 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
34711 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
34712 rtx x
= gen_reg_rtx (hvmode
);
34714 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
34717 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
34718 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
34727 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34728 whose ONE_VAR element is VAR, and other elements are zero. Return true
34732 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
34733 rtx target
, rtx var
, int one_var
)
34735 enum machine_mode vsimode
;
34738 bool use_vector_set
= false;
34743 /* For SSE4.1, we normally use vector set. But if the second
34744 element is zero and inter-unit moves are OK, we use movq
34746 use_vector_set
= (TARGET_64BIT
34748 && !(TARGET_INTER_UNIT_MOVES
34754 use_vector_set
= TARGET_SSE4_1
;
34757 use_vector_set
= TARGET_SSE2
;
34760 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
34767 use_vector_set
= TARGET_AVX
;
34770 /* Use ix86_expand_vector_set in 64bit mode only. */
34771 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
34777 if (use_vector_set
)
34779 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
34780 var
= force_reg (GET_MODE_INNER (mode
), var
);
34781 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
34797 var
= force_reg (GET_MODE_INNER (mode
), var
);
34798 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
34799 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
34804 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
34805 new_target
= gen_reg_rtx (mode
);
34807 new_target
= target
;
34808 var
= force_reg (GET_MODE_INNER (mode
), var
);
34809 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
34810 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
34811 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
34814 /* We need to shuffle the value to the correct position, so
34815 create a new pseudo to store the intermediate result. */
34817 /* With SSE2, we can use the integer shuffle insns. */
34818 if (mode
!= V4SFmode
&& TARGET_SSE2
)
34820 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
34822 GEN_INT (one_var
== 1 ? 0 : 1),
34823 GEN_INT (one_var
== 2 ? 0 : 1),
34824 GEN_INT (one_var
== 3 ? 0 : 1)));
34825 if (target
!= new_target
)
34826 emit_move_insn (target
, new_target
);
34830 /* Otherwise convert the intermediate result to V4SFmode and
34831 use the SSE1 shuffle instructions. */
34832 if (mode
!= V4SFmode
)
34834 tmp
= gen_reg_rtx (V4SFmode
);
34835 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
34840 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
34842 GEN_INT (one_var
== 1 ? 0 : 1),
34843 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
34844 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
34846 if (mode
!= V4SFmode
)
34847 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
34848 else if (tmp
!= target
)
34849 emit_move_insn (target
, tmp
);
34851 else if (target
!= new_target
)
34852 emit_move_insn (target
, new_target
);
34857 vsimode
= V4SImode
;
34863 vsimode
= V2SImode
;
34869 /* Zero extend the variable element to SImode and recurse. */
34870 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
34872 x
= gen_reg_rtx (vsimode
);
34873 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
34875 gcc_unreachable ();
34877 emit_move_insn (target
, gen_lowpart (mode
, x
));
34885 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34886 consisting of the values in VALS. It is known that all elements
34887 except ONE_VAR are constants. Return true if successful. */
34890 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
34891 rtx target
, rtx vals
, int one_var
)
34893 rtx var
= XVECEXP (vals
, 0, one_var
);
34894 enum machine_mode wmode
;
34897 const_vec
= copy_rtx (vals
);
34898 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
34899 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
34907 /* For the two element vectors, it's just as easy to use
34908 the general case. */
34912 /* Use ix86_expand_vector_set in 64bit mode only. */
34935 /* There's no way to set one QImode entry easily. Combine
34936 the variable value with its adjacent constant value, and
34937 promote to an HImode set. */
34938 x
= XVECEXP (vals
, 0, one_var
^ 1);
34941 var
= convert_modes (HImode
, QImode
, var
, true);
34942 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
34943 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
34944 x
= GEN_INT (INTVAL (x
) & 0xff);
34948 var
= convert_modes (HImode
, QImode
, var
, true);
34949 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
34951 if (x
!= const0_rtx
)
34952 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
34953 1, OPTAB_LIB_WIDEN
);
34955 x
= gen_reg_rtx (wmode
);
34956 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
34957 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
34959 emit_move_insn (target
, gen_lowpart (mode
, x
));
34966 emit_move_insn (target
, const_vec
);
34967 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
34971 /* A subroutine of ix86_expand_vector_init_general. Use vector
34972 concatenate to handle the most general case: all values variable,
34973 and none identical. */
34976 ix86_expand_vector_init_concat (enum machine_mode mode
,
34977 rtx target
, rtx
*ops
, int n
)
34979 enum machine_mode cmode
, hmode
= VOIDmode
;
34980 rtx first
[8], second
[4];
35020 gcc_unreachable ();
35023 if (!register_operand (ops
[1], cmode
))
35024 ops
[1] = force_reg (cmode
, ops
[1]);
35025 if (!register_operand (ops
[0], cmode
))
35026 ops
[0] = force_reg (cmode
, ops
[0]);
35027 emit_insn (gen_rtx_SET (VOIDmode
, target
,
35028 gen_rtx_VEC_CONCAT (mode
, ops
[0],
35048 gcc_unreachable ();
35064 gcc_unreachable ();
35069 /* FIXME: We process inputs backward to help RA. PR 36222. */
35072 for (; i
> 0; i
-= 2, j
--)
35074 first
[j
] = gen_reg_rtx (cmode
);
35075 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
35076 ix86_expand_vector_init (false, first
[j
],
35077 gen_rtx_PARALLEL (cmode
, v
));
35083 gcc_assert (hmode
!= VOIDmode
);
35084 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
35086 second
[j
] = gen_reg_rtx (hmode
);
35087 ix86_expand_vector_init_concat (hmode
, second
[j
],
35091 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
35094 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
35098 gcc_unreachable ();
35102 /* A subroutine of ix86_expand_vector_init_general. Use vector
35103 interleave to handle the most general case: all values variable,
35104 and none identical. */
35107 ix86_expand_vector_init_interleave (enum machine_mode mode
,
35108 rtx target
, rtx
*ops
, int n
)
35110 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
35113 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
35114 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
35115 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
35120 gen_load_even
= gen_vec_setv8hi
;
35121 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
35122 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
35123 inner_mode
= HImode
;
35124 first_imode
= V4SImode
;
35125 second_imode
= V2DImode
;
35126 third_imode
= VOIDmode
;
35129 gen_load_even
= gen_vec_setv16qi
;
35130 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
35131 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
35132 inner_mode
= QImode
;
35133 first_imode
= V8HImode
;
35134 second_imode
= V4SImode
;
35135 third_imode
= V2DImode
;
35138 gcc_unreachable ();
35141 for (i
= 0; i
< n
; i
++)
35143 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
35144 op0
= gen_reg_rtx (SImode
);
35145 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
35147 /* Insert the SImode value as low element of V4SImode vector. */
35148 op1
= gen_reg_rtx (V4SImode
);
35149 op0
= gen_rtx_VEC_MERGE (V4SImode
,
35150 gen_rtx_VEC_DUPLICATE (V4SImode
,
35152 CONST0_RTX (V4SImode
),
35154 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
35156 /* Cast the V4SImode vector back to a vector in orignal mode. */
35157 op0
= gen_reg_rtx (mode
);
35158 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
35160 /* Load even elements into the second positon. */
35161 emit_insn (gen_load_even (op0
,
35162 force_reg (inner_mode
,
35166 /* Cast vector to FIRST_IMODE vector. */
35167 ops
[i
] = gen_reg_rtx (first_imode
);
35168 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
35171 /* Interleave low FIRST_IMODE vectors. */
35172 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
35174 op0
= gen_reg_rtx (first_imode
);
35175 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
35177 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
35178 ops
[j
] = gen_reg_rtx (second_imode
);
35179 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
35182 /* Interleave low SECOND_IMODE vectors. */
35183 switch (second_imode
)
35186 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
35188 op0
= gen_reg_rtx (second_imode
);
35189 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
35192 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
35194 ops
[j
] = gen_reg_rtx (third_imode
);
35195 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
35197 second_imode
= V2DImode
;
35198 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
35202 op0
= gen_reg_rtx (second_imode
);
35203 emit_insn (gen_interleave_second_low (op0
, ops
[0],
35206 /* Cast the SECOND_IMODE vector back to a vector on original
35208 emit_insn (gen_rtx_SET (VOIDmode
, target
,
35209 gen_lowpart (mode
, op0
)));
35213 gcc_unreachable ();
35217 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
35218 all values variable, and none identical. */
35221 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
35222 rtx target
, rtx vals
)
35224 rtx ops
[32], op0
, op1
;
35225 enum machine_mode half_mode
= VOIDmode
;
35232 if (!mmx_ok
&& !TARGET_SSE
)
35244 n
= GET_MODE_NUNITS (mode
);
35245 for (i
= 0; i
< n
; i
++)
35246 ops
[i
] = XVECEXP (vals
, 0, i
);
35247 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
35251 half_mode
= V16QImode
;
35255 half_mode
= V8HImode
;
35259 n
= GET_MODE_NUNITS (mode
);
35260 for (i
= 0; i
< n
; i
++)
35261 ops
[i
] = XVECEXP (vals
, 0, i
);
35262 op0
= gen_reg_rtx (half_mode
);
35263 op1
= gen_reg_rtx (half_mode
);
35264 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
35266 ix86_expand_vector_init_interleave (half_mode
, op1
,
35267 &ops
[n
>> 1], n
>> 2);
35268 emit_insn (gen_rtx_SET (VOIDmode
, target
,
35269 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
35273 if (!TARGET_SSE4_1
)
35281 /* Don't use ix86_expand_vector_init_interleave if we can't
35282 move from GPR to SSE register directly. */
35283 if (!TARGET_INTER_UNIT_MOVES
)
35286 n
= GET_MODE_NUNITS (mode
);
35287 for (i
= 0; i
< n
; i
++)
35288 ops
[i
] = XVECEXP (vals
, 0, i
);
35289 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
35297 gcc_unreachable ();
35301 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
35302 enum machine_mode inner_mode
;
35303 rtx words
[4], shift
;
35305 inner_mode
= GET_MODE_INNER (mode
);
35306 n_elts
= GET_MODE_NUNITS (mode
);
35307 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
35308 n_elt_per_word
= n_elts
/ n_words
;
35309 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
35311 for (i
= 0; i
< n_words
; ++i
)
35313 rtx word
= NULL_RTX
;
35315 for (j
= 0; j
< n_elt_per_word
; ++j
)
35317 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
35318 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
35324 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
35325 word
, 1, OPTAB_LIB_WIDEN
);
35326 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
35327 word
, 1, OPTAB_LIB_WIDEN
);
35335 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
35336 else if (n_words
== 2)
35338 rtx tmp
= gen_reg_rtx (mode
);
35339 emit_clobber (tmp
);
35340 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
35341 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
35342 emit_move_insn (target
, tmp
);
35344 else if (n_words
== 4)
35346 rtx tmp
= gen_reg_rtx (V4SImode
);
35347 gcc_assert (word_mode
== SImode
);
35348 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
35349 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
35350 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
35353 gcc_unreachable ();
35357 /* Initialize vector TARGET via VALS. Suppress the use of MMX
35358 instructions unless MMX_OK is true. */
35361 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
35363 enum machine_mode mode
= GET_MODE (target
);
35364 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
35365 int n_elts
= GET_MODE_NUNITS (mode
);
35366 int n_var
= 0, one_var
= -1;
35367 bool all_same
= true, all_const_zero
= true;
35371 for (i
= 0; i
< n_elts
; ++i
)
35373 x
= XVECEXP (vals
, 0, i
);
35374 if (!(CONST_INT_P (x
)
35375 || GET_CODE (x
) == CONST_DOUBLE
35376 || GET_CODE (x
) == CONST_FIXED
))
35377 n_var
++, one_var
= i
;
35378 else if (x
!= CONST0_RTX (inner_mode
))
35379 all_const_zero
= false;
35380 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
35384 /* Constants are best loaded from the constant pool. */
35387 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
35391 /* If all values are identical, broadcast the value. */
35393 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
35394 XVECEXP (vals
, 0, 0)))
35397 /* Values where only one field is non-constant are best loaded from
35398 the pool and overwritten via move later. */
35402 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
35403 XVECEXP (vals
, 0, one_var
),
35407 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
35411 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
35415 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
35417 enum machine_mode mode
= GET_MODE (target
);
35418 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
35419 enum machine_mode half_mode
;
35420 bool use_vec_merge
= false;
35422 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
35424 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
35425 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
35426 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
35427 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
35428 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
35429 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
35431 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
35433 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
35434 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
35435 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
35436 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
35437 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
35438 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
35448 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
35449 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
35451 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
35453 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
35454 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35460 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
35464 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
35465 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
35467 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
35469 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
35470 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35477 /* For the two element vectors, we implement a VEC_CONCAT with
35478 the extraction of the other element. */
35480 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
35481 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
35484 op0
= val
, op1
= tmp
;
35486 op0
= tmp
, op1
= val
;
35488 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
35489 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35494 use_vec_merge
= TARGET_SSE4_1
;
35501 use_vec_merge
= true;
35505 /* tmp = target = A B C D */
35506 tmp
= copy_to_reg (target
);
35507 /* target = A A B B */
35508 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
35509 /* target = X A B B */
35510 ix86_expand_vector_set (false, target
, val
, 0);
35511 /* target = A X C D */
35512 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
35513 const1_rtx
, const0_rtx
,
35514 GEN_INT (2+4), GEN_INT (3+4)));
35518 /* tmp = target = A B C D */
35519 tmp
= copy_to_reg (target
);
35520 /* tmp = X B C D */
35521 ix86_expand_vector_set (false, tmp
, val
, 0);
35522 /* target = A B X D */
35523 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
35524 const0_rtx
, const1_rtx
,
35525 GEN_INT (0+4), GEN_INT (3+4)));
35529 /* tmp = target = A B C D */
35530 tmp
= copy_to_reg (target
);
35531 /* tmp = X B C D */
35532 ix86_expand_vector_set (false, tmp
, val
, 0);
35533 /* target = A B X D */
35534 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
35535 const0_rtx
, const1_rtx
,
35536 GEN_INT (2+4), GEN_INT (0+4)));
35540 gcc_unreachable ();
35545 use_vec_merge
= TARGET_SSE4_1
;
35549 /* Element 0 handled by vec_merge below. */
35552 use_vec_merge
= true;
35558 /* With SSE2, use integer shuffles to swap element 0 and ELT,
35559 store into element 0, then shuffle them back. */
35563 order
[0] = GEN_INT (elt
);
35564 order
[1] = const1_rtx
;
35565 order
[2] = const2_rtx
;
35566 order
[3] = GEN_INT (3);
35567 order
[elt
] = const0_rtx
;
35569 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
35570 order
[1], order
[2], order
[3]));
35572 ix86_expand_vector_set (false, target
, val
, 0);
35574 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
35575 order
[1], order
[2], order
[3]));
35579 /* For SSE1, we have to reuse the V4SF code. */
35580 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
35581 gen_lowpart (SFmode
, val
), elt
);
35586 use_vec_merge
= TARGET_SSE2
;
35589 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
35593 use_vec_merge
= TARGET_SSE4_1
;
35600 half_mode
= V16QImode
;
35606 half_mode
= V8HImode
;
35612 half_mode
= V4SImode
;
35618 half_mode
= V2DImode
;
35624 half_mode
= V4SFmode
;
35630 half_mode
= V2DFmode
;
35636 /* Compute offset. */
35640 gcc_assert (i
<= 1);
35642 /* Extract the half. */
35643 tmp
= gen_reg_rtx (half_mode
);
35644 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
35646 /* Put val in tmp at elt. */
35647 ix86_expand_vector_set (false, tmp
, val
, elt
);
35650 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
35659 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
35660 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
35661 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35665 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
35667 emit_move_insn (mem
, target
);
35669 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
35670 emit_move_insn (tmp
, val
);
35672 emit_move_insn (target
, mem
);
35677 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
35679 enum machine_mode mode
= GET_MODE (vec
);
35680 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
35681 bool use_vec_extr
= false;
35694 use_vec_extr
= true;
35698 use_vec_extr
= TARGET_SSE4_1
;
35710 tmp
= gen_reg_rtx (mode
);
35711 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
35712 GEN_INT (elt
), GEN_INT (elt
),
35713 GEN_INT (elt
+4), GEN_INT (elt
+4)));
35717 tmp
= gen_reg_rtx (mode
);
35718 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
35722 gcc_unreachable ();
35725 use_vec_extr
= true;
35730 use_vec_extr
= TARGET_SSE4_1
;
35744 tmp
= gen_reg_rtx (mode
);
35745 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
35746 GEN_INT (elt
), GEN_INT (elt
),
35747 GEN_INT (elt
), GEN_INT (elt
)));
35751 tmp
= gen_reg_rtx (mode
);
35752 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
35756 gcc_unreachable ();
35759 use_vec_extr
= true;
35764 /* For SSE1, we have to reuse the V4SF code. */
35765 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
35766 gen_lowpart (V4SFmode
, vec
), elt
);
35772 use_vec_extr
= TARGET_SSE2
;
35775 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
35779 use_vec_extr
= TARGET_SSE4_1
;
35785 tmp
= gen_reg_rtx (V4SFmode
);
35787 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
35789 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
35790 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
35798 tmp
= gen_reg_rtx (V2DFmode
);
35800 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
35802 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
35803 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
35811 tmp
= gen_reg_rtx (V16QImode
);
35813 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
35815 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
35816 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
35824 tmp
= gen_reg_rtx (V8HImode
);
35826 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
35828 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
35829 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
35837 tmp
= gen_reg_rtx (V4SImode
);
35839 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
35841 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
35842 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
35850 tmp
= gen_reg_rtx (V2DImode
);
35852 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
35854 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
35855 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
35861 /* ??? Could extract the appropriate HImode element and shift. */
35868 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
35869 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
35871 /* Let the rtl optimizers know about the zero extension performed. */
35872 if (inner_mode
== QImode
|| inner_mode
== HImode
)
35874 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
35875 target
= gen_lowpart (SImode
, target
);
35878 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35882 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
35884 emit_move_insn (mem
, vec
);
35886 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
35887 emit_move_insn (target
, tmp
);
35891 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
35892 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
35893 The upper bits of DEST are undefined, though they shouldn't cause
35894 exceptions (some bits from src or all zeros are ok). */
35897 emit_reduc_half (rtx dest
, rtx src
, int i
)
35900 switch (GET_MODE (src
))
35904 tem
= gen_sse_movhlps (dest
, src
, src
);
35906 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
35907 GEN_INT (1 + 4), GEN_INT (1 + 4));
35910 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
35916 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
35917 gen_lowpart (V1TImode
, src
),
35922 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
35924 tem
= gen_avx_shufps256 (dest
, src
, src
,
35925 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
35929 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
35931 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
35938 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
35939 gen_lowpart (V4DImode
, src
),
35940 gen_lowpart (V4DImode
, src
),
35943 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
35944 gen_lowpart (V2TImode
, src
),
35948 gcc_unreachable ();
35953 /* Expand a vector reduction. FN is the binary pattern to reduce;
35954 DEST is the destination; IN is the input vector. */
35957 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
35959 rtx half
, dst
, vec
= in
;
35960 enum machine_mode mode
= GET_MODE (in
);
35963 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
35965 && mode
== V8HImode
35966 && fn
== gen_uminv8hi3
)
35968 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
35972 for (i
= GET_MODE_BITSIZE (mode
);
35973 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
35976 half
= gen_reg_rtx (mode
);
35977 emit_reduc_half (half
, vec
, i
);
35978 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
35981 dst
= gen_reg_rtx (mode
);
35982 emit_insn (fn (dst
, half
, vec
));
35987 /* Target hook for scalar_mode_supported_p. */
35989 ix86_scalar_mode_supported_p (enum machine_mode mode
)
35991 if (DECIMAL_FLOAT_MODE_P (mode
))
35992 return default_decimal_float_supported_p ();
35993 else if (mode
== TFmode
)
35996 return default_scalar_mode_supported_p (mode
);
35999 /* Implements target hook vector_mode_supported_p. */
36001 ix86_vector_mode_supported_p (enum machine_mode mode
)
36003 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
36005 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
36007 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
36009 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
36011 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
36016 /* Target hook for c_mode_for_suffix. */
36017 static enum machine_mode
36018 ix86_c_mode_for_suffix (char suffix
)
36028 /* Worker function for TARGET_MD_ASM_CLOBBERS.
36030 We do this in the new i386 backend to maintain source compatibility
36031 with the old cc0-based compiler. */
36034 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
36035 tree inputs ATTRIBUTE_UNUSED
,
36038 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
36040 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
36045 /* Implements target vector targetm.asm.encode_section_info. */
36047 static void ATTRIBUTE_UNUSED
36048 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
36050 default_encode_section_info (decl
, rtl
, first
);
36052 if (TREE_CODE (decl
) == VAR_DECL
36053 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
36054 && ix86_in_large_data_p (decl
))
36055 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
36058 /* Worker function for REVERSE_CONDITION. */
36061 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
36063 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
36064 ? reverse_condition (code
)
36065 : reverse_condition_maybe_unordered (code
));
36068 /* Output code to perform an x87 FP register move, from OPERANDS[1]
36072 output_387_reg_move (rtx insn
, rtx
*operands
)
36074 if (REG_P (operands
[0]))
36076 if (REG_P (operands
[1])
36077 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
36079 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
36080 return output_387_ffreep (operands
, 0);
36081 return "fstp\t%y0";
36083 if (STACK_TOP_P (operands
[0]))
36084 return "fld%Z1\t%y1";
36087 else if (MEM_P (operands
[0]))
36089 gcc_assert (REG_P (operands
[1]));
36090 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
36091 return "fstp%Z0\t%y0";
36094 /* There is no non-popping store to memory for XFmode.
36095 So if we need one, follow the store with a load. */
36096 if (GET_MODE (operands
[0]) == XFmode
)
36097 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
36099 return "fst%Z0\t%y0";
36106 /* Output code to perform a conditional jump to LABEL, if C2 flag in
36107 FP status register is set. */
36110 ix86_emit_fp_unordered_jump (rtx label
)
36112 rtx reg
= gen_reg_rtx (HImode
);
36115 emit_insn (gen_x86_fnstsw_1 (reg
));
36117 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
36119 emit_insn (gen_x86_sahf_1 (reg
));
36121 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
36122 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
36126 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
36128 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
36129 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
36132 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
36133 gen_rtx_LABEL_REF (VOIDmode
, label
),
36135 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
36137 emit_jump_insn (temp
);
36138 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
36141 /* Output code to perform a log1p XFmode calculation. */
36143 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
36145 rtx label1
= gen_label_rtx ();
36146 rtx label2
= gen_label_rtx ();
36148 rtx tmp
= gen_reg_rtx (XFmode
);
36149 rtx tmp2
= gen_reg_rtx (XFmode
);
36152 emit_insn (gen_absxf2 (tmp
, op1
));
36153 test
= gen_rtx_GE (VOIDmode
, tmp
,
36154 CONST_DOUBLE_FROM_REAL_VALUE (
36155 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
36157 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
36159 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
36160 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
36161 emit_jump (label2
);
36163 emit_label (label1
);
36164 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
36165 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
36166 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
36167 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
36169 emit_label (label2
);
36172 /* Emit code for round calculation. */
36173 void ix86_emit_i387_round (rtx op0
, rtx op1
)
36175 enum machine_mode inmode
= GET_MODE (op1
);
36176 enum machine_mode outmode
= GET_MODE (op0
);
36177 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
36178 rtx scratch
= gen_reg_rtx (HImode
);
36179 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
36180 rtx jump_label
= gen_label_rtx ();
36182 rtx (*gen_abs
) (rtx
, rtx
);
36183 rtx (*gen_neg
) (rtx
, rtx
);
36188 gen_abs
= gen_abssf2
;
36191 gen_abs
= gen_absdf2
;
36194 gen_abs
= gen_absxf2
;
36197 gcc_unreachable ();
36203 gen_neg
= gen_negsf2
;
36206 gen_neg
= gen_negdf2
;
36209 gen_neg
= gen_negxf2
;
36212 gen_neg
= gen_neghi2
;
36215 gen_neg
= gen_negsi2
;
36218 gen_neg
= gen_negdi2
;
36221 gcc_unreachable ();
36224 e1
= gen_reg_rtx (inmode
);
36225 e2
= gen_reg_rtx (inmode
);
36226 res
= gen_reg_rtx (outmode
);
36228 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
36230 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
36232 /* scratch = fxam(op1) */
36233 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
36234 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
36236 /* e1 = fabs(op1) */
36237 emit_insn (gen_abs (e1
, op1
));
36239 /* e2 = e1 + 0.5 */
36240 half
= force_reg (inmode
, half
);
36241 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
36242 gen_rtx_PLUS (inmode
, e1
, half
)));
36244 /* res = floor(e2) */
36245 if (inmode
!= XFmode
)
36247 tmp1
= gen_reg_rtx (XFmode
);
36249 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
36250 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
36260 rtx tmp0
= gen_reg_rtx (XFmode
);
36262 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
36264 emit_insn (gen_rtx_SET (VOIDmode
, res
,
36265 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
36266 UNSPEC_TRUNC_NOOP
)));
36270 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
36273 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
36276 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
36279 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
36282 gcc_unreachable ();
36285 /* flags = signbit(a) */
36286 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
36288 /* if (flags) then res = -res */
36289 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
36290 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
36291 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
36293 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
36294 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
36295 JUMP_LABEL (insn
) = jump_label
;
36297 emit_insn (gen_neg (res
, res
));
36299 emit_label (jump_label
);
36300 LABEL_NUSES (jump_label
) = 1;
36302 emit_move_insn (op0
, res
);
36305 /* Output code to perform a Newton-Rhapson approximation of a single precision
36306 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
36308 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
36310 rtx x0
, x1
, e0
, e1
;
36312 x0
= gen_reg_rtx (mode
);
36313 e0
= gen_reg_rtx (mode
);
36314 e1
= gen_reg_rtx (mode
);
36315 x1
= gen_reg_rtx (mode
);
36317 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
36319 b
= force_reg (mode
, b
);
36321 /* x0 = rcp(b) estimate */
36322 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
36323 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
36326 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
36327 gen_rtx_MULT (mode
, x0
, b
)));
36330 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
36331 gen_rtx_MULT (mode
, x0
, e0
)));
36334 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
36335 gen_rtx_PLUS (mode
, x0
, x0
)));
36338 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
36339 gen_rtx_MINUS (mode
, e1
, e0
)));
36342 emit_insn (gen_rtx_SET (VOIDmode
, res
,
36343 gen_rtx_MULT (mode
, a
, x1
)));
36346 /* Output code to perform a Newton-Rhapson approximation of a
36347 single precision floating point [reciprocal] square root. */
36349 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
36352 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
36355 x0
= gen_reg_rtx (mode
);
36356 e0
= gen_reg_rtx (mode
);
36357 e1
= gen_reg_rtx (mode
);
36358 e2
= gen_reg_rtx (mode
);
36359 e3
= gen_reg_rtx (mode
);
36361 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
36362 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
36364 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
36365 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
36367 if (VECTOR_MODE_P (mode
))
36369 mthree
= ix86_build_const_vector (mode
, true, mthree
);
36370 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
36373 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
36374 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
36376 a
= force_reg (mode
, a
);
36378 /* x0 = rsqrt(a) estimate */
36379 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
36380 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
36383 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
36388 zero
= gen_reg_rtx (mode
);
36389 mask
= gen_reg_rtx (mode
);
36391 zero
= force_reg (mode
, CONST0_RTX(mode
));
36392 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
36393 gen_rtx_NE (mode
, zero
, a
)));
36395 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
36396 gen_rtx_AND (mode
, x0
, mask
)));
36400 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
36401 gen_rtx_MULT (mode
, x0
, a
)));
36403 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
36404 gen_rtx_MULT (mode
, e0
, x0
)));
36407 mthree
= force_reg (mode
, mthree
);
36408 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
36409 gen_rtx_PLUS (mode
, e1
, mthree
)));
36411 mhalf
= force_reg (mode
, mhalf
);
36413 /* e3 = -.5 * x0 */
36414 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
36415 gen_rtx_MULT (mode
, x0
, mhalf
)));
36417 /* e3 = -.5 * e0 */
36418 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
36419 gen_rtx_MULT (mode
, e0
, mhalf
)));
36420 /* ret = e2 * e3 */
36421 emit_insn (gen_rtx_SET (VOIDmode
, res
,
36422 gen_rtx_MULT (mode
, e2
, e3
)));
36425 #ifdef TARGET_SOLARIS
36426 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
36429 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
36432 /* With Binutils 2.15, the "@unwind" marker must be specified on
36433 every occurrence of the ".eh_frame" section, not just the first
36436 && strcmp (name
, ".eh_frame") == 0)
36438 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
36439 flags
& SECTION_WRITE
? "aw" : "a");
36444 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
36446 solaris_elf_asm_comdat_section (name
, flags
, decl
);
36451 default_elf_asm_named_section (name
, flags
, decl
);
36453 #endif /* TARGET_SOLARIS */
36455 /* Return the mangling of TYPE if it is an extended fundamental type. */
36457 static const char *
36458 ix86_mangle_type (const_tree type
)
36460 type
= TYPE_MAIN_VARIANT (type
);
36462 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
36463 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
36466 switch (TYPE_MODE (type
))
36469 /* __float128 is "g". */
36472 /* "long double" or __float80 is "e". */
36479 /* For 32-bit code we can save PIC register setup by using
36480 __stack_chk_fail_local hidden function instead of calling
36481 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
36482 register, so it is better to call __stack_chk_fail directly. */
36484 static tree ATTRIBUTE_UNUSED
36485 ix86_stack_protect_fail (void)
36487 return TARGET_64BIT
36488 ? default_external_stack_protect_fail ()
36489 : default_hidden_stack_protect_fail ();
36492 /* Select a format to encode pointers in exception handling data. CODE
36493 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
36494 true if the symbol may be affected by dynamic relocations.
36496 ??? All x86 object file formats are capable of representing this.
36497 After all, the relocation needed is the same as for the call insn.
36498 Whether or not a particular assembler allows us to enter such, I
36499 guess we'll have to see. */
36501 asm_preferred_eh_data_format (int code
, int global
)
36505 int type
= DW_EH_PE_sdata8
;
36507 || ix86_cmodel
== CM_SMALL_PIC
36508 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
36509 type
= DW_EH_PE_sdata4
;
36510 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
36512 if (ix86_cmodel
== CM_SMALL
36513 || (ix86_cmodel
== CM_MEDIUM
&& code
))
36514 return DW_EH_PE_udata4
;
36515 return DW_EH_PE_absptr
;
36518 /* Expand copysign from SIGN to the positive value ABS_VALUE
36519 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
36522 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
36524 enum machine_mode mode
= GET_MODE (sign
);
36525 rtx sgn
= gen_reg_rtx (mode
);
36526 if (mask
== NULL_RTX
)
36528 enum machine_mode vmode
;
36530 if (mode
== SFmode
)
36532 else if (mode
== DFmode
)
36537 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
36538 if (!VECTOR_MODE_P (mode
))
36540 /* We need to generate a scalar mode mask in this case. */
36541 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
36542 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
36543 mask
= gen_reg_rtx (mode
);
36544 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
36548 mask
= gen_rtx_NOT (mode
, mask
);
36549 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
36550 gen_rtx_AND (mode
, mask
, sign
)));
36551 emit_insn (gen_rtx_SET (VOIDmode
, result
,
36552 gen_rtx_IOR (mode
, abs_value
, sgn
)));
36555 /* Expand fabs (OP0) and return a new rtx that holds the result. The
36556 mask for masking out the sign-bit is stored in *SMASK, if that is
36559 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
36561 enum machine_mode vmode
, mode
= GET_MODE (op0
);
36564 xa
= gen_reg_rtx (mode
);
36565 if (mode
== SFmode
)
36567 else if (mode
== DFmode
)
36571 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
36572 if (!VECTOR_MODE_P (mode
))
36574 /* We need to generate a scalar mode mask in this case. */
36575 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
36576 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
36577 mask
= gen_reg_rtx (mode
);
36578 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
36580 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
36581 gen_rtx_AND (mode
, op0
, mask
)));
36589 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
36590 swapping the operands if SWAP_OPERANDS is true. The expanded
36591 code is a forward jump to a newly created label in case the
36592 comparison is true. The generated label rtx is returned. */
36594 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
36595 bool swap_operands
)
36606 label
= gen_label_rtx ();
36607 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
36608 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36609 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
36610 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
36611 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
36612 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
36613 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
36614 JUMP_LABEL (tmp
) = label
;
36619 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
36620 using comparison code CODE. Operands are swapped for the comparison if
36621 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
36623 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
36624 bool swap_operands
)
36626 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
36627 enum machine_mode mode
= GET_MODE (op0
);
36628 rtx mask
= gen_reg_rtx (mode
);
36637 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
36639 emit_insn (insn (mask
, op0
, op1
,
36640 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
36644 /* Generate and return a rtx of mode MODE for 2**n where n is the number
36645 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
36647 ix86_gen_TWO52 (enum machine_mode mode
)
36649 REAL_VALUE_TYPE TWO52r
;
36652 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
36653 TWO52
= const_double_from_real_value (TWO52r
, mode
);
36654 TWO52
= force_reg (mode
, TWO52
);
36659 /* Expand SSE sequence for computing lround from OP1 storing
36662 ix86_expand_lround (rtx op0
, rtx op1
)
36664 /* C code for the stuff we're doing below:
36665 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
36668 enum machine_mode mode
= GET_MODE (op1
);
36669 const struct real_format
*fmt
;
36670 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36673 /* load nextafter (0.5, 0.0) */
36674 fmt
= REAL_MODE_FORMAT (mode
);
36675 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36676 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36678 /* adj = copysign (0.5, op1) */
36679 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
36680 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
36682 /* adj = op1 + adj */
36683 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
36685 /* op0 = (imode)adj */
36686 expand_fix (op0
, adj
, 0);
36689 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
36692 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
36694 /* C code for the stuff we're doing below (for do_floor):
36696 xi -= (double)xi > op1 ? 1 : 0;
36699 enum machine_mode fmode
= GET_MODE (op1
);
36700 enum machine_mode imode
= GET_MODE (op0
);
36701 rtx ireg
, freg
, label
, tmp
;
36703 /* reg = (long)op1 */
36704 ireg
= gen_reg_rtx (imode
);
36705 expand_fix (ireg
, op1
, 0);
36707 /* freg = (double)reg */
36708 freg
= gen_reg_rtx (fmode
);
36709 expand_float (freg
, ireg
, 0);
36711 /* ireg = (freg > op1) ? ireg - 1 : ireg */
36712 label
= ix86_expand_sse_compare_and_jump (UNLE
,
36713 freg
, op1
, !do_floor
);
36714 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
36715 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
36716 emit_move_insn (ireg
, tmp
);
36718 emit_label (label
);
36719 LABEL_NUSES (label
) = 1;
36721 emit_move_insn (op0
, ireg
);
36724 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
36725 result in OPERAND0. */
36727 ix86_expand_rint (rtx operand0
, rtx operand1
)
36729 /* C code for the stuff we're doing below:
36730 xa = fabs (operand1);
36731 if (!isless (xa, 2**52))
36733 xa = xa + 2**52 - 2**52;
36734 return copysign (xa, operand1);
36736 enum machine_mode mode
= GET_MODE (operand0
);
36737 rtx res
, xa
, label
, TWO52
, mask
;
36739 res
= gen_reg_rtx (mode
);
36740 emit_move_insn (res
, operand1
);
36742 /* xa = abs (operand1) */
36743 xa
= ix86_expand_sse_fabs (res
, &mask
);
36745 /* if (!isless (xa, TWO52)) goto label; */
36746 TWO52
= ix86_gen_TWO52 (mode
);
36747 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36749 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36750 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
36752 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
36754 emit_label (label
);
36755 LABEL_NUSES (label
) = 1;
36757 emit_move_insn (operand0
, res
);
36760 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
36763 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
36765 /* C code for the stuff we expand below.
36766 double xa = fabs (x), x2;
36767 if (!isless (xa, TWO52))
36769 xa = xa + TWO52 - TWO52;
36770 x2 = copysign (xa, x);
36779 enum machine_mode mode
= GET_MODE (operand0
);
36780 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
36782 TWO52
= ix86_gen_TWO52 (mode
);
36784 /* Temporary for holding the result, initialized to the input
36785 operand to ease control flow. */
36786 res
= gen_reg_rtx (mode
);
36787 emit_move_insn (res
, operand1
);
36789 /* xa = abs (operand1) */
36790 xa
= ix86_expand_sse_fabs (res
, &mask
);
36792 /* if (!isless (xa, TWO52)) goto label; */
36793 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36795 /* xa = xa + TWO52 - TWO52; */
36796 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36797 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
36799 /* xa = copysign (xa, operand1) */
36800 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
36802 /* generate 1.0 or -1.0 */
36803 one
= force_reg (mode
,
36804 const_double_from_real_value (do_floor
36805 ? dconst1
: dconstm1
, mode
));
36807 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
36808 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
36809 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36810 gen_rtx_AND (mode
, one
, tmp
)));
36811 /* We always need to subtract here to preserve signed zero. */
36812 tmp
= expand_simple_binop (mode
, MINUS
,
36813 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36814 emit_move_insn (res
, tmp
);
36816 emit_label (label
);
36817 LABEL_NUSES (label
) = 1;
36819 emit_move_insn (operand0
, res
);
36822 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
36825 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
36827 /* C code for the stuff we expand below.
36828 double xa = fabs (x), x2;
36829 if (!isless (xa, TWO52))
36831 x2 = (double)(long)x;
36838 if (HONOR_SIGNED_ZEROS (mode))
36839 return copysign (x2, x);
36842 enum machine_mode mode
= GET_MODE (operand0
);
36843 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
36845 TWO52
= ix86_gen_TWO52 (mode
);
36847 /* Temporary for holding the result, initialized to the input
36848 operand to ease control flow. */
36849 res
= gen_reg_rtx (mode
);
36850 emit_move_insn (res
, operand1
);
36852 /* xa = abs (operand1) */
36853 xa
= ix86_expand_sse_fabs (res
, &mask
);
36855 /* if (!isless (xa, TWO52)) goto label; */
36856 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36858 /* xa = (double)(long)x */
36859 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36860 expand_fix (xi
, res
, 0);
36861 expand_float (xa
, xi
, 0);
36864 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
36866 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
36867 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
36868 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36869 gen_rtx_AND (mode
, one
, tmp
)));
36870 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
36871 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36872 emit_move_insn (res
, tmp
);
36874 if (HONOR_SIGNED_ZEROS (mode
))
36875 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
36877 emit_label (label
);
36878 LABEL_NUSES (label
) = 1;
36880 emit_move_insn (operand0
, res
);
36883 /* Expand SSE sequence for computing round from OPERAND1 storing
36884 into OPERAND0. Sequence that works without relying on DImode truncation
36885 via cvttsd2siq that is only available on 64bit targets. */
36887 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
36889 /* C code for the stuff we expand below.
36890 double xa = fabs (x), xa2, x2;
36891 if (!isless (xa, TWO52))
36893 Using the absolute value and copying back sign makes
36894 -0.0 -> -0.0 correct.
36895 xa2 = xa + TWO52 - TWO52;
36900 else if (dxa > 0.5)
36902 x2 = copysign (xa2, x);
36905 enum machine_mode mode
= GET_MODE (operand0
);
36906 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
36908 TWO52
= ix86_gen_TWO52 (mode
);
36910 /* Temporary for holding the result, initialized to the input
36911 operand to ease control flow. */
36912 res
= gen_reg_rtx (mode
);
36913 emit_move_insn (res
, operand1
);
36915 /* xa = abs (operand1) */
36916 xa
= ix86_expand_sse_fabs (res
, &mask
);
36918 /* if (!isless (xa, TWO52)) goto label; */
36919 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36921 /* xa2 = xa + TWO52 - TWO52; */
36922 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36923 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
36925 /* dxa = xa2 - xa; */
36926 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
36928 /* generate 0.5, 1.0 and -0.5 */
36929 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
36930 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
36931 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
36935 tmp
= gen_reg_rtx (mode
);
36936 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
36937 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
36938 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36939 gen_rtx_AND (mode
, one
, tmp
)));
36940 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36941 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
36942 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
36943 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36944 gen_rtx_AND (mode
, one
, tmp
)));
36945 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36947 /* res = copysign (xa2, operand1) */
36948 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
36950 emit_label (label
);
36951 LABEL_NUSES (label
) = 1;
36953 emit_move_insn (operand0
, res
);
36956 /* Expand SSE sequence for computing trunc from OPERAND1 storing
36959 ix86_expand_trunc (rtx operand0
, rtx operand1
)
36961 /* C code for SSE variant we expand below.
36962 double xa = fabs (x), x2;
36963 if (!isless (xa, TWO52))
36965 x2 = (double)(long)x;
36966 if (HONOR_SIGNED_ZEROS (mode))
36967 return copysign (x2, x);
36970 enum machine_mode mode
= GET_MODE (operand0
);
36971 rtx xa
, xi
, TWO52
, label
, res
, mask
;
36973 TWO52
= ix86_gen_TWO52 (mode
);
36975 /* Temporary for holding the result, initialized to the input
36976 operand to ease control flow. */
36977 res
= gen_reg_rtx (mode
);
36978 emit_move_insn (res
, operand1
);
36980 /* xa = abs (operand1) */
36981 xa
= ix86_expand_sse_fabs (res
, &mask
);
36983 /* if (!isless (xa, TWO52)) goto label; */
36984 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36986 /* x = (double)(long)x */
36987 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36988 expand_fix (xi
, res
, 0);
36989 expand_float (res
, xi
, 0);
36991 if (HONOR_SIGNED_ZEROS (mode
))
36992 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
36994 emit_label (label
);
36995 LABEL_NUSES (label
) = 1;
36997 emit_move_insn (operand0
, res
);
37000 /* Expand SSE sequence for computing trunc from OPERAND1 storing
37003 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
37005 enum machine_mode mode
= GET_MODE (operand0
);
37006 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
37008 /* C code for SSE variant we expand below.
37009 double xa = fabs (x), x2;
37010 if (!isless (xa, TWO52))
37012 xa2 = xa + TWO52 - TWO52;
37016 x2 = copysign (xa2, x);
37020 TWO52
= ix86_gen_TWO52 (mode
);
37022 /* Temporary for holding the result, initialized to the input
37023 operand to ease control flow. */
37024 res
= gen_reg_rtx (mode
);
37025 emit_move_insn (res
, operand1
);
37027 /* xa = abs (operand1) */
37028 xa
= ix86_expand_sse_fabs (res
, &smask
);
37030 /* if (!isless (xa, TWO52)) goto label; */
37031 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37033 /* res = xa + TWO52 - TWO52; */
37034 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
37035 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
37036 emit_move_insn (res
, tmp
);
37039 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
37041 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
37042 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
37043 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
37044 gen_rtx_AND (mode
, mask
, one
)));
37045 tmp
= expand_simple_binop (mode
, MINUS
,
37046 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
37047 emit_move_insn (res
, tmp
);
37049 /* res = copysign (res, operand1) */
37050 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
37052 emit_label (label
);
37053 LABEL_NUSES (label
) = 1;
37055 emit_move_insn (operand0
, res
);
37058 /* Expand SSE sequence for computing round from OPERAND1 storing
37061 ix86_expand_round (rtx operand0
, rtx operand1
)
37063 /* C code for the stuff we're doing below:
37064 double xa = fabs (x);
37065 if (!isless (xa, TWO52))
37067 xa = (double)(long)(xa + nextafter (0.5, 0.0));
37068 return copysign (xa, x);
37070 enum machine_mode mode
= GET_MODE (operand0
);
37071 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
37072 const struct real_format
*fmt
;
37073 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37075 /* Temporary for holding the result, initialized to the input
37076 operand to ease control flow. */
37077 res
= gen_reg_rtx (mode
);
37078 emit_move_insn (res
, operand1
);
37080 TWO52
= ix86_gen_TWO52 (mode
);
37081 xa
= ix86_expand_sse_fabs (res
, &mask
);
37082 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
37084 /* load nextafter (0.5, 0.0) */
37085 fmt
= REAL_MODE_FORMAT (mode
);
37086 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37087 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37089 /* xa = xa + 0.5 */
37090 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
37091 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
37093 /* xa = (double)(int64_t)xa */
37094 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
37095 expand_fix (xi
, xa
, 0);
37096 expand_float (xa
, xi
, 0);
37098 /* res = copysign (xa, operand1) */
37099 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
37101 emit_label (label
);
37102 LABEL_NUSES (label
) = 1;
37104 emit_move_insn (operand0
, res
);
37107 /* Expand SSE sequence for computing round
37108 from OP1 storing into OP0 using sse4 round insn. */
37110 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
37112 enum machine_mode mode
= GET_MODE (op0
);
37113 rtx e1
, e2
, res
, half
;
37114 const struct real_format
*fmt
;
37115 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
37116 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
37117 rtx (*gen_round
) (rtx
, rtx
, rtx
);
37122 gen_copysign
= gen_copysignsf3
;
37123 gen_round
= gen_sse4_1_roundsf2
;
37126 gen_copysign
= gen_copysigndf3
;
37127 gen_round
= gen_sse4_1_rounddf2
;
37130 gcc_unreachable ();
37133 /* round (a) = trunc (a + copysign (0.5, a)) */
37135 /* load nextafter (0.5, 0.0) */
37136 fmt
= REAL_MODE_FORMAT (mode
);
37137 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
37138 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
37139 half
= const_double_from_real_value (pred_half
, mode
);
37141 /* e1 = copysign (0.5, op1) */
37142 e1
= gen_reg_rtx (mode
);
37143 emit_insn (gen_copysign (e1
, half
, op1
));
37145 /* e2 = op1 + e1 */
37146 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
37148 /* res = trunc (e2) */
37149 res
= gen_reg_rtx (mode
);
37150 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
37152 emit_move_insn (op0
, res
);
37156 /* Table of valid machine attributes. */
37157 static const struct attribute_spec ix86_attribute_table
[] =
37159 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
37160 affects_type_identity } */
37161 /* Stdcall attribute says callee is responsible for popping arguments
37162 if they are not variable. */
37163 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
37165 /* Fastcall attribute says callee is responsible for popping arguments
37166 if they are not variable. */
37167 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
37169 /* Thiscall attribute says callee is responsible for popping arguments
37170 if they are not variable. */
37171 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
37173 /* Cdecl attribute says the callee is a normal C declaration */
37174 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
37176 /* Regparm attribute specifies how many integer arguments are to be
37177 passed in registers. */
37178 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
37180 /* Sseregparm attribute says we are using x86_64 calling conventions
37181 for FP arguments. */
37182 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
37184 /* The transactional memory builtins are implicitly regparm or fastcall
37185 depending on the ABI. Override the generic do-nothing attribute that
37186 these builtins were declared with. */
37187 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
37189 /* force_align_arg_pointer says this function realigns the stack at entry. */
37190 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
37191 false, true, true, ix86_handle_cconv_attribute
, false },
37192 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
37193 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
37194 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
37195 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
37198 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
37200 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
37202 #ifdef SUBTARGET_ATTRIBUTE_TABLE
37203 SUBTARGET_ATTRIBUTE_TABLE
,
37205 /* ms_abi and sysv_abi calling convention function attributes. */
37206 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
37207 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
37208 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
37210 { "callee_pop_aggregate_return", 1, 1, false, true, true,
37211 ix86_handle_callee_pop_aggregate_return
, true },
37213 { NULL
, 0, 0, false, false, false, NULL
, false }
37216 /* Implement targetm.vectorize.builtin_vectorization_cost. */
37218 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
37220 int misalign ATTRIBUTE_UNUSED
)
37224 switch (type_of_cost
)
37227 return ix86_cost
->scalar_stmt_cost
;
37230 return ix86_cost
->scalar_load_cost
;
37233 return ix86_cost
->scalar_store_cost
;
37236 return ix86_cost
->vec_stmt_cost
;
37239 return ix86_cost
->vec_align_load_cost
;
37242 return ix86_cost
->vec_store_cost
;
37244 case vec_to_scalar
:
37245 return ix86_cost
->vec_to_scalar_cost
;
37247 case scalar_to_vec
:
37248 return ix86_cost
->scalar_to_vec_cost
;
37250 case unaligned_load
:
37251 case unaligned_store
:
37252 return ix86_cost
->vec_unalign_load_cost
;
37254 case cond_branch_taken
:
37255 return ix86_cost
->cond_taken_branch_cost
;
37257 case cond_branch_not_taken
:
37258 return ix86_cost
->cond_not_taken_branch_cost
;
37261 case vec_promote_demote
:
37262 return ix86_cost
->vec_stmt_cost
;
37264 case vec_construct
:
37265 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
37266 return elements
/ 2 + 1;
37269 gcc_unreachable ();
37273 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
37274 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
37275 insn every time. */
37277 static GTY(()) rtx vselect_insn
;
37279 /* Initialize vselect_insn. */
37282 init_vselect_insn (void)
37287 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
37288 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
37289 XVECEXP (x
, 0, i
) = const0_rtx
;
37290 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
37292 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
37294 vselect_insn
= emit_insn (x
);
37298 /* Construct (set target (vec_select op0 (parallel perm))) and
37299 return true if that's a valid instruction in the active ISA. */
37302 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
37303 unsigned nelt
, bool testing_p
)
37306 rtx x
, save_vconcat
;
37309 if (vselect_insn
== NULL_RTX
)
37310 init_vselect_insn ();
37312 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
37313 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
37314 for (i
= 0; i
< nelt
; ++i
)
37315 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
37316 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
37317 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
37318 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
37319 SET_DEST (PATTERN (vselect_insn
)) = target
;
37320 icode
= recog_memoized (vselect_insn
);
37322 if (icode
>= 0 && !testing_p
)
37323 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
37325 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
37326 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
37327 INSN_CODE (vselect_insn
) = -1;
37332 /* Similar, but generate a vec_concat from op0 and op1 as well. */
37335 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
37336 const unsigned char *perm
, unsigned nelt
,
37339 enum machine_mode v2mode
;
37343 if (vselect_insn
== NULL_RTX
)
37344 init_vselect_insn ();
37346 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
37347 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
37348 PUT_MODE (x
, v2mode
);
37351 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
37352 XEXP (x
, 0) = const0_rtx
;
37353 XEXP (x
, 1) = const0_rtx
;
37357 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37358 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
37361 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
37363 enum machine_mode vmode
= d
->vmode
;
37364 unsigned i
, mask
, nelt
= d
->nelt
;
37365 rtx target
, op0
, op1
, x
;
37366 rtx rperm
[32], vperm
;
37368 if (d
->one_operand_p
)
37370 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
37372 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
37374 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
37379 /* This is a blend, not a permute. Elements must stay in their
37380 respective lanes. */
37381 for (i
= 0; i
< nelt
; ++i
)
37383 unsigned e
= d
->perm
[i
];
37384 if (!(e
== i
|| e
== i
+ nelt
))
37391 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
37392 decision should be extracted elsewhere, so that we only try that
37393 sequence once all budget==3 options have been tried. */
37394 target
= d
->target
;
37407 for (i
= 0; i
< nelt
; ++i
)
37408 mask
|= (d
->perm
[i
] >= nelt
) << i
;
37412 for (i
= 0; i
< 2; ++i
)
37413 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
37418 for (i
= 0; i
< 4; ++i
)
37419 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
37424 /* See if bytes move in pairs so we can use pblendw with
37425 an immediate argument, rather than pblendvb with a vector
37427 for (i
= 0; i
< 16; i
+= 2)
37428 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
37431 for (i
= 0; i
< nelt
; ++i
)
37432 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
37435 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
37436 vperm
= force_reg (vmode
, vperm
);
37438 if (GET_MODE_SIZE (vmode
) == 16)
37439 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
37441 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
37445 for (i
= 0; i
< 8; ++i
)
37446 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
37451 target
= gen_lowpart (vmode
, target
);
37452 op0
= gen_lowpart (vmode
, op0
);
37453 op1
= gen_lowpart (vmode
, op1
);
37457 /* See if bytes move in pairs. If not, vpblendvb must be used. */
37458 for (i
= 0; i
< 32; i
+= 2)
37459 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
37461 /* See if bytes move in quadruplets. If yes, vpblendd
37462 with immediate can be used. */
37463 for (i
= 0; i
< 32; i
+= 4)
37464 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
37468 /* See if bytes move the same in both lanes. If yes,
37469 vpblendw with immediate can be used. */
37470 for (i
= 0; i
< 16; i
+= 2)
37471 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
37474 /* Use vpblendw. */
37475 for (i
= 0; i
< 16; ++i
)
37476 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
37481 /* Use vpblendd. */
37482 for (i
= 0; i
< 8; ++i
)
37483 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
37488 /* See if words move in pairs. If yes, vpblendd can be used. */
37489 for (i
= 0; i
< 16; i
+= 2)
37490 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
37494 /* See if words move the same in both lanes. If not,
37495 vpblendvb must be used. */
37496 for (i
= 0; i
< 8; i
++)
37497 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
37499 /* Use vpblendvb. */
37500 for (i
= 0; i
< 32; ++i
)
37501 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
37505 target
= gen_lowpart (vmode
, target
);
37506 op0
= gen_lowpart (vmode
, op0
);
37507 op1
= gen_lowpart (vmode
, op1
);
37508 goto finish_pblendvb
;
37511 /* Use vpblendw. */
37512 for (i
= 0; i
< 16; ++i
)
37513 mask
|= (d
->perm
[i
] >= 16) << i
;
37517 /* Use vpblendd. */
37518 for (i
= 0; i
< 8; ++i
)
37519 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
37524 /* Use vpblendd. */
37525 for (i
= 0; i
< 4; ++i
)
37526 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
37531 gcc_unreachable ();
37534 /* This matches five different patterns with the different modes. */
37535 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
37536 x
= gen_rtx_SET (VOIDmode
, target
, x
);
37542 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37543 in terms of the variable form of vpermilps.
37545 Note that we will have already failed the immediate input vpermilps,
37546 which requires that the high and low part shuffle be identical; the
37547 variable form doesn't require that. */
37550 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
37552 rtx rperm
[8], vperm
;
37555 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
37558 /* We can only permute within the 128-bit lane. */
37559 for (i
= 0; i
< 8; ++i
)
37561 unsigned e
= d
->perm
[i
];
37562 if (i
< 4 ? e
>= 4 : e
< 4)
37569 for (i
= 0; i
< 8; ++i
)
37571 unsigned e
= d
->perm
[i
];
37573 /* Within each 128-bit lane, the elements of op0 are numbered
37574 from 0 and the elements of op1 are numbered from 4. */
37580 rperm
[i
] = GEN_INT (e
);
37583 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
37584 vperm
= force_reg (V8SImode
, vperm
);
37585 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
37590 /* Return true if permutation D can be performed as VMODE permutation
37594 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
37596 unsigned int i
, j
, chunk
;
37598 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
37599 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
37600 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
37603 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
37606 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
37607 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
37608 if (d
->perm
[i
] & (chunk
- 1))
37611 for (j
= 1; j
< chunk
; ++j
)
37612 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
37618 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37619 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
37622 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
37624 unsigned i
, nelt
, eltsz
, mask
;
37625 unsigned char perm
[32];
37626 enum machine_mode vmode
= V16QImode
;
37627 rtx rperm
[32], vperm
, target
, op0
, op1
;
37631 if (!d
->one_operand_p
)
37633 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
37636 && valid_perm_using_mode_p (V2TImode
, d
))
37641 /* Use vperm2i128 insn. The pattern uses
37642 V4DImode instead of V2TImode. */
37643 target
= gen_lowpart (V4DImode
, d
->target
);
37644 op0
= gen_lowpart (V4DImode
, d
->op0
);
37645 op1
= gen_lowpart (V4DImode
, d
->op1
);
37647 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
37648 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
37649 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
37657 if (GET_MODE_SIZE (d
->vmode
) == 16)
37662 else if (GET_MODE_SIZE (d
->vmode
) == 32)
37667 /* V4DImode should be already handled through
37668 expand_vselect by vpermq instruction. */
37669 gcc_assert (d
->vmode
!= V4DImode
);
37672 if (d
->vmode
== V8SImode
37673 || d
->vmode
== V16HImode
37674 || d
->vmode
== V32QImode
)
37676 /* First see if vpermq can be used for
37677 V8SImode/V16HImode/V32QImode. */
37678 if (valid_perm_using_mode_p (V4DImode
, d
))
37680 for (i
= 0; i
< 4; i
++)
37681 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
37684 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
37685 gen_lowpart (V4DImode
, d
->op0
),
37689 /* Next see if vpermd can be used. */
37690 if (valid_perm_using_mode_p (V8SImode
, d
))
37693 /* Or if vpermps can be used. */
37694 else if (d
->vmode
== V8SFmode
)
37697 if (vmode
== V32QImode
)
37699 /* vpshufb only works intra lanes, it is not
37700 possible to shuffle bytes in between the lanes. */
37701 for (i
= 0; i
< nelt
; ++i
)
37702 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
37713 if (vmode
== V8SImode
)
37714 for (i
= 0; i
< 8; ++i
)
37715 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
37718 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37719 if (!d
->one_operand_p
)
37720 mask
= 2 * nelt
- 1;
37721 else if (vmode
== V16QImode
)
37724 mask
= nelt
/ 2 - 1;
37726 for (i
= 0; i
< nelt
; ++i
)
37728 unsigned j
, e
= d
->perm
[i
] & mask
;
37729 for (j
= 0; j
< eltsz
; ++j
)
37730 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
37734 vperm
= gen_rtx_CONST_VECTOR (vmode
,
37735 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
37736 vperm
= force_reg (vmode
, vperm
);
37738 target
= gen_lowpart (vmode
, d
->target
);
37739 op0
= gen_lowpart (vmode
, d
->op0
);
37740 if (d
->one_operand_p
)
37742 if (vmode
== V16QImode
)
37743 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
37744 else if (vmode
== V32QImode
)
37745 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
37746 else if (vmode
== V8SFmode
)
37747 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
37749 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
37753 op1
= gen_lowpart (vmode
, d
->op1
);
37754 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
37760 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
37761 in a single instruction. */
37764 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
37766 unsigned i
, nelt
= d
->nelt
;
37767 unsigned char perm2
[MAX_VECT_LEN
];
37769 /* Check plain VEC_SELECT first, because AVX has instructions that could
37770 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
37771 input where SEL+CONCAT may not. */
37772 if (d
->one_operand_p
)
37774 int mask
= nelt
- 1;
37775 bool identity_perm
= true;
37776 bool broadcast_perm
= true;
37778 for (i
= 0; i
< nelt
; i
++)
37780 perm2
[i
] = d
->perm
[i
] & mask
;
37782 identity_perm
= false;
37784 broadcast_perm
= false;
37790 emit_move_insn (d
->target
, d
->op0
);
37793 else if (broadcast_perm
&& TARGET_AVX2
)
37795 /* Use vpbroadcast{b,w,d}. */
37796 rtx (*gen
) (rtx
, rtx
) = NULL
;
37800 gen
= gen_avx2_pbroadcastv32qi_1
;
37803 gen
= gen_avx2_pbroadcastv16hi_1
;
37806 gen
= gen_avx2_pbroadcastv8si_1
;
37809 gen
= gen_avx2_pbroadcastv16qi
;
37812 gen
= gen_avx2_pbroadcastv8hi
;
37815 gen
= gen_avx2_vec_dupv8sf_1
;
37817 /* For other modes prefer other shuffles this function creates. */
37823 emit_insn (gen (d
->target
, d
->op0
));
37828 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
37831 /* There are plenty of patterns in sse.md that are written for
37832 SEL+CONCAT and are not replicated for a single op. Perhaps
37833 that should be changed, to avoid the nastiness here. */
37835 /* Recognize interleave style patterns, which means incrementing
37836 every other permutation operand. */
37837 for (i
= 0; i
< nelt
; i
+= 2)
37839 perm2
[i
] = d
->perm
[i
] & mask
;
37840 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
37842 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
37846 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
37849 for (i
= 0; i
< nelt
; i
+= 4)
37851 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
37852 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
37853 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
37854 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
37857 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
37863 /* Finally, try the fully general two operand permute. */
37864 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
37868 /* Recognize interleave style patterns with reversed operands. */
37869 if (!d
->one_operand_p
)
37871 for (i
= 0; i
< nelt
; ++i
)
37873 unsigned e
= d
->perm
[i
];
37881 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
37886 /* Try the SSE4.1 blend variable merge instructions. */
37887 if (expand_vec_perm_blend (d
))
37890 /* Try one of the AVX vpermil variable permutations. */
37891 if (expand_vec_perm_vpermil (d
))
37894 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
37895 vpshufb, vpermd, vpermps or vpermq variable permutation. */
37896 if (expand_vec_perm_pshufb (d
))
37902 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37903 in terms of a pair of pshuflw + pshufhw instructions. */
37906 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
37908 unsigned char perm2
[MAX_VECT_LEN
];
37912 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
37915 /* The two permutations only operate in 64-bit lanes. */
37916 for (i
= 0; i
< 4; ++i
)
37917 if (d
->perm
[i
] >= 4)
37919 for (i
= 4; i
< 8; ++i
)
37920 if (d
->perm
[i
] < 4)
37926 /* Emit the pshuflw. */
37927 memcpy (perm2
, d
->perm
, 4);
37928 for (i
= 4; i
< 8; ++i
)
37930 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
37933 /* Emit the pshufhw. */
37934 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
37935 for (i
= 0; i
< 4; ++i
)
37937 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
37943 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37944 the permutation using the SSSE3 palignr instruction. This succeeds
37945 when all of the elements in PERM fit within one vector and we merely
37946 need to shift them down so that a single vector permutation has a
37947 chance to succeed. */
37950 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
37952 unsigned i
, nelt
= d
->nelt
;
37957 /* Even with AVX, palignr only operates on 128-bit vectors. */
37958 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37961 min
= nelt
, max
= 0;
37962 for (i
= 0; i
< nelt
; ++i
)
37964 unsigned e
= d
->perm
[i
];
37970 if (min
== 0 || max
- min
>= nelt
)
37973 /* Given that we have SSSE3, we know we'll be able to implement the
37974 single operand permutation after the palignr with pshufb. */
37978 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
37979 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
37980 gen_lowpart (TImode
, d
->op1
),
37981 gen_lowpart (TImode
, d
->op0
), shift
));
37983 d
->op0
= d
->op1
= d
->target
;
37984 d
->one_operand_p
= true;
37987 for (i
= 0; i
< nelt
; ++i
)
37989 unsigned e
= d
->perm
[i
] - min
;
37995 /* Test for the degenerate case where the alignment by itself
37996 produces the desired permutation. */
38000 ok
= expand_vec_perm_1 (d
);
38006 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
38008 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38009 a two vector permutation into a single vector permutation by using
38010 an interleave operation to merge the vectors. */
38013 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
38015 struct expand_vec_perm_d dremap
, dfinal
;
38016 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
38017 unsigned HOST_WIDE_INT contents
;
38018 unsigned char remap
[2 * MAX_VECT_LEN
];
38020 bool ok
, same_halves
= false;
38022 if (GET_MODE_SIZE (d
->vmode
) == 16)
38024 if (d
->one_operand_p
)
38027 else if (GET_MODE_SIZE (d
->vmode
) == 32)
38031 /* For 32-byte modes allow even d->one_operand_p.
38032 The lack of cross-lane shuffling in some instructions
38033 might prevent a single insn shuffle. */
38035 dfinal
.testing_p
= true;
38036 /* If expand_vec_perm_interleave3 can expand this into
38037 a 3 insn sequence, give up and let it be expanded as
38038 3 insn sequence. While that is one insn longer,
38039 it doesn't need a memory operand and in the common
38040 case that both interleave low and high permutations
38041 with the same operands are adjacent needs 4 insns
38042 for both after CSE. */
38043 if (expand_vec_perm_interleave3 (&dfinal
))
38049 /* Examine from whence the elements come. */
38051 for (i
= 0; i
< nelt
; ++i
)
38052 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
38054 memset (remap
, 0xff, sizeof (remap
));
38057 if (GET_MODE_SIZE (d
->vmode
) == 16)
38059 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
38061 /* Split the two input vectors into 4 halves. */
38062 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
38067 /* If the elements from the low halves use interleave low, and similarly
38068 for interleave high. If the elements are from mis-matched halves, we
38069 can use shufps for V4SF/V4SI or do a DImode shuffle. */
38070 if ((contents
& (h1
| h3
)) == contents
)
38073 for (i
= 0; i
< nelt2
; ++i
)
38076 remap
[i
+ nelt
] = i
* 2 + 1;
38077 dremap
.perm
[i
* 2] = i
;
38078 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
38080 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
38081 dremap
.vmode
= V4SFmode
;
38083 else if ((contents
& (h2
| h4
)) == contents
)
38086 for (i
= 0; i
< nelt2
; ++i
)
38088 remap
[i
+ nelt2
] = i
* 2;
38089 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
38090 dremap
.perm
[i
* 2] = i
+ nelt2
;
38091 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
38093 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
38094 dremap
.vmode
= V4SFmode
;
38096 else if ((contents
& (h1
| h4
)) == contents
)
38099 for (i
= 0; i
< nelt2
; ++i
)
38102 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
38103 dremap
.perm
[i
] = i
;
38104 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
38109 dremap
.vmode
= V2DImode
;
38111 dremap
.perm
[0] = 0;
38112 dremap
.perm
[1] = 3;
38115 else if ((contents
& (h2
| h3
)) == contents
)
38118 for (i
= 0; i
< nelt2
; ++i
)
38120 remap
[i
+ nelt2
] = i
;
38121 remap
[i
+ nelt
] = i
+ nelt2
;
38122 dremap
.perm
[i
] = i
+ nelt2
;
38123 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
38128 dremap
.vmode
= V2DImode
;
38130 dremap
.perm
[0] = 1;
38131 dremap
.perm
[1] = 2;
38139 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
38140 unsigned HOST_WIDE_INT q
[8];
38141 unsigned int nonzero_halves
[4];
38143 /* Split the two input vectors into 8 quarters. */
38144 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
38145 for (i
= 1; i
< 8; ++i
)
38146 q
[i
] = q
[0] << (nelt4
* i
);
38147 for (i
= 0; i
< 4; ++i
)
38148 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
38150 nonzero_halves
[nzcnt
] = i
;
38156 gcc_assert (d
->one_operand_p
);
38157 nonzero_halves
[1] = nonzero_halves
[0];
38158 same_halves
= true;
38160 else if (d
->one_operand_p
)
38162 gcc_assert (nonzero_halves
[0] == 0);
38163 gcc_assert (nonzero_halves
[1] == 1);
38168 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
38170 /* Attempt to increase the likelihood that dfinal
38171 shuffle will be intra-lane. */
38172 char tmph
= nonzero_halves
[0];
38173 nonzero_halves
[0] = nonzero_halves
[1];
38174 nonzero_halves
[1] = tmph
;
38177 /* vperm2f128 or vperm2i128. */
38178 for (i
= 0; i
< nelt2
; ++i
)
38180 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
38181 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
38182 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
38183 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
38186 if (d
->vmode
!= V8SFmode
38187 && d
->vmode
!= V4DFmode
38188 && d
->vmode
!= V8SImode
)
38190 dremap
.vmode
= V8SImode
;
38192 for (i
= 0; i
< 4; ++i
)
38194 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
38195 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
38199 else if (d
->one_operand_p
)
38201 else if (TARGET_AVX2
38202 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
38205 for (i
= 0; i
< nelt4
; ++i
)
38208 remap
[i
+ nelt
] = i
* 2 + 1;
38209 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
38210 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
38211 dremap
.perm
[i
* 2] = i
;
38212 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
38213 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
38214 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
38217 else if (TARGET_AVX2
38218 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
38221 for (i
= 0; i
< nelt4
; ++i
)
38223 remap
[i
+ nelt4
] = i
* 2;
38224 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
38225 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
38226 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
38227 dremap
.perm
[i
* 2] = i
+ nelt4
;
38228 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
38229 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
38230 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
38237 /* Use the remapping array set up above to move the elements from their
38238 swizzled locations into their final destinations. */
38240 for (i
= 0; i
< nelt
; ++i
)
38242 unsigned e
= remap
[d
->perm
[i
]];
38243 gcc_assert (e
< nelt
);
38244 /* If same_halves is true, both halves of the remapped vector are the
38245 same. Avoid cross-lane accesses if possible. */
38246 if (same_halves
&& i
>= nelt2
)
38248 gcc_assert (e
< nelt2
);
38249 dfinal
.perm
[i
] = e
+ nelt2
;
38252 dfinal
.perm
[i
] = e
;
38254 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
38255 dfinal
.op1
= dfinal
.op0
;
38256 dfinal
.one_operand_p
= true;
38257 dremap
.target
= dfinal
.op0
;
38259 /* Test if the final remap can be done with a single insn. For V4SFmode or
38260 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
38262 ok
= expand_vec_perm_1 (&dfinal
);
38263 seq
= get_insns ();
38272 if (dremap
.vmode
!= dfinal
.vmode
)
38274 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
38275 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
38276 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
38279 ok
= expand_vec_perm_1 (&dremap
);
38286 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38287 a single vector cross-lane permutation into vpermq followed
38288 by any of the single insn permutations. */
38291 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
38293 struct expand_vec_perm_d dremap
, dfinal
;
38294 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
38295 unsigned contents
[2];
38299 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
38300 && d
->one_operand_p
))
38305 for (i
= 0; i
< nelt2
; ++i
)
38307 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
38308 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
38311 for (i
= 0; i
< 2; ++i
)
38313 unsigned int cnt
= 0;
38314 for (j
= 0; j
< 4; ++j
)
38315 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
38323 dremap
.vmode
= V4DImode
;
38325 dremap
.target
= gen_reg_rtx (V4DImode
);
38326 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
38327 dremap
.op1
= dremap
.op0
;
38328 dremap
.one_operand_p
= true;
38329 for (i
= 0; i
< 2; ++i
)
38331 unsigned int cnt
= 0;
38332 for (j
= 0; j
< 4; ++j
)
38333 if ((contents
[i
] & (1u << j
)) != 0)
38334 dremap
.perm
[2 * i
+ cnt
++] = j
;
38335 for (; cnt
< 2; ++cnt
)
38336 dremap
.perm
[2 * i
+ cnt
] = 0;
38340 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
38341 dfinal
.op1
= dfinal
.op0
;
38342 dfinal
.one_operand_p
= true;
38343 for (i
= 0, j
= 0; i
< nelt
; ++i
)
38347 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
38348 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
38350 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
38351 dfinal
.perm
[i
] |= nelt4
;
38353 gcc_unreachable ();
38356 ok
= expand_vec_perm_1 (&dremap
);
38359 ok
= expand_vec_perm_1 (&dfinal
);
38365 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
38366 a vector permutation using two instructions, vperm2f128 resp.
38367 vperm2i128 followed by any single in-lane permutation. */
38370 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
38372 struct expand_vec_perm_d dfirst
, dsecond
;
38373 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
38377 || GET_MODE_SIZE (d
->vmode
) != 32
38378 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
38382 dsecond
.one_operand_p
= false;
38383 dsecond
.testing_p
= true;
38385 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
38386 immediate. For perm < 16 the second permutation uses
38387 d->op0 as first operand, for perm >= 16 it uses d->op1
38388 as first operand. The second operand is the result of
38390 for (perm
= 0; perm
< 32; perm
++)
38392 /* Ignore permutations which do not move anything cross-lane. */
38395 /* The second shuffle for e.g. V4DFmode has
38396 0123 and ABCD operands.
38397 Ignore AB23, as 23 is already in the second lane
38398 of the first operand. */
38399 if ((perm
& 0xc) == (1 << 2)) continue;
38400 /* And 01CD, as 01 is in the first lane of the first
38402 if ((perm
& 3) == 0) continue;
38403 /* And 4567, as then the vperm2[fi]128 doesn't change
38404 anything on the original 4567 second operand. */
38405 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
38409 /* The second shuffle for e.g. V4DFmode has
38410 4567 and ABCD operands.
38411 Ignore AB67, as 67 is already in the second lane
38412 of the first operand. */
38413 if ((perm
& 0xc) == (3 << 2)) continue;
38414 /* And 45CD, as 45 is in the first lane of the first
38416 if ((perm
& 3) == 2) continue;
38417 /* And 0123, as then the vperm2[fi]128 doesn't change
38418 anything on the original 0123 first operand. */
38419 if ((perm
& 0xf) == (1 << 2)) continue;
38422 for (i
= 0; i
< nelt
; i
++)
38424 j
= d
->perm
[i
] / nelt2
;
38425 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
38426 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
38427 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
38428 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
38436 ok
= expand_vec_perm_1 (&dsecond
);
38447 /* Found a usable second shuffle. dfirst will be
38448 vperm2f128 on d->op0 and d->op1. */
38449 dsecond
.testing_p
= false;
38451 dfirst
.target
= gen_reg_rtx (d
->vmode
);
38452 for (i
= 0; i
< nelt
; i
++)
38453 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
38454 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
38456 ok
= expand_vec_perm_1 (&dfirst
);
38459 /* And dsecond is some single insn shuffle, taking
38460 d->op0 and result of vperm2f128 (if perm < 16) or
38461 d->op1 and result of vperm2f128 (otherwise). */
38462 dsecond
.op1
= dfirst
.target
;
38464 dsecond
.op0
= dfirst
.op1
;
38466 ok
= expand_vec_perm_1 (&dsecond
);
38472 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
38473 if (d
->one_operand_p
)
38480 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
38481 a two vector permutation using 2 intra-lane interleave insns
38482 and cross-lane shuffle for 32-byte vectors. */
38485 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
38488 rtx (*gen
) (rtx
, rtx
, rtx
);
38490 if (d
->one_operand_p
)
38492 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
38494 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
38500 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
38502 for (i
= 0; i
< nelt
; i
+= 2)
38503 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
38504 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
38514 gen
= gen_vec_interleave_highv32qi
;
38516 gen
= gen_vec_interleave_lowv32qi
;
38520 gen
= gen_vec_interleave_highv16hi
;
38522 gen
= gen_vec_interleave_lowv16hi
;
38526 gen
= gen_vec_interleave_highv8si
;
38528 gen
= gen_vec_interleave_lowv8si
;
38532 gen
= gen_vec_interleave_highv4di
;
38534 gen
= gen_vec_interleave_lowv4di
;
38538 gen
= gen_vec_interleave_highv8sf
;
38540 gen
= gen_vec_interleave_lowv8sf
;
38544 gen
= gen_vec_interleave_highv4df
;
38546 gen
= gen_vec_interleave_lowv4df
;
38549 gcc_unreachable ();
38552 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
38556 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
38557 a single vector permutation using a single intra-lane vector
38558 permutation, vperm2f128 swapping the lanes and vblend* insn blending
38559 the non-swapped and swapped vectors together. */
38562 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
38564 struct expand_vec_perm_d dfirst
, dsecond
;
38565 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
38568 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
38572 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
38573 || !d
->one_operand_p
)
38577 for (i
= 0; i
< nelt
; i
++)
38578 dfirst
.perm
[i
] = 0xff;
38579 for (i
= 0, msk
= 0; i
< nelt
; i
++)
38581 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
38582 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
38584 dfirst
.perm
[j
] = d
->perm
[i
];
38588 for (i
= 0; i
< nelt
; i
++)
38589 if (dfirst
.perm
[i
] == 0xff)
38590 dfirst
.perm
[i
] = i
;
38593 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
38596 ok
= expand_vec_perm_1 (&dfirst
);
38597 seq
= get_insns ();
38609 dsecond
.op0
= dfirst
.target
;
38610 dsecond
.op1
= dfirst
.target
;
38611 dsecond
.one_operand_p
= true;
38612 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
38613 for (i
= 0; i
< nelt
; i
++)
38614 dsecond
.perm
[i
] = i
^ nelt2
;
38616 ok
= expand_vec_perm_1 (&dsecond
);
38619 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
38620 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
38624 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
38625 permutation using two vperm2f128, followed by a vshufpd insn blending
38626 the two vectors together. */
38629 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
38631 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
38634 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
38644 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
38645 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
38646 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
38647 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
38648 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
38649 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
38650 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
38651 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
38652 dthird
.perm
[0] = (d
->perm
[0] % 2);
38653 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
38654 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
38655 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
38657 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
38658 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
38659 dthird
.op0
= dfirst
.target
;
38660 dthird
.op1
= dsecond
.target
;
38661 dthird
.one_operand_p
= false;
38663 canonicalize_perm (&dfirst
);
38664 canonicalize_perm (&dsecond
);
38666 ok
= expand_vec_perm_1 (&dfirst
)
38667 && expand_vec_perm_1 (&dsecond
)
38668 && expand_vec_perm_1 (&dthird
);
38675 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
38676 permutation with two pshufb insns and an ior. We should have already
38677 failed all two instruction sequences. */
38680 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
38682 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
38683 unsigned int i
, nelt
, eltsz
;
38685 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
38687 gcc_assert (!d
->one_operand_p
);
38690 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38692 /* Generate two permutation masks. If the required element is within
38693 the given vector it is shuffled into the proper lane. If the required
38694 element is in the other vector, force a zero into the lane by setting
38695 bit 7 in the permutation mask. */
38696 m128
= GEN_INT (-128);
38697 for (i
= 0; i
< nelt
; ++i
)
38699 unsigned j
, e
= d
->perm
[i
];
38700 unsigned which
= (e
>= nelt
);
38704 for (j
= 0; j
< eltsz
; ++j
)
38706 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
38707 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
38711 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
38712 vperm
= force_reg (V16QImode
, vperm
);
38714 l
= gen_reg_rtx (V16QImode
);
38715 op
= gen_lowpart (V16QImode
, d
->op0
);
38716 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
38718 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
38719 vperm
= force_reg (V16QImode
, vperm
);
38721 h
= gen_reg_rtx (V16QImode
);
38722 op
= gen_lowpart (V16QImode
, d
->op1
);
38723 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
38725 op
= gen_lowpart (V16QImode
, d
->target
);
38726 emit_insn (gen_iorv16qi3 (op
, l
, h
));
38731 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
38732 with two vpshufb insns, vpermq and vpor. We should have already failed
38733 all two or three instruction sequences. */
38736 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
38738 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
38739 unsigned int i
, nelt
, eltsz
;
38742 || !d
->one_operand_p
38743 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38750 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38752 /* Generate two permutation masks. If the required element is within
38753 the same lane, it is shuffled in. If the required element from the
38754 other lane, force a zero by setting bit 7 in the permutation mask.
38755 In the other mask the mask has non-negative elements if element
38756 is requested from the other lane, but also moved to the other lane,
38757 so that the result of vpshufb can have the two V2TImode halves
38759 m128
= GEN_INT (-128);
38760 for (i
= 0; i
< nelt
; ++i
)
38762 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38763 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
38765 for (j
= 0; j
< eltsz
; ++j
)
38767 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
38768 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
38772 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
38773 vperm
= force_reg (V32QImode
, vperm
);
38775 h
= gen_reg_rtx (V32QImode
);
38776 op
= gen_lowpart (V32QImode
, d
->op0
);
38777 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
38779 /* Swap the 128-byte lanes of h into hp. */
38780 hp
= gen_reg_rtx (V4DImode
);
38781 op
= gen_lowpart (V4DImode
, h
);
38782 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
38785 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
38786 vperm
= force_reg (V32QImode
, vperm
);
38788 l
= gen_reg_rtx (V32QImode
);
38789 op
= gen_lowpart (V32QImode
, d
->op0
);
38790 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
38792 op
= gen_lowpart (V32QImode
, d
->target
);
38793 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
38798 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
38799 and extract-odd permutations of two V32QImode and V16QImode operand
38800 with two vpshufb insns, vpor and vpermq. We should have already
38801 failed all two or three instruction sequences. */
38804 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
38806 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
38807 unsigned int i
, nelt
, eltsz
;
38810 || d
->one_operand_p
38811 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38814 for (i
= 0; i
< d
->nelt
; ++i
)
38815 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
38822 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38824 /* Generate two permutation masks. In the first permutation mask
38825 the first quarter will contain indexes for the first half
38826 of the op0, the second quarter will contain bit 7 set, third quarter
38827 will contain indexes for the second half of the op0 and the
38828 last quarter bit 7 set. In the second permutation mask
38829 the first quarter will contain bit 7 set, the second quarter
38830 indexes for the first half of the op1, the third quarter bit 7 set
38831 and last quarter indexes for the second half of the op1.
38832 I.e. the first mask e.g. for V32QImode extract even will be:
38833 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
38834 (all values masked with 0xf except for -128) and second mask
38835 for extract even will be
38836 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
38837 m128
= GEN_INT (-128);
38838 for (i
= 0; i
< nelt
; ++i
)
38840 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38841 unsigned which
= d
->perm
[i
] >= nelt
;
38842 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
38844 for (j
= 0; j
< eltsz
; ++j
)
38846 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
38847 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
38851 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
38852 vperm
= force_reg (V32QImode
, vperm
);
38854 l
= gen_reg_rtx (V32QImode
);
38855 op
= gen_lowpart (V32QImode
, d
->op0
);
38856 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
38858 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
38859 vperm
= force_reg (V32QImode
, vperm
);
38861 h
= gen_reg_rtx (V32QImode
);
38862 op
= gen_lowpart (V32QImode
, d
->op1
);
38863 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
38865 ior
= gen_reg_rtx (V32QImode
);
38866 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
38868 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
38869 op
= gen_lowpart (V4DImode
, d
->target
);
38870 ior
= gen_lowpart (V4DImode
, ior
);
38871 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
38872 const1_rtx
, GEN_INT (3)));
38877 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
38878 and extract-odd permutations. */
38881 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
38888 t1
= gen_reg_rtx (V4DFmode
);
38889 t2
= gen_reg_rtx (V4DFmode
);
38891 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
38892 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
38893 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
38895 /* Now an unpck[lh]pd will produce the result required. */
38897 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
38899 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
38905 int mask
= odd
? 0xdd : 0x88;
38907 t1
= gen_reg_rtx (V8SFmode
);
38908 t2
= gen_reg_rtx (V8SFmode
);
38909 t3
= gen_reg_rtx (V8SFmode
);
38911 /* Shuffle within the 128-bit lanes to produce:
38912 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
38913 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
38916 /* Shuffle the lanes around to produce:
38917 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
38918 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
38921 /* Shuffle within the 128-bit lanes to produce:
38922 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
38923 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
38925 /* Shuffle within the 128-bit lanes to produce:
38926 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
38927 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
38929 /* Shuffle the lanes around to produce:
38930 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
38931 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
38940 /* These are always directly implementable by expand_vec_perm_1. */
38941 gcc_unreachable ();
38945 return expand_vec_perm_pshufb2 (d
);
38948 /* We need 2*log2(N)-1 operations to achieve odd/even
38949 with interleave. */
38950 t1
= gen_reg_rtx (V8HImode
);
38951 t2
= gen_reg_rtx (V8HImode
);
38952 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
38953 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
38954 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
38955 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
38957 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
38959 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
38966 return expand_vec_perm_pshufb2 (d
);
38969 t1
= gen_reg_rtx (V16QImode
);
38970 t2
= gen_reg_rtx (V16QImode
);
38971 t3
= gen_reg_rtx (V16QImode
);
38972 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
38973 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
38974 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
38975 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
38976 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
38977 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
38979 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
38981 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
38988 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
38993 struct expand_vec_perm_d d_copy
= *d
;
38994 d_copy
.vmode
= V4DFmode
;
38995 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
38996 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
38997 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
38998 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
39001 t1
= gen_reg_rtx (V4DImode
);
39002 t2
= gen_reg_rtx (V4DImode
);
39004 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
39005 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
39006 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
39008 /* Now an vpunpck[lh]qdq will produce the result required. */
39010 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
39012 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
39019 struct expand_vec_perm_d d_copy
= *d
;
39020 d_copy
.vmode
= V8SFmode
;
39021 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
39022 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
39023 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
39024 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
39027 t1
= gen_reg_rtx (V8SImode
);
39028 t2
= gen_reg_rtx (V8SImode
);
39030 /* Shuffle the lanes around into
39031 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
39032 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
39033 gen_lowpart (V4DImode
, d
->op0
),
39034 gen_lowpart (V4DImode
, d
->op1
),
39036 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
39037 gen_lowpart (V4DImode
, d
->op0
),
39038 gen_lowpart (V4DImode
, d
->op1
),
39041 /* Swap the 2nd and 3rd position in each lane into
39042 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
39043 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
39044 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
39045 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
39046 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
39048 /* Now an vpunpck[lh]qdq will produce
39049 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
39051 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
39052 gen_lowpart (V4DImode
, t1
),
39053 gen_lowpart (V4DImode
, t2
));
39055 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
39056 gen_lowpart (V4DImode
, t1
),
39057 gen_lowpart (V4DImode
, t2
));
39062 gcc_unreachable ();
39068 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
39069 extract-even and extract-odd permutations. */
39072 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
39074 unsigned i
, odd
, nelt
= d
->nelt
;
39077 if (odd
!= 0 && odd
!= 1)
39080 for (i
= 1; i
< nelt
; ++i
)
39081 if (d
->perm
[i
] != 2 * i
+ odd
)
39084 return expand_vec_perm_even_odd_1 (d
, odd
);
39087 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
39088 permutations. We assume that expand_vec_perm_1 has already failed. */
39091 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
39093 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
39094 enum machine_mode vmode
= d
->vmode
;
39095 unsigned char perm2
[4];
39103 /* These are special-cased in sse.md so that we can optionally
39104 use the vbroadcast instruction. They expand to two insns
39105 if the input happens to be in a register. */
39106 gcc_unreachable ();
39112 /* These are always implementable using standard shuffle patterns. */
39113 gcc_unreachable ();
39117 /* These can be implemented via interleave. We save one insn by
39118 stopping once we have promoted to V4SImode and then use pshufd. */
39122 rtx (*gen
) (rtx
, rtx
, rtx
)
39123 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
39124 : gen_vec_interleave_lowv8hi
;
39128 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
39129 : gen_vec_interleave_highv8hi
;
39134 dest
= gen_reg_rtx (vmode
);
39135 emit_insn (gen (dest
, op0
, op0
));
39136 vmode
= get_mode_wider_vector (vmode
);
39137 op0
= gen_lowpart (vmode
, dest
);
39139 while (vmode
!= V4SImode
);
39141 memset (perm2
, elt
, 4);
39142 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
39151 /* For AVX2 broadcasts of the first element vpbroadcast* or
39152 vpermq should be used by expand_vec_perm_1. */
39153 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
39157 gcc_unreachable ();
39161 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
39162 broadcast permutations. */
39165 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
39167 unsigned i
, elt
, nelt
= d
->nelt
;
39169 if (!d
->one_operand_p
)
39173 for (i
= 1; i
< nelt
; ++i
)
39174 if (d
->perm
[i
] != elt
)
39177 return expand_vec_perm_broadcast_1 (d
);
39180 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
39181 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
39182 all the shorter instruction sequences. */
39185 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
39187 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
39188 unsigned int i
, nelt
, eltsz
;
39192 || d
->one_operand_p
39193 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
39200 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
39202 /* Generate 4 permutation masks. If the required element is within
39203 the same lane, it is shuffled in. If the required element from the
39204 other lane, force a zero by setting bit 7 in the permutation mask.
39205 In the other mask the mask has non-negative elements if element
39206 is requested from the other lane, but also moved to the other lane,
39207 so that the result of vpshufb can have the two V2TImode halves
39209 m128
= GEN_INT (-128);
39210 for (i
= 0; i
< 32; ++i
)
39212 rperm
[0][i
] = m128
;
39213 rperm
[1][i
] = m128
;
39214 rperm
[2][i
] = m128
;
39215 rperm
[3][i
] = m128
;
39221 for (i
= 0; i
< nelt
; ++i
)
39223 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
39224 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
39225 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
39227 for (j
= 0; j
< eltsz
; ++j
)
39228 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
39229 used
[which
] = true;
39232 for (i
= 0; i
< 2; ++i
)
39234 if (!used
[2 * i
+ 1])
39239 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
39240 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
39241 vperm
= force_reg (V32QImode
, vperm
);
39242 h
[i
] = gen_reg_rtx (V32QImode
);
39243 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
39244 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
39247 /* Swap the 128-byte lanes of h[X]. */
39248 for (i
= 0; i
< 2; ++i
)
39250 if (h
[i
] == NULL_RTX
)
39252 op
= gen_reg_rtx (V4DImode
);
39253 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
39254 const2_rtx
, GEN_INT (3), const0_rtx
,
39256 h
[i
] = gen_lowpart (V32QImode
, op
);
39259 for (i
= 0; i
< 2; ++i
)
39266 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
39267 vperm
= force_reg (V32QImode
, vperm
);
39268 l
[i
] = gen_reg_rtx (V32QImode
);
39269 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
39270 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
39273 for (i
= 0; i
< 2; ++i
)
39277 op
= gen_reg_rtx (V32QImode
);
39278 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
39285 gcc_assert (l
[0] && l
[1]);
39286 op
= gen_lowpart (V32QImode
, d
->target
);
39287 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
39291 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
39292 With all of the interface bits taken care of, perform the expansion
39293 in D and return true on success. */
39296 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
39298 /* Try a single instruction expansion. */
39299 if (expand_vec_perm_1 (d
))
39302 /* Try sequences of two instructions. */
39304 if (expand_vec_perm_pshuflw_pshufhw (d
))
39307 if (expand_vec_perm_palignr (d
))
39310 if (expand_vec_perm_interleave2 (d
))
39313 if (expand_vec_perm_broadcast (d
))
39316 if (expand_vec_perm_vpermq_perm_1 (d
))
39319 if (expand_vec_perm_vperm2f128 (d
))
39322 /* Try sequences of three instructions. */
39324 if (expand_vec_perm_2vperm2f128_vshuf (d
))
39327 if (expand_vec_perm_pshufb2 (d
))
39330 if (expand_vec_perm_interleave3 (d
))
39333 if (expand_vec_perm_vperm2f128_vblend (d
))
39336 /* Try sequences of four instructions. */
39338 if (expand_vec_perm_vpshufb2_vpermq (d
))
39341 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
39344 /* ??? Look for narrow permutations whose element orderings would
39345 allow the promotion to a wider mode. */
39347 /* ??? Look for sequences of interleave or a wider permute that place
39348 the data into the correct lanes for a half-vector shuffle like
39349 pshuf[lh]w or vpermilps. */
39351 /* ??? Look for sequences of interleave that produce the desired results.
39352 The combinatorics of punpck[lh] get pretty ugly... */
39354 if (expand_vec_perm_even_odd (d
))
39357 /* Even longer sequences. */
39358 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
39364 /* If a permutation only uses one operand, make it clear. Returns true
39365 if the permutation references both operands. */
39368 canonicalize_perm (struct expand_vec_perm_d
*d
)
39370 int i
, which
, nelt
= d
->nelt
;
39372 for (i
= which
= 0; i
< nelt
; ++i
)
39373 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
39375 d
->one_operand_p
= true;
39382 if (!rtx_equal_p (d
->op0
, d
->op1
))
39384 d
->one_operand_p
= false;
39387 /* The elements of PERM do not suggest that only the first operand
39388 is used, but both operands are identical. Allow easier matching
39389 of the permutation by folding the permutation into the single
39394 for (i
= 0; i
< nelt
; ++i
)
39395 d
->perm
[i
] &= nelt
- 1;
39404 return (which
== 3);
39408 ix86_expand_vec_perm_const (rtx operands
[4])
39410 struct expand_vec_perm_d d
;
39411 unsigned char perm
[MAX_VECT_LEN
];
39416 d
.target
= operands
[0];
39417 d
.op0
= operands
[1];
39418 d
.op1
= operands
[2];
39421 d
.vmode
= GET_MODE (d
.target
);
39422 gcc_assert (VECTOR_MODE_P (d
.vmode
));
39423 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
39424 d
.testing_p
= false;
39426 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
39427 gcc_assert (XVECLEN (sel
, 0) == nelt
);
39428 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
39430 for (i
= 0; i
< nelt
; ++i
)
39432 rtx e
= XVECEXP (sel
, 0, i
);
39433 int ei
= INTVAL (e
) & (2 * nelt
- 1);
39438 two_args
= canonicalize_perm (&d
);
39440 if (ix86_expand_vec_perm_const_1 (&d
))
39443 /* If the selector says both arguments are needed, but the operands are the
39444 same, the above tried to expand with one_operand_p and flattened selector.
39445 If that didn't work, retry without one_operand_p; we succeeded with that
39447 if (two_args
&& d
.one_operand_p
)
39449 d
.one_operand_p
= false;
39450 memcpy (d
.perm
, perm
, sizeof (perm
));
39451 return ix86_expand_vec_perm_const_1 (&d
);
39457 /* Implement targetm.vectorize.vec_perm_const_ok. */
39460 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
39461 const unsigned char *sel
)
39463 struct expand_vec_perm_d d
;
39464 unsigned int i
, nelt
, which
;
39468 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
39469 d
.testing_p
= true;
39471 /* Given sufficient ISA support we can just return true here
39472 for selected vector modes. */
39473 if (GET_MODE_SIZE (d
.vmode
) == 16)
39475 /* All implementable with a single vpperm insn. */
39478 /* All implementable with 2 pshufb + 1 ior. */
39481 /* All implementable with shufpd or unpck[lh]pd. */
39486 /* Extract the values from the vector CST into the permutation
39488 memcpy (d
.perm
, sel
, nelt
);
39489 for (i
= which
= 0; i
< nelt
; ++i
)
39491 unsigned char e
= d
.perm
[i
];
39492 gcc_assert (e
< 2 * nelt
);
39493 which
|= (e
< nelt
? 1 : 2);
39496 /* For all elements from second vector, fold the elements to first. */
39498 for (i
= 0; i
< nelt
; ++i
)
39501 /* Check whether the mask can be applied to the vector type. */
39502 d
.one_operand_p
= (which
!= 3);
39504 /* Implementable with shufps or pshufd. */
39505 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
39508 /* Otherwise we have to go through the motions and see if we can
39509 figure out how to generate the requested permutation. */
39510 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
39511 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
39512 if (!d
.one_operand_p
)
39513 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
39516 ret
= ix86_expand_vec_perm_const_1 (&d
);
39523 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
39525 struct expand_vec_perm_d d
;
39531 d
.vmode
= GET_MODE (targ
);
39532 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
39533 d
.one_operand_p
= false;
39534 d
.testing_p
= false;
39536 for (i
= 0; i
< nelt
; ++i
)
39537 d
.perm
[i
] = i
* 2 + odd
;
39539 /* We'll either be able to implement the permutation directly... */
39540 if (expand_vec_perm_1 (&d
))
39543 /* ... or we use the special-case patterns. */
39544 expand_vec_perm_even_odd_1 (&d
, odd
);
39548 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
39550 struct expand_vec_perm_d d
;
39551 unsigned i
, nelt
, base
;
39557 d
.vmode
= GET_MODE (targ
);
39558 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
39559 d
.one_operand_p
= false;
39560 d
.testing_p
= false;
39562 base
= high_p
? nelt
/ 2 : 0;
39563 for (i
= 0; i
< nelt
/ 2; ++i
)
39565 d
.perm
[i
* 2] = i
+ base
;
39566 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
39569 /* Note that for AVX this isn't one instruction. */
39570 ok
= ix86_expand_vec_perm_const_1 (&d
);
39575 /* Expand a vector operation CODE for a V*QImode in terms of the
39576 same operation on V*HImode. */
39579 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
39581 enum machine_mode qimode
= GET_MODE (dest
);
39582 enum machine_mode himode
;
39583 rtx (*gen_il
) (rtx
, rtx
, rtx
);
39584 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
39585 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
39586 struct expand_vec_perm_d d
;
39587 bool ok
, full_interleave
;
39588 bool uns_p
= false;
39595 gen_il
= gen_vec_interleave_lowv16qi
;
39596 gen_ih
= gen_vec_interleave_highv16qi
;
39599 himode
= V16HImode
;
39600 gen_il
= gen_avx2_interleave_lowv32qi
;
39601 gen_ih
= gen_avx2_interleave_highv32qi
;
39604 gcc_unreachable ();
39607 op2_l
= op2_h
= op2
;
39611 /* Unpack data such that we've got a source byte in each low byte of
39612 each word. We don't care what goes into the high byte of each word.
39613 Rather than trying to get zero in there, most convenient is to let
39614 it be a copy of the low byte. */
39615 op2_l
= gen_reg_rtx (qimode
);
39616 op2_h
= gen_reg_rtx (qimode
);
39617 emit_insn (gen_il (op2_l
, op2
, op2
));
39618 emit_insn (gen_ih (op2_h
, op2
, op2
));
39621 op1_l
= gen_reg_rtx (qimode
);
39622 op1_h
= gen_reg_rtx (qimode
);
39623 emit_insn (gen_il (op1_l
, op1
, op1
));
39624 emit_insn (gen_ih (op1_h
, op1
, op1
));
39625 full_interleave
= qimode
== V16QImode
;
39633 op1_l
= gen_reg_rtx (himode
);
39634 op1_h
= gen_reg_rtx (himode
);
39635 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
39636 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
39637 full_interleave
= true;
39640 gcc_unreachable ();
39643 /* Perform the operation. */
39644 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
39646 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
39648 gcc_assert (res_l
&& res_h
);
39650 /* Merge the data back into the right place. */
39652 d
.op0
= gen_lowpart (qimode
, res_l
);
39653 d
.op1
= gen_lowpart (qimode
, res_h
);
39655 d
.nelt
= GET_MODE_NUNITS (qimode
);
39656 d
.one_operand_p
= false;
39657 d
.testing_p
= false;
39659 if (full_interleave
)
39661 /* For SSE2, we used an full interleave, so the desired
39662 results are in the even elements. */
39663 for (i
= 0; i
< 32; ++i
)
39668 /* For AVX, the interleave used above was not cross-lane. So the
39669 extraction is evens but with the second and third quarter swapped.
39670 Happily, that is even one insn shorter than even extraction. */
39671 for (i
= 0; i
< 32; ++i
)
39672 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
39675 ok
= ix86_expand_vec_perm_const_1 (&d
);
39678 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
39679 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
39683 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
39684 bool uns_p
, bool odd_p
)
39686 enum machine_mode mode
= GET_MODE (op1
);
39687 enum machine_mode wmode
= GET_MODE (dest
);
39690 /* We only play even/odd games with vectors of SImode. */
39691 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
39693 /* If we're looking for the odd results, shift those members down to
39694 the even slots. For some cpus this is faster than a PSHUFD. */
39697 if (TARGET_XOP
&& mode
== V4SImode
)
39699 x
= force_reg (wmode
, CONST0_RTX (wmode
));
39700 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
39704 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
39705 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
39706 x
, NULL
, 1, OPTAB_DIRECT
);
39707 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
39708 x
, NULL
, 1, OPTAB_DIRECT
);
39709 op1
= gen_lowpart (mode
, op1
);
39710 op2
= gen_lowpart (mode
, op2
);
39713 if (mode
== V8SImode
)
39716 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
39718 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
39721 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
39722 else if (TARGET_SSE4_1
)
39723 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
39726 rtx s1
, s2
, t0
, t1
, t2
;
39728 /* The easiest way to implement this without PMULDQ is to go through
39729 the motions as if we are performing a full 64-bit multiply. With
39730 the exception that we need to do less shuffling of the elements. */
39732 /* Compute the sign-extension, aka highparts, of the two operands. */
39733 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
39734 op1
, pc_rtx
, pc_rtx
);
39735 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
39736 op2
, pc_rtx
, pc_rtx
);
39738 /* Multiply LO(A) * HI(B), and vice-versa. */
39739 t1
= gen_reg_rtx (wmode
);
39740 t2
= gen_reg_rtx (wmode
);
39741 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
39742 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
39744 /* Multiply LO(A) * LO(B). */
39745 t0
= gen_reg_rtx (wmode
);
39746 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
39748 /* Combine and shift the highparts into place. */
39749 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
39750 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
39753 /* Combine high and low parts. */
39754 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
39761 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
39762 bool uns_p
, bool high_p
)
39764 enum machine_mode wmode
= GET_MODE (dest
);
39765 enum machine_mode mode
= GET_MODE (op1
);
39766 rtx t1
, t2
, t3
, t4
, mask
;
39771 t1
= gen_reg_rtx (mode
);
39772 t2
= gen_reg_rtx (mode
);
39773 if (TARGET_XOP
&& !uns_p
)
39775 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
39776 shuffle the elements once so that all elements are in the right
39777 place for immediate use: { A C B D }. */
39778 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
39779 const1_rtx
, GEN_INT (3)));
39780 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
39781 const1_rtx
, GEN_INT (3)));
39785 /* Put the elements into place for the multiply. */
39786 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
39787 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
39790 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
39794 /* Shuffle the elements between the lanes. After this we
39795 have { A B E F | C D G H } for each operand. */
39796 t1
= gen_reg_rtx (V4DImode
);
39797 t2
= gen_reg_rtx (V4DImode
);
39798 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
39799 const0_rtx
, const2_rtx
,
39800 const1_rtx
, GEN_INT (3)));
39801 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
39802 const0_rtx
, const2_rtx
,
39803 const1_rtx
, GEN_INT (3)));
39805 /* Shuffle the elements within the lanes. After this we
39806 have { A A B B | C C D D } or { E E F F | G G H H }. */
39807 t3
= gen_reg_rtx (V8SImode
);
39808 t4
= gen_reg_rtx (V8SImode
);
39809 mask
= GEN_INT (high_p
39810 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
39811 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
39812 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
39813 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
39815 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
39820 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
39821 uns_p
, OPTAB_DIRECT
);
39822 t2
= expand_binop (mode
,
39823 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
39824 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
39825 gcc_assert (t1
&& t2
);
39827 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
39832 t1
= gen_reg_rtx (wmode
);
39833 t2
= gen_reg_rtx (wmode
);
39834 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
39835 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
39837 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
39841 gcc_unreachable ();
39846 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
39850 res_1
= gen_reg_rtx (V4SImode
);
39851 res_2
= gen_reg_rtx (V4SImode
);
39852 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
39853 op1
, op2
, true, false);
39854 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
39855 op1
, op2
, true, true);
39857 /* Move the results in element 2 down to element 1; we don't care
39858 what goes in elements 2 and 3. Then we can merge the parts
39859 back together with an interleave.
39861 Note that two other sequences were tried:
39862 (1) Use interleaves at the start instead of psrldq, which allows
39863 us to use a single shufps to merge things back at the end.
39864 (2) Use shufps here to combine the two vectors, then pshufd to
39865 put the elements in the correct order.
39866 In both cases the cost of the reformatting stall was too high
39867 and the overall sequence slower. */
39869 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
39870 const0_rtx
, const0_rtx
));
39871 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
39872 const0_rtx
, const0_rtx
));
39873 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
39875 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
39879 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
39881 enum machine_mode mode
= GET_MODE (op0
);
39882 rtx t1
, t2
, t3
, t4
, t5
, t6
;
39884 if (TARGET_XOP
&& mode
== V2DImode
)
39886 /* op1: A,B,C,D, op2: E,F,G,H */
39887 op1
= gen_lowpart (V4SImode
, op1
);
39888 op2
= gen_lowpart (V4SImode
, op2
);
39890 t1
= gen_reg_rtx (V4SImode
);
39891 t2
= gen_reg_rtx (V4SImode
);
39892 t3
= gen_reg_rtx (V2DImode
);
39893 t4
= gen_reg_rtx (V2DImode
);
39896 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
39902 /* t2: (B*E),(A*F),(D*G),(C*H) */
39903 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
39905 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
39906 emit_insn (gen_xop_phadddq (t3
, t2
));
39908 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
39909 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
39911 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
39912 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
39916 enum machine_mode nmode
;
39917 rtx (*umul
) (rtx
, rtx
, rtx
);
39919 if (mode
== V2DImode
)
39921 umul
= gen_vec_widen_umult_even_v4si
;
39924 else if (mode
== V4DImode
)
39926 umul
= gen_vec_widen_umult_even_v8si
;
39930 gcc_unreachable ();
39933 /* Multiply low parts. */
39934 t1
= gen_reg_rtx (mode
);
39935 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
39937 /* Shift input vectors right 32 bits so we can multiply high parts. */
39939 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
39940 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
39942 /* Multiply high parts by low parts. */
39943 t4
= gen_reg_rtx (mode
);
39944 t5
= gen_reg_rtx (mode
);
39945 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
39946 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
39948 /* Combine and shift the highparts back. */
39949 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
39950 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
39952 /* Combine high and low parts. */
39953 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
39956 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
39957 gen_rtx_MULT (mode
, op1
, op2
));
39960 /* Expand an insert into a vector register through pinsr insn.
39961 Return true if successful. */
39964 ix86_expand_pinsr (rtx
*operands
)
39966 rtx dst
= operands
[0];
39967 rtx src
= operands
[3];
39969 unsigned int size
= INTVAL (operands
[1]);
39970 unsigned int pos
= INTVAL (operands
[2]);
39972 if (GET_CODE (dst
) == SUBREG
)
39974 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
39975 dst
= SUBREG_REG (dst
);
39978 if (GET_CODE (src
) == SUBREG
)
39979 src
= SUBREG_REG (src
);
39981 switch (GET_MODE (dst
))
39988 enum machine_mode srcmode
, dstmode
;
39989 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
39991 srcmode
= mode_for_size (size
, MODE_INT
, 0);
39996 if (!TARGET_SSE4_1
)
39998 dstmode
= V16QImode
;
39999 pinsr
= gen_sse4_1_pinsrb
;
40005 dstmode
= V8HImode
;
40006 pinsr
= gen_sse2_pinsrw
;
40010 if (!TARGET_SSE4_1
)
40012 dstmode
= V4SImode
;
40013 pinsr
= gen_sse4_1_pinsrd
;
40017 gcc_assert (TARGET_64BIT
);
40018 if (!TARGET_SSE4_1
)
40020 dstmode
= V2DImode
;
40021 pinsr
= gen_sse4_1_pinsrq
;
40028 dst
= gen_lowpart (dstmode
, dst
);
40029 src
= gen_lowpart (srcmode
, src
);
40033 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
40042 /* This function returns the calling abi specific va_list type node.
40043 It returns the FNDECL specific va_list type. */
40046 ix86_fn_abi_va_list (tree fndecl
)
40049 return va_list_type_node
;
40050 gcc_assert (fndecl
!= NULL_TREE
);
40052 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
40053 return ms_va_list_type_node
;
40055 return sysv_va_list_type_node
;
40058 /* Returns the canonical va_list type specified by TYPE. If there
40059 is no valid TYPE provided, it return NULL_TREE. */
40062 ix86_canonical_va_list_type (tree type
)
40066 /* Resolve references and pointers to va_list type. */
40067 if (TREE_CODE (type
) == MEM_REF
)
40068 type
= TREE_TYPE (type
);
40069 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
40070 type
= TREE_TYPE (type
);
40071 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
40072 type
= TREE_TYPE (type
);
40074 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
40076 wtype
= va_list_type_node
;
40077 gcc_assert (wtype
!= NULL_TREE
);
40079 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
40081 /* If va_list is an array type, the argument may have decayed
40082 to a pointer type, e.g. by being passed to another function.
40083 In that case, unwrap both types so that we can compare the
40084 underlying records. */
40085 if (TREE_CODE (htype
) == ARRAY_TYPE
40086 || POINTER_TYPE_P (htype
))
40088 wtype
= TREE_TYPE (wtype
);
40089 htype
= TREE_TYPE (htype
);
40092 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
40093 return va_list_type_node
;
40094 wtype
= sysv_va_list_type_node
;
40095 gcc_assert (wtype
!= NULL_TREE
);
40097 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
40099 /* If va_list is an array type, the argument may have decayed
40100 to a pointer type, e.g. by being passed to another function.
40101 In that case, unwrap both types so that we can compare the
40102 underlying records. */
40103 if (TREE_CODE (htype
) == ARRAY_TYPE
40104 || POINTER_TYPE_P (htype
))
40106 wtype
= TREE_TYPE (wtype
);
40107 htype
= TREE_TYPE (htype
);
40110 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
40111 return sysv_va_list_type_node
;
40112 wtype
= ms_va_list_type_node
;
40113 gcc_assert (wtype
!= NULL_TREE
);
40115 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
40117 /* If va_list is an array type, the argument may have decayed
40118 to a pointer type, e.g. by being passed to another function.
40119 In that case, unwrap both types so that we can compare the
40120 underlying records. */
40121 if (TREE_CODE (htype
) == ARRAY_TYPE
40122 || POINTER_TYPE_P (htype
))
40124 wtype
= TREE_TYPE (wtype
);
40125 htype
= TREE_TYPE (htype
);
40128 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
40129 return ms_va_list_type_node
;
40132 return std_canonical_va_list_type (type
);
40135 /* Iterate through the target-specific builtin types for va_list.
40136 IDX denotes the iterator, *PTREE is set to the result type of
40137 the va_list builtin, and *PNAME to its internal type.
40138 Returns zero if there is no element for this index, otherwise
40139 IDX should be increased upon the next call.
40140 Note, do not iterate a base builtin's name like __builtin_va_list.
40141 Used from c_common_nodes_and_builtins. */
40144 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
40154 *ptree
= ms_va_list_type_node
;
40155 *pname
= "__builtin_ms_va_list";
40159 *ptree
= sysv_va_list_type_node
;
40160 *pname
= "__builtin_sysv_va_list";
40168 #undef TARGET_SCHED_DISPATCH
40169 #define TARGET_SCHED_DISPATCH has_dispatch
40170 #undef TARGET_SCHED_DISPATCH_DO
40171 #define TARGET_SCHED_DISPATCH_DO do_dispatch
40172 #undef TARGET_SCHED_REASSOCIATION_WIDTH
40173 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
40174 #undef TARGET_SCHED_REORDER
40175 #define TARGET_SCHED_REORDER ix86_sched_reorder
40176 #undef TARGET_SCHED_ADJUST_PRIORITY
40177 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
40178 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
40179 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ix86_dependencies_evaluation_hook
40181 /* The size of the dispatch window is the total number of bytes of
40182 object code allowed in a window. */
40183 #define DISPATCH_WINDOW_SIZE 16
40185 /* Number of dispatch windows considered for scheduling. */
40186 #define MAX_DISPATCH_WINDOWS 3
40188 /* Maximum number of instructions in a window. */
40191 /* Maximum number of immediate operands in a window. */
40194 /* Maximum number of immediate bits allowed in a window. */
40195 #define MAX_IMM_SIZE 128
40197 /* Maximum number of 32 bit immediates allowed in a window. */
40198 #define MAX_IMM_32 4
40200 /* Maximum number of 64 bit immediates allowed in a window. */
40201 #define MAX_IMM_64 2
40203 /* Maximum total of loads or prefetches allowed in a window. */
40206 /* Maximum total of stores allowed in a window. */
40207 #define MAX_STORE 1
40213 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
40214 enum dispatch_group
{
40229 /* Number of allowable groups in a dispatch window. It is an array
40230 indexed by dispatch_group enum. 100 is used as a big number,
40231 because the number of these kind of operations does not have any
40232 effect in dispatch window, but we need them for other reasons in
40234 static unsigned int num_allowable_groups
[disp_last
] = {
40235 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
40238 char group_name
[disp_last
+ 1][16] = {
40239 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
40240 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
40241 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
40244 /* Instruction path. */
40247 path_single
, /* Single micro op. */
40248 path_double
, /* Double micro op. */
40249 path_multi
, /* Instructions with more than 2 micro op.. */
40253 /* sched_insn_info defines a window to the instructions scheduled in
40254 the basic block. It contains a pointer to the insn_info table and
40255 the instruction scheduled.
40257 Windows are allocated for each basic block and are linked
40259 typedef struct sched_insn_info_s
{
40261 enum dispatch_group group
;
40262 enum insn_path path
;
40267 /* Linked list of dispatch windows. This is a two way list of
40268 dispatch windows of a basic block. It contains information about
40269 the number of uops in the window and the total number of
40270 instructions and of bytes in the object code for this dispatch
40272 typedef struct dispatch_windows_s
{
40273 int num_insn
; /* Number of insn in the window. */
40274 int num_uops
; /* Number of uops in the window. */
40275 int window_size
; /* Number of bytes in the window. */
40276 int window_num
; /* Window number between 0 or 1. */
40277 int num_imm
; /* Number of immediates in an insn. */
40278 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
40279 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
40280 int imm_size
; /* Total immediates in the window. */
40281 int num_loads
; /* Total memory loads in the window. */
40282 int num_stores
; /* Total memory stores in the window. */
40283 int violation
; /* Violation exists in window. */
40284 sched_insn_info
*window
; /* Pointer to the window. */
40285 struct dispatch_windows_s
*next
;
40286 struct dispatch_windows_s
*prev
;
40287 } dispatch_windows
;
40289 /* Immediate valuse used in an insn. */
40290 typedef struct imm_info_s
40297 static dispatch_windows
*dispatch_window_list
;
40298 static dispatch_windows
*dispatch_window_list1
;
40300 /* Get dispatch group of insn. */
40302 static enum dispatch_group
40303 get_mem_group (rtx insn
)
40305 enum attr_memory memory
;
40307 if (INSN_CODE (insn
) < 0)
40308 return disp_no_group
;
40309 memory
= get_attr_memory (insn
);
40310 if (memory
== MEMORY_STORE
)
40313 if (memory
== MEMORY_LOAD
)
40316 if (memory
== MEMORY_BOTH
)
40317 return disp_load_store
;
40319 return disp_no_group
;
40322 /* Return true if insn is a compare instruction. */
40327 enum attr_type type
;
40329 type
= get_attr_type (insn
);
40330 return (type
== TYPE_TEST
40331 || type
== TYPE_ICMP
40332 || type
== TYPE_FCMP
40333 || GET_CODE (PATTERN (insn
)) == COMPARE
);
40336 /* Return true if a dispatch violation encountered. */
40339 dispatch_violation (void)
40341 if (dispatch_window_list
->next
)
40342 return dispatch_window_list
->next
->violation
;
40343 return dispatch_window_list
->violation
;
40346 /* Return true if insn is a branch instruction. */
40349 is_branch (rtx insn
)
40351 return (CALL_P (insn
) || JUMP_P (insn
));
40354 /* Return true if insn is a prefetch instruction. */
40357 is_prefetch (rtx insn
)
40359 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
40362 /* This function initializes a dispatch window and the list container holding a
40363 pointer to the window. */
40366 init_window (int window_num
)
40369 dispatch_windows
*new_list
;
40371 if (window_num
== 0)
40372 new_list
= dispatch_window_list
;
40374 new_list
= dispatch_window_list1
;
40376 new_list
->num_insn
= 0;
40377 new_list
->num_uops
= 0;
40378 new_list
->window_size
= 0;
40379 new_list
->next
= NULL
;
40380 new_list
->prev
= NULL
;
40381 new_list
->window_num
= window_num
;
40382 new_list
->num_imm
= 0;
40383 new_list
->num_imm_32
= 0;
40384 new_list
->num_imm_64
= 0;
40385 new_list
->imm_size
= 0;
40386 new_list
->num_loads
= 0;
40387 new_list
->num_stores
= 0;
40388 new_list
->violation
= false;
40390 for (i
= 0; i
< MAX_INSN
; i
++)
40392 new_list
->window
[i
].insn
= NULL
;
40393 new_list
->window
[i
].group
= disp_no_group
;
40394 new_list
->window
[i
].path
= no_path
;
40395 new_list
->window
[i
].byte_len
= 0;
40396 new_list
->window
[i
].imm_bytes
= 0;
40401 /* This function allocates and initializes a dispatch window and the
40402 list container holding a pointer to the window. */
40404 static dispatch_windows
*
40405 allocate_window (void)
40407 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
40408 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
40413 /* This routine initializes the dispatch scheduling information. It
40414 initiates building dispatch scheduler tables and constructs the
40415 first dispatch window. */
40418 init_dispatch_sched (void)
40420 /* Allocate a dispatch list and a window. */
40421 dispatch_window_list
= allocate_window ();
40422 dispatch_window_list1
= allocate_window ();
40427 /* This function returns true if a branch is detected. End of a basic block
40428 does not have to be a branch, but here we assume only branches end a
40432 is_end_basic_block (enum dispatch_group group
)
40434 return group
== disp_branch
;
40437 /* This function is called when the end of a window processing is reached. */
40440 process_end_window (void)
40442 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
40443 if (dispatch_window_list
->next
)
40445 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
40446 gcc_assert (dispatch_window_list
->window_size
40447 + dispatch_window_list1
->window_size
<= 48);
40453 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
40454 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
40455 for 48 bytes of instructions. Note that these windows are not dispatch
40456 windows that their sizes are DISPATCH_WINDOW_SIZE. */
40458 static dispatch_windows
*
40459 allocate_next_window (int window_num
)
40461 if (window_num
== 0)
40463 if (dispatch_window_list
->next
)
40466 return dispatch_window_list
;
40469 dispatch_window_list
->next
= dispatch_window_list1
;
40470 dispatch_window_list1
->prev
= dispatch_window_list
;
40472 return dispatch_window_list1
;
40475 /* Increment the number of immediate operands of an instruction. */
40478 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
40483 switch ( GET_CODE (*in_rtx
))
40488 (imm_values
->imm
)++;
40489 if (x86_64_immediate_operand (*in_rtx
, SImode
))
40490 (imm_values
->imm32
)++;
40492 (imm_values
->imm64
)++;
40496 (imm_values
->imm
)++;
40497 (imm_values
->imm64
)++;
40501 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
40503 (imm_values
->imm
)++;
40504 (imm_values
->imm32
)++;
40515 /* Compute number of immediate operands of an instruction. */
40518 find_constant (rtx in_rtx
, imm_info
*imm_values
)
40520 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
40521 (rtx_function
) find_constant_1
, (void *) imm_values
);
40524 /* Return total size of immediate operands of an instruction along with number
40525 of corresponding immediate-operands. It initializes its parameters to zero
40526 befor calling FIND_CONSTANT.
40527 INSN is the input instruction. IMM is the total of immediates.
40528 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
40532 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
40534 imm_info imm_values
= {0, 0, 0};
40536 find_constant (insn
, &imm_values
);
40537 *imm
= imm_values
.imm
;
40538 *imm32
= imm_values
.imm32
;
40539 *imm64
= imm_values
.imm64
;
40540 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
40543 /* This function indicates if an operand of an instruction is an
40547 has_immediate (rtx insn
)
40549 int num_imm_operand
;
40550 int num_imm32_operand
;
40551 int num_imm64_operand
;
40554 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
40555 &num_imm64_operand
);
40559 /* Return single or double path for instructions. */
40561 static enum insn_path
40562 get_insn_path (rtx insn
)
40564 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
40566 if ((int)path
== 0)
40567 return path_single
;
40569 if ((int)path
== 1)
40570 return path_double
;
40575 /* Return insn dispatch group. */
40577 static enum dispatch_group
40578 get_insn_group (rtx insn
)
40580 enum dispatch_group group
= get_mem_group (insn
);
40584 if (is_branch (insn
))
40585 return disp_branch
;
40590 if (has_immediate (insn
))
40593 if (is_prefetch (insn
))
40594 return disp_prefetch
;
40596 return disp_no_group
;
40599 /* Count number of GROUP restricted instructions in a dispatch
40600 window WINDOW_LIST. */
40603 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
40605 enum dispatch_group group
= get_insn_group (insn
);
40607 int num_imm_operand
;
40608 int num_imm32_operand
;
40609 int num_imm64_operand
;
40611 if (group
== disp_no_group
)
40614 if (group
== disp_imm
)
40616 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
40617 &num_imm64_operand
);
40618 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
40619 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
40620 || (num_imm32_operand
> 0
40621 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
40622 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
40623 || (num_imm64_operand
> 0
40624 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
40625 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
40626 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
40627 && num_imm64_operand
> 0
40628 && ((window_list
->num_imm_64
> 0
40629 && window_list
->num_insn
>= 2)
40630 || window_list
->num_insn
>= 3)))
40636 if ((group
== disp_load_store
40637 && (window_list
->num_loads
>= MAX_LOAD
40638 || window_list
->num_stores
>= MAX_STORE
))
40639 || ((group
== disp_load
40640 || group
== disp_prefetch
)
40641 && window_list
->num_loads
>= MAX_LOAD
)
40642 || (group
== disp_store
40643 && window_list
->num_stores
>= MAX_STORE
))
40649 /* This function returns true if insn satisfies dispatch rules on the
40650 last window scheduled. */
40653 fits_dispatch_window (rtx insn
)
40655 dispatch_windows
*window_list
= dispatch_window_list
;
40656 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
40657 unsigned int num_restrict
;
40658 enum dispatch_group group
= get_insn_group (insn
);
40659 enum insn_path path
= get_insn_path (insn
);
40662 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
40663 instructions should be given the lowest priority in the
40664 scheduling process in Haifa scheduler to make sure they will be
40665 scheduled in the same dispatch window as the reference to them. */
40666 if (group
== disp_jcc
|| group
== disp_cmp
)
40669 /* Check nonrestricted. */
40670 if (group
== disp_no_group
|| group
== disp_branch
)
40673 /* Get last dispatch window. */
40674 if (window_list_next
)
40675 window_list
= window_list_next
;
40677 if (window_list
->window_num
== 1)
40679 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
40682 || (min_insn_size (insn
) + sum
) >= 48)
40683 /* Window 1 is full. Go for next window. */
40687 num_restrict
= count_num_restricted (insn
, window_list
);
40689 if (num_restrict
> num_allowable_groups
[group
])
40692 /* See if it fits in the first window. */
40693 if (window_list
->window_num
== 0)
40695 /* The first widow should have only single and double path
40697 if (path
== path_double
40698 && (window_list
->num_uops
+ 2) > MAX_INSN
)
40700 else if (path
!= path_single
)
40706 /* Add an instruction INSN with NUM_UOPS micro-operations to the
40707 dispatch window WINDOW_LIST. */
40710 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
40712 int byte_len
= min_insn_size (insn
);
40713 int num_insn
= window_list
->num_insn
;
40715 sched_insn_info
*window
= window_list
->window
;
40716 enum dispatch_group group
= get_insn_group (insn
);
40717 enum insn_path path
= get_insn_path (insn
);
40718 int num_imm_operand
;
40719 int num_imm32_operand
;
40720 int num_imm64_operand
;
40722 if (!window_list
->violation
&& group
!= disp_cmp
40723 && !fits_dispatch_window (insn
))
40724 window_list
->violation
= true;
40726 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
40727 &num_imm64_operand
);
40729 /* Initialize window with new instruction. */
40730 window
[num_insn
].insn
= insn
;
40731 window
[num_insn
].byte_len
= byte_len
;
40732 window
[num_insn
].group
= group
;
40733 window
[num_insn
].path
= path
;
40734 window
[num_insn
].imm_bytes
= imm_size
;
40736 window_list
->window_size
+= byte_len
;
40737 window_list
->num_insn
= num_insn
+ 1;
40738 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
40739 window_list
->imm_size
+= imm_size
;
40740 window_list
->num_imm
+= num_imm_operand
;
40741 window_list
->num_imm_32
+= num_imm32_operand
;
40742 window_list
->num_imm_64
+= num_imm64_operand
;
40744 if (group
== disp_store
)
40745 window_list
->num_stores
+= 1;
40746 else if (group
== disp_load
40747 || group
== disp_prefetch
)
40748 window_list
->num_loads
+= 1;
40749 else if (group
== disp_load_store
)
40751 window_list
->num_stores
+= 1;
40752 window_list
->num_loads
+= 1;
40756 /* Adds a scheduled instruction, INSN, to the current dispatch window.
40757 If the total bytes of instructions or the number of instructions in
40758 the window exceed allowable, it allocates a new window. */
40761 add_to_dispatch_window (rtx insn
)
40764 dispatch_windows
*window_list
;
40765 dispatch_windows
*next_list
;
40766 dispatch_windows
*window0_list
;
40767 enum insn_path path
;
40768 enum dispatch_group insn_group
;
40776 if (INSN_CODE (insn
) < 0)
40779 byte_len
= min_insn_size (insn
);
40780 window_list
= dispatch_window_list
;
40781 next_list
= window_list
->next
;
40782 path
= get_insn_path (insn
);
40783 insn_group
= get_insn_group (insn
);
40785 /* Get the last dispatch window. */
40787 window_list
= dispatch_window_list
->next
;
40789 if (path
== path_single
)
40791 else if (path
== path_double
)
40794 insn_num_uops
= (int) path
;
40796 /* If current window is full, get a new window.
40797 Window number zero is full, if MAX_INSN uops are scheduled in it.
40798 Window number one is full, if window zero's bytes plus window
40799 one's bytes is 32, or if the bytes of the new instruction added
40800 to the total makes it greater than 48, or it has already MAX_INSN
40801 instructions in it. */
40802 num_insn
= window_list
->num_insn
;
40803 num_uops
= window_list
->num_uops
;
40804 window_num
= window_list
->window_num
;
40805 insn_fits
= fits_dispatch_window (insn
);
40807 if (num_insn
>= MAX_INSN
40808 || num_uops
+ insn_num_uops
> MAX_INSN
40811 window_num
= ~window_num
& 1;
40812 window_list
= allocate_next_window (window_num
);
40815 if (window_num
== 0)
40817 add_insn_window (insn
, window_list
, insn_num_uops
);
40818 if (window_list
->num_insn
>= MAX_INSN
40819 && insn_group
== disp_branch
)
40821 process_end_window ();
40825 else if (window_num
== 1)
40827 window0_list
= window_list
->prev
;
40828 sum
= window0_list
->window_size
+ window_list
->window_size
;
40830 || (byte_len
+ sum
) >= 48)
40832 process_end_window ();
40833 window_list
= dispatch_window_list
;
40836 add_insn_window (insn
, window_list
, insn_num_uops
);
40839 gcc_unreachable ();
40841 if (is_end_basic_block (insn_group
))
40843 /* End of basic block is reached do end-basic-block process. */
40844 process_end_window ();
40849 /* Print the dispatch window, WINDOW_NUM, to FILE. */
40851 DEBUG_FUNCTION
static void
40852 debug_dispatch_window_file (FILE *file
, int window_num
)
40854 dispatch_windows
*list
;
40857 if (window_num
== 0)
40858 list
= dispatch_window_list
;
40860 list
= dispatch_window_list1
;
40862 fprintf (file
, "Window #%d:\n", list
->window_num
);
40863 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
40864 list
->num_insn
, list
->num_uops
, list
->window_size
);
40865 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40866 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
40868 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
40870 fprintf (file
, " insn info:\n");
40872 for (i
= 0; i
< MAX_INSN
; i
++)
40874 if (!list
->window
[i
].insn
)
40876 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
40877 i
, group_name
[list
->window
[i
].group
],
40878 i
, (void *)list
->window
[i
].insn
,
40879 i
, list
->window
[i
].path
,
40880 i
, list
->window
[i
].byte_len
,
40881 i
, list
->window
[i
].imm_bytes
);
40885 /* Print to stdout a dispatch window. */
40887 DEBUG_FUNCTION
void
40888 debug_dispatch_window (int window_num
)
40890 debug_dispatch_window_file (stdout
, window_num
);
40893 /* Print INSN dispatch information to FILE. */
40895 DEBUG_FUNCTION
static void
40896 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
40899 enum insn_path path
;
40900 enum dispatch_group group
;
40902 int num_imm_operand
;
40903 int num_imm32_operand
;
40904 int num_imm64_operand
;
40906 if (INSN_CODE (insn
) < 0)
40909 byte_len
= min_insn_size (insn
);
40910 path
= get_insn_path (insn
);
40911 group
= get_insn_group (insn
);
40912 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
40913 &num_imm64_operand
);
40915 fprintf (file
, " insn info:\n");
40916 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
40917 group_name
[group
], path
, byte_len
);
40918 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40919 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
40922 /* Print to STDERR the status of the ready list with respect to
40923 dispatch windows. */
40925 DEBUG_FUNCTION
void
40926 debug_ready_dispatch (void)
40929 int no_ready
= number_in_ready ();
40931 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
40933 for (i
= 0; i
< no_ready
; i
++)
40934 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
40937 /* This routine is the driver of the dispatch scheduler. */
40940 do_dispatch (rtx insn
, int mode
)
40942 if (mode
== DISPATCH_INIT
)
40943 init_dispatch_sched ();
40944 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
40945 add_to_dispatch_window (insn
);
40948 /* Return TRUE if Dispatch Scheduling is supported. */
40951 has_dispatch (rtx insn
, int action
)
40953 if ((TARGET_BDVER1
|| TARGET_BDVER2
)
40954 && flag_dispatch_scheduler
)
40960 case IS_DISPATCH_ON
:
40965 return is_cmp (insn
);
40967 case DISPATCH_VIOLATION
:
40968 return dispatch_violation ();
40970 case FITS_DISPATCH_WINDOW
:
40971 return fits_dispatch_window (insn
);
40977 /* Implementation of reassociation_width target hook used by
40978 reassoc phase to identify parallelism level in reassociated
40979 tree. Statements tree_code is passed in OPC. Arguments type
40982 Currently parallel reassociation is enabled for Atom
40983 processors only and we set reassociation width to be 2
40984 because Atom may issue up to 2 instructions per cycle.
40986 Return value should be fixed if parallel reassociation is
40987 enabled for other processors. */
40990 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
40991 enum machine_mode mode
)
40995 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
40997 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
41003 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
41004 place emms and femms instructions. */
41006 static enum machine_mode
41007 ix86_preferred_simd_mode (enum machine_mode mode
)
41015 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
41017 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
41019 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
41021 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
41024 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
41030 if (!TARGET_VECTORIZE_DOUBLE
)
41032 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
41034 else if (TARGET_SSE2
)
41043 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
41046 static unsigned int
41047 ix86_autovectorize_vector_sizes (void)
41049 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
41054 /* Return class of registers which could be used for pseudo of MODE
41055 and of class RCLASS for spilling instead of memory. Return NO_REGS
41056 if it is not possible or non-profitable. */
41058 ix86_spill_class (reg_class_t rclass
, enum machine_mode mode
)
41060 if (TARGET_SSE
&& TARGET_GENERAL_REGS_SSE_SPILL
&& ! TARGET_MMX
41061 && hard_reg_set_subset_p (reg_class_contents
[rclass
],
41062 reg_class_contents
[GENERAL_REGS
])
41063 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
)))
41068 /* Implement targetm.vectorize.init_cost. */
41071 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
41073 unsigned *cost
= XNEWVEC (unsigned, 3);
41074 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
41078 /* Implement targetm.vectorize.add_stmt_cost. */
41081 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
41082 struct _stmt_vec_info
*stmt_info
, int misalign
,
41083 enum vect_cost_model_location where
)
41085 unsigned *cost
= (unsigned *) data
;
41086 unsigned retval
= 0;
41088 if (flag_vect_cost_model
)
41090 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
41091 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
41093 /* Statements in an inner loop relative to the loop being
41094 vectorized are weighted more heavily. The value here is
41095 arbitrary and could potentially be improved with analysis. */
41096 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
41097 count
*= 50; /* FIXME. */
41099 retval
= (unsigned) (count
* stmt_cost
);
41100 cost
[where
] += retval
;
41106 /* Implement targetm.vectorize.finish_cost. */
41109 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
41110 unsigned *body_cost
, unsigned *epilogue_cost
)
41112 unsigned *cost
= (unsigned *) data
;
41113 *prologue_cost
= cost
[vect_prologue
];
41114 *body_cost
= cost
[vect_body
];
41115 *epilogue_cost
= cost
[vect_epilogue
];
41118 /* Implement targetm.vectorize.destroy_cost_data. */
41121 ix86_destroy_cost_data (void *data
)
41126 /* Validate target specific memory model bits in VAL. */
41128 static unsigned HOST_WIDE_INT
41129 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
41131 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
41132 unsigned HOST_WIDE_INT strong
;
41134 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
41136 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
41138 warning (OPT_Winvalid_memory_model
,
41139 "Unknown architecture specific memory model");
41140 return MEMMODEL_SEQ_CST
;
41142 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
41143 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
41145 warning (OPT_Winvalid_memory_model
,
41146 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
41147 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
41149 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
41151 warning (OPT_Winvalid_memory_model
,
41152 "HLE_RELEASE not used with RELEASE or stronger memory model");
41153 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
41158 /* Initialize the GCC target structure. */
41159 #undef TARGET_RETURN_IN_MEMORY
41160 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
41162 #undef TARGET_LEGITIMIZE_ADDRESS
41163 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
41165 #undef TARGET_ATTRIBUTE_TABLE
41166 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
41167 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
41168 # undef TARGET_MERGE_DECL_ATTRIBUTES
41169 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
41172 #undef TARGET_COMP_TYPE_ATTRIBUTES
41173 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
41175 #undef TARGET_INIT_BUILTINS
41176 #define TARGET_INIT_BUILTINS ix86_init_builtins
41177 #undef TARGET_BUILTIN_DECL
41178 #define TARGET_BUILTIN_DECL ix86_builtin_decl
41179 #undef TARGET_EXPAND_BUILTIN
41180 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
41182 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
41183 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
41184 ix86_builtin_vectorized_function
41186 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
41187 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
41189 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
41190 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
41192 #undef TARGET_VECTORIZE_BUILTIN_GATHER
41193 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
41195 #undef TARGET_BUILTIN_RECIPROCAL
41196 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
41198 #undef TARGET_ASM_FUNCTION_EPILOGUE
41199 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
41201 #undef TARGET_ENCODE_SECTION_INFO
41202 #ifndef SUBTARGET_ENCODE_SECTION_INFO
41203 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
41205 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
41208 #undef TARGET_ASM_OPEN_PAREN
41209 #define TARGET_ASM_OPEN_PAREN ""
41210 #undef TARGET_ASM_CLOSE_PAREN
41211 #define TARGET_ASM_CLOSE_PAREN ""
41213 #undef TARGET_ASM_BYTE_OP
41214 #define TARGET_ASM_BYTE_OP ASM_BYTE
41216 #undef TARGET_ASM_ALIGNED_HI_OP
41217 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
41218 #undef TARGET_ASM_ALIGNED_SI_OP
41219 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
41221 #undef TARGET_ASM_ALIGNED_DI_OP
41222 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
41225 #undef TARGET_PROFILE_BEFORE_PROLOGUE
41226 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
41228 #undef TARGET_ASM_UNALIGNED_HI_OP
41229 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
41230 #undef TARGET_ASM_UNALIGNED_SI_OP
41231 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
41232 #undef TARGET_ASM_UNALIGNED_DI_OP
41233 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
41235 #undef TARGET_PRINT_OPERAND
41236 #define TARGET_PRINT_OPERAND ix86_print_operand
41237 #undef TARGET_PRINT_OPERAND_ADDRESS
41238 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
41239 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
41240 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
41241 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
41242 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
41244 #undef TARGET_SCHED_INIT_GLOBAL
41245 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
41246 #undef TARGET_SCHED_ADJUST_COST
41247 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
41248 #undef TARGET_SCHED_ISSUE_RATE
41249 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
41250 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
41251 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
41252 ia32_multipass_dfa_lookahead
41254 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
41255 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
41257 #undef TARGET_MEMMODEL_CHECK
41258 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
41261 #undef TARGET_HAVE_TLS
41262 #define TARGET_HAVE_TLS true
41264 #undef TARGET_CANNOT_FORCE_CONST_MEM
41265 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
41266 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
41267 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
41269 #undef TARGET_DELEGITIMIZE_ADDRESS
41270 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
41272 #undef TARGET_MS_BITFIELD_LAYOUT_P
41273 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
41276 #undef TARGET_BINDS_LOCAL_P
41277 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
41279 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
41280 #undef TARGET_BINDS_LOCAL_P
41281 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
41284 #undef TARGET_ASM_OUTPUT_MI_THUNK
41285 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
41286 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
41287 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
41289 #undef TARGET_ASM_FILE_START
41290 #define TARGET_ASM_FILE_START x86_file_start
41292 #undef TARGET_OPTION_OVERRIDE
41293 #define TARGET_OPTION_OVERRIDE ix86_option_override
41295 #undef TARGET_REGISTER_MOVE_COST
41296 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
41297 #undef TARGET_MEMORY_MOVE_COST
41298 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
41299 #undef TARGET_RTX_COSTS
41300 #define TARGET_RTX_COSTS ix86_rtx_costs
41301 #undef TARGET_ADDRESS_COST
41302 #define TARGET_ADDRESS_COST ix86_address_cost
41304 #undef TARGET_FIXED_CONDITION_CODE_REGS
41305 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
41306 #undef TARGET_CC_MODES_COMPATIBLE
41307 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
41309 #undef TARGET_MACHINE_DEPENDENT_REORG
41310 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
41312 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
41313 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
41315 #undef TARGET_BUILD_BUILTIN_VA_LIST
41316 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
41318 #undef TARGET_FOLD_BUILTIN
41319 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
41321 #undef TARGET_ENUM_VA_LIST_P
41322 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
41324 #undef TARGET_FN_ABI_VA_LIST
41325 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
41327 #undef TARGET_CANONICAL_VA_LIST_TYPE
41328 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
41330 #undef TARGET_EXPAND_BUILTIN_VA_START
41331 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
41333 #undef TARGET_MD_ASM_CLOBBERS
41334 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
41336 #undef TARGET_PROMOTE_PROTOTYPES
41337 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
41338 #undef TARGET_STRUCT_VALUE_RTX
41339 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
41340 #undef TARGET_SETUP_INCOMING_VARARGS
41341 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
41342 #undef TARGET_MUST_PASS_IN_STACK
41343 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
41344 #undef TARGET_FUNCTION_ARG_ADVANCE
41345 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
41346 #undef TARGET_FUNCTION_ARG
41347 #define TARGET_FUNCTION_ARG ix86_function_arg
41348 #undef TARGET_FUNCTION_ARG_BOUNDARY
41349 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
41350 #undef TARGET_PASS_BY_REFERENCE
41351 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
41352 #undef TARGET_INTERNAL_ARG_POINTER
41353 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
41354 #undef TARGET_UPDATE_STACK_BOUNDARY
41355 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
41356 #undef TARGET_GET_DRAP_RTX
41357 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
41358 #undef TARGET_STRICT_ARGUMENT_NAMING
41359 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
41360 #undef TARGET_STATIC_CHAIN
41361 #define TARGET_STATIC_CHAIN ix86_static_chain
41362 #undef TARGET_TRAMPOLINE_INIT
41363 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
41364 #undef TARGET_RETURN_POPS_ARGS
41365 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
41367 #undef TARGET_LEGITIMATE_COMBINED_INSN
41368 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
41370 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
41371 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
41373 #undef TARGET_SCALAR_MODE_SUPPORTED_P
41374 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
41376 #undef TARGET_VECTOR_MODE_SUPPORTED_P
41377 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
41379 #undef TARGET_C_MODE_FOR_SUFFIX
41380 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
41383 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
41384 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
41387 #ifdef SUBTARGET_INSERT_ATTRIBUTES
41388 #undef TARGET_INSERT_ATTRIBUTES
41389 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
41392 #undef TARGET_MANGLE_TYPE
41393 #define TARGET_MANGLE_TYPE ix86_mangle_type
41396 #undef TARGET_STACK_PROTECT_FAIL
41397 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
41400 #undef TARGET_FUNCTION_VALUE
41401 #define TARGET_FUNCTION_VALUE ix86_function_value
41403 #undef TARGET_FUNCTION_VALUE_REGNO_P
41404 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
41406 #undef TARGET_PROMOTE_FUNCTION_MODE
41407 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
41409 #undef TARGET_MEMBER_TYPE_FORCES_BLK
41410 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
41412 #undef TARGET_SECONDARY_RELOAD
41413 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
41415 #undef TARGET_CLASS_MAX_NREGS
41416 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
41418 #undef TARGET_PREFERRED_RELOAD_CLASS
41419 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
41420 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
41421 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
41422 #undef TARGET_CLASS_LIKELY_SPILLED_P
41423 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
41425 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
41426 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
41427 ix86_builtin_vectorization_cost
41428 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
41429 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
41430 ix86_vectorize_vec_perm_const_ok
41431 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
41432 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
41433 ix86_preferred_simd_mode
41434 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
41435 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
41436 ix86_autovectorize_vector_sizes
41437 #undef TARGET_VECTORIZE_INIT_COST
41438 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
41439 #undef TARGET_VECTORIZE_ADD_STMT_COST
41440 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
41441 #undef TARGET_VECTORIZE_FINISH_COST
41442 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
41443 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
41444 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
41446 #undef TARGET_SET_CURRENT_FUNCTION
41447 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
41449 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
41450 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
41452 #undef TARGET_OPTION_SAVE
41453 #define TARGET_OPTION_SAVE ix86_function_specific_save
41455 #undef TARGET_OPTION_RESTORE
41456 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
41458 #undef TARGET_OPTION_PRINT
41459 #define TARGET_OPTION_PRINT ix86_function_specific_print
41461 #undef TARGET_CAN_INLINE_P
41462 #define TARGET_CAN_INLINE_P ix86_can_inline_p
41464 #undef TARGET_EXPAND_TO_RTL_HOOK
41465 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
41467 #undef TARGET_LEGITIMATE_ADDRESS_P
41468 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
41470 #undef TARGET_LRA_P
41471 #define TARGET_LRA_P ix86_lra_p
41473 #undef TARGET_REGISTER_PRIORITY
41474 #define TARGET_REGISTER_PRIORITY ix86_register_priority
41476 #undef TARGET_LEGITIMATE_CONSTANT_P
41477 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
41479 #undef TARGET_FRAME_POINTER_REQUIRED
41480 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
41482 #undef TARGET_CAN_ELIMINATE
41483 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
41485 #undef TARGET_EXTRA_LIVE_ON_ENTRY
41486 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
41488 #undef TARGET_ASM_CODE_END
41489 #define TARGET_ASM_CODE_END ix86_code_end
41491 #undef TARGET_CONDITIONAL_REGISTER_USAGE
41492 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
41495 #undef TARGET_INIT_LIBFUNCS
41496 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
41499 #undef TARGET_SPILL_CLASS
41500 #define TARGET_SPILL_CLASS ix86_spill_class
41502 struct gcc_target targetm
= TARGET_INITIALIZER
;
41504 #include "gt-i386.h"