1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
55 #include "tm-constrs.h"
59 #include "sched-int.h"
63 #include "diagnostic.h"
66 enum upper_128bits_state
73 typedef struct block_info_def
75 /* State of the upper 128bits of AVX registers at exit. */
76 enum upper_128bits_state state
;
77 /* TRUE if state of the upper 128bits of AVX registers is unchanged
80 /* TRUE if block has been processed. */
82 /* TRUE if block has been scanned. */
84 /* Previous state of the upper 128bits of AVX registers at entry. */
85 enum upper_128bits_state prev
;
88 #define BLOCK_INFO(B) ((block_info) (B)->aux)
90 enum call_avx256_state
92 /* Callee returns 256bit AVX register. */
93 callee_return_avx256
= -1,
94 /* Callee returns and passes 256bit AVX register. */
95 callee_return_pass_avx256
,
96 /* Callee passes 256bit AVX register. */
98 /* Callee doesn't return nor passe 256bit AVX register, or no
99 256bit AVX register in function return. */
101 /* vzeroupper intrinsic. */
105 /* Check if a 256bit AVX register is referenced in stores. */
108 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
111 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
112 || (GET_CODE (set
) == SET
113 && REG_P (SET_SRC (set
))
114 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
116 enum upper_128bits_state
*state
117 = (enum upper_128bits_state
*) data
;
122 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
123 in basic block BB. Delete it if upper 128bit AVX registers are
124 unused. If it isn't deleted, move it to just before a jump insn.
126 STATE is state of the upper 128bits of AVX registers at entry. */
129 move_or_delete_vzeroupper_2 (basic_block bb
,
130 enum upper_128bits_state state
)
133 rtx vzeroupper_insn
= NULL_RTX
;
138 if (BLOCK_INFO (bb
)->unchanged
)
141 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
144 BLOCK_INFO (bb
)->state
= state
;
148 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
151 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
152 bb
->index
, BLOCK_INFO (bb
)->state
);
156 BLOCK_INFO (bb
)->prev
= state
;
159 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
164 /* BB_END changes when it is deleted. */
165 bb_end
= BB_END (bb
);
167 while (insn
!= bb_end
)
169 insn
= NEXT_INSN (insn
);
171 if (!NONDEBUG_INSN_P (insn
))
174 /* Move vzeroupper before jump/call. */
175 if (JUMP_P (insn
) || CALL_P (insn
))
177 if (!vzeroupper_insn
)
180 if (PREV_INSN (insn
) != vzeroupper_insn
)
184 fprintf (dump_file
, "Move vzeroupper after:\n");
185 print_rtl_single (dump_file
, PREV_INSN (insn
));
186 fprintf (dump_file
, "before:\n");
187 print_rtl_single (dump_file
, insn
);
189 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
192 vzeroupper_insn
= NULL_RTX
;
196 pat
= PATTERN (insn
);
198 /* Check insn for vzeroupper intrinsic. */
199 if (GET_CODE (pat
) == UNSPEC_VOLATILE
200 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
204 /* Found vzeroupper intrinsic. */
205 fprintf (dump_file
, "Found vzeroupper:\n");
206 print_rtl_single (dump_file
, insn
);
211 /* Check insn for vzeroall intrinsic. */
212 if (GET_CODE (pat
) == PARALLEL
213 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
214 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
219 /* Delete pending vzeroupper insertion. */
222 delete_insn (vzeroupper_insn
);
223 vzeroupper_insn
= NULL_RTX
;
226 else if (state
!= used
)
228 note_stores (pat
, check_avx256_stores
, &state
);
235 /* Process vzeroupper intrinsic. */
236 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
240 /* Since the upper 128bits are cleared, callee must not pass
241 256bit AVX register. We only need to check if callee
242 returns 256bit AVX register. */
243 if (avx256
== callee_return_avx256
)
249 /* Remove unnecessary vzeroupper since upper 128bits are
253 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
254 print_rtl_single (dump_file
, insn
);
260 /* Set state to UNUSED if callee doesn't return 256bit AVX
262 if (avx256
!= callee_return_pass_avx256
)
265 if (avx256
== callee_return_pass_avx256
266 || avx256
== callee_pass_avx256
)
268 /* Must remove vzeroupper since callee passes in 256bit
272 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
273 print_rtl_single (dump_file
, insn
);
279 vzeroupper_insn
= insn
;
285 BLOCK_INFO (bb
)->state
= state
;
286 BLOCK_INFO (bb
)->unchanged
= unchanged
;
287 BLOCK_INFO (bb
)->scanned
= true;
290 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
291 bb
->index
, unchanged
? "unchanged" : "changed",
295 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
296 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
297 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
301 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
305 enum upper_128bits_state state
, old_state
, new_state
;
309 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
310 block
->index
, BLOCK_INFO (block
)->processed
);
312 if (BLOCK_INFO (block
)->processed
)
317 /* Check all predecessor edges of this block. */
318 seen_unknown
= false;
319 FOR_EACH_EDGE (e
, ei
, block
->preds
)
323 switch (BLOCK_INFO (e
->src
)->state
)
326 if (!unknown_is_unused
)
340 old_state
= BLOCK_INFO (block
)->state
;
341 move_or_delete_vzeroupper_2 (block
, state
);
342 new_state
= BLOCK_INFO (block
)->state
;
344 if (state
!= unknown
|| new_state
== used
)
345 BLOCK_INFO (block
)->processed
= true;
347 /* Need to rescan if the upper 128bits of AVX registers are changed
349 if (new_state
!= old_state
)
351 if (new_state
== used
)
352 cfun
->machine
->rescan_vzeroupper_p
= 1;
359 /* Go through the instruction stream looking for vzeroupper. Delete
360 it if upper 128bit AVX registers are unused. If it isn't deleted,
361 move it to just before a jump insn. */
364 move_or_delete_vzeroupper (void)
369 fibheap_t worklist
, pending
, fibheap_swap
;
370 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
375 /* Set up block info for each basic block. */
376 alloc_aux_for_blocks (sizeof (struct block_info_def
));
378 /* Process outgoing edges of entry point. */
380 fprintf (dump_file
, "Process outgoing edges of entry point\n");
382 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
384 move_or_delete_vzeroupper_2 (e
->dest
,
385 cfun
->machine
->caller_pass_avx256_p
387 BLOCK_INFO (e
->dest
)->processed
= true;
390 /* Compute reverse completion order of depth first search of the CFG
391 so that the data-flow runs faster. */
392 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
393 bb_order
= XNEWVEC (int, last_basic_block
);
394 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
395 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
396 bb_order
[rc_order
[i
]] = i
;
399 worklist
= fibheap_new ();
400 pending
= fibheap_new ();
401 visited
= sbitmap_alloc (last_basic_block
);
402 in_worklist
= sbitmap_alloc (last_basic_block
);
403 in_pending
= sbitmap_alloc (last_basic_block
);
404 sbitmap_zero (in_worklist
);
406 /* Don't check outgoing edges of entry point. */
407 sbitmap_ones (in_pending
);
409 if (BLOCK_INFO (bb
)->processed
)
410 RESET_BIT (in_pending
, bb
->index
);
413 move_or_delete_vzeroupper_1 (bb
, false);
414 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
418 fprintf (dump_file
, "Check remaining basic blocks\n");
420 while (!fibheap_empty (pending
))
422 fibheap_swap
= pending
;
424 worklist
= fibheap_swap
;
425 sbitmap_swap
= in_pending
;
426 in_pending
= in_worklist
;
427 in_worklist
= sbitmap_swap
;
429 sbitmap_zero (visited
);
431 cfun
->machine
->rescan_vzeroupper_p
= 0;
433 while (!fibheap_empty (worklist
))
435 bb
= (basic_block
) fibheap_extract_min (worklist
);
436 RESET_BIT (in_worklist
, bb
->index
);
437 gcc_assert (!TEST_BIT (visited
, bb
->index
));
438 if (!TEST_BIT (visited
, bb
->index
))
442 SET_BIT (visited
, bb
->index
);
444 if (move_or_delete_vzeroupper_1 (bb
, false))
445 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
447 if (e
->dest
== EXIT_BLOCK_PTR
448 || BLOCK_INFO (e
->dest
)->processed
)
451 if (TEST_BIT (visited
, e
->dest
->index
))
453 if (!TEST_BIT (in_pending
, e
->dest
->index
))
455 /* Send E->DEST to next round. */
456 SET_BIT (in_pending
, e
->dest
->index
);
457 fibheap_insert (pending
,
458 bb_order
[e
->dest
->index
],
462 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
464 /* Add E->DEST to current round. */
465 SET_BIT (in_worklist
, e
->dest
->index
);
466 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
473 if (!cfun
->machine
->rescan_vzeroupper_p
)
478 fibheap_delete (worklist
);
479 fibheap_delete (pending
);
480 sbitmap_free (visited
);
481 sbitmap_free (in_worklist
);
482 sbitmap_free (in_pending
);
485 fprintf (dump_file
, "Process remaining basic blocks\n");
488 move_or_delete_vzeroupper_1 (bb
, true);
490 free_aux_for_blocks ();
493 static rtx
legitimize_dllimport_symbol (rtx
, bool);
495 #ifndef CHECK_STACK_LIMIT
496 #define CHECK_STACK_LIMIT (-1)
499 /* Return index of given mode in mult and division cost tables. */
500 #define MODE_INDEX(mode) \
501 ((mode) == QImode ? 0 \
502 : (mode) == HImode ? 1 \
503 : (mode) == SImode ? 2 \
504 : (mode) == DImode ? 3 \
507 /* Processor costs (relative to an add) */
508 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
509 #define COSTS_N_BYTES(N) ((N) * 2)
511 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
514 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
515 COSTS_N_BYTES (2), /* cost of an add instruction */
516 COSTS_N_BYTES (3), /* cost of a lea instruction */
517 COSTS_N_BYTES (2), /* variable shift costs */
518 COSTS_N_BYTES (3), /* constant shift costs */
519 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
520 COSTS_N_BYTES (3), /* HI */
521 COSTS_N_BYTES (3), /* SI */
522 COSTS_N_BYTES (3), /* DI */
523 COSTS_N_BYTES (5)}, /* other */
524 0, /* cost of multiply per each bit set */
525 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
526 COSTS_N_BYTES (3), /* HI */
527 COSTS_N_BYTES (3), /* SI */
528 COSTS_N_BYTES (3), /* DI */
529 COSTS_N_BYTES (5)}, /* other */
530 COSTS_N_BYTES (3), /* cost of movsx */
531 COSTS_N_BYTES (3), /* cost of movzx */
532 0, /* "large" insn */
534 2, /* cost for loading QImode using movzbl */
535 {2, 2, 2}, /* cost of loading integer registers
536 in QImode, HImode and SImode.
537 Relative to reg-reg move (2). */
538 {2, 2, 2}, /* cost of storing integer registers */
539 2, /* cost of reg,reg fld/fst */
540 {2, 2, 2}, /* cost of loading fp registers
541 in SFmode, DFmode and XFmode */
542 {2, 2, 2}, /* cost of storing fp registers
543 in SFmode, DFmode and XFmode */
544 3, /* cost of moving MMX register */
545 {3, 3}, /* cost of loading MMX registers
546 in SImode and DImode */
547 {3, 3}, /* cost of storing MMX registers
548 in SImode and DImode */
549 3, /* cost of moving SSE register */
550 {3, 3, 3}, /* cost of loading SSE registers
551 in SImode, DImode and TImode */
552 {3, 3, 3}, /* cost of storing SSE registers
553 in SImode, DImode and TImode */
554 3, /* MMX or SSE register to integer */
555 0, /* size of l1 cache */
556 0, /* size of l2 cache */
557 0, /* size of prefetch block */
558 0, /* number of parallel prefetches */
560 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
562 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
563 COSTS_N_BYTES (2), /* cost of FABS instruction. */
564 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
565 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
569 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 1, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 1, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
583 /* Processor costs (relative to an add) */
585 struct processor_costs i386_cost
= { /* 386 specific costs */
586 COSTS_N_INSNS (1), /* cost of an add instruction */
587 COSTS_N_INSNS (1), /* cost of a lea instruction */
588 COSTS_N_INSNS (3), /* variable shift costs */
589 COSTS_N_INSNS (2), /* constant shift costs */
590 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
591 COSTS_N_INSNS (6), /* HI */
592 COSTS_N_INSNS (6), /* SI */
593 COSTS_N_INSNS (6), /* DI */
594 COSTS_N_INSNS (6)}, /* other */
595 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
596 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
597 COSTS_N_INSNS (23), /* HI */
598 COSTS_N_INSNS (23), /* SI */
599 COSTS_N_INSNS (23), /* DI */
600 COSTS_N_INSNS (23)}, /* other */
601 COSTS_N_INSNS (3), /* cost of movsx */
602 COSTS_N_INSNS (2), /* cost of movzx */
603 15, /* "large" insn */
605 4, /* cost for loading QImode using movzbl */
606 {2, 4, 2}, /* cost of loading integer registers
607 in QImode, HImode and SImode.
608 Relative to reg-reg move (2). */
609 {2, 4, 2}, /* cost of storing integer registers */
610 2, /* cost of reg,reg fld/fst */
611 {8, 8, 8}, /* cost of loading fp registers
612 in SFmode, DFmode and XFmode */
613 {8, 8, 8}, /* cost of storing fp registers
614 in SFmode, DFmode and XFmode */
615 2, /* cost of moving MMX register */
616 {4, 8}, /* cost of loading MMX registers
617 in SImode and DImode */
618 {4, 8}, /* cost of storing MMX registers
619 in SImode and DImode */
620 2, /* cost of moving SSE register */
621 {4, 8, 16}, /* cost of loading SSE registers
622 in SImode, DImode and TImode */
623 {4, 8, 16}, /* cost of storing SSE registers
624 in SImode, DImode and TImode */
625 3, /* MMX or SSE register to integer */
626 0, /* size of l1 cache */
627 0, /* size of l2 cache */
628 0, /* size of prefetch block */
629 0, /* number of parallel prefetches */
631 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
632 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
633 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
634 COSTS_N_INSNS (22), /* cost of FABS instruction. */
635 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
636 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
640 DUMMY_STRINGOP_ALGS
},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
655 struct processor_costs i486_cost
= { /* 486 specific costs */
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (1), /* cost of a lea instruction */
658 COSTS_N_INSNS (3), /* variable shift costs */
659 COSTS_N_INSNS (2), /* constant shift costs */
660 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (12), /* HI */
662 COSTS_N_INSNS (12), /* SI */
663 COSTS_N_INSNS (12), /* DI */
664 COSTS_N_INSNS (12)}, /* other */
665 1, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (40), /* HI */
668 COSTS_N_INSNS (40), /* SI */
669 COSTS_N_INSNS (40), /* DI */
670 COSTS_N_INSNS (40)}, /* other */
671 COSTS_N_INSNS (3), /* cost of movsx */
672 COSTS_N_INSNS (2), /* cost of movzx */
673 15, /* "large" insn */
675 4, /* cost for loading QImode using movzbl */
676 {2, 4, 2}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {2, 4, 2}, /* cost of storing integer registers */
680 2, /* cost of reg,reg fld/fst */
681 {8, 8, 8}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {8, 8, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {4, 8}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 8}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 8, 16}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 8, 16}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 3, /* MMX or SSE register to integer */
696 4, /* size of l1 cache. 486 has 8kB cache
697 shared for code and data, so 4kB is
698 not really precise. */
699 4, /* size of l2 cache */
700 0, /* size of prefetch block */
701 0, /* number of parallel prefetches */
703 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
704 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
705 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
706 COSTS_N_INSNS (3), /* cost of FABS instruction. */
707 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
708 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
712 DUMMY_STRINGOP_ALGS
},
713 1, /* scalar_stmt_cost. */
714 1, /* scalar load_cost. */
715 1, /* scalar_store_cost. */
716 1, /* vec_stmt_cost. */
717 1, /* vec_to_scalar_cost. */
718 1, /* scalar_to_vec_cost. */
719 1, /* vec_align_load_cost. */
720 2, /* vec_unalign_load_cost. */
721 1, /* vec_store_cost. */
722 3, /* cond_taken_branch_cost. */
723 1, /* cond_not_taken_branch_cost. */
727 struct processor_costs pentium_cost
= {
728 COSTS_N_INSNS (1), /* cost of an add instruction */
729 COSTS_N_INSNS (1), /* cost of a lea instruction */
730 COSTS_N_INSNS (4), /* variable shift costs */
731 COSTS_N_INSNS (1), /* constant shift costs */
732 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
733 COSTS_N_INSNS (11), /* HI */
734 COSTS_N_INSNS (11), /* SI */
735 COSTS_N_INSNS (11), /* DI */
736 COSTS_N_INSNS (11)}, /* other */
737 0, /* cost of multiply per each bit set */
738 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
739 COSTS_N_INSNS (25), /* HI */
740 COSTS_N_INSNS (25), /* SI */
741 COSTS_N_INSNS (25), /* DI */
742 COSTS_N_INSNS (25)}, /* other */
743 COSTS_N_INSNS (3), /* cost of movsx */
744 COSTS_N_INSNS (2), /* cost of movzx */
745 8, /* "large" insn */
747 6, /* cost for loading QImode using movzbl */
748 {2, 4, 2}, /* cost of loading integer registers
749 in QImode, HImode and SImode.
750 Relative to reg-reg move (2). */
751 {2, 4, 2}, /* cost of storing integer registers */
752 2, /* cost of reg,reg fld/fst */
753 {2, 2, 6}, /* cost of loading fp registers
754 in SFmode, DFmode and XFmode */
755 {4, 4, 6}, /* cost of storing fp registers
756 in SFmode, DFmode and XFmode */
757 8, /* cost of moving MMX register */
758 {8, 8}, /* cost of loading MMX registers
759 in SImode and DImode */
760 {8, 8}, /* cost of storing MMX registers
761 in SImode and DImode */
762 2, /* cost of moving SSE register */
763 {4, 8, 16}, /* cost of loading SSE registers
764 in SImode, DImode and TImode */
765 {4, 8, 16}, /* cost of storing SSE registers
766 in SImode, DImode and TImode */
767 3, /* MMX or SSE register to integer */
768 8, /* size of l1 cache. */
769 8, /* size of l2 cache */
770 0, /* size of prefetch block */
771 0, /* number of parallel prefetches */
773 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
774 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
775 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
776 COSTS_N_INSNS (1), /* cost of FABS instruction. */
777 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
778 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
779 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
780 DUMMY_STRINGOP_ALGS
},
781 {{libcall
, {{-1, rep_prefix_4_byte
}}},
782 DUMMY_STRINGOP_ALGS
},
783 1, /* scalar_stmt_cost. */
784 1, /* scalar load_cost. */
785 1, /* scalar_store_cost. */
786 1, /* vec_stmt_cost. */
787 1, /* vec_to_scalar_cost. */
788 1, /* scalar_to_vec_cost. */
789 1, /* vec_align_load_cost. */
790 2, /* vec_unalign_load_cost. */
791 1, /* vec_store_cost. */
792 3, /* cond_taken_branch_cost. */
793 1, /* cond_not_taken_branch_cost. */
797 struct processor_costs pentiumpro_cost
= {
798 COSTS_N_INSNS (1), /* cost of an add instruction */
799 COSTS_N_INSNS (1), /* cost of a lea instruction */
800 COSTS_N_INSNS (1), /* variable shift costs */
801 COSTS_N_INSNS (1), /* constant shift costs */
802 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
803 COSTS_N_INSNS (4), /* HI */
804 COSTS_N_INSNS (4), /* SI */
805 COSTS_N_INSNS (4), /* DI */
806 COSTS_N_INSNS (4)}, /* other */
807 0, /* cost of multiply per each bit set */
808 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
809 COSTS_N_INSNS (17), /* HI */
810 COSTS_N_INSNS (17), /* SI */
811 COSTS_N_INSNS (17), /* DI */
812 COSTS_N_INSNS (17)}, /* other */
813 COSTS_N_INSNS (1), /* cost of movsx */
814 COSTS_N_INSNS (1), /* cost of movzx */
815 8, /* "large" insn */
817 2, /* cost for loading QImode using movzbl */
818 {4, 4, 4}, /* cost of loading integer registers
819 in QImode, HImode and SImode.
820 Relative to reg-reg move (2). */
821 {2, 2, 2}, /* cost of storing integer registers */
822 2, /* cost of reg,reg fld/fst */
823 {2, 2, 6}, /* cost of loading fp registers
824 in SFmode, DFmode and XFmode */
825 {4, 4, 6}, /* cost of storing fp registers
826 in SFmode, DFmode and XFmode */
827 2, /* cost of moving MMX register */
828 {2, 2}, /* cost of loading MMX registers
829 in SImode and DImode */
830 {2, 2}, /* cost of storing MMX registers
831 in SImode and DImode */
832 2, /* cost of moving SSE register */
833 {2, 2, 8}, /* cost of loading SSE registers
834 in SImode, DImode and TImode */
835 {2, 2, 8}, /* cost of storing SSE registers
836 in SImode, DImode and TImode */
837 3, /* MMX or SSE register to integer */
838 8, /* size of l1 cache. */
839 256, /* size of l2 cache */
840 32, /* size of prefetch block */
841 6, /* number of parallel prefetches */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (2), /* cost of FABS instruction. */
847 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
849 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
850 (we ensure the alignment). For small blocks inline loop is still a
851 noticeable win, for bigger blocks either rep movsl or rep movsb is
852 way to go. Rep movsb has apparently more expensive startup time in CPU,
853 but after 4K the difference is down in the noise. */
854 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
855 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
856 DUMMY_STRINGOP_ALGS
},
857 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
858 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
859 DUMMY_STRINGOP_ALGS
},
860 1, /* scalar_stmt_cost. */
861 1, /* scalar load_cost. */
862 1, /* scalar_store_cost. */
863 1, /* vec_stmt_cost. */
864 1, /* vec_to_scalar_cost. */
865 1, /* scalar_to_vec_cost. */
866 1, /* vec_align_load_cost. */
867 2, /* vec_unalign_load_cost. */
868 1, /* vec_store_cost. */
869 3, /* cond_taken_branch_cost. */
870 1, /* cond_not_taken_branch_cost. */
874 struct processor_costs geode_cost
= {
875 COSTS_N_INSNS (1), /* cost of an add instruction */
876 COSTS_N_INSNS (1), /* cost of a lea instruction */
877 COSTS_N_INSNS (2), /* variable shift costs */
878 COSTS_N_INSNS (1), /* constant shift costs */
879 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
880 COSTS_N_INSNS (4), /* HI */
881 COSTS_N_INSNS (7), /* SI */
882 COSTS_N_INSNS (7), /* DI */
883 COSTS_N_INSNS (7)}, /* other */
884 0, /* cost of multiply per each bit set */
885 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
886 COSTS_N_INSNS (23), /* HI */
887 COSTS_N_INSNS (39), /* SI */
888 COSTS_N_INSNS (39), /* DI */
889 COSTS_N_INSNS (39)}, /* other */
890 COSTS_N_INSNS (1), /* cost of movsx */
891 COSTS_N_INSNS (1), /* cost of movzx */
892 8, /* "large" insn */
894 1, /* cost for loading QImode using movzbl */
895 {1, 1, 1}, /* cost of loading integer registers
896 in QImode, HImode and SImode.
897 Relative to reg-reg move (2). */
898 {1, 1, 1}, /* cost of storing integer registers */
899 1, /* cost of reg,reg fld/fst */
900 {1, 1, 1}, /* cost of loading fp registers
901 in SFmode, DFmode and XFmode */
902 {4, 6, 6}, /* cost of storing fp registers
903 in SFmode, DFmode and XFmode */
905 1, /* cost of moving MMX register */
906 {1, 1}, /* cost of loading MMX registers
907 in SImode and DImode */
908 {1, 1}, /* cost of storing MMX registers
909 in SImode and DImode */
910 1, /* cost of moving SSE register */
911 {1, 1, 1}, /* cost of loading SSE registers
912 in SImode, DImode and TImode */
913 {1, 1, 1}, /* cost of storing SSE registers
914 in SImode, DImode and TImode */
915 1, /* MMX or SSE register to integer */
916 64, /* size of l1 cache. */
917 128, /* size of l2 cache. */
918 32, /* size of prefetch block */
919 1, /* number of parallel prefetches */
921 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
922 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
923 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
924 COSTS_N_INSNS (1), /* cost of FABS instruction. */
925 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
926 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
930 DUMMY_STRINGOP_ALGS
},
931 1, /* scalar_stmt_cost. */
932 1, /* scalar load_cost. */
933 1, /* scalar_store_cost. */
934 1, /* vec_stmt_cost. */
935 1, /* vec_to_scalar_cost. */
936 1, /* scalar_to_vec_cost. */
937 1, /* vec_align_load_cost. */
938 2, /* vec_unalign_load_cost. */
939 1, /* vec_store_cost. */
940 3, /* cond_taken_branch_cost. */
941 1, /* cond_not_taken_branch_cost. */
945 struct processor_costs k6_cost
= {
946 COSTS_N_INSNS (1), /* cost of an add instruction */
947 COSTS_N_INSNS (2), /* cost of a lea instruction */
948 COSTS_N_INSNS (1), /* variable shift costs */
949 COSTS_N_INSNS (1), /* constant shift costs */
950 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
951 COSTS_N_INSNS (3), /* HI */
952 COSTS_N_INSNS (3), /* SI */
953 COSTS_N_INSNS (3), /* DI */
954 COSTS_N_INSNS (3)}, /* other */
955 0, /* cost of multiply per each bit set */
956 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
957 COSTS_N_INSNS (18), /* HI */
958 COSTS_N_INSNS (18), /* SI */
959 COSTS_N_INSNS (18), /* DI */
960 COSTS_N_INSNS (18)}, /* other */
961 COSTS_N_INSNS (2), /* cost of movsx */
962 COSTS_N_INSNS (2), /* cost of movzx */
963 8, /* "large" insn */
965 3, /* cost for loading QImode using movzbl */
966 {4, 5, 4}, /* cost of loading integer registers
967 in QImode, HImode and SImode.
968 Relative to reg-reg move (2). */
969 {2, 3, 2}, /* cost of storing integer registers */
970 4, /* cost of reg,reg fld/fst */
971 {6, 6, 6}, /* cost of loading fp registers
972 in SFmode, DFmode and XFmode */
973 {4, 4, 4}, /* cost of storing fp registers
974 in SFmode, DFmode and XFmode */
975 2, /* cost of moving MMX register */
976 {2, 2}, /* cost of loading MMX registers
977 in SImode and DImode */
978 {2, 2}, /* cost of storing MMX registers
979 in SImode and DImode */
980 2, /* cost of moving SSE register */
981 {2, 2, 8}, /* cost of loading SSE registers
982 in SImode, DImode and TImode */
983 {2, 2, 8}, /* cost of storing SSE registers
984 in SImode, DImode and TImode */
985 6, /* MMX or SSE register to integer */
986 32, /* size of l1 cache. */
987 32, /* size of l2 cache. Some models
988 have integrated l2 cache, but
989 optimizing for k6 is not important
990 enough to worry about that. */
991 32, /* size of prefetch block */
992 1, /* number of parallel prefetches */
994 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
995 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
996 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
997 COSTS_N_INSNS (2), /* cost of FABS instruction. */
998 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
999 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1003 DUMMY_STRINGOP_ALGS
},
1004 1, /* scalar_stmt_cost. */
1005 1, /* scalar load_cost. */
1006 1, /* scalar_store_cost. */
1007 1, /* vec_stmt_cost. */
1008 1, /* vec_to_scalar_cost. */
1009 1, /* scalar_to_vec_cost. */
1010 1, /* vec_align_load_cost. */
1011 2, /* vec_unalign_load_cost. */
1012 1, /* vec_store_cost. */
1013 3, /* cond_taken_branch_cost. */
1014 1, /* cond_not_taken_branch_cost. */
1018 struct processor_costs athlon_cost
= {
1019 COSTS_N_INSNS (1), /* cost of an add instruction */
1020 COSTS_N_INSNS (2), /* cost of a lea instruction */
1021 COSTS_N_INSNS (1), /* variable shift costs */
1022 COSTS_N_INSNS (1), /* constant shift costs */
1023 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1024 COSTS_N_INSNS (5), /* HI */
1025 COSTS_N_INSNS (5), /* SI */
1026 COSTS_N_INSNS (5), /* DI */
1027 COSTS_N_INSNS (5)}, /* other */
1028 0, /* cost of multiply per each bit set */
1029 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1030 COSTS_N_INSNS (26), /* HI */
1031 COSTS_N_INSNS (42), /* SI */
1032 COSTS_N_INSNS (74), /* DI */
1033 COSTS_N_INSNS (74)}, /* other */
1034 COSTS_N_INSNS (1), /* cost of movsx */
1035 COSTS_N_INSNS (1), /* cost of movzx */
1036 8, /* "large" insn */
1038 4, /* cost for loading QImode using movzbl */
1039 {3, 4, 3}, /* cost of loading integer registers
1040 in QImode, HImode and SImode.
1041 Relative to reg-reg move (2). */
1042 {3, 4, 3}, /* cost of storing integer registers */
1043 4, /* cost of reg,reg fld/fst */
1044 {4, 4, 12}, /* cost of loading fp registers
1045 in SFmode, DFmode and XFmode */
1046 {6, 6, 8}, /* cost of storing fp registers
1047 in SFmode, DFmode and XFmode */
1048 2, /* cost of moving MMX register */
1049 {4, 4}, /* cost of loading MMX registers
1050 in SImode and DImode */
1051 {4, 4}, /* cost of storing MMX registers
1052 in SImode and DImode */
1053 2, /* cost of moving SSE register */
1054 {4, 4, 6}, /* cost of loading SSE registers
1055 in SImode, DImode and TImode */
1056 {4, 4, 5}, /* cost of storing SSE registers
1057 in SImode, DImode and TImode */
1058 5, /* MMX or SSE register to integer */
1059 64, /* size of l1 cache. */
1060 256, /* size of l2 cache. */
1061 64, /* size of prefetch block */
1062 6, /* number of parallel prefetches */
1063 5, /* Branch cost */
1064 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1065 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1066 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1067 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1068 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1069 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1070 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1071 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1072 128 bytes for memset. */
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1076 DUMMY_STRINGOP_ALGS
},
1077 1, /* scalar_stmt_cost. */
1078 1, /* scalar load_cost. */
1079 1, /* scalar_store_cost. */
1080 1, /* vec_stmt_cost. */
1081 1, /* vec_to_scalar_cost. */
1082 1, /* scalar_to_vec_cost. */
1083 1, /* vec_align_load_cost. */
1084 2, /* vec_unalign_load_cost. */
1085 1, /* vec_store_cost. */
1086 3, /* cond_taken_branch_cost. */
1087 1, /* cond_not_taken_branch_cost. */
1091 struct processor_costs k8_cost
= {
1092 COSTS_N_INSNS (1), /* cost of an add instruction */
1093 COSTS_N_INSNS (2), /* cost of a lea instruction */
1094 COSTS_N_INSNS (1), /* variable shift costs */
1095 COSTS_N_INSNS (1), /* constant shift costs */
1096 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1097 COSTS_N_INSNS (4), /* HI */
1098 COSTS_N_INSNS (3), /* SI */
1099 COSTS_N_INSNS (4), /* DI */
1100 COSTS_N_INSNS (5)}, /* other */
1101 0, /* cost of multiply per each bit set */
1102 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1103 COSTS_N_INSNS (26), /* HI */
1104 COSTS_N_INSNS (42), /* SI */
1105 COSTS_N_INSNS (74), /* DI */
1106 COSTS_N_INSNS (74)}, /* other */
1107 COSTS_N_INSNS (1), /* cost of movsx */
1108 COSTS_N_INSNS (1), /* cost of movzx */
1109 8, /* "large" insn */
1111 4, /* cost for loading QImode using movzbl */
1112 {3, 4, 3}, /* cost of loading integer registers
1113 in QImode, HImode and SImode.
1114 Relative to reg-reg move (2). */
1115 {3, 4, 3}, /* cost of storing integer registers */
1116 4, /* cost of reg,reg fld/fst */
1117 {4, 4, 12}, /* cost of loading fp registers
1118 in SFmode, DFmode and XFmode */
1119 {6, 6, 8}, /* cost of storing fp registers
1120 in SFmode, DFmode and XFmode */
1121 2, /* cost of moving MMX register */
1122 {3, 3}, /* cost of loading MMX registers
1123 in SImode and DImode */
1124 {4, 4}, /* cost of storing MMX registers
1125 in SImode and DImode */
1126 2, /* cost of moving SSE register */
1127 {4, 3, 6}, /* cost of loading SSE registers
1128 in SImode, DImode and TImode */
1129 {4, 4, 5}, /* cost of storing SSE registers
1130 in SImode, DImode and TImode */
1131 5, /* MMX or SSE register to integer */
1132 64, /* size of l1 cache. */
1133 512, /* size of l2 cache. */
1134 64, /* size of prefetch block */
1135 /* New AMD processors never drop prefetches; if they cannot be performed
1136 immediately, they are queued. We set number of simultaneous prefetches
1137 to a large constant to reflect this (it probably is not a good idea not
1138 to limit number of prefetches at all, as their execution also takes some
1140 100, /* number of parallel prefetches */
1141 3, /* Branch cost */
1142 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1143 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1144 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1145 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1146 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1147 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1148 /* K8 has optimized REP instruction for medium sized blocks, but for very
1149 small blocks it is better to use loop. For large blocks, libcall can
1150 do nontemporary accesses and beat inline considerably. */
1151 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1152 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1153 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1154 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1155 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1156 4, /* scalar_stmt_cost. */
1157 2, /* scalar load_cost. */
1158 2, /* scalar_store_cost. */
1159 5, /* vec_stmt_cost. */
1160 0, /* vec_to_scalar_cost. */
1161 2, /* scalar_to_vec_cost. */
1162 2, /* vec_align_load_cost. */
1163 3, /* vec_unalign_load_cost. */
1164 3, /* vec_store_cost. */
1165 3, /* cond_taken_branch_cost. */
1166 2, /* cond_not_taken_branch_cost. */
1169 struct processor_costs amdfam10_cost
= {
1170 COSTS_N_INSNS (1), /* cost of an add instruction */
1171 COSTS_N_INSNS (2), /* cost of a lea instruction */
1172 COSTS_N_INSNS (1), /* variable shift costs */
1173 COSTS_N_INSNS (1), /* constant shift costs */
1174 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1175 COSTS_N_INSNS (4), /* HI */
1176 COSTS_N_INSNS (3), /* SI */
1177 COSTS_N_INSNS (4), /* DI */
1178 COSTS_N_INSNS (5)}, /* other */
1179 0, /* cost of multiply per each bit set */
1180 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1181 COSTS_N_INSNS (35), /* HI */
1182 COSTS_N_INSNS (51), /* SI */
1183 COSTS_N_INSNS (83), /* DI */
1184 COSTS_N_INSNS (83)}, /* other */
1185 COSTS_N_INSNS (1), /* cost of movsx */
1186 COSTS_N_INSNS (1), /* cost of movzx */
1187 8, /* "large" insn */
1189 4, /* cost for loading QImode using movzbl */
1190 {3, 4, 3}, /* cost of loading integer registers
1191 in QImode, HImode and SImode.
1192 Relative to reg-reg move (2). */
1193 {3, 4, 3}, /* cost of storing integer registers */
1194 4, /* cost of reg,reg fld/fst */
1195 {4, 4, 12}, /* cost of loading fp registers
1196 in SFmode, DFmode and XFmode */
1197 {6, 6, 8}, /* cost of storing fp registers
1198 in SFmode, DFmode and XFmode */
1199 2, /* cost of moving MMX register */
1200 {3, 3}, /* cost of loading MMX registers
1201 in SImode and DImode */
1202 {4, 4}, /* cost of storing MMX registers
1203 in SImode and DImode */
1204 2, /* cost of moving SSE register */
1205 {4, 4, 3}, /* cost of loading SSE registers
1206 in SImode, DImode and TImode */
1207 {4, 4, 5}, /* cost of storing SSE registers
1208 in SImode, DImode and TImode */
1209 3, /* MMX or SSE register to integer */
1211 MOVD reg64, xmmreg Double FSTORE 4
1212 MOVD reg32, xmmreg Double FSTORE 4
1214 MOVD reg64, xmmreg Double FADD 3
1216 MOVD reg32, xmmreg Double FADD 3
1218 64, /* size of l1 cache. */
1219 512, /* size of l2 cache. */
1220 64, /* size of prefetch block */
1221 /* New AMD processors never drop prefetches; if they cannot be performed
1222 immediately, they are queued. We set number of simultaneous prefetches
1223 to a large constant to reflect this (it probably is not a good idea not
1224 to limit number of prefetches at all, as their execution also takes some
1226 100, /* number of parallel prefetches */
1227 2, /* Branch cost */
1228 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1229 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1230 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1231 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1232 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1233 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1235 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1236 very small blocks it is better to use loop. For large blocks, libcall can
1237 do nontemporary accesses and beat inline considerably. */
1238 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1239 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1240 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1241 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1242 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1243 4, /* scalar_stmt_cost. */
1244 2, /* scalar load_cost. */
1245 2, /* scalar_store_cost. */
1246 6, /* vec_stmt_cost. */
1247 0, /* vec_to_scalar_cost. */
1248 2, /* scalar_to_vec_cost. */
1249 2, /* vec_align_load_cost. */
1250 2, /* vec_unalign_load_cost. */
1251 2, /* vec_store_cost. */
1252 2, /* cond_taken_branch_cost. */
1253 1, /* cond_not_taken_branch_cost. */
1256 struct processor_costs bdver1_cost
= {
1257 COSTS_N_INSNS (1), /* cost of an add instruction */
1258 COSTS_N_INSNS (1), /* cost of a lea instruction */
1259 COSTS_N_INSNS (1), /* variable shift costs */
1260 COSTS_N_INSNS (1), /* constant shift costs */
1261 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1262 COSTS_N_INSNS (4), /* HI */
1263 COSTS_N_INSNS (4), /* SI */
1264 COSTS_N_INSNS (6), /* DI */
1265 COSTS_N_INSNS (6)}, /* other */
1266 0, /* cost of multiply per each bit set */
1267 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1268 COSTS_N_INSNS (35), /* HI */
1269 COSTS_N_INSNS (51), /* SI */
1270 COSTS_N_INSNS (83), /* DI */
1271 COSTS_N_INSNS (83)}, /* other */
1272 COSTS_N_INSNS (1), /* cost of movsx */
1273 COSTS_N_INSNS (1), /* cost of movzx */
1274 8, /* "large" insn */
1276 4, /* cost for loading QImode using movzbl */
1277 {5, 5, 4}, /* cost of loading integer registers
1278 in QImode, HImode and SImode.
1279 Relative to reg-reg move (2). */
1280 {4, 4, 4}, /* cost of storing integer registers */
1281 2, /* cost of reg,reg fld/fst */
1282 {5, 5, 12}, /* cost of loading fp registers
1283 in SFmode, DFmode and XFmode */
1284 {4, 4, 8}, /* cost of storing fp registers
1285 in SFmode, DFmode and XFmode */
1286 2, /* cost of moving MMX register */
1287 {4, 4}, /* cost of loading MMX registers
1288 in SImode and DImode */
1289 {4, 4}, /* cost of storing MMX registers
1290 in SImode and DImode */
1291 2, /* cost of moving SSE register */
1292 {4, 4, 4}, /* cost of loading SSE registers
1293 in SImode, DImode and TImode */
1294 {4, 4, 4}, /* cost of storing SSE registers
1295 in SImode, DImode and TImode */
1296 2, /* MMX or SSE register to integer */
1298 MOVD reg64, xmmreg Double FSTORE 4
1299 MOVD reg32, xmmreg Double FSTORE 4
1301 MOVD reg64, xmmreg Double FADD 3
1303 MOVD reg32, xmmreg Double FADD 3
1305 16, /* size of l1 cache. */
1306 2048, /* size of l2 cache. */
1307 64, /* size of prefetch block */
1308 /* New AMD processors never drop prefetches; if they cannot be performed
1309 immediately, they are queued. We set number of simultaneous prefetches
1310 to a large constant to reflect this (it probably is not a good idea not
1311 to limit number of prefetches at all, as their execution also takes some
1313 100, /* number of parallel prefetches */
1314 2, /* Branch cost */
1315 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1316 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1317 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1318 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1319 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1320 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1322 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1323 very small blocks it is better to use loop. For large blocks, libcall
1324 can do nontemporary accesses and beat inline considerably. */
1325 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1326 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1327 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1328 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1329 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1330 6, /* scalar_stmt_cost. */
1331 4, /* scalar load_cost. */
1332 4, /* scalar_store_cost. */
1333 6, /* vec_stmt_cost. */
1334 0, /* vec_to_scalar_cost. */
1335 2, /* scalar_to_vec_cost. */
1336 4, /* vec_align_load_cost. */
1337 4, /* vec_unalign_load_cost. */
1338 4, /* vec_store_cost. */
1339 2, /* cond_taken_branch_cost. */
1340 1, /* cond_not_taken_branch_cost. */
1343 struct processor_costs bdver2_cost
= {
1344 COSTS_N_INSNS (1), /* cost of an add instruction */
1345 COSTS_N_INSNS (1), /* cost of a lea instruction */
1346 COSTS_N_INSNS (1), /* variable shift costs */
1347 COSTS_N_INSNS (1), /* constant shift costs */
1348 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1349 COSTS_N_INSNS (4), /* HI */
1350 COSTS_N_INSNS (4), /* SI */
1351 COSTS_N_INSNS (6), /* DI */
1352 COSTS_N_INSNS (6)}, /* other */
1353 0, /* cost of multiply per each bit set */
1354 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1355 COSTS_N_INSNS (35), /* HI */
1356 COSTS_N_INSNS (51), /* SI */
1357 COSTS_N_INSNS (83), /* DI */
1358 COSTS_N_INSNS (83)}, /* other */
1359 COSTS_N_INSNS (1), /* cost of movsx */
1360 COSTS_N_INSNS (1), /* cost of movzx */
1361 8, /* "large" insn */
1363 4, /* cost for loading QImode using movzbl */
1364 {5, 5, 4}, /* cost of loading integer registers
1365 in QImode, HImode and SImode.
1366 Relative to reg-reg move (2). */
1367 {4, 4, 4}, /* cost of storing integer registers */
1368 2, /* cost of reg,reg fld/fst */
1369 {5, 5, 12}, /* cost of loading fp registers
1370 in SFmode, DFmode and XFmode */
1371 {4, 4, 8}, /* cost of storing fp registers
1372 in SFmode, DFmode and XFmode */
1373 2, /* cost of moving MMX register */
1374 {4, 4}, /* cost of loading MMX registers
1375 in SImode and DImode */
1376 {4, 4}, /* cost of storing MMX registers
1377 in SImode and DImode */
1378 2, /* cost of moving SSE register */
1379 {4, 4, 4}, /* cost of loading SSE registers
1380 in SImode, DImode and TImode */
1381 {4, 4, 4}, /* cost of storing SSE registers
1382 in SImode, DImode and TImode */
1383 2, /* MMX or SSE register to integer */
1385 MOVD reg64, xmmreg Double FSTORE 4
1386 MOVD reg32, xmmreg Double FSTORE 4
1388 MOVD reg64, xmmreg Double FADD 3
1390 MOVD reg32, xmmreg Double FADD 3
1392 16, /* size of l1 cache. */
1393 2048, /* size of l2 cache. */
1394 64, /* size of prefetch block */
1395 /* New AMD processors never drop prefetches; if they cannot be performed
1396 immediately, they are queued. We set number of simultaneous prefetches
1397 to a large constant to reflect this (it probably is not a good idea not
1398 to limit number of prefetches at all, as their execution also takes some
1400 100, /* number of parallel prefetches */
1401 2, /* Branch cost */
1402 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1403 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1404 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1405 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1406 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1407 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1409 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1410 very small blocks it is better to use loop. For large blocks, libcall
1411 can do nontemporary accesses and beat inline considerably. */
1412 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1413 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1414 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1415 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1416 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1417 6, /* scalar_stmt_cost. */
1418 4, /* scalar load_cost. */
1419 4, /* scalar_store_cost. */
1420 6, /* vec_stmt_cost. */
1421 0, /* vec_to_scalar_cost. */
1422 2, /* scalar_to_vec_cost. */
1423 4, /* vec_align_load_cost. */
1424 4, /* vec_unalign_load_cost. */
1425 4, /* vec_store_cost. */
1426 2, /* cond_taken_branch_cost. */
1427 1, /* cond_not_taken_branch_cost. */
1430 struct processor_costs btver1_cost
= {
1431 COSTS_N_INSNS (1), /* cost of an add instruction */
1432 COSTS_N_INSNS (2), /* cost of a lea instruction */
1433 COSTS_N_INSNS (1), /* variable shift costs */
1434 COSTS_N_INSNS (1), /* constant shift costs */
1435 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1436 COSTS_N_INSNS (4), /* HI */
1437 COSTS_N_INSNS (3), /* SI */
1438 COSTS_N_INSNS (4), /* DI */
1439 COSTS_N_INSNS (5)}, /* other */
1440 0, /* cost of multiply per each bit set */
1441 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1442 COSTS_N_INSNS (35), /* HI */
1443 COSTS_N_INSNS (51), /* SI */
1444 COSTS_N_INSNS (83), /* DI */
1445 COSTS_N_INSNS (83)}, /* other */
1446 COSTS_N_INSNS (1), /* cost of movsx */
1447 COSTS_N_INSNS (1), /* cost of movzx */
1448 8, /* "large" insn */
1450 4, /* cost for loading QImode using movzbl */
1451 {3, 4, 3}, /* cost of loading integer registers
1452 in QImode, HImode and SImode.
1453 Relative to reg-reg move (2). */
1454 {3, 4, 3}, /* cost of storing integer registers */
1455 4, /* cost of reg,reg fld/fst */
1456 {4, 4, 12}, /* cost of loading fp registers
1457 in SFmode, DFmode and XFmode */
1458 {6, 6, 8}, /* cost of storing fp registers
1459 in SFmode, DFmode and XFmode */
1460 2, /* cost of moving MMX register */
1461 {3, 3}, /* cost of loading MMX registers
1462 in SImode and DImode */
1463 {4, 4}, /* cost of storing MMX registers
1464 in SImode and DImode */
1465 2, /* cost of moving SSE register */
1466 {4, 4, 3}, /* cost of loading SSE registers
1467 in SImode, DImode and TImode */
1468 {4, 4, 5}, /* cost of storing SSE registers
1469 in SImode, DImode and TImode */
1470 3, /* MMX or SSE register to integer */
1472 MOVD reg64, xmmreg Double FSTORE 4
1473 MOVD reg32, xmmreg Double FSTORE 4
1475 MOVD reg64, xmmreg Double FADD 3
1477 MOVD reg32, xmmreg Double FADD 3
1479 32, /* size of l1 cache. */
1480 512, /* size of l2 cache. */
1481 64, /* size of prefetch block */
1482 100, /* number of parallel prefetches */
1483 2, /* Branch cost */
1484 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1485 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1486 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1487 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1488 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1489 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1491 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1492 very small blocks it is better to use loop. For large blocks, libcall can
1493 do nontemporary accesses and beat inline considerably. */
1494 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1495 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1496 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1497 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1498 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1499 4, /* scalar_stmt_cost. */
1500 2, /* scalar load_cost. */
1501 2, /* scalar_store_cost. */
1502 6, /* vec_stmt_cost. */
1503 0, /* vec_to_scalar_cost. */
1504 2, /* scalar_to_vec_cost. */
1505 2, /* vec_align_load_cost. */
1506 2, /* vec_unalign_load_cost. */
1507 2, /* vec_store_cost. */
1508 2, /* cond_taken_branch_cost. */
1509 1, /* cond_not_taken_branch_cost. */
1512 struct processor_costs btver2_cost
= {
1513 COSTS_N_INSNS (1), /* cost of an add instruction */
1514 COSTS_N_INSNS (2), /* cost of a lea instruction */
1515 COSTS_N_INSNS (1), /* variable shift costs */
1516 COSTS_N_INSNS (1), /* constant shift costs */
1517 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1518 COSTS_N_INSNS (4), /* HI */
1519 COSTS_N_INSNS (3), /* SI */
1520 COSTS_N_INSNS (4), /* DI */
1521 COSTS_N_INSNS (5)}, /* other */
1522 0, /* cost of multiply per each bit set */
1523 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1524 COSTS_N_INSNS (35), /* HI */
1525 COSTS_N_INSNS (51), /* SI */
1526 COSTS_N_INSNS (83), /* DI */
1527 COSTS_N_INSNS (83)}, /* other */
1528 COSTS_N_INSNS (1), /* cost of movsx */
1529 COSTS_N_INSNS (1), /* cost of movzx */
1530 8, /* "large" insn */
1532 4, /* cost for loading QImode using movzbl */
1533 {3, 4, 3}, /* cost of loading integer registers
1534 in QImode, HImode and SImode.
1535 Relative to reg-reg move (2). */
1536 {3, 4, 3}, /* cost of storing integer registers */
1537 4, /* cost of reg,reg fld/fst */
1538 {4, 4, 12}, /* cost of loading fp registers
1539 in SFmode, DFmode and XFmode */
1540 {6, 6, 8}, /* cost of storing fp registers
1541 in SFmode, DFmode and XFmode */
1542 2, /* cost of moving MMX register */
1543 {3, 3}, /* cost of loading MMX registers
1544 in SImode and DImode */
1545 {4, 4}, /* cost of storing MMX registers
1546 in SImode and DImode */
1547 2, /* cost of moving SSE register */
1548 {4, 4, 3}, /* cost of loading SSE registers
1549 in SImode, DImode and TImode */
1550 {4, 4, 5}, /* cost of storing SSE registers
1551 in SImode, DImode and TImode */
1552 3, /* MMX or SSE register to integer */
1554 MOVD reg64, xmmreg Double FSTORE 4
1555 MOVD reg32, xmmreg Double FSTORE 4
1557 MOVD reg64, xmmreg Double FADD 3
1559 MOVD reg32, xmmreg Double FADD 3
1561 32, /* size of l1 cache. */
1562 2048, /* size of l2 cache. */
1563 64, /* size of prefetch block */
1564 100, /* number of parallel prefetches */
1565 2, /* Branch cost */
1566 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1567 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1568 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1569 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1570 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1571 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1573 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1574 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1575 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1576 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1577 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1578 4, /* scalar_stmt_cost. */
1579 2, /* scalar load_cost. */
1580 2, /* scalar_store_cost. */
1581 6, /* vec_stmt_cost. */
1582 0, /* vec_to_scalar_cost. */
1583 2, /* scalar_to_vec_cost. */
1584 2, /* vec_align_load_cost. */
1585 2, /* vec_unalign_load_cost. */
1586 2, /* vec_store_cost. */
1587 2, /* cond_taken_branch_cost. */
1588 1, /* cond_not_taken_branch_cost. */
1592 struct processor_costs pentium4_cost
= {
1593 COSTS_N_INSNS (1), /* cost of an add instruction */
1594 COSTS_N_INSNS (3), /* cost of a lea instruction */
1595 COSTS_N_INSNS (4), /* variable shift costs */
1596 COSTS_N_INSNS (4), /* constant shift costs */
1597 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1598 COSTS_N_INSNS (15), /* HI */
1599 COSTS_N_INSNS (15), /* SI */
1600 COSTS_N_INSNS (15), /* DI */
1601 COSTS_N_INSNS (15)}, /* other */
1602 0, /* cost of multiply per each bit set */
1603 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1604 COSTS_N_INSNS (56), /* HI */
1605 COSTS_N_INSNS (56), /* SI */
1606 COSTS_N_INSNS (56), /* DI */
1607 COSTS_N_INSNS (56)}, /* other */
1608 COSTS_N_INSNS (1), /* cost of movsx */
1609 COSTS_N_INSNS (1), /* cost of movzx */
1610 16, /* "large" insn */
1612 2, /* cost for loading QImode using movzbl */
1613 {4, 5, 4}, /* cost of loading integer registers
1614 in QImode, HImode and SImode.
1615 Relative to reg-reg move (2). */
1616 {2, 3, 2}, /* cost of storing integer registers */
1617 2, /* cost of reg,reg fld/fst */
1618 {2, 2, 6}, /* cost of loading fp registers
1619 in SFmode, DFmode and XFmode */
1620 {4, 4, 6}, /* cost of storing fp registers
1621 in SFmode, DFmode and XFmode */
1622 2, /* cost of moving MMX register */
1623 {2, 2}, /* cost of loading MMX registers
1624 in SImode and DImode */
1625 {2, 2}, /* cost of storing MMX registers
1626 in SImode and DImode */
1627 12, /* cost of moving SSE register */
1628 {12, 12, 12}, /* cost of loading SSE registers
1629 in SImode, DImode and TImode */
1630 {2, 2, 8}, /* cost of storing SSE registers
1631 in SImode, DImode and TImode */
1632 10, /* MMX or SSE register to integer */
1633 8, /* size of l1 cache. */
1634 256, /* size of l2 cache. */
1635 64, /* size of prefetch block */
1636 6, /* number of parallel prefetches */
1637 2, /* Branch cost */
1638 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1639 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1640 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1641 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1642 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1643 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1644 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1645 DUMMY_STRINGOP_ALGS
},
1646 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1648 DUMMY_STRINGOP_ALGS
},
1649 1, /* scalar_stmt_cost. */
1650 1, /* scalar load_cost. */
1651 1, /* scalar_store_cost. */
1652 1, /* vec_stmt_cost. */
1653 1, /* vec_to_scalar_cost. */
1654 1, /* scalar_to_vec_cost. */
1655 1, /* vec_align_load_cost. */
1656 2, /* vec_unalign_load_cost. */
1657 1, /* vec_store_cost. */
1658 3, /* cond_taken_branch_cost. */
1659 1, /* cond_not_taken_branch_cost. */
1663 struct processor_costs nocona_cost
= {
1664 COSTS_N_INSNS (1), /* cost of an add instruction */
1665 COSTS_N_INSNS (1), /* cost of a lea instruction */
1666 COSTS_N_INSNS (1), /* variable shift costs */
1667 COSTS_N_INSNS (1), /* constant shift costs */
1668 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1669 COSTS_N_INSNS (10), /* HI */
1670 COSTS_N_INSNS (10), /* SI */
1671 COSTS_N_INSNS (10), /* DI */
1672 COSTS_N_INSNS (10)}, /* other */
1673 0, /* cost of multiply per each bit set */
1674 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1675 COSTS_N_INSNS (66), /* HI */
1676 COSTS_N_INSNS (66), /* SI */
1677 COSTS_N_INSNS (66), /* DI */
1678 COSTS_N_INSNS (66)}, /* other */
1679 COSTS_N_INSNS (1), /* cost of movsx */
1680 COSTS_N_INSNS (1), /* cost of movzx */
1681 16, /* "large" insn */
1682 17, /* MOVE_RATIO */
1683 4, /* cost for loading QImode using movzbl */
1684 {4, 4, 4}, /* cost of loading integer registers
1685 in QImode, HImode and SImode.
1686 Relative to reg-reg move (2). */
1687 {4, 4, 4}, /* cost of storing integer registers */
1688 3, /* cost of reg,reg fld/fst */
1689 {12, 12, 12}, /* cost of loading fp registers
1690 in SFmode, DFmode and XFmode */
1691 {4, 4, 4}, /* cost of storing fp registers
1692 in SFmode, DFmode and XFmode */
1693 6, /* cost of moving MMX register */
1694 {12, 12}, /* cost of loading MMX registers
1695 in SImode and DImode */
1696 {12, 12}, /* cost of storing MMX registers
1697 in SImode and DImode */
1698 6, /* cost of moving SSE register */
1699 {12, 12, 12}, /* cost of loading SSE registers
1700 in SImode, DImode and TImode */
1701 {12, 12, 12}, /* cost of storing SSE registers
1702 in SImode, DImode and TImode */
1703 8, /* MMX or SSE register to integer */
1704 8, /* size of l1 cache. */
1705 1024, /* size of l2 cache. */
1706 128, /* size of prefetch block */
1707 8, /* number of parallel prefetches */
1708 1, /* Branch cost */
1709 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1710 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1711 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1712 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1713 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1714 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1715 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1716 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1717 {100000, unrolled_loop
}, {-1, libcall
}}}},
1718 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1720 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1721 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1722 1, /* scalar_stmt_cost. */
1723 1, /* scalar load_cost. */
1724 1, /* scalar_store_cost. */
1725 1, /* vec_stmt_cost. */
1726 1, /* vec_to_scalar_cost. */
1727 1, /* scalar_to_vec_cost. */
1728 1, /* vec_align_load_cost. */
1729 2, /* vec_unalign_load_cost. */
1730 1, /* vec_store_cost. */
1731 3, /* cond_taken_branch_cost. */
1732 1, /* cond_not_taken_branch_cost. */
1736 struct processor_costs atom_cost
= {
1737 COSTS_N_INSNS (1), /* cost of an add instruction */
1738 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1739 COSTS_N_INSNS (1), /* variable shift costs */
1740 COSTS_N_INSNS (1), /* constant shift costs */
1741 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1742 COSTS_N_INSNS (4), /* HI */
1743 COSTS_N_INSNS (3), /* SI */
1744 COSTS_N_INSNS (4), /* DI */
1745 COSTS_N_INSNS (2)}, /* other */
1746 0, /* cost of multiply per each bit set */
1747 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1748 COSTS_N_INSNS (26), /* HI */
1749 COSTS_N_INSNS (42), /* SI */
1750 COSTS_N_INSNS (74), /* DI */
1751 COSTS_N_INSNS (74)}, /* other */
1752 COSTS_N_INSNS (1), /* cost of movsx */
1753 COSTS_N_INSNS (1), /* cost of movzx */
1754 8, /* "large" insn */
1755 17, /* MOVE_RATIO */
1756 4, /* cost for loading QImode using movzbl */
1757 {4, 4, 4}, /* cost of loading integer registers
1758 in QImode, HImode and SImode.
1759 Relative to reg-reg move (2). */
1760 {4, 4, 4}, /* cost of storing integer registers */
1761 4, /* cost of reg,reg fld/fst */
1762 {12, 12, 12}, /* cost of loading fp registers
1763 in SFmode, DFmode and XFmode */
1764 {6, 6, 8}, /* cost of storing fp registers
1765 in SFmode, DFmode and XFmode */
1766 2, /* cost of moving MMX register */
1767 {8, 8}, /* cost of loading MMX registers
1768 in SImode and DImode */
1769 {8, 8}, /* cost of storing MMX registers
1770 in SImode and DImode */
1771 2, /* cost of moving SSE register */
1772 {8, 8, 8}, /* cost of loading SSE registers
1773 in SImode, DImode and TImode */
1774 {8, 8, 8}, /* cost of storing SSE registers
1775 in SImode, DImode and TImode */
1776 5, /* MMX or SSE register to integer */
1777 32, /* size of l1 cache. */
1778 256, /* size of l2 cache. */
1779 64, /* size of prefetch block */
1780 6, /* number of parallel prefetches */
1781 3, /* Branch cost */
1782 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1783 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1784 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1785 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1786 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1787 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1788 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1789 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1790 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1792 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1793 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1794 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1795 1, /* scalar_stmt_cost. */
1796 1, /* scalar load_cost. */
1797 1, /* scalar_store_cost. */
1798 1, /* vec_stmt_cost. */
1799 1, /* vec_to_scalar_cost. */
1800 1, /* scalar_to_vec_cost. */
1801 1, /* vec_align_load_cost. */
1802 2, /* vec_unalign_load_cost. */
1803 1, /* vec_store_cost. */
1804 3, /* cond_taken_branch_cost. */
1805 1, /* cond_not_taken_branch_cost. */
1808 /* Generic64 should produce code tuned for Nocona and K8. */
1810 struct processor_costs generic64_cost
= {
1811 COSTS_N_INSNS (1), /* cost of an add instruction */
1812 /* On all chips taken into consideration lea is 2 cycles and more. With
1813 this cost however our current implementation of synth_mult results in
1814 use of unnecessary temporary registers causing regression on several
1815 SPECfp benchmarks. */
1816 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1817 COSTS_N_INSNS (1), /* variable shift costs */
1818 COSTS_N_INSNS (1), /* constant shift costs */
1819 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1820 COSTS_N_INSNS (4), /* HI */
1821 COSTS_N_INSNS (3), /* SI */
1822 COSTS_N_INSNS (4), /* DI */
1823 COSTS_N_INSNS (2)}, /* other */
1824 0, /* cost of multiply per each bit set */
1825 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1826 COSTS_N_INSNS (26), /* HI */
1827 COSTS_N_INSNS (42), /* SI */
1828 COSTS_N_INSNS (74), /* DI */
1829 COSTS_N_INSNS (74)}, /* other */
1830 COSTS_N_INSNS (1), /* cost of movsx */
1831 COSTS_N_INSNS (1), /* cost of movzx */
1832 8, /* "large" insn */
1833 17, /* MOVE_RATIO */
1834 4, /* cost for loading QImode using movzbl */
1835 {4, 4, 4}, /* cost of loading integer registers
1836 in QImode, HImode and SImode.
1837 Relative to reg-reg move (2). */
1838 {4, 4, 4}, /* cost of storing integer registers */
1839 4, /* cost of reg,reg fld/fst */
1840 {12, 12, 12}, /* cost of loading fp registers
1841 in SFmode, DFmode and XFmode */
1842 {6, 6, 8}, /* cost of storing fp registers
1843 in SFmode, DFmode and XFmode */
1844 2, /* cost of moving MMX register */
1845 {8, 8}, /* cost of loading MMX registers
1846 in SImode and DImode */
1847 {8, 8}, /* cost of storing MMX registers
1848 in SImode and DImode */
1849 2, /* cost of moving SSE register */
1850 {8, 8, 8}, /* cost of loading SSE registers
1851 in SImode, DImode and TImode */
1852 {8, 8, 8}, /* cost of storing SSE registers
1853 in SImode, DImode and TImode */
1854 5, /* MMX or SSE register to integer */
1855 32, /* size of l1 cache. */
1856 512, /* size of l2 cache. */
1857 64, /* size of prefetch block */
1858 6, /* number of parallel prefetches */
1859 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1860 value is increased to perhaps more appropriate value of 5. */
1861 3, /* Branch cost */
1862 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1863 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1864 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1865 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1866 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1867 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1868 {DUMMY_STRINGOP_ALGS
,
1869 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1870 {DUMMY_STRINGOP_ALGS
,
1871 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1872 1, /* scalar_stmt_cost. */
1873 1, /* scalar load_cost. */
1874 1, /* scalar_store_cost. */
1875 1, /* vec_stmt_cost. */
1876 1, /* vec_to_scalar_cost. */
1877 1, /* scalar_to_vec_cost. */
1878 1, /* vec_align_load_cost. */
1879 2, /* vec_unalign_load_cost. */
1880 1, /* vec_store_cost. */
1881 3, /* cond_taken_branch_cost. */
1882 1, /* cond_not_taken_branch_cost. */
1885 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1888 struct processor_costs generic32_cost
= {
1889 COSTS_N_INSNS (1), /* cost of an add instruction */
1890 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1891 COSTS_N_INSNS (1), /* variable shift costs */
1892 COSTS_N_INSNS (1), /* constant shift costs */
1893 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1894 COSTS_N_INSNS (4), /* HI */
1895 COSTS_N_INSNS (3), /* SI */
1896 COSTS_N_INSNS (4), /* DI */
1897 COSTS_N_INSNS (2)}, /* other */
1898 0, /* cost of multiply per each bit set */
1899 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1900 COSTS_N_INSNS (26), /* HI */
1901 COSTS_N_INSNS (42), /* SI */
1902 COSTS_N_INSNS (74), /* DI */
1903 COSTS_N_INSNS (74)}, /* other */
1904 COSTS_N_INSNS (1), /* cost of movsx */
1905 COSTS_N_INSNS (1), /* cost of movzx */
1906 8, /* "large" insn */
1907 17, /* MOVE_RATIO */
1908 4, /* cost for loading QImode using movzbl */
1909 {4, 4, 4}, /* cost of loading integer registers
1910 in QImode, HImode and SImode.
1911 Relative to reg-reg move (2). */
1912 {4, 4, 4}, /* cost of storing integer registers */
1913 4, /* cost of reg,reg fld/fst */
1914 {12, 12, 12}, /* cost of loading fp registers
1915 in SFmode, DFmode and XFmode */
1916 {6, 6, 8}, /* cost of storing fp registers
1917 in SFmode, DFmode and XFmode */
1918 2, /* cost of moving MMX register */
1919 {8, 8}, /* cost of loading MMX registers
1920 in SImode and DImode */
1921 {8, 8}, /* cost of storing MMX registers
1922 in SImode and DImode */
1923 2, /* cost of moving SSE register */
1924 {8, 8, 8}, /* cost of loading SSE registers
1925 in SImode, DImode and TImode */
1926 {8, 8, 8}, /* cost of storing SSE registers
1927 in SImode, DImode and TImode */
1928 5, /* MMX or SSE register to integer */
1929 32, /* size of l1 cache. */
1930 256, /* size of l2 cache. */
1931 64, /* size of prefetch block */
1932 6, /* number of parallel prefetches */
1933 3, /* Branch cost */
1934 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1935 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1936 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1937 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1938 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1939 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1940 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1941 DUMMY_STRINGOP_ALGS
},
1942 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1943 DUMMY_STRINGOP_ALGS
},
1944 1, /* scalar_stmt_cost. */
1945 1, /* scalar load_cost. */
1946 1, /* scalar_store_cost. */
1947 1, /* vec_stmt_cost. */
1948 1, /* vec_to_scalar_cost. */
1949 1, /* scalar_to_vec_cost. */
1950 1, /* vec_align_load_cost. */
1951 2, /* vec_unalign_load_cost. */
1952 1, /* vec_store_cost. */
1953 3, /* cond_taken_branch_cost. */
1954 1, /* cond_not_taken_branch_cost. */
1957 /* Set by -mtune. */
1958 const struct processor_costs
*ix86_tune_cost
= &pentium_cost
;
1960 /* Set by -mtune or -Os. */
1961 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1963 /* Processor feature/optimization bitmasks. */
1964 #define m_386 (1<<PROCESSOR_I386)
1965 #define m_486 (1<<PROCESSOR_I486)
1966 #define m_PENT (1<<PROCESSOR_PENTIUM)
1967 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1968 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1969 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1970 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1971 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1972 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1973 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1974 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1975 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1976 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1977 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1978 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1979 #define m_ATOM (1<<PROCESSOR_ATOM)
1981 #define m_GEODE (1<<PROCESSOR_GEODE)
1982 #define m_K6 (1<<PROCESSOR_K6)
1983 #define m_K6_GEODE (m_K6 | m_GEODE)
1984 #define m_K8 (1<<PROCESSOR_K8)
1985 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1986 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1987 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1988 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1989 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1990 #define m_BDVER (m_BDVER1 | m_BDVER2)
1991 #define m_BTVER (m_BTVER1 | m_BTVER2)
1992 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1993 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
1994 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
1996 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1997 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1999 /* Generic instruction choice should be common subset of supported CPUs
2000 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
2001 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
2003 /* Feature tests against the various tunings. */
2004 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
2006 /* Feature tests against the various tunings used to create ix86_tune_features
2007 based on the processor mask. */
2008 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
2009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
2010 negatively, so enabling for Generic64 seems like good code size
2011 tradeoff. We can't enable it for 32bit generic because it does not
2012 work well with PPro base chips. */
2013 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
2015 /* X86_TUNE_PUSH_MEMORY */
2016 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2018 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
2021 /* X86_TUNE_UNROLL_STRLEN */
2022 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
2024 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
2025 on simulation result. But after P4 was made, no performance benefit
2026 was observed with branch hints. It also increases the code size.
2027 As a result, icc never generates branch hints. */
2030 /* X86_TUNE_DOUBLE_WITH_ADD */
2033 /* X86_TUNE_USE_SAHF */
2034 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC
,
2036 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
2037 partial dependencies. */
2038 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2040 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
2041 register stalls on Generic32 compilation setting as well. However
2042 in current implementation the partial register stalls are not eliminated
2043 very well - they can be introduced via subregs synthesized by combine
2044 and can happen in caller/callee saving sequences. Because this option
2045 pays back little on PPro based chips and is in conflict with partial reg
2046 dependencies used by Athlon/P4 based chips, it is better to leave it off
2047 for generic32 for now. */
2050 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
2051 m_CORE2I7
| m_GENERIC
,
2053 /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
2054 * on 16-bit immediate moves into memory on Core2 and Corei7. */
2055 m_CORE2I7
| m_GENERIC
,
2057 /* X86_TUNE_USE_HIMODE_FIOP */
2058 m_386
| m_486
| m_K6_GEODE
,
2060 /* X86_TUNE_USE_SIMODE_FIOP */
2061 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
2063 /* X86_TUNE_USE_MOV0 */
2066 /* X86_TUNE_USE_CLTD */
2067 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
2069 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
2072 /* X86_TUNE_SPLIT_LONG_MOVES */
2075 /* X86_TUNE_READ_MODIFY_WRITE */
2078 /* X86_TUNE_READ_MODIFY */
2081 /* X86_TUNE_PROMOTE_QIMODE */
2082 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2084 /* X86_TUNE_FAST_PREFIX */
2085 ~(m_386
| m_486
| m_PENT
),
2087 /* X86_TUNE_SINGLE_STRINGOP */
2088 m_386
| m_P4_NOCONA
,
2090 /* X86_TUNE_QIMODE_MATH */
2093 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2094 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2095 might be considered for Generic32 if our scheme for avoiding partial
2096 stalls was more effective. */
2099 /* X86_TUNE_PROMOTE_QI_REGS */
2102 /* X86_TUNE_PROMOTE_HI_REGS */
2105 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2106 over esp addition. */
2107 m_386
| m_486
| m_PENT
| m_PPRO
,
2109 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2110 over esp addition. */
2113 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2114 over esp subtraction. */
2115 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2117 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2118 over esp subtraction. */
2119 m_PENT
| m_K6_GEODE
,
2121 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2122 for DFmode copies */
2123 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2125 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2126 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2128 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2129 conflict here in between PPro/Pentium4 based chips that thread 128bit
2130 SSE registers as single units versus K8 based chips that divide SSE
2131 registers to two 64bit halves. This knob promotes all store destinations
2132 to be 128bit to allow register renaming on 128bit SSE units, but usually
2133 results in one extra microop on 64bit SSE units. Experimental results
2134 shows that disabling this option on P4 brings over 20% SPECfp regression,
2135 while enabling it on K8 brings roughly 2.4% regression that can be partly
2136 masked by careful scheduling of moves. */
2137 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2139 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2140 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER
,
2142 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2145 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2149 are resolved on SSE register parts instead of whole registers, so we may
2150 maintain just lower part of scalar values in proper format leaving the
2151 upper part undefined. */
2154 /* X86_TUNE_SSE_TYPELESS_STORES */
2157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2158 m_PPRO
| m_P4_NOCONA
,
2160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2161 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2163 /* X86_TUNE_PROLOGUE_USING_MOVE */
2164 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2166 /* X86_TUNE_EPILOGUE_USING_MOVE */
2167 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2169 /* X86_TUNE_SHIFT1 */
2172 /* X86_TUNE_USE_FFREEP */
2175 /* X86_TUNE_INTER_UNIT_MOVES */
2176 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2178 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2179 ~(m_AMDFAM10
| m_BDVER
),
2181 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2182 than 4 branch instructions in the 16 byte window. */
2183 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2185 /* X86_TUNE_SCHEDULE */
2186 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2188 /* X86_TUNE_USE_BT */
2189 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2191 /* X86_TUNE_USE_INCDEC */
2192 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2194 /* X86_TUNE_PAD_RETURNS */
2195 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2197 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2200 /* X86_TUNE_EXT_80387_CONSTANTS */
2201 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2203 /* X86_TUNE_SHORTEN_X87_SSE */
2206 /* X86_TUNE_AVOID_VECTOR_DECODE */
2207 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2209 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2210 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2213 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2214 vector path on AMD machines. */
2215 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2217 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2219 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER
| m_GENERIC64
,
2221 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2225 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2226 but one byte longer. */
2229 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2230 operand that cannot be represented using a modRM byte. The XOR
2231 replacement is long decoded, so this split helps here as well. */
2234 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2236 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2238 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2239 from integer to FP. */
2242 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2243 with a subsequent conditional jump instruction into a single
2244 compare-and-branch uop. */
2247 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2248 will impact LEA instruction selection. */
2251 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2255 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2256 at -O3. For the moment, the prefetching seems badly tuned for Intel
2258 m_K6_GEODE
| m_AMD_MULTIPLE
,
2260 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2261 the auto-vectorizer. */
2264 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2265 during reassociation of integer computation. */
2268 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2269 during reassociation of fp computation. */
2273 /* Feature tests against the various architecture variations. */
2274 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2276 /* Feature tests against the various architecture variations, used to create
2277 ix86_arch_features based on the processor mask. */
2278 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2279 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2280 ~(m_386
| m_486
| m_PENT
| m_K6
),
2282 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2285 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2288 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2291 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2295 static const unsigned int x86_accumulate_outgoing_args
2296 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2298 static const unsigned int x86_arch_always_fancy_math_387
2299 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2301 static const unsigned int x86_avx256_split_unaligned_load
2302 = m_COREI7
| m_GENERIC
;
2304 static const unsigned int x86_avx256_split_unaligned_store
2305 = m_COREI7
| m_BDVER
| m_GENERIC
;
2307 /* In case the average insn count for single function invocation is
2308 lower than this constant, emit fast (but longer) prologue and
2310 #define FAST_PROLOGUE_INSN_COUNT 20
2312 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2313 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2314 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2315 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2317 /* Array of the smallest class containing reg number REGNO, indexed by
2318 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2320 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2322 /* ax, dx, cx, bx */
2323 AREG
, DREG
, CREG
, BREG
,
2324 /* si, di, bp, sp */
2325 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2327 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2328 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2331 /* flags, fpsr, fpcr, frame */
2332 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2334 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2337 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2340 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2341 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2342 /* SSE REX registers */
2343 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2347 /* The "default" register map used in 32bit mode. */
2349 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2351 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2352 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2353 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2354 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2355 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2356 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2357 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2360 /* The "default" register map used in 64bit mode. */
2362 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2364 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2365 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2366 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2367 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2368 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2369 8,9,10,11,12,13,14,15, /* extended integer registers */
2370 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2373 /* Define the register numbers to be used in Dwarf debugging information.
2374 The SVR4 reference port C compiler uses the following register numbers
2375 in its Dwarf output code:
2376 0 for %eax (gcc regno = 0)
2377 1 for %ecx (gcc regno = 2)
2378 2 for %edx (gcc regno = 1)
2379 3 for %ebx (gcc regno = 3)
2380 4 for %esp (gcc regno = 7)
2381 5 for %ebp (gcc regno = 6)
2382 6 for %esi (gcc regno = 4)
2383 7 for %edi (gcc regno = 5)
2384 The following three DWARF register numbers are never generated by
2385 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2386 believes these numbers have these meanings.
2387 8 for %eip (no gcc equivalent)
2388 9 for %eflags (gcc regno = 17)
2389 10 for %trapno (no gcc equivalent)
2390 It is not at all clear how we should number the FP stack registers
2391 for the x86 architecture. If the version of SDB on x86/svr4 were
2392 a bit less brain dead with respect to floating-point then we would
2393 have a precedent to follow with respect to DWARF register numbers
2394 for x86 FP registers, but the SDB on x86/svr4 is so completely
2395 broken with respect to FP registers that it is hardly worth thinking
2396 of it as something to strive for compatibility with.
2397 The version of x86/svr4 SDB I have at the moment does (partially)
2398 seem to believe that DWARF register number 11 is associated with
2399 the x86 register %st(0), but that's about all. Higher DWARF
2400 register numbers don't seem to be associated with anything in
2401 particular, and even for DWARF regno 11, SDB only seems to under-
2402 stand that it should say that a variable lives in %st(0) (when
2403 asked via an `=' command) if we said it was in DWARF regno 11,
2404 but SDB still prints garbage when asked for the value of the
2405 variable in question (via a `/' command).
2406 (Also note that the labels SDB prints for various FP stack regs
2407 when doing an `x' command are all wrong.)
2408 Note that these problems generally don't affect the native SVR4
2409 C compiler because it doesn't allow the use of -O with -g and
2410 because when it is *not* optimizing, it allocates a memory
2411 location for each floating-point variable, and the memory
2412 location is what gets described in the DWARF AT_location
2413 attribute for the variable in question.
2414 Regardless of the severe mental illness of the x86/svr4 SDB, we
2415 do something sensible here and we use the following DWARF
2416 register numbers. Note that these are all stack-top-relative
2418 11 for %st(0) (gcc regno = 8)
2419 12 for %st(1) (gcc regno = 9)
2420 13 for %st(2) (gcc regno = 10)
2421 14 for %st(3) (gcc regno = 11)
2422 15 for %st(4) (gcc regno = 12)
2423 16 for %st(5) (gcc regno = 13)
2424 17 for %st(6) (gcc regno = 14)
2425 18 for %st(7) (gcc regno = 15)
2427 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2429 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2430 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2431 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2432 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2433 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2434 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2435 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2438 /* Define parameter passing and return registers. */
2440 static int const x86_64_int_parameter_registers
[6] =
2442 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2445 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2447 CX_REG
, DX_REG
, R8_REG
, R9_REG
2450 static int const x86_64_int_return_registers
[4] =
2452 AX_REG
, DX_REG
, DI_REG
, SI_REG
2455 /* Define the structure for the machine field in struct function. */
2457 struct GTY(()) stack_local_entry
{
2458 unsigned short mode
;
2461 struct stack_local_entry
*next
;
2464 /* Structure describing stack frame layout.
2465 Stack grows downward:
2471 saved static chain if ix86_static_chain_on_stack
2473 saved frame pointer if frame_pointer_needed
2474 <- HARD_FRAME_POINTER
2480 <- sse_regs_save_offset
2483 [va_arg registers] |
2487 [padding2] | = to_allocate
2496 int outgoing_arguments_size
;
2498 /* The offsets relative to ARG_POINTER. */
2499 HOST_WIDE_INT frame_pointer_offset
;
2500 HOST_WIDE_INT hard_frame_pointer_offset
;
2501 HOST_WIDE_INT stack_pointer_offset
;
2502 HOST_WIDE_INT hfp_save_offset
;
2503 HOST_WIDE_INT reg_save_offset
;
2504 HOST_WIDE_INT sse_reg_save_offset
;
2506 /* When save_regs_using_mov is set, emit prologue using
2507 move instead of push instructions. */
2508 bool save_regs_using_mov
;
2511 /* Which cpu are we scheduling for. */
2512 enum attr_cpu ix86_schedule
;
2514 /* Which cpu are we optimizing for. */
2515 enum processor_type ix86_tune
;
2517 /* Which instruction set architecture to use. */
2518 enum processor_type ix86_arch
;
2520 /* true if sse prefetch instruction is not NOOP. */
2521 int x86_prefetch_sse
;
2523 /* -mstackrealign option */
2524 static const char ix86_force_align_arg_pointer_string
[]
2525 = "force_align_arg_pointer";
2527 static rtx (*ix86_gen_leave
) (void);
2528 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2529 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2530 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2531 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2532 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2533 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2534 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2535 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2536 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2537 static rtx (*ix86_gen_tls_global_dynamic_64
) (rtx
, rtx
, rtx
);
2538 static rtx (*ix86_gen_tls_local_dynamic_base_64
) (rtx
, rtx
);
2540 /* Preferred alignment for stack boundary in bits. */
2541 unsigned int ix86_preferred_stack_boundary
;
2543 /* Alignment for incoming stack boundary in bits specified at
2545 static unsigned int ix86_user_incoming_stack_boundary
;
2547 /* Default alignment for incoming stack boundary in bits. */
2548 static unsigned int ix86_default_incoming_stack_boundary
;
2550 /* Alignment for incoming stack boundary in bits. */
2551 unsigned int ix86_incoming_stack_boundary
;
2553 /* Calling abi specific va_list type nodes. */
2554 static GTY(()) tree sysv_va_list_type_node
;
2555 static GTY(()) tree ms_va_list_type_node
;
2557 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2558 char internal_label_prefix
[16];
2559 int internal_label_prefix_len
;
2561 /* Fence to use after loop using movnt. */
2564 /* Register class used for passing given 64bit part of the argument.
2565 These represent classes as documented by the PS ABI, with the exception
2566 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2567 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2569 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2570 whenever possible (upper half does contain padding). */
2571 enum x86_64_reg_class
2574 X86_64_INTEGER_CLASS
,
2575 X86_64_INTEGERSI_CLASS
,
2582 X86_64_COMPLEX_X87_CLASS
,
2586 #define MAX_CLASSES 4
2588 /* Table of constants used by fldpi, fldln2, etc.... */
2589 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2590 static bool ext_80387_constants_init
= 0;
2593 static struct machine_function
* ix86_init_machine_status (void);
2594 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2595 static bool ix86_function_value_regno_p (const unsigned int);
2596 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2598 static rtx
ix86_static_chain (const_tree
, bool);
2599 static int ix86_function_regparm (const_tree
, const_tree
);
2600 static void ix86_compute_frame_layout (struct ix86_frame
*);
2601 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2603 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2604 static tree
ix86_canonical_va_list_type (tree
);
2605 static void predict_jump (int);
2606 static unsigned int split_stack_prologue_scratch_regno (void);
2607 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2609 enum ix86_function_specific_strings
2611 IX86_FUNCTION_SPECIFIC_ARCH
,
2612 IX86_FUNCTION_SPECIFIC_TUNE
,
2613 IX86_FUNCTION_SPECIFIC_MAX
2616 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2617 const char *, enum fpmath_unit
, bool);
2618 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2619 static void ix86_function_specific_save (struct cl_target_option
*);
2620 static void ix86_function_specific_restore (struct cl_target_option
*);
2621 static void ix86_function_specific_print (FILE *, int,
2622 struct cl_target_option
*);
2623 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2624 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2625 struct gcc_options
*);
2626 static bool ix86_can_inline_p (tree
, tree
);
2627 static void ix86_set_current_function (tree
);
2628 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2630 static enum calling_abi
ix86_function_abi (const_tree
);
2633 #ifndef SUBTARGET32_DEFAULT_CPU
2634 #define SUBTARGET32_DEFAULT_CPU "i386"
2637 /* The svr4 ABI for the i386 says that records and unions are returned
2639 #ifndef DEFAULT_PCC_STRUCT_RETURN
2640 #define DEFAULT_PCC_STRUCT_RETURN 1
2643 /* Whether -mtune= or -march= were specified */
2644 static int ix86_tune_defaulted
;
2645 static int ix86_arch_specified
;
2647 /* Vectorization library interface and handlers. */
2648 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2650 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2651 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2653 /* Processor target table, indexed by processor number */
2656 const struct processor_costs
*cost
; /* Processor costs */
2657 const int align_loop
; /* Default alignments. */
2658 const int align_loop_max_skip
;
2659 const int align_jump
;
2660 const int align_jump_max_skip
;
2661 const int align_func
;
2664 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2666 {&i386_cost
, 4, 3, 4, 3, 4},
2667 {&i486_cost
, 16, 15, 16, 15, 16},
2668 {&pentium_cost
, 16, 7, 16, 7, 16},
2669 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2670 {&geode_cost
, 0, 0, 0, 0, 0},
2671 {&k6_cost
, 32, 7, 32, 7, 32},
2672 {&athlon_cost
, 16, 7, 16, 7, 16},
2673 {&pentium4_cost
, 0, 0, 0, 0, 0},
2674 {&k8_cost
, 16, 7, 16, 7, 16},
2675 {&nocona_cost
, 0, 0, 0, 0, 0},
2676 /* Core 2 32-bit. */
2677 {&generic32_cost
, 16, 10, 16, 10, 16},
2678 /* Core 2 64-bit. */
2679 {&generic64_cost
, 16, 10, 16, 10, 16},
2680 /* Core i7 32-bit. */
2681 {&generic32_cost
, 16, 10, 16, 10, 16},
2682 /* Core i7 64-bit. */
2683 {&generic64_cost
, 16, 10, 16, 10, 16},
2684 {&generic32_cost
, 16, 7, 16, 7, 16},
2685 {&generic64_cost
, 16, 10, 16, 10, 16},
2686 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2687 {&bdver1_cost
, 32, 24, 32, 7, 32},
2688 {&bdver2_cost
, 32, 24, 32, 7, 32},
2689 {&btver1_cost
, 32, 24, 32, 7, 32},
2690 {&btver2_cost
, 32, 24, 32, 7, 32},
2691 {&atom_cost
, 16, 15, 16, 7, 16}
2694 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2725 /* Return true if a red-zone is in use. */
2728 ix86_using_red_zone (void)
2730 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2733 /* Return a string that documents the current -m options. The caller is
2734 responsible for freeing the string. */
2737 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2738 const char *tune
, enum fpmath_unit fpmath
,
2741 struct ix86_target_opts
2743 const char *option
; /* option string */
2744 HOST_WIDE_INT mask
; /* isa mask options */
2747 /* This table is ordered so that options like -msse4.2 that imply
2748 preceding options while match those first. */
2749 static struct ix86_target_opts isa_opts
[] =
2751 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2752 { "-mfma", OPTION_MASK_ISA_FMA
},
2753 { "-mxop", OPTION_MASK_ISA_XOP
},
2754 { "-mlwp", OPTION_MASK_ISA_LWP
},
2755 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2756 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2757 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2758 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2759 { "-msse3", OPTION_MASK_ISA_SSE3
},
2760 { "-msse2", OPTION_MASK_ISA_SSE2
},
2761 { "-msse", OPTION_MASK_ISA_SSE
},
2762 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2763 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2764 { "-mmmx", OPTION_MASK_ISA_MMX
},
2765 { "-mabm", OPTION_MASK_ISA_ABM
},
2766 { "-mbmi", OPTION_MASK_ISA_BMI
},
2767 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2768 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2769 { "-mhle", OPTION_MASK_ISA_HLE
},
2770 { "-mtbm", OPTION_MASK_ISA_TBM
},
2771 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2772 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2773 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2774 { "-maes", OPTION_MASK_ISA_AES
},
2775 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2776 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2777 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2778 { "-mf16c", OPTION_MASK_ISA_F16C
},
2779 { "-mrtm", OPTION_MASK_ISA_RTM
},
2783 static struct ix86_target_opts flag_opts
[] =
2785 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2786 { "-m80387", MASK_80387
},
2787 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2788 { "-malign-double", MASK_ALIGN_DOUBLE
},
2789 { "-mcld", MASK_CLD
},
2790 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2791 { "-mieee-fp", MASK_IEEE_FP
},
2792 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2793 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2794 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2795 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2796 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2797 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2798 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2799 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2800 { "-mrecip", MASK_RECIP
},
2801 { "-mrtd", MASK_RTD
},
2802 { "-msseregparm", MASK_SSEREGPARM
},
2803 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2804 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2805 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2806 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2807 { "-mvzeroupper", MASK_VZEROUPPER
},
2808 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2809 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2810 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2813 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2816 char target_other
[40];
2826 memset (opts
, '\0', sizeof (opts
));
2828 /* Add -march= option. */
2831 opts
[num
][0] = "-march=";
2832 opts
[num
++][1] = arch
;
2835 /* Add -mtune= option. */
2838 opts
[num
][0] = "-mtune=";
2839 opts
[num
++][1] = tune
;
2842 /* Add -m32/-m64/-mx32. */
2843 if ((isa
& OPTION_MASK_ISA_64BIT
) != 0)
2845 if ((isa
& OPTION_MASK_ABI_64
) != 0)
2849 isa
&= ~ (OPTION_MASK_ISA_64BIT
2850 | OPTION_MASK_ABI_64
2851 | OPTION_MASK_ABI_X32
);
2855 opts
[num
++][0] = abi
;
2857 /* Pick out the options in isa options. */
2858 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2860 if ((isa
& isa_opts
[i
].mask
) != 0)
2862 opts
[num
++][0] = isa_opts
[i
].option
;
2863 isa
&= ~ isa_opts
[i
].mask
;
2867 if (isa
&& add_nl_p
)
2869 opts
[num
++][0] = isa_other
;
2870 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2874 /* Add flag options. */
2875 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2877 if ((flags
& flag_opts
[i
].mask
) != 0)
2879 opts
[num
++][0] = flag_opts
[i
].option
;
2880 flags
&= ~ flag_opts
[i
].mask
;
2884 if (flags
&& add_nl_p
)
2886 opts
[num
++][0] = target_other
;
2887 sprintf (target_other
, "(other flags: %#x)", flags
);
2890 /* Add -fpmath= option. */
2893 opts
[num
][0] = "-mfpmath=";
2894 switch ((int) fpmath
)
2897 opts
[num
++][1] = "387";
2901 opts
[num
++][1] = "sse";
2904 case FPMATH_387
| FPMATH_SSE
:
2905 opts
[num
++][1] = "sse+387";
2917 gcc_assert (num
< ARRAY_SIZE (opts
));
2919 /* Size the string. */
2921 sep_len
= (add_nl_p
) ? 3 : 1;
2922 for (i
= 0; i
< num
; i
++)
2925 for (j
= 0; j
< 2; j
++)
2927 len
+= strlen (opts
[i
][j
]);
2930 /* Build the string. */
2931 ret
= ptr
= (char *) xmalloc (len
);
2934 for (i
= 0; i
< num
; i
++)
2938 for (j
= 0; j
< 2; j
++)
2939 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2946 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2954 for (j
= 0; j
< 2; j
++)
2957 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2959 line_len
+= len2
[j
];
2964 gcc_assert (ret
+ len
>= ptr
);
2969 /* Return true, if profiling code should be emitted before
2970 prologue. Otherwise it returns false.
2971 Note: For x86 with "hotfix" it is sorried. */
2973 ix86_profile_before_prologue (void)
2975 return flag_fentry
!= 0;
2978 /* Function that is callable from the debugger to print the current
2981 ix86_debug_options (void)
2983 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2984 ix86_arch_string
, ix86_tune_string
,
2989 fprintf (stderr
, "%s\n\n", opts
);
2993 fputs ("<no options>\n\n", stderr
);
2998 /* Override various settings based on options. If MAIN_ARGS_P, the
2999 options are from the command line, otherwise they are from
3003 ix86_option_override_internal (bool main_args_p
)
3006 unsigned int ix86_arch_mask
, ix86_tune_mask
;
3007 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
3012 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3013 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3014 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3015 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3016 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3017 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3018 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3019 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3020 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3021 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3022 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3023 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3024 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3025 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3026 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3027 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3028 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3029 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3030 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3031 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3032 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3033 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3034 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3035 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3036 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3037 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3038 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3039 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3040 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3041 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3042 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3043 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3044 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3045 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3046 /* if this reaches 64, need to widen struct pta flags below */
3050 const char *const name
; /* processor name or nickname. */
3051 const enum processor_type processor
;
3052 const enum attr_cpu schedule
;
3053 const unsigned HOST_WIDE_INT flags
;
3055 const processor_alias_table
[] =
3057 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
3058 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
3059 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3060 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
3061 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
3062 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
3063 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3064 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
3065 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
3066 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3067 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
3068 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
3069 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3071 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3073 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
3074 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3075 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
3076 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
3077 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
3078 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
3079 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
3080 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
3081 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
3082 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3083 | PTA_CX16
| PTA_NO_SAHF
},
3084 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
3085 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3086 | PTA_SSSE3
| PTA_CX16
},
3087 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
3088 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3089 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
3090 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
3091 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3092 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3093 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
3094 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
3095 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3096 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
3097 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3098 | PTA_RDRND
| PTA_F16C
},
3099 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
3100 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3101 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
3102 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
3103 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
3104 | PTA_FMA
| PTA_MOVBE
| PTA_RTM
| PTA_HLE
},
3105 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
3106 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3107 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
3108 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
3109 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
3110 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
3111 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3112 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3113 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3114 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3115 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3116 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3117 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3118 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3119 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3120 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3121 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3122 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3123 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3124 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3125 {"k8", PROCESSOR_K8
, CPU_K8
,
3126 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3127 | PTA_SSE2
| PTA_NO_SAHF
},
3128 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3129 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3130 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3131 {"opteron", PROCESSOR_K8
, CPU_K8
,
3132 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3133 | PTA_SSE2
| PTA_NO_SAHF
},
3134 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3135 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3136 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3137 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3138 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3139 | PTA_SSE2
| PTA_NO_SAHF
},
3140 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3141 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3142 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3143 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3144 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3145 | PTA_SSE2
| PTA_NO_SAHF
},
3146 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3147 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3148 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3149 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3150 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3151 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3152 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3153 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3154 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3155 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3156 | PTA_XOP
| PTA_LWP
},
3157 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3158 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3159 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3160 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3161 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3163 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3164 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3165 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3166 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3167 PTA_HLE
/* flags are only used for -march switch. */ },
3168 {"btver2", PROCESSOR_BTVER2
, CPU_GENERIC64
,
3169 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3170 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
| PTA_SSE4_1
3171 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3172 | PTA_BMI
| PTA_F16C
| PTA_MOVBE
},
3173 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3175 | PTA_HLE
/* flags are only used for -march switch. */ },
3178 /* -mrecip options. */
3181 const char *string
; /* option name */
3182 unsigned int mask
; /* mask bits to set */
3184 const recip_options
[] =
3186 { "all", RECIP_MASK_ALL
},
3187 { "none", RECIP_MASK_NONE
},
3188 { "div", RECIP_MASK_DIV
},
3189 { "sqrt", RECIP_MASK_SQRT
},
3190 { "vec-div", RECIP_MASK_VEC_DIV
},
3191 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3194 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3196 /* Set up prefix/suffix so the error messages refer to either the command
3197 line argument, or the attribute(target). */
3206 prefix
= "option(\"";
3211 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3212 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3213 if (TARGET_64BIT_DEFAULT
&& !TARGET_64BIT
)
3214 ix86_isa_flags
&= ~(OPTION_MASK_ABI_64
| OPTION_MASK_ABI_X32
);
3215 #ifdef TARGET_BI_ARCH
3218 #if TARGET_BI_ARCH == 1
3219 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3220 is on and OPTION_MASK_ABI_X32 is off. We turn off
3221 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3224 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3226 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3227 on and OPTION_MASK_ABI_64 is off. We turn off
3228 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3231 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3238 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3239 OPTION_MASK_ABI_64 for TARGET_X32. */
3240 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3241 ix86_isa_flags
&= ~OPTION_MASK_ABI_64
;
3243 else if (TARGET_LP64
)
3245 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3246 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3247 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3248 ix86_isa_flags
&= ~OPTION_MASK_ABI_X32
;
3251 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3252 SUBTARGET_OVERRIDE_OPTIONS
;
3255 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3256 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3259 /* -fPIC is the default for x86_64. */
3260 if (TARGET_MACHO
&& TARGET_64BIT
)
3263 /* Need to check -mtune=generic first. */
3264 if (ix86_tune_string
)
3266 if (!strcmp (ix86_tune_string
, "generic")
3267 || !strcmp (ix86_tune_string
, "i686")
3268 /* As special support for cross compilers we read -mtune=native
3269 as -mtune=generic. With native compilers we won't see the
3270 -mtune=native, as it was changed by the driver. */
3271 || !strcmp (ix86_tune_string
, "native"))
3274 ix86_tune_string
= "generic64";
3276 ix86_tune_string
= "generic32";
3278 /* If this call is for setting the option attribute, allow the
3279 generic32/generic64 that was previously set. */
3280 else if (!main_args_p
3281 && (!strcmp (ix86_tune_string
, "generic32")
3282 || !strcmp (ix86_tune_string
, "generic64")))
3284 else if (!strncmp (ix86_tune_string
, "generic", 7))
3285 error ("bad value (%s) for %stune=%s %s",
3286 ix86_tune_string
, prefix
, suffix
, sw
);
3287 else if (!strcmp (ix86_tune_string
, "x86-64"))
3288 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3289 "%stune=k8%s or %stune=generic%s instead as appropriate",
3290 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3294 if (ix86_arch_string
)
3295 ix86_tune_string
= ix86_arch_string
;
3296 if (!ix86_tune_string
)
3298 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3299 ix86_tune_defaulted
= 1;
3302 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3303 need to use a sensible tune option. */
3304 if (!strcmp (ix86_tune_string
, "generic")
3305 || !strcmp (ix86_tune_string
, "x86-64")
3306 || !strcmp (ix86_tune_string
, "i686"))
3309 ix86_tune_string
= "generic64";
3311 ix86_tune_string
= "generic32";
3315 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3317 /* rep; movq isn't available in 32-bit code. */
3318 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3319 ix86_stringop_alg
= no_stringop
;
3322 if (!ix86_arch_string
)
3323 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3325 ix86_arch_specified
= 1;
3327 if (global_options_set
.x_ix86_pmode
)
3329 if ((TARGET_LP64
&& ix86_pmode
== PMODE_SI
)
3330 || (!TARGET_64BIT
&& ix86_pmode
== PMODE_DI
))
3331 error ("address mode %qs not supported in the %s bit mode",
3332 TARGET_64BIT
? "short" : "long",
3333 TARGET_64BIT
? "64" : "32");
3336 ix86_pmode
= TARGET_LP64
? PMODE_DI
: PMODE_SI
;
3338 if (!global_options_set
.x_ix86_abi
)
3339 ix86_abi
= DEFAULT_ABI
;
3341 if (global_options_set
.x_ix86_cmodel
)
3343 switch (ix86_cmodel
)
3348 ix86_cmodel
= CM_SMALL_PIC
;
3350 error ("code model %qs not supported in the %s bit mode",
3357 ix86_cmodel
= CM_MEDIUM_PIC
;
3359 error ("code model %qs not supported in the %s bit mode",
3361 else if (TARGET_X32
)
3362 error ("code model %qs not supported in x32 mode",
3369 ix86_cmodel
= CM_LARGE_PIC
;
3371 error ("code model %qs not supported in the %s bit mode",
3373 else if (TARGET_X32
)
3374 error ("code model %qs not supported in x32 mode",
3380 error ("code model %s does not support PIC mode", "32");
3382 error ("code model %qs not supported in the %s bit mode",
3389 error ("code model %s does not support PIC mode", "kernel");
3390 ix86_cmodel
= CM_32
;
3393 error ("code model %qs not supported in the %s bit mode",
3403 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3404 use of rip-relative addressing. This eliminates fixups that
3405 would otherwise be needed if this object is to be placed in a
3406 DLL, and is essentially just as efficient as direct addressing. */
3407 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3408 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3409 else if (TARGET_64BIT
)
3410 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3412 ix86_cmodel
= CM_32
;
3414 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3416 error ("-masm=intel not supported in this configuration");
3417 ix86_asm_dialect
= ASM_ATT
;
3419 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3420 sorry ("%i-bit mode not compiled in",
3421 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3423 for (i
= 0; i
< pta_size
; i
++)
3424 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3426 ix86_schedule
= processor_alias_table
[i
].schedule
;
3427 ix86_arch
= processor_alias_table
[i
].processor
;
3428 /* Default cpu tuning to the architecture. */
3429 ix86_tune
= ix86_arch
;
3431 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3432 error ("CPU you selected does not support x86-64 "
3435 if (processor_alias_table
[i
].flags
& PTA_MMX
3436 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3437 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3438 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3439 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3440 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3441 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3442 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3443 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3444 if (processor_alias_table
[i
].flags
& PTA_SSE
3445 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3446 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3447 if (processor_alias_table
[i
].flags
& PTA_SSE2
3448 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3449 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3450 if (processor_alias_table
[i
].flags
& PTA_SSE3
3451 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3452 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3453 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3454 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3455 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3456 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3457 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3458 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3459 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3460 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3461 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3462 if (processor_alias_table
[i
].flags
& PTA_AVX
3463 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3464 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3465 if (processor_alias_table
[i
].flags
& PTA_AVX2
3466 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3467 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3468 if (processor_alias_table
[i
].flags
& PTA_FMA
3469 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3470 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3471 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3472 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3473 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3474 if (processor_alias_table
[i
].flags
& PTA_FMA4
3475 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3476 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3477 if (processor_alias_table
[i
].flags
& PTA_XOP
3478 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3479 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3480 if (processor_alias_table
[i
].flags
& PTA_LWP
3481 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3482 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3483 if (processor_alias_table
[i
].flags
& PTA_ABM
3484 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3485 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3486 if (processor_alias_table
[i
].flags
& PTA_BMI
3487 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3488 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3489 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3490 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3491 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3492 if (processor_alias_table
[i
].flags
& PTA_TBM
3493 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3494 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3495 if (processor_alias_table
[i
].flags
& PTA_BMI2
3496 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3497 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3498 if (processor_alias_table
[i
].flags
& PTA_CX16
3499 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3500 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3501 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3502 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3503 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3504 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3505 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3506 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3507 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3508 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3509 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3510 if (processor_alias_table
[i
].flags
& PTA_AES
3511 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3512 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3513 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3514 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3515 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3516 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3517 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3518 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3519 if (processor_alias_table
[i
].flags
& PTA_RDRND
3520 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3521 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3522 if (processor_alias_table
[i
].flags
& PTA_F16C
3523 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3524 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3525 if (processor_alias_table
[i
].flags
& PTA_RTM
3526 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RTM
))
3527 ix86_isa_flags
|= OPTION_MASK_ISA_RTM
;
3528 if (processor_alias_table
[i
].flags
& PTA_HLE
3529 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_HLE
))
3530 ix86_isa_flags
|= OPTION_MASK_ISA_HLE
;
3531 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3532 x86_prefetch_sse
= true;
3537 if (!strcmp (ix86_arch_string
, "generic"))
3538 error ("generic CPU can be used only for %stune=%s %s",
3539 prefix
, suffix
, sw
);
3540 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3541 error ("bad value (%s) for %sarch=%s %s",
3542 ix86_arch_string
, prefix
, suffix
, sw
);
3544 ix86_arch_mask
= 1u << ix86_arch
;
3545 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3546 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3548 for (i
= 0; i
< pta_size
; i
++)
3549 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3551 ix86_schedule
= processor_alias_table
[i
].schedule
;
3552 ix86_tune
= processor_alias_table
[i
].processor
;
3555 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3557 if (ix86_tune_defaulted
)
3559 ix86_tune_string
= "x86-64";
3560 for (i
= 0; i
< pta_size
; i
++)
3561 if (! strcmp (ix86_tune_string
,
3562 processor_alias_table
[i
].name
))
3564 ix86_schedule
= processor_alias_table
[i
].schedule
;
3565 ix86_tune
= processor_alias_table
[i
].processor
;
3568 error ("CPU you selected does not support x86-64 "
3574 /* Adjust tuning when compiling for 32-bit ABI. */
3577 case PROCESSOR_GENERIC64
:
3578 ix86_tune
= PROCESSOR_GENERIC32
;
3579 ix86_schedule
= CPU_PENTIUMPRO
;
3582 case PROCESSOR_CORE2_64
:
3583 ix86_tune
= PROCESSOR_CORE2_32
;
3586 case PROCESSOR_COREI7_64
:
3587 ix86_tune
= PROCESSOR_COREI7_32
;
3594 /* Intel CPUs have always interpreted SSE prefetch instructions as
3595 NOPs; so, we can enable SSE prefetch instructions even when
3596 -mtune (rather than -march) points us to a processor that has them.
3597 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3598 higher processors. */
3600 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3601 x86_prefetch_sse
= true;
3605 if (ix86_tune_specified
&& i
== pta_size
)
3606 error ("bad value (%s) for %stune=%s %s",
3607 ix86_tune_string
, prefix
, suffix
, sw
);
3609 ix86_tune_mask
= 1u << ix86_tune
;
3610 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3611 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3613 #ifndef USE_IX86_FRAME_POINTER
3614 #define USE_IX86_FRAME_POINTER 0
3617 #ifndef USE_X86_64_FRAME_POINTER
3618 #define USE_X86_64_FRAME_POINTER 0
3621 /* Set the default values for switches whose default depends on TARGET_64BIT
3622 in case they weren't overwritten by command line options. */
3625 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3626 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3627 if (flag_asynchronous_unwind_tables
== 2)
3628 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3629 if (flag_pcc_struct_return
== 2)
3630 flag_pcc_struct_return
= 0;
3634 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3635 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3636 if (flag_asynchronous_unwind_tables
== 2)
3637 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3638 if (flag_pcc_struct_return
== 2)
3639 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3642 ix86_tune_cost
= processor_target_table
[ix86_tune
].cost
;
3644 ix86_cost
= &ix86_size_cost
;
3646 ix86_cost
= ix86_tune_cost
;
3648 /* Arrange to set up i386_stack_locals for all functions. */
3649 init_machine_status
= ix86_init_machine_status
;
3651 /* Validate -mregparm= value. */
3652 if (global_options_set
.x_ix86_regparm
)
3655 warning (0, "-mregparm is ignored in 64-bit mode");
3656 if (ix86_regparm
> REGPARM_MAX
)
3658 error ("-mregparm=%d is not between 0 and %d",
3659 ix86_regparm
, REGPARM_MAX
);
3664 ix86_regparm
= REGPARM_MAX
;
3666 /* Default align_* from the processor table. */
3667 if (align_loops
== 0)
3669 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3670 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3672 if (align_jumps
== 0)
3674 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3675 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3677 if (align_functions
== 0)
3679 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3682 /* Provide default for -mbranch-cost= value. */
3683 if (!global_options_set
.x_ix86_branch_cost
)
3684 ix86_branch_cost
= ix86_cost
->branch_cost
;
3688 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3690 /* Enable by default the SSE and MMX builtins. Do allow the user to
3691 explicitly disable any of these. In particular, disabling SSE and
3692 MMX for kernel code is extremely useful. */
3693 if (!ix86_arch_specified
)
3695 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3696 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3699 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3703 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3705 if (!ix86_arch_specified
)
3707 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3709 /* i386 ABI does not specify red zone. It still makes sense to use it
3710 when programmer takes care to stack from being destroyed. */
3711 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3712 target_flags
|= MASK_NO_RED_ZONE
;
3715 /* Keep nonleaf frame pointers. */
3716 if (flag_omit_frame_pointer
)
3717 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3718 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3719 flag_omit_frame_pointer
= 1;
3721 /* If we're doing fast math, we don't care about comparison order
3722 wrt NaNs. This lets us use a shorter comparison sequence. */
3723 if (flag_finite_math_only
)
3724 target_flags
&= ~MASK_IEEE_FP
;
3726 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3727 since the insns won't need emulation. */
3728 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3729 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3731 /* Likewise, if the target doesn't have a 387, or we've specified
3732 software floating point, don't use 387 inline intrinsics. */
3734 target_flags
|= MASK_NO_FANCY_MATH_387
;
3736 /* Turn on MMX builtins for -msse. */
3739 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3740 x86_prefetch_sse
= true;
3743 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3744 if (TARGET_SSE4_2
|| TARGET_ABM
)
3745 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3747 /* Turn on lzcnt instruction for -mabm. */
3749 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3751 /* Validate -mpreferred-stack-boundary= value or default it to
3752 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3753 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3754 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3756 int min
= (TARGET_64BIT
? (TARGET_SSE
? 4 : 3) : 2);
3757 int max
= (TARGET_SEH
? 4 : 12);
3759 if (ix86_preferred_stack_boundary_arg
< min
3760 || ix86_preferred_stack_boundary_arg
> max
)
3763 error ("-mpreferred-stack-boundary is not supported "
3766 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3767 ix86_preferred_stack_boundary_arg
, min
, max
);
3770 ix86_preferred_stack_boundary
3771 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3774 /* Set the default value for -mstackrealign. */
3775 if (ix86_force_align_arg_pointer
== -1)
3776 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3778 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3780 /* Validate -mincoming-stack-boundary= value or default it to
3781 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3782 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3783 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3785 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3786 || ix86_incoming_stack_boundary_arg
> 12)
3787 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3788 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3791 ix86_user_incoming_stack_boundary
3792 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3793 ix86_incoming_stack_boundary
3794 = ix86_user_incoming_stack_boundary
;
3798 /* Accept -msseregparm only if at least SSE support is enabled. */
3799 if (TARGET_SSEREGPARM
3801 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3803 if (global_options_set
.x_ix86_fpmath
)
3805 if (ix86_fpmath
& FPMATH_SSE
)
3809 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3810 ix86_fpmath
= FPMATH_387
;
3812 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3814 warning (0, "387 instruction set disabled, using SSE arithmetics");
3815 ix86_fpmath
= FPMATH_SSE
;
3820 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3822 /* If the i387 is disabled, then do not return values in it. */
3824 target_flags
&= ~MASK_FLOAT_RETURNS
;
3826 /* Use external vectorized library in vectorizing intrinsics. */
3827 if (global_options_set
.x_ix86_veclibabi_type
)
3828 switch (ix86_veclibabi_type
)
3830 case ix86_veclibabi_type_svml
:
3831 ix86_veclib_handler
= ix86_veclibabi_svml
;
3834 case ix86_veclibabi_type_acml
:
3835 ix86_veclib_handler
= ix86_veclibabi_acml
;
3842 if ((!USE_IX86_FRAME_POINTER
3843 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3844 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3846 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3848 /* ??? Unwind info is not correct around the CFG unless either a frame
3849 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3850 unwind info generation to be aware of the CFG and propagating states
3852 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3853 || flag_exceptions
|| flag_non_call_exceptions
)
3854 && flag_omit_frame_pointer
3855 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3857 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3858 warning (0, "unwind tables currently require either a frame pointer "
3859 "or %saccumulate-outgoing-args%s for correctness",
3861 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3864 /* If stack probes are required, the space used for large function
3865 arguments on the stack must also be probed, so enable
3866 -maccumulate-outgoing-args so this happens in the prologue. */
3867 if (TARGET_STACK_PROBE
3868 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3870 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3871 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3872 "for correctness", prefix
, suffix
);
3873 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3876 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3879 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3880 p
= strchr (internal_label_prefix
, 'X');
3881 internal_label_prefix_len
= p
- internal_label_prefix
;
3885 /* When scheduling description is not available, disable scheduler pass
3886 so it won't slow down the compilation and make x87 code slower. */
3887 if (!TARGET_SCHEDULE
)
3888 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3890 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3891 ix86_tune_cost
->simultaneous_prefetches
,
3892 global_options
.x_param_values
,
3893 global_options_set
.x_param_values
);
3894 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
3895 ix86_tune_cost
->prefetch_block
,
3896 global_options
.x_param_values
,
3897 global_options_set
.x_param_values
);
3898 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
3899 ix86_tune_cost
->l1_cache_size
,
3900 global_options
.x_param_values
,
3901 global_options_set
.x_param_values
);
3902 maybe_set_param_value (PARAM_L2_CACHE_SIZE
,
3903 ix86_tune_cost
->l2_cache_size
,
3904 global_options
.x_param_values
,
3905 global_options_set
.x_param_values
);
3907 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3908 if (flag_prefetch_loop_arrays
< 0
3911 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3912 flag_prefetch_loop_arrays
= 1;
3914 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3915 can be optimized to ap = __builtin_next_arg (0). */
3916 if (!TARGET_64BIT
&& !flag_split_stack
)
3917 targetm
.expand_builtin_va_start
= NULL
;
3921 ix86_gen_leave
= gen_leave_rex64
;
3922 if (Pmode
== DImode
)
3924 ix86_gen_monitor
= gen_sse3_monitor64_di
;
3925 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_di
;
3926 ix86_gen_tls_local_dynamic_base_64
3927 = gen_tls_local_dynamic_base_64_di
;
3931 ix86_gen_monitor
= gen_sse3_monitor64_si
;
3932 ix86_gen_tls_global_dynamic_64
= gen_tls_global_dynamic_64_si
;
3933 ix86_gen_tls_local_dynamic_base_64
3934 = gen_tls_local_dynamic_base_64_si
;
3939 ix86_gen_leave
= gen_leave
;
3940 ix86_gen_monitor
= gen_sse3_monitor
;
3943 if (Pmode
== DImode
)
3945 ix86_gen_add3
= gen_adddi3
;
3946 ix86_gen_sub3
= gen_subdi3
;
3947 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3948 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3949 ix86_gen_andsp
= gen_anddi3
;
3950 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3951 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3952 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3956 ix86_gen_add3
= gen_addsi3
;
3957 ix86_gen_sub3
= gen_subsi3
;
3958 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3959 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3960 ix86_gen_andsp
= gen_andsi3
;
3961 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3962 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3963 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3967 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3969 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3972 if (!TARGET_64BIT
&& flag_pic
)
3974 if (flag_fentry
> 0)
3975 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3979 else if (TARGET_SEH
)
3981 if (flag_fentry
== 0)
3982 sorry ("-mno-fentry isn%'t compatible with SEH");
3985 else if (flag_fentry
< 0)
3987 #if defined(PROFILE_BEFORE_PROLOGUE)
3996 /* When not optimize for size, enable vzeroupper optimization for
3997 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3998 AVX unaligned load/store. */
4001 if (flag_expensive_optimizations
4002 && !(target_flags_explicit
& MASK_VZEROUPPER
))
4003 target_flags
|= MASK_VZEROUPPER
;
4004 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
4005 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
4006 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
4007 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
4008 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
4009 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
4010 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
4011 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
4012 target_flags
|= MASK_PREFER_AVX128
;
4017 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
4018 target_flags
&= ~MASK_VZEROUPPER
;
4021 if (ix86_recip_name
)
4023 char *p
= ASTRDUP (ix86_recip_name
);
4025 unsigned int mask
, i
;
4028 while ((q
= strtok (p
, ",")) != NULL
)
4039 if (!strcmp (q
, "default"))
4040 mask
= RECIP_MASK_ALL
;
4043 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
4044 if (!strcmp (q
, recip_options
[i
].string
))
4046 mask
= recip_options
[i
].mask
;
4050 if (i
== ARRAY_SIZE (recip_options
))
4052 error ("unknown option for -mrecip=%s", q
);
4054 mask
= RECIP_MASK_NONE
;
4058 recip_mask_explicit
|= mask
;
4060 recip_mask
&= ~mask
;
4067 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
4068 else if (target_flags_explicit
& MASK_RECIP
)
4069 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
4071 /* Save the initial options in case the user does function specific
4074 target_option_default_node
= target_option_current_node
4075 = build_target_option_node ();
4078 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
4081 function_pass_avx256_p (const_rtx val
)
4086 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
4089 if (GET_CODE (val
) == PARALLEL
)
4094 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
4096 r
= XVECEXP (val
, 0, i
);
4097 if (GET_CODE (r
) == EXPR_LIST
4099 && REG_P (XEXP (r
, 0))
4100 && (GET_MODE (XEXP (r
, 0)) == OImode
4101 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
4109 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4112 ix86_option_override (void)
4114 ix86_option_override_internal (true);
4117 /* Update register usage after having seen the compiler flags. */
4120 ix86_conditional_register_usage (void)
4125 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4127 if (fixed_regs
[i
] > 1)
4128 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4129 if (call_used_regs
[i
] > 1)
4130 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
4133 /* The PIC register, if it exists, is fixed. */
4134 j
= PIC_OFFSET_TABLE_REGNUM
;
4135 if (j
!= INVALID_REGNUM
)
4136 fixed_regs
[j
] = call_used_regs
[j
] = 1;
4138 /* The 64-bit MS_ABI changes the set of call-used registers. */
4139 if (TARGET_64BIT_MS_ABI
)
4141 call_used_regs
[SI_REG
] = 0;
4142 call_used_regs
[DI_REG
] = 0;
4143 call_used_regs
[XMM6_REG
] = 0;
4144 call_used_regs
[XMM7_REG
] = 0;
4145 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4146 call_used_regs
[i
] = 0;
4149 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
4150 other call-clobbered regs for 64-bit. */
4153 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
4155 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4156 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
4157 && call_used_regs
[i
])
4158 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
4161 /* If MMX is disabled, squash the registers. */
4163 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4164 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
4165 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4167 /* If SSE is disabled, squash the registers. */
4169 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4170 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
4171 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4173 /* If the FPU is disabled, squash the registers. */
4174 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
4175 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
4176 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
4177 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
4179 /* If 32-bit, squash the 64-bit registers. */
4182 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
4184 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4190 /* Save the current options */
4193 ix86_function_specific_save (struct cl_target_option
*ptr
)
4195 ptr
->arch
= ix86_arch
;
4196 ptr
->schedule
= ix86_schedule
;
4197 ptr
->tune
= ix86_tune
;
4198 ptr
->branch_cost
= ix86_branch_cost
;
4199 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4200 ptr
->arch_specified
= ix86_arch_specified
;
4201 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4202 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4203 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4205 /* The fields are char but the variables are not; make sure the
4206 values fit in the fields. */
4207 gcc_assert (ptr
->arch
== ix86_arch
);
4208 gcc_assert (ptr
->schedule
== ix86_schedule
);
4209 gcc_assert (ptr
->tune
== ix86_tune
);
4210 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4213 /* Restore the current options */
4216 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4218 enum processor_type old_tune
= ix86_tune
;
4219 enum processor_type old_arch
= ix86_arch
;
4220 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4223 ix86_arch
= (enum processor_type
) ptr
->arch
;
4224 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4225 ix86_tune
= (enum processor_type
) ptr
->tune
;
4226 ix86_branch_cost
= ptr
->branch_cost
;
4227 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4228 ix86_arch_specified
= ptr
->arch_specified
;
4229 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4230 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4231 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4233 /* Recreate the arch feature tests if the arch changed */
4234 if (old_arch
!= ix86_arch
)
4236 ix86_arch_mask
= 1u << ix86_arch
;
4237 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4238 ix86_arch_features
[i
]
4239 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4242 /* Recreate the tune optimization tests */
4243 if (old_tune
!= ix86_tune
)
4245 ix86_tune_mask
= 1u << ix86_tune
;
4246 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4247 ix86_tune_features
[i
]
4248 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4252 /* Print the current options */
4255 ix86_function_specific_print (FILE *file
, int indent
,
4256 struct cl_target_option
*ptr
)
4259 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4260 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4262 fprintf (file
, "%*sarch = %d (%s)\n",
4265 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4266 ? cpu_names
[ptr
->arch
]
4269 fprintf (file
, "%*stune = %d (%s)\n",
4272 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4273 ? cpu_names
[ptr
->tune
]
4276 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4280 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4281 free (target_string
);
4286 /* Inner function to process the attribute((target(...))), take an argument and
4287 set the current options from the argument. If we have a list, recursively go
4291 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4292 struct gcc_options
*enum_opts_set
)
4297 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4298 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4299 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4300 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4301 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4317 enum ix86_opt_type type
;
4322 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4323 IX86_ATTR_ISA ("abm", OPT_mabm
),
4324 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4325 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4326 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4327 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4328 IX86_ATTR_ISA ("aes", OPT_maes
),
4329 IX86_ATTR_ISA ("avx", OPT_mavx
),
4330 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4331 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4332 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4333 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4334 IX86_ATTR_ISA ("sse", OPT_msse
),
4335 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4336 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4337 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4338 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4339 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4340 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4341 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4342 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4343 IX86_ATTR_ISA ("fma", OPT_mfma
),
4344 IX86_ATTR_ISA ("xop", OPT_mxop
),
4345 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4346 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4347 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4348 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4349 IX86_ATTR_ISA ("rtm", OPT_mrtm
),
4350 IX86_ATTR_ISA ("hle", OPT_mhle
),
4353 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4355 /* string options */
4356 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4357 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4360 IX86_ATTR_YES ("cld",
4364 IX86_ATTR_NO ("fancy-math-387",
4365 OPT_mfancy_math_387
,
4366 MASK_NO_FANCY_MATH_387
),
4368 IX86_ATTR_YES ("ieee-fp",
4372 IX86_ATTR_YES ("inline-all-stringops",
4373 OPT_minline_all_stringops
,
4374 MASK_INLINE_ALL_STRINGOPS
),
4376 IX86_ATTR_YES ("inline-stringops-dynamically",
4377 OPT_minline_stringops_dynamically
,
4378 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4380 IX86_ATTR_NO ("align-stringops",
4381 OPT_mno_align_stringops
,
4382 MASK_NO_ALIGN_STRINGOPS
),
4384 IX86_ATTR_YES ("recip",
4390 /* If this is a list, recurse to get the options. */
4391 if (TREE_CODE (args
) == TREE_LIST
)
4395 for (; args
; args
= TREE_CHAIN (args
))
4396 if (TREE_VALUE (args
)
4397 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4398 p_strings
, enum_opts_set
))
4404 else if (TREE_CODE (args
) != STRING_CST
)
4407 /* Handle multiple arguments separated by commas. */
4408 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4410 while (next_optstr
&& *next_optstr
!= '\0')
4412 char *p
= next_optstr
;
4414 char *comma
= strchr (next_optstr
, ',');
4415 const char *opt_string
;
4416 size_t len
, opt_len
;
4421 enum ix86_opt_type type
= ix86_opt_unknown
;
4427 len
= comma
- next_optstr
;
4428 next_optstr
= comma
+ 1;
4436 /* Recognize no-xxx. */
4437 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4446 /* Find the option. */
4449 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4451 type
= attrs
[i
].type
;
4452 opt_len
= attrs
[i
].len
;
4453 if (ch
== attrs
[i
].string
[0]
4454 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4457 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4460 mask
= attrs
[i
].mask
;
4461 opt_string
= attrs
[i
].string
;
4466 /* Process the option. */
4469 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4473 else if (type
== ix86_opt_isa
)
4475 struct cl_decoded_option decoded
;
4477 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4478 ix86_handle_option (&global_options
, &global_options_set
,
4479 &decoded
, input_location
);
4482 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4484 if (type
== ix86_opt_no
)
4485 opt_set_p
= !opt_set_p
;
4488 target_flags
|= mask
;
4490 target_flags
&= ~mask
;
4493 else if (type
== ix86_opt_str
)
4497 error ("option(\"%s\") was already specified", opt_string
);
4501 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4504 else if (type
== ix86_opt_enum
)
4509 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4511 set_option (&global_options
, enum_opts_set
, opt
, value
,
4512 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4516 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4528 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4531 ix86_valid_target_attribute_tree (tree args
)
4533 const char *orig_arch_string
= ix86_arch_string
;
4534 const char *orig_tune_string
= ix86_tune_string
;
4535 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4536 int orig_tune_defaulted
= ix86_tune_defaulted
;
4537 int orig_arch_specified
= ix86_arch_specified
;
4538 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4541 struct cl_target_option
*def
4542 = TREE_TARGET_OPTION (target_option_default_node
);
4543 struct gcc_options enum_opts_set
;
4545 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4547 /* Process each of the options on the chain. */
4548 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4552 /* If the changed options are different from the default, rerun
4553 ix86_option_override_internal, and then save the options away.
4554 The string options are are attribute options, and will be undone
4555 when we copy the save structure. */
4556 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4557 || target_flags
!= def
->x_target_flags
4558 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4559 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4560 || enum_opts_set
.x_ix86_fpmath
)
4562 /* If we are using the default tune= or arch=, undo the string assigned,
4563 and use the default. */
4564 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4565 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4566 else if (!orig_arch_specified
)
4567 ix86_arch_string
= NULL
;
4569 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4570 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4571 else if (orig_tune_defaulted
)
4572 ix86_tune_string
= NULL
;
4574 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4575 if (enum_opts_set
.x_ix86_fpmath
)
4576 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4577 else if (!TARGET_64BIT
&& TARGET_SSE
)
4579 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4580 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4583 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4584 ix86_option_override_internal (false);
4586 /* Add any builtin functions with the new isa if any. */
4587 ix86_add_new_builtins (ix86_isa_flags
);
4589 /* Save the current options unless we are validating options for
4591 t
= build_target_option_node ();
4593 ix86_arch_string
= orig_arch_string
;
4594 ix86_tune_string
= orig_tune_string
;
4595 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4597 /* Free up memory allocated to hold the strings */
4598 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4599 free (option_strings
[i
]);
4605 /* Hook to validate attribute((target("string"))). */
4608 ix86_valid_target_attribute_p (tree fndecl
,
4609 tree
ARG_UNUSED (name
),
4611 int ARG_UNUSED (flags
))
4613 struct cl_target_option cur_target
;
4615 tree old_optimize
= build_optimization_node ();
4616 tree new_target
, new_optimize
;
4617 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4619 /* If the function changed the optimization levels as well as setting target
4620 options, start with the optimizations specified. */
4621 if (func_optimize
&& func_optimize
!= old_optimize
)
4622 cl_optimization_restore (&global_options
,
4623 TREE_OPTIMIZATION (func_optimize
));
4625 /* The target attributes may also change some optimization flags, so update
4626 the optimization options if necessary. */
4627 cl_target_option_save (&cur_target
, &global_options
);
4628 new_target
= ix86_valid_target_attribute_tree (args
);
4629 new_optimize
= build_optimization_node ();
4636 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4638 if (old_optimize
!= new_optimize
)
4639 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4642 cl_target_option_restore (&global_options
, &cur_target
);
4644 if (old_optimize
!= new_optimize
)
4645 cl_optimization_restore (&global_options
,
4646 TREE_OPTIMIZATION (old_optimize
));
4652 /* Hook to determine if one function can safely inline another. */
4655 ix86_can_inline_p (tree caller
, tree callee
)
4658 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4659 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4661 /* If callee has no option attributes, then it is ok to inline. */
4665 /* If caller has no option attributes, but callee does then it is not ok to
4667 else if (!caller_tree
)
4672 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4673 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4675 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4676 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4678 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4679 != callee_opts
->x_ix86_isa_flags
)
4682 /* See if we have the same non-isa options. */
4683 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4686 /* See if arch, tune, etc. are the same. */
4687 else if (caller_opts
->arch
!= callee_opts
->arch
)
4690 else if (caller_opts
->tune
!= callee_opts
->tune
)
4693 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4696 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4707 /* Remember the last target of ix86_set_current_function. */
4708 static GTY(()) tree ix86_previous_fndecl
;
4710 /* Establish appropriate back-end context for processing the function
4711 FNDECL. The argument might be NULL to indicate processing at top
4712 level, outside of any function scope. */
4714 ix86_set_current_function (tree fndecl
)
4716 /* Only change the context if the function changes. This hook is called
4717 several times in the course of compiling a function, and we don't want to
4718 slow things down too much or call target_reinit when it isn't safe. */
4719 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4721 tree old_tree
= (ix86_previous_fndecl
4722 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4725 tree new_tree
= (fndecl
4726 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4729 ix86_previous_fndecl
= fndecl
;
4730 if (old_tree
== new_tree
)
4735 cl_target_option_restore (&global_options
,
4736 TREE_TARGET_OPTION (new_tree
));
4742 struct cl_target_option
*def
4743 = TREE_TARGET_OPTION (target_option_current_node
);
4745 cl_target_option_restore (&global_options
, def
);
4752 /* Return true if this goes in large data/bss. */
4755 ix86_in_large_data_p (tree exp
)
4757 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4760 /* Functions are never large data. */
4761 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4764 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4766 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4767 if (strcmp (section
, ".ldata") == 0
4768 || strcmp (section
, ".lbss") == 0)
4774 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4776 /* If this is an incomplete type with size 0, then we can't put it
4777 in data because it might be too big when completed. */
4778 if (!size
|| size
> ix86_section_threshold
)
4785 /* Switch to the appropriate section for output of DECL.
4786 DECL is either a `VAR_DECL' node or a constant of some sort.
4787 RELOC indicates whether forming the initial value of DECL requires
4788 link-time relocations. */
4790 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4794 x86_64_elf_select_section (tree decl
, int reloc
,
4795 unsigned HOST_WIDE_INT align
)
4797 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4798 && ix86_in_large_data_p (decl
))
4800 const char *sname
= NULL
;
4801 unsigned int flags
= SECTION_WRITE
;
4802 switch (categorize_decl_for_section (decl
, reloc
))
4807 case SECCAT_DATA_REL
:
4808 sname
= ".ldata.rel";
4810 case SECCAT_DATA_REL_LOCAL
:
4811 sname
= ".ldata.rel.local";
4813 case SECCAT_DATA_REL_RO
:
4814 sname
= ".ldata.rel.ro";
4816 case SECCAT_DATA_REL_RO_LOCAL
:
4817 sname
= ".ldata.rel.ro.local";
4821 flags
|= SECTION_BSS
;
4824 case SECCAT_RODATA_MERGE_STR
:
4825 case SECCAT_RODATA_MERGE_STR_INIT
:
4826 case SECCAT_RODATA_MERGE_CONST
:
4830 case SECCAT_SRODATA
:
4837 /* We don't split these for medium model. Place them into
4838 default sections and hope for best. */
4843 /* We might get called with string constants, but get_named_section
4844 doesn't like them as they are not DECLs. Also, we need to set
4845 flags in that case. */
4847 return get_section (sname
, flags
, NULL
);
4848 return get_named_section (decl
, sname
, reloc
);
4851 return default_elf_select_section (decl
, reloc
, align
);
4854 /* Build up a unique section name, expressed as a
4855 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4856 RELOC indicates whether the initial value of EXP requires
4857 link-time relocations. */
4859 static void ATTRIBUTE_UNUSED
4860 x86_64_elf_unique_section (tree decl
, int reloc
)
4862 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4863 && ix86_in_large_data_p (decl
))
4865 const char *prefix
= NULL
;
4866 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4867 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4869 switch (categorize_decl_for_section (decl
, reloc
))
4872 case SECCAT_DATA_REL
:
4873 case SECCAT_DATA_REL_LOCAL
:
4874 case SECCAT_DATA_REL_RO
:
4875 case SECCAT_DATA_REL_RO_LOCAL
:
4876 prefix
= one_only
? ".ld" : ".ldata";
4879 prefix
= one_only
? ".lb" : ".lbss";
4882 case SECCAT_RODATA_MERGE_STR
:
4883 case SECCAT_RODATA_MERGE_STR_INIT
:
4884 case SECCAT_RODATA_MERGE_CONST
:
4885 prefix
= one_only
? ".lr" : ".lrodata";
4887 case SECCAT_SRODATA
:
4894 /* We don't split these for medium model. Place them into
4895 default sections and hope for best. */
4900 const char *name
, *linkonce
;
4903 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4904 name
= targetm
.strip_name_encoding (name
);
4906 /* If we're using one_only, then there needs to be a .gnu.linkonce
4907 prefix to the section name. */
4908 linkonce
= one_only
? ".gnu.linkonce" : "";
4910 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4912 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4916 default_unique_section (decl
, reloc
);
4919 #ifdef COMMON_ASM_OP
4920 /* This says how to output assembler code to declare an
4921 uninitialized external linkage data object.
4923 For medium model x86-64 we need to use .largecomm opcode for
4926 x86_elf_aligned_common (FILE *file
,
4927 const char *name
, unsigned HOST_WIDE_INT size
,
4930 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4931 && size
> (unsigned int)ix86_section_threshold
)
4932 fputs (".largecomm\t", file
);
4934 fputs (COMMON_ASM_OP
, file
);
4935 assemble_name (file
, name
);
4936 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4937 size
, align
/ BITS_PER_UNIT
);
4941 /* Utility function for targets to use in implementing
4942 ASM_OUTPUT_ALIGNED_BSS. */
4945 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4946 const char *name
, unsigned HOST_WIDE_INT size
,
4949 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4950 && size
> (unsigned int)ix86_section_threshold
)
4951 switch_to_section (get_named_section (decl
, ".lbss", 0));
4953 switch_to_section (bss_section
);
4954 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4955 #ifdef ASM_DECLARE_OBJECT_NAME
4956 last_assemble_variable_decl
= decl
;
4957 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4959 /* Standard thing is just output label for the object. */
4960 ASM_OUTPUT_LABEL (file
, name
);
4961 #endif /* ASM_DECLARE_OBJECT_NAME */
4962 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4965 /* Decide whether we must probe the stack before any space allocation
4966 on this target. It's essentially TARGET_STACK_PROBE except when
4967 -fstack-check causes the stack to be already probed differently. */
4970 ix86_target_stack_probe (void)
4972 /* Do not probe the stack twice if static stack checking is enabled. */
4973 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4976 return TARGET_STACK_PROBE
;
4979 /* Decide whether we can make a sibling call to a function. DECL is the
4980 declaration of the function being targeted by the call and EXP is the
4981 CALL_EXPR representing the call. */
4984 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4986 tree type
, decl_or_type
;
4989 /* If we are generating position-independent code, we cannot sibcall
4990 optimize any indirect call, or a direct call to a global function,
4991 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4995 && (!decl
|| !targetm
.binds_local_p (decl
)))
4998 /* If we need to align the outgoing stack, then sibcalling would
4999 unalign the stack, which may break the called function. */
5000 if (ix86_minimum_incoming_stack_boundary (true)
5001 < PREFERRED_STACK_BOUNDARY
)
5006 decl_or_type
= decl
;
5007 type
= TREE_TYPE (decl
);
5011 /* We're looking at the CALL_EXPR, we need the type of the function. */
5012 type
= CALL_EXPR_FN (exp
); /* pointer expression */
5013 type
= TREE_TYPE (type
); /* pointer type */
5014 type
= TREE_TYPE (type
); /* function type */
5015 decl_or_type
= type
;
5018 /* Check that the return value locations are the same. Like
5019 if we are returning floats on the 80387 register stack, we cannot
5020 make a sibcall from a function that doesn't return a float to a
5021 function that does or, conversely, from a function that does return
5022 a float to a function that doesn't; the necessary stack adjustment
5023 would not be executed. This is also the place we notice
5024 differences in the return value ABI. Note that it is ok for one
5025 of the functions to have void return type as long as the return
5026 value of the other is passed in a register. */
5027 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
5028 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5030 if (STACK_REG_P (a
) || STACK_REG_P (b
))
5032 if (!rtx_equal_p (a
, b
))
5035 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5037 /* Disable sibcall if we need to generate vzeroupper after
5039 if (TARGET_VZEROUPPER
5040 && cfun
->machine
->callee_return_avx256_p
5041 && !cfun
->machine
->caller_return_avx256_p
)
5044 else if (!rtx_equal_p (a
, b
))
5049 /* The SYSV ABI has more call-clobbered registers;
5050 disallow sibcalls from MS to SYSV. */
5051 if (cfun
->machine
->call_abi
== MS_ABI
5052 && ix86_function_type_abi (type
) == SYSV_ABI
)
5057 /* If this call is indirect, we'll need to be able to use a
5058 call-clobbered register for the address of the target function.
5059 Make sure that all such registers are not used for passing
5060 parameters. Note that DLLIMPORT functions are indirect. */
5062 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
5064 if (ix86_function_regparm (type
, NULL
) >= 3)
5066 /* ??? Need to count the actual number of registers to be used,
5067 not the possible number of registers. Fix later. */
5073 /* Otherwise okay. That also includes certain types of indirect calls. */
5077 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5078 and "sseregparm" calling convention attributes;
5079 arguments as in struct attribute_spec.handler. */
5082 ix86_handle_cconv_attribute (tree
*node
, tree name
,
5084 int flags ATTRIBUTE_UNUSED
,
5087 if (TREE_CODE (*node
) != FUNCTION_TYPE
5088 && TREE_CODE (*node
) != METHOD_TYPE
5089 && TREE_CODE (*node
) != FIELD_DECL
5090 && TREE_CODE (*node
) != TYPE_DECL
)
5092 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
5094 *no_add_attrs
= true;
5098 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5099 if (is_attribute_p ("regparm", name
))
5103 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5105 error ("fastcall and regparm attributes are not compatible");
5108 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5110 error ("regparam and thiscall attributes are not compatible");
5113 cst
= TREE_VALUE (args
);
5114 if (TREE_CODE (cst
) != INTEGER_CST
)
5116 warning (OPT_Wattributes
,
5117 "%qE attribute requires an integer constant argument",
5119 *no_add_attrs
= true;
5121 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
5123 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
5125 *no_add_attrs
= true;
5133 /* Do not warn when emulating the MS ABI. */
5134 if ((TREE_CODE (*node
) != FUNCTION_TYPE
5135 && TREE_CODE (*node
) != METHOD_TYPE
)
5136 || ix86_function_type_abi (*node
) != MS_ABI
)
5137 warning (OPT_Wattributes
, "%qE attribute ignored",
5139 *no_add_attrs
= true;
5143 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5144 if (is_attribute_p ("fastcall", name
))
5146 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5148 error ("fastcall and cdecl attributes are not compatible");
5150 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5152 error ("fastcall and stdcall attributes are not compatible");
5154 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
5156 error ("fastcall and regparm attributes are not compatible");
5158 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5160 error ("fastcall and thiscall attributes are not compatible");
5164 /* Can combine stdcall with fastcall (redundant), regparm and
5166 else if (is_attribute_p ("stdcall", name
))
5168 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5170 error ("stdcall and cdecl attributes are not compatible");
5172 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5174 error ("stdcall and fastcall attributes are not compatible");
5176 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5178 error ("stdcall and thiscall attributes are not compatible");
5182 /* Can combine cdecl with regparm and sseregparm. */
5183 else if (is_attribute_p ("cdecl", name
))
5185 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5187 error ("stdcall and cdecl attributes are not compatible");
5189 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5191 error ("fastcall and cdecl attributes are not compatible");
5193 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5195 error ("cdecl and thiscall attributes are not compatible");
5198 else if (is_attribute_p ("thiscall", name
))
5200 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5201 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5203 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5205 error ("stdcall and thiscall attributes are not compatible");
5207 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5209 error ("fastcall and thiscall attributes are not compatible");
5211 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5213 error ("cdecl and thiscall attributes are not compatible");
5217 /* Can combine sseregparm with all attributes. */
5222 /* The transactional memory builtins are implicitly regparm or fastcall
5223 depending on the ABI. Override the generic do-nothing attribute that
5224 these builtins were declared with, and replace it with one of the two
5225 attributes that we expect elsewhere. */
5228 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5229 tree args ATTRIBUTE_UNUSED
,
5230 int flags ATTRIBUTE_UNUSED
,
5235 /* In no case do we want to add the placeholder attribute. */
5236 *no_add_attrs
= true;
5238 /* The 64-bit ABI is unchanged for transactional memory. */
5242 /* ??? Is there a better way to validate 32-bit windows? We have
5243 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5244 if (CHECK_STACK_LIMIT
> 0)
5245 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5248 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5249 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5251 decl_attributes (node
, alt
, flags
);
5256 /* This function determines from TYPE the calling-convention. */
5259 ix86_get_callcvt (const_tree type
)
5261 unsigned int ret
= 0;
5266 return IX86_CALLCVT_CDECL
;
5268 attrs
= TYPE_ATTRIBUTES (type
);
5269 if (attrs
!= NULL_TREE
)
5271 if (lookup_attribute ("cdecl", attrs
))
5272 ret
|= IX86_CALLCVT_CDECL
;
5273 else if (lookup_attribute ("stdcall", attrs
))
5274 ret
|= IX86_CALLCVT_STDCALL
;
5275 else if (lookup_attribute ("fastcall", attrs
))
5276 ret
|= IX86_CALLCVT_FASTCALL
;
5277 else if (lookup_attribute ("thiscall", attrs
))
5278 ret
|= IX86_CALLCVT_THISCALL
;
5280 /* Regparam isn't allowed for thiscall and fastcall. */
5281 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5283 if (lookup_attribute ("regparm", attrs
))
5284 ret
|= IX86_CALLCVT_REGPARM
;
5285 if (lookup_attribute ("sseregparm", attrs
))
5286 ret
|= IX86_CALLCVT_SSEREGPARM
;
5289 if (IX86_BASE_CALLCVT(ret
) != 0)
5293 is_stdarg
= stdarg_p (type
);
5294 if (TARGET_RTD
&& !is_stdarg
)
5295 return IX86_CALLCVT_STDCALL
| ret
;
5299 || TREE_CODE (type
) != METHOD_TYPE
5300 || ix86_function_type_abi (type
) != MS_ABI
)
5301 return IX86_CALLCVT_CDECL
| ret
;
5303 return IX86_CALLCVT_THISCALL
;
5306 /* Return 0 if the attributes for two types are incompatible, 1 if they
5307 are compatible, and 2 if they are nearly compatible (which causes a
5308 warning to be generated). */
5311 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5313 unsigned int ccvt1
, ccvt2
;
5315 if (TREE_CODE (type1
) != FUNCTION_TYPE
5316 && TREE_CODE (type1
) != METHOD_TYPE
)
5319 ccvt1
= ix86_get_callcvt (type1
);
5320 ccvt2
= ix86_get_callcvt (type2
);
5323 if (ix86_function_regparm (type1
, NULL
)
5324 != ix86_function_regparm (type2
, NULL
))
5330 /* Return the regparm value for a function with the indicated TYPE and DECL.
5331 DECL may be NULL when calling function indirectly
5332 or considering a libcall. */
5335 ix86_function_regparm (const_tree type
, const_tree decl
)
5342 return (ix86_function_type_abi (type
) == SYSV_ABI
5343 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5344 ccvt
= ix86_get_callcvt (type
);
5345 regparm
= ix86_regparm
;
5347 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5349 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5352 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5356 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5358 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5361 /* Use register calling convention for local functions when possible. */
5363 && TREE_CODE (decl
) == FUNCTION_DECL
5365 && !(profile_flag
&& !flag_fentry
))
5367 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5368 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5369 if (i
&& i
->local
&& i
->can_change_signature
)
5371 int local_regparm
, globals
= 0, regno
;
5373 /* Make sure no regparm register is taken by a
5374 fixed register variable. */
5375 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5376 if (fixed_regs
[local_regparm
])
5379 /* We don't want to use regparm(3) for nested functions as
5380 these use a static chain pointer in the third argument. */
5381 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5384 /* In 32-bit mode save a register for the split stack. */
5385 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5388 /* Each fixed register usage increases register pressure,
5389 so less registers should be used for argument passing.
5390 This functionality can be overriden by an explicit
5392 for (regno
= 0; regno
<= DI_REG
; regno
++)
5393 if (fixed_regs
[regno
])
5397 = globals
< local_regparm
? local_regparm
- globals
: 0;
5399 if (local_regparm
> regparm
)
5400 regparm
= local_regparm
;
5407 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5408 DFmode (2) arguments in SSE registers for a function with the
5409 indicated TYPE and DECL. DECL may be NULL when calling function
5410 indirectly or considering a libcall. Otherwise return 0. */
5413 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5415 gcc_assert (!TARGET_64BIT
);
5417 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5418 by the sseregparm attribute. */
5419 if (TARGET_SSEREGPARM
5420 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5427 error ("calling %qD with attribute sseregparm without "
5428 "SSE/SSE2 enabled", decl
);
5430 error ("calling %qT with attribute sseregparm without "
5431 "SSE/SSE2 enabled", type
);
5439 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5440 (and DFmode for SSE2) arguments in SSE registers. */
5441 if (decl
&& TARGET_SSE_MATH
&& optimize
5442 && !(profile_flag
&& !flag_fentry
))
5444 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5445 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5446 if (i
&& i
->local
&& i
->can_change_signature
)
5447 return TARGET_SSE2
? 2 : 1;
5453 /* Return true if EAX is live at the start of the function. Used by
5454 ix86_expand_prologue to determine if we need special help before
5455 calling allocate_stack_worker. */
5458 ix86_eax_live_at_start_p (void)
5460 /* Cheat. Don't bother working forward from ix86_function_regparm
5461 to the function type to whether an actual argument is located in
5462 eax. Instead just look at cfg info, which is still close enough
5463 to correct at this point. This gives false positives for broken
5464 functions that might use uninitialized data that happens to be
5465 allocated in eax, but who cares? */
5466 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5470 ix86_keep_aggregate_return_pointer (tree fntype
)
5476 attr
= lookup_attribute ("callee_pop_aggregate_return",
5477 TYPE_ATTRIBUTES (fntype
));
5479 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5481 /* For 32-bit MS-ABI the default is to keep aggregate
5483 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5486 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5489 /* Value is the number of bytes of arguments automatically
5490 popped when returning from a subroutine call.
5491 FUNDECL is the declaration node of the function (as a tree),
5492 FUNTYPE is the data type of the function (as a tree),
5493 or for a library call it is an identifier node for the subroutine name.
5494 SIZE is the number of bytes of arguments passed on the stack.
5496 On the 80386, the RTD insn may be used to pop them if the number
5497 of args is fixed, but if the number is variable then the caller
5498 must pop them all. RTD can't be used for library calls now
5499 because the library is compiled with the Unix compiler.
5500 Use of RTD is a selectable option, since it is incompatible with
5501 standard Unix calling sequences. If the option is not selected,
5502 the caller must always pop the args.
5504 The attribute stdcall is equivalent to RTD on a per module basis. */
5507 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5511 /* None of the 64-bit ABIs pop arguments. */
5515 ccvt
= ix86_get_callcvt (funtype
);
5517 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5518 | IX86_CALLCVT_THISCALL
)) != 0
5519 && ! stdarg_p (funtype
))
5522 /* Lose any fake structure return argument if it is passed on the stack. */
5523 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5524 && !ix86_keep_aggregate_return_pointer (funtype
))
5526 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5528 return GET_MODE_SIZE (Pmode
);
5534 /* Argument support functions. */
5536 /* Return true when register may be used to pass function parameters. */
5538 ix86_function_arg_regno_p (int regno
)
5541 const int *parm_regs
;
5546 return (regno
< REGPARM_MAX
5547 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5549 return (regno
< REGPARM_MAX
5550 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5551 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5552 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5553 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5558 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5563 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5564 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5568 /* TODO: The function should depend on current function ABI but
5569 builtins.c would need updating then. Therefore we use the
5572 /* RAX is used as hidden argument to va_arg functions. */
5573 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5576 if (ix86_abi
== MS_ABI
)
5577 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5579 parm_regs
= x86_64_int_parameter_registers
;
5580 for (i
= 0; i
< (ix86_abi
== MS_ABI
5581 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5582 if (regno
== parm_regs
[i
])
5587 /* Return if we do not know how to pass TYPE solely in registers. */
5590 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5592 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5595 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5596 The layout_type routine is crafty and tries to trick us into passing
5597 currently unsupported vector types on the stack by using TImode. */
5598 return (!TARGET_64BIT
&& mode
== TImode
5599 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5602 /* It returns the size, in bytes, of the area reserved for arguments passed
5603 in registers for the function represented by fndecl dependent to the used
5606 ix86_reg_parm_stack_space (const_tree fndecl
)
5608 enum calling_abi call_abi
= SYSV_ABI
;
5609 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5610 call_abi
= ix86_function_abi (fndecl
);
5612 call_abi
= ix86_function_type_abi (fndecl
);
5613 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5618 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5621 ix86_function_type_abi (const_tree fntype
)
5623 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5625 enum calling_abi abi
= ix86_abi
;
5626 if (abi
== SYSV_ABI
)
5628 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5631 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5639 ix86_function_ms_hook_prologue (const_tree fn
)
5641 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5643 if (decl_function_context (fn
) != NULL_TREE
)
5644 error_at (DECL_SOURCE_LOCATION (fn
),
5645 "ms_hook_prologue is not compatible with nested function");
5652 static enum calling_abi
5653 ix86_function_abi (const_tree fndecl
)
5657 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5660 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5663 ix86_cfun_abi (void)
5667 return cfun
->machine
->call_abi
;
5670 /* Write the extra assembler code needed to declare a function properly. */
5673 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5676 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5680 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5681 unsigned int filler_cc
= 0xcccccccc;
5683 for (i
= 0; i
< filler_count
; i
+= 4)
5684 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5687 #ifdef SUBTARGET_ASM_UNWIND_INIT
5688 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5691 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5693 /* Output magic byte marker, if hot-patch attribute is set. */
5698 /* leaq [%rsp + 0], %rsp */
5699 asm_fprintf (asm_out_file
, ASM_BYTE
5700 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5704 /* movl.s %edi, %edi
5706 movl.s %esp, %ebp */
5707 asm_fprintf (asm_out_file
, ASM_BYTE
5708 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5714 extern void init_regs (void);
5716 /* Implementation of call abi switching target hook. Specific to FNDECL
5717 the specific call register sets are set. See also
5718 ix86_conditional_register_usage for more details. */
5720 ix86_call_abi_override (const_tree fndecl
)
5722 if (fndecl
== NULL_TREE
)
5723 cfun
->machine
->call_abi
= ix86_abi
;
5725 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5728 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5729 expensive re-initialization of init_regs each time we switch function context
5730 since this is needed only during RTL expansion. */
5732 ix86_maybe_switch_abi (void)
5735 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5739 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5740 for a call to a function whose data type is FNTYPE.
5741 For a library call, FNTYPE is 0. */
5744 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5745 tree fntype
, /* tree ptr for function decl */
5746 rtx libname
, /* SYMBOL_REF of library name or 0 */
5750 struct cgraph_local_info
*i
;
5753 memset (cum
, 0, sizeof (*cum
));
5755 /* Initialize for the current callee. */
5758 cfun
->machine
->callee_pass_avx256_p
= false;
5759 cfun
->machine
->callee_return_avx256_p
= false;
5764 i
= cgraph_local_info (fndecl
);
5765 cum
->call_abi
= ix86_function_abi (fndecl
);
5766 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5771 cum
->call_abi
= ix86_function_type_abi (fntype
);
5773 fnret_type
= TREE_TYPE (fntype
);
5778 if (TARGET_VZEROUPPER
&& fnret_type
)
5780 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5782 if (function_pass_avx256_p (fnret_value
))
5784 /* The return value of this function uses 256bit AVX modes. */
5786 cfun
->machine
->callee_return_avx256_p
= true;
5788 cfun
->machine
->caller_return_avx256_p
= true;
5792 cum
->caller
= caller
;
5794 /* Set up the number of registers to use for passing arguments. */
5796 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5797 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5798 "or subtarget optimization implying it");
5799 cum
->nregs
= ix86_regparm
;
5802 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5803 ? X86_64_REGPARM_MAX
5804 : X86_64_MS_REGPARM_MAX
);
5808 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5811 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5812 ? X86_64_SSE_REGPARM_MAX
5813 : X86_64_MS_SSE_REGPARM_MAX
);
5817 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5818 cum
->warn_avx
= true;
5819 cum
->warn_sse
= true;
5820 cum
->warn_mmx
= true;
5822 /* Because type might mismatch in between caller and callee, we need to
5823 use actual type of function for local calls.
5824 FIXME: cgraph_analyze can be told to actually record if function uses
5825 va_start so for local functions maybe_vaarg can be made aggressive
5827 FIXME: once typesytem is fixed, we won't need this code anymore. */
5828 if (i
&& i
->local
&& i
->can_change_signature
)
5829 fntype
= TREE_TYPE (fndecl
);
5830 cum
->maybe_vaarg
= (fntype
5831 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5836 /* If there are variable arguments, then we won't pass anything
5837 in registers in 32-bit mode. */
5838 if (stdarg_p (fntype
))
5849 /* Use ecx and edx registers if function has fastcall attribute,
5850 else look for regparm information. */
5853 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5854 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5857 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5859 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5865 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5868 /* Set up the number of SSE registers used for passing SFmode
5869 and DFmode arguments. Warn for mismatching ABI. */
5870 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5874 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5875 But in the case of vector types, it is some vector mode.
5877 When we have only some of our vector isa extensions enabled, then there
5878 are some modes for which vector_mode_supported_p is false. For these
5879 modes, the generic vector support in gcc will choose some non-vector mode
5880 in order to implement the type. By computing the natural mode, we'll
5881 select the proper ABI location for the operand and not depend on whatever
5882 the middle-end decides to do with these vector types.
5884 The midde-end can't deal with the vector types > 16 bytes. In this
5885 case, we return the original mode and warn ABI change if CUM isn't
5888 static enum machine_mode
5889 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5891 enum machine_mode mode
= TYPE_MODE (type
);
5893 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5895 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5896 if ((size
== 8 || size
== 16 || size
== 32)
5897 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5898 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5900 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5902 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5903 mode
= MIN_MODE_VECTOR_FLOAT
;
5905 mode
= MIN_MODE_VECTOR_INT
;
5907 /* Get the mode which has this inner mode and number of units. */
5908 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5909 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5910 && GET_MODE_INNER (mode
) == innermode
)
5912 if (size
== 32 && !TARGET_AVX
)
5914 static bool warnedavx
;
5921 warning (0, "AVX vector argument without AVX "
5922 "enabled changes the ABI");
5924 return TYPE_MODE (type
);
5926 else if ((size
== 8 || size
== 16) && !TARGET_SSE
)
5928 static bool warnedsse
;
5935 warning (0, "SSE vector argument without SSE "
5936 "enabled changes the ABI");
5951 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5952 this may not agree with the mode that the type system has chosen for the
5953 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5954 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5957 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5962 if (orig_mode
!= BLKmode
)
5963 tmp
= gen_rtx_REG (orig_mode
, regno
);
5966 tmp
= gen_rtx_REG (mode
, regno
);
5967 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5968 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5974 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5975 of this code is to classify each 8bytes of incoming argument by the register
5976 class and assign registers accordingly. */
5978 /* Return the union class of CLASS1 and CLASS2.
5979 See the x86-64 PS ABI for details. */
5981 static enum x86_64_reg_class
5982 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5984 /* Rule #1: If both classes are equal, this is the resulting class. */
5985 if (class1
== class2
)
5988 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5990 if (class1
== X86_64_NO_CLASS
)
5992 if (class2
== X86_64_NO_CLASS
)
5995 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5996 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5997 return X86_64_MEMORY_CLASS
;
5999 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6000 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
6001 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
6002 return X86_64_INTEGERSI_CLASS
;
6003 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
6004 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
6005 return X86_64_INTEGER_CLASS
;
6007 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6009 if (class1
== X86_64_X87_CLASS
6010 || class1
== X86_64_X87UP_CLASS
6011 || class1
== X86_64_COMPLEX_X87_CLASS
6012 || class2
== X86_64_X87_CLASS
6013 || class2
== X86_64_X87UP_CLASS
6014 || class2
== X86_64_COMPLEX_X87_CLASS
)
6015 return X86_64_MEMORY_CLASS
;
6017 /* Rule #6: Otherwise class SSE is used. */
6018 return X86_64_SSE_CLASS
;
6021 /* Classify the argument of type TYPE and mode MODE.
6022 CLASSES will be filled by the register class used to pass each word
6023 of the operand. The number of words is returned. In case the parameter
6024 should be passed in memory, 0 is returned. As a special case for zero
6025 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6027 BIT_OFFSET is used internally for handling records and specifies offset
6028 of the offset in bits modulo 256 to avoid overflow cases.
6030 See the x86-64 PS ABI for details.
6034 classify_argument (enum machine_mode mode
, const_tree type
,
6035 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
6037 HOST_WIDE_INT bytes
=
6038 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6040 = (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6042 /* Variable sized entities are always passed/returned in memory. */
6046 if (mode
!= VOIDmode
6047 && targetm
.calls
.must_pass_in_stack (mode
, type
))
6050 if (type
&& AGGREGATE_TYPE_P (type
))
6054 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
6056 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
6060 for (i
= 0; i
< words
; i
++)
6061 classes
[i
] = X86_64_NO_CLASS
;
6063 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6064 signalize memory class, so handle it as special case. */
6067 classes
[0] = X86_64_NO_CLASS
;
6071 /* Classify each field of record and merge classes. */
6072 switch (TREE_CODE (type
))
6075 /* And now merge the fields of structure. */
6076 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6078 if (TREE_CODE (field
) == FIELD_DECL
)
6082 if (TREE_TYPE (field
) == error_mark_node
)
6085 /* Bitfields are always classified as integer. Handle them
6086 early, since later code would consider them to be
6087 misaligned integers. */
6088 if (DECL_BIT_FIELD (field
))
6090 for (i
= (int_bit_position (field
)
6091 + (bit_offset
% 64)) / 8 / 8;
6092 i
< ((int_bit_position (field
) + (bit_offset
% 64))
6093 + tree_low_cst (DECL_SIZE (field
), 0)
6096 merge_classes (X86_64_INTEGER_CLASS
,
6103 type
= TREE_TYPE (field
);
6105 /* Flexible array member is ignored. */
6106 if (TYPE_MODE (type
) == BLKmode
6107 && TREE_CODE (type
) == ARRAY_TYPE
6108 && TYPE_SIZE (type
) == NULL_TREE
6109 && TYPE_DOMAIN (type
) != NULL_TREE
6110 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
6115 if (!warned
&& warn_psabi
)
6118 inform (input_location
,
6119 "the ABI of passing struct with"
6120 " a flexible array member has"
6121 " changed in GCC 4.4");
6125 num
= classify_argument (TYPE_MODE (type
), type
,
6127 (int_bit_position (field
)
6128 + bit_offset
) % 256);
6131 pos
= (int_bit_position (field
)
6132 + (bit_offset
% 64)) / 8 / 8;
6133 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
6135 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
6142 /* Arrays are handled as small records. */
6145 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
6146 TREE_TYPE (type
), subclasses
, bit_offset
);
6150 /* The partial classes are now full classes. */
6151 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
6152 subclasses
[0] = X86_64_SSE_CLASS
;
6153 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
6154 && !((bit_offset
% 64) == 0 && bytes
== 4))
6155 subclasses
[0] = X86_64_INTEGER_CLASS
;
6157 for (i
= 0; i
< words
; i
++)
6158 classes
[i
] = subclasses
[i
% num
];
6163 case QUAL_UNION_TYPE
:
6164 /* Unions are similar to RECORD_TYPE but offset is always 0.
6166 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6168 if (TREE_CODE (field
) == FIELD_DECL
)
6172 if (TREE_TYPE (field
) == error_mark_node
)
6175 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
6176 TREE_TYPE (field
), subclasses
,
6180 for (i
= 0; i
< num
; i
++)
6181 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
6192 /* When size > 16 bytes, if the first one isn't
6193 X86_64_SSE_CLASS or any other ones aren't
6194 X86_64_SSEUP_CLASS, everything should be passed in
6196 if (classes
[0] != X86_64_SSE_CLASS
)
6199 for (i
= 1; i
< words
; i
++)
6200 if (classes
[i
] != X86_64_SSEUP_CLASS
)
6204 /* Final merger cleanup. */
6205 for (i
= 0; i
< words
; i
++)
6207 /* If one class is MEMORY, everything should be passed in
6209 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6212 /* The X86_64_SSEUP_CLASS should be always preceded by
6213 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6214 if (classes
[i
] == X86_64_SSEUP_CLASS
6215 && classes
[i
- 1] != X86_64_SSE_CLASS
6216 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6218 /* The first one should never be X86_64_SSEUP_CLASS. */
6219 gcc_assert (i
!= 0);
6220 classes
[i
] = X86_64_SSE_CLASS
;
6223 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6224 everything should be passed in memory. */
6225 if (classes
[i
] == X86_64_X87UP_CLASS
6226 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6230 /* The first one should never be X86_64_X87UP_CLASS. */
6231 gcc_assert (i
!= 0);
6232 if (!warned
&& warn_psabi
)
6235 inform (input_location
,
6236 "the ABI of passing union with long double"
6237 " has changed in GCC 4.4");
6245 /* Compute alignment needed. We align all types to natural boundaries with
6246 exception of XFmode that is aligned to 64bits. */
6247 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6249 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6252 mode_alignment
= 128;
6253 else if (mode
== XCmode
)
6254 mode_alignment
= 256;
6255 if (COMPLEX_MODE_P (mode
))
6256 mode_alignment
/= 2;
6257 /* Misaligned fields are always returned in memory. */
6258 if (bit_offset
% mode_alignment
)
6262 /* for V1xx modes, just use the base mode */
6263 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6264 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6265 mode
= GET_MODE_INNER (mode
);
6267 /* Classification of atomic types. */
6272 classes
[0] = X86_64_SSE_CLASS
;
6275 classes
[0] = X86_64_SSE_CLASS
;
6276 classes
[1] = X86_64_SSEUP_CLASS
;
6286 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6290 classes
[0] = X86_64_INTEGERSI_CLASS
;
6293 else if (size
<= 64)
6295 classes
[0] = X86_64_INTEGER_CLASS
;
6298 else if (size
<= 64+32)
6300 classes
[0] = X86_64_INTEGER_CLASS
;
6301 classes
[1] = X86_64_INTEGERSI_CLASS
;
6304 else if (size
<= 64+64)
6306 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6314 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6318 /* OImode shouldn't be used directly. */
6323 if (!(bit_offset
% 64))
6324 classes
[0] = X86_64_SSESF_CLASS
;
6326 classes
[0] = X86_64_SSE_CLASS
;
6329 classes
[0] = X86_64_SSEDF_CLASS
;
6332 classes
[0] = X86_64_X87_CLASS
;
6333 classes
[1] = X86_64_X87UP_CLASS
;
6336 classes
[0] = X86_64_SSE_CLASS
;
6337 classes
[1] = X86_64_SSEUP_CLASS
;
6340 classes
[0] = X86_64_SSE_CLASS
;
6341 if (!(bit_offset
% 64))
6347 if (!warned
&& warn_psabi
)
6350 inform (input_location
,
6351 "the ABI of passing structure with complex float"
6352 " member has changed in GCC 4.4");
6354 classes
[1] = X86_64_SSESF_CLASS
;
6358 classes
[0] = X86_64_SSEDF_CLASS
;
6359 classes
[1] = X86_64_SSEDF_CLASS
;
6362 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6365 /* This modes is larger than 16 bytes. */
6373 classes
[0] = X86_64_SSE_CLASS
;
6374 classes
[1] = X86_64_SSEUP_CLASS
;
6375 classes
[2] = X86_64_SSEUP_CLASS
;
6376 classes
[3] = X86_64_SSEUP_CLASS
;
6384 classes
[0] = X86_64_SSE_CLASS
;
6385 classes
[1] = X86_64_SSEUP_CLASS
;
6393 classes
[0] = X86_64_SSE_CLASS
;
6399 gcc_assert (VECTOR_MODE_P (mode
));
6404 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6406 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6407 classes
[0] = X86_64_INTEGERSI_CLASS
;
6409 classes
[0] = X86_64_INTEGER_CLASS
;
6410 classes
[1] = X86_64_INTEGER_CLASS
;
6411 return 1 + (bytes
> 8);
6415 /* Examine the argument and return set number of register required in each
6416 class. Return 0 iff parameter should be passed in memory. */
6418 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6419 int *int_nregs
, int *sse_nregs
)
6421 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6422 int n
= classify_argument (mode
, type
, regclass
, 0);
6428 for (n
--; n
>= 0; n
--)
6429 switch (regclass
[n
])
6431 case X86_64_INTEGER_CLASS
:
6432 case X86_64_INTEGERSI_CLASS
:
6435 case X86_64_SSE_CLASS
:
6436 case X86_64_SSESF_CLASS
:
6437 case X86_64_SSEDF_CLASS
:
6440 case X86_64_NO_CLASS
:
6441 case X86_64_SSEUP_CLASS
:
6443 case X86_64_X87_CLASS
:
6444 case X86_64_X87UP_CLASS
:
6448 case X86_64_COMPLEX_X87_CLASS
:
6449 return in_return
? 2 : 0;
6450 case X86_64_MEMORY_CLASS
:
6456 /* Construct container for the argument used by GCC interface. See
6457 FUNCTION_ARG for the detailed description. */
6460 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6461 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6462 const int *intreg
, int sse_regno
)
6464 /* The following variables hold the static issued_error state. */
6465 static bool issued_sse_arg_error
;
6466 static bool issued_sse_ret_error
;
6467 static bool issued_x87_ret_error
;
6469 enum machine_mode tmpmode
;
6471 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6472 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6476 int needed_sseregs
, needed_intregs
;
6477 rtx exp
[MAX_CLASSES
];
6480 n
= classify_argument (mode
, type
, regclass
, 0);
6483 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6486 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6489 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6490 some less clueful developer tries to use floating-point anyway. */
6491 if (needed_sseregs
&& !TARGET_SSE
)
6495 if (!issued_sse_ret_error
)
6497 error ("SSE register return with SSE disabled");
6498 issued_sse_ret_error
= true;
6501 else if (!issued_sse_arg_error
)
6503 error ("SSE register argument with SSE disabled");
6504 issued_sse_arg_error
= true;
6509 /* Likewise, error if the ABI requires us to return values in the
6510 x87 registers and the user specified -mno-80387. */
6511 if (!TARGET_80387
&& in_return
)
6512 for (i
= 0; i
< n
; i
++)
6513 if (regclass
[i
] == X86_64_X87_CLASS
6514 || regclass
[i
] == X86_64_X87UP_CLASS
6515 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6517 if (!issued_x87_ret_error
)
6519 error ("x87 register return with x87 disabled");
6520 issued_x87_ret_error
= true;
6525 /* First construct simple cases. Avoid SCmode, since we want to use
6526 single register to pass this type. */
6527 if (n
== 1 && mode
!= SCmode
)
6528 switch (regclass
[0])
6530 case X86_64_INTEGER_CLASS
:
6531 case X86_64_INTEGERSI_CLASS
:
6532 return gen_rtx_REG (mode
, intreg
[0]);
6533 case X86_64_SSE_CLASS
:
6534 case X86_64_SSESF_CLASS
:
6535 case X86_64_SSEDF_CLASS
:
6536 if (mode
!= BLKmode
)
6537 return gen_reg_or_parallel (mode
, orig_mode
,
6538 SSE_REGNO (sse_regno
));
6540 case X86_64_X87_CLASS
:
6541 case X86_64_COMPLEX_X87_CLASS
:
6542 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6543 case X86_64_NO_CLASS
:
6544 /* Zero sized array, struct or class. */
6550 && regclass
[0] == X86_64_SSE_CLASS
6551 && regclass
[1] == X86_64_SSEUP_CLASS
6553 return gen_reg_or_parallel (mode
, orig_mode
,
6554 SSE_REGNO (sse_regno
));
6556 && regclass
[0] == X86_64_SSE_CLASS
6557 && regclass
[1] == X86_64_SSEUP_CLASS
6558 && regclass
[2] == X86_64_SSEUP_CLASS
6559 && regclass
[3] == X86_64_SSEUP_CLASS
6561 return gen_reg_or_parallel (mode
, orig_mode
,
6562 SSE_REGNO (sse_regno
));
6564 && regclass
[0] == X86_64_X87_CLASS
6565 && regclass
[1] == X86_64_X87UP_CLASS
)
6566 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6569 && regclass
[0] == X86_64_INTEGER_CLASS
6570 && regclass
[1] == X86_64_INTEGER_CLASS
6571 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6572 && intreg
[0] + 1 == intreg
[1])
6573 return gen_rtx_REG (mode
, intreg
[0]);
6575 /* Otherwise figure out the entries of the PARALLEL. */
6576 for (i
= 0; i
< n
; i
++)
6580 switch (regclass
[i
])
6582 case X86_64_NO_CLASS
:
6584 case X86_64_INTEGER_CLASS
:
6585 case X86_64_INTEGERSI_CLASS
:
6586 /* Merge TImodes on aligned occasions here too. */
6587 if (i
* 8 + 8 > bytes
)
6589 = mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6590 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6594 /* We've requested 24 bytes we
6595 don't have mode for. Use DImode. */
6596 if (tmpmode
== BLKmode
)
6599 = gen_rtx_EXPR_LIST (VOIDmode
,
6600 gen_rtx_REG (tmpmode
, *intreg
),
6604 case X86_64_SSESF_CLASS
:
6606 = gen_rtx_EXPR_LIST (VOIDmode
,
6607 gen_rtx_REG (SFmode
,
6608 SSE_REGNO (sse_regno
)),
6612 case X86_64_SSEDF_CLASS
:
6614 = gen_rtx_EXPR_LIST (VOIDmode
,
6615 gen_rtx_REG (DFmode
,
6616 SSE_REGNO (sse_regno
)),
6620 case X86_64_SSE_CLASS
:
6628 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6638 && regclass
[1] == X86_64_SSEUP_CLASS
6639 && regclass
[2] == X86_64_SSEUP_CLASS
6640 && regclass
[3] == X86_64_SSEUP_CLASS
);
6648 = gen_rtx_EXPR_LIST (VOIDmode
,
6649 gen_rtx_REG (tmpmode
,
6650 SSE_REGNO (sse_regno
)),
6659 /* Empty aligned struct, union or class. */
6663 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6664 for (i
= 0; i
< nexps
; i
++)
6665 XVECEXP (ret
, 0, i
) = exp
[i
];
6669 /* Update the data in CUM to advance over an argument of mode MODE
6670 and data type TYPE. (TYPE is null for libcalls where that information
6671 may not be available.) */
6674 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6675 const_tree type
, HOST_WIDE_INT bytes
,
6676 HOST_WIDE_INT words
)
6692 cum
->words
+= words
;
6693 cum
->nregs
-= words
;
6694 cum
->regno
+= words
;
6696 if (cum
->nregs
<= 0)
6704 /* OImode shouldn't be used directly. */
6708 if (cum
->float_in_sse
< 2)
6711 if (cum
->float_in_sse
< 1)
6728 if (!type
|| !AGGREGATE_TYPE_P (type
))
6730 cum
->sse_words
+= words
;
6731 cum
->sse_nregs
-= 1;
6732 cum
->sse_regno
+= 1;
6733 if (cum
->sse_nregs
<= 0)
6747 if (!type
|| !AGGREGATE_TYPE_P (type
))
6749 cum
->mmx_words
+= words
;
6750 cum
->mmx_nregs
-= 1;
6751 cum
->mmx_regno
+= 1;
6752 if (cum
->mmx_nregs
<= 0)
6763 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6764 const_tree type
, HOST_WIDE_INT words
, bool named
)
6766 int int_nregs
, sse_nregs
;
6768 /* Unnamed 256bit vector mode parameters are passed on stack. */
6769 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6772 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6773 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6775 cum
->nregs
-= int_nregs
;
6776 cum
->sse_nregs
-= sse_nregs
;
6777 cum
->regno
+= int_nregs
;
6778 cum
->sse_regno
+= sse_nregs
;
6782 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6783 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6784 cum
->words
+= words
;
6789 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6790 HOST_WIDE_INT words
)
6792 /* Otherwise, this should be passed indirect. */
6793 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6795 cum
->words
+= words
;
6803 /* Update the data in CUM to advance over an argument of mode MODE and
6804 data type TYPE. (TYPE is null for libcalls where that information
6805 may not be available.) */
6808 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6809 const_tree type
, bool named
)
6811 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6812 HOST_WIDE_INT bytes
, words
;
6814 if (mode
== BLKmode
)
6815 bytes
= int_size_in_bytes (type
);
6817 bytes
= GET_MODE_SIZE (mode
);
6818 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6821 mode
= type_natural_mode (type
, NULL
);
6823 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6824 function_arg_advance_ms_64 (cum
, bytes
, words
);
6825 else if (TARGET_64BIT
)
6826 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6828 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6831 /* Define where to put the arguments to a function.
6832 Value is zero to push the argument on the stack,
6833 or a hard register in which to store the argument.
6835 MODE is the argument's machine mode.
6836 TYPE is the data type of the argument (as a tree).
6837 This is null for libcalls where that information may
6839 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6840 the preceding args and about the function being called.
6841 NAMED is nonzero if this argument is a named parameter
6842 (otherwise it is an extra parameter matching an ellipsis). */
6845 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6846 enum machine_mode orig_mode
, const_tree type
,
6847 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6849 static bool warnedsse
, warnedmmx
;
6851 /* Avoid the AL settings for the Unix64 ABI. */
6852 if (mode
== VOIDmode
)
6868 if (words
<= cum
->nregs
)
6870 int regno
= cum
->regno
;
6872 /* Fastcall allocates the first two DWORD (SImode) or
6873 smaller arguments to ECX and EDX if it isn't an
6879 || (type
&& AGGREGATE_TYPE_P (type
)))
6882 /* ECX not EAX is the first allocated register. */
6883 if (regno
== AX_REG
)
6886 return gen_rtx_REG (mode
, regno
);
6891 if (cum
->float_in_sse
< 2)
6894 if (cum
->float_in_sse
< 1)
6898 /* In 32bit, we pass TImode in xmm registers. */
6905 if (!type
|| !AGGREGATE_TYPE_P (type
))
6907 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6910 warning (0, "SSE vector argument without SSE enabled "
6914 return gen_reg_or_parallel (mode
, orig_mode
,
6915 cum
->sse_regno
+ FIRST_SSE_REG
);
6920 /* OImode shouldn't be used directly. */
6929 if (!type
|| !AGGREGATE_TYPE_P (type
))
6932 return gen_reg_or_parallel (mode
, orig_mode
,
6933 cum
->sse_regno
+ FIRST_SSE_REG
);
6943 if (!type
|| !AGGREGATE_TYPE_P (type
))
6945 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6948 warning (0, "MMX vector argument without MMX enabled "
6952 return gen_reg_or_parallel (mode
, orig_mode
,
6953 cum
->mmx_regno
+ FIRST_MMX_REG
);
6962 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6963 enum machine_mode orig_mode
, const_tree type
, bool named
)
6965 /* Handle a hidden AL argument containing number of registers
6966 for varargs x86-64 functions. */
6967 if (mode
== VOIDmode
)
6968 return GEN_INT (cum
->maybe_vaarg
6969 ? (cum
->sse_nregs
< 0
6970 ? X86_64_SSE_REGPARM_MAX
6985 /* Unnamed 256bit vector mode parameters are passed on stack. */
6991 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6993 &x86_64_int_parameter_registers
[cum
->regno
],
6998 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6999 enum machine_mode orig_mode
, bool named
,
7000 HOST_WIDE_INT bytes
)
7004 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7005 We use value of -2 to specify that current function call is MSABI. */
7006 if (mode
== VOIDmode
)
7007 return GEN_INT (-2);
7009 /* If we've run out of registers, it goes on the stack. */
7010 if (cum
->nregs
== 0)
7013 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
7015 /* Only floating point modes are passed in anything but integer regs. */
7016 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
7019 regno
= cum
->regno
+ FIRST_SSE_REG
;
7024 /* Unnamed floating parameters are passed in both the
7025 SSE and integer registers. */
7026 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
7027 t2
= gen_rtx_REG (mode
, regno
);
7028 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
7029 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
7030 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
7033 /* Handle aggregated types passed in register. */
7034 if (orig_mode
== BLKmode
)
7036 if (bytes
> 0 && bytes
<= 8)
7037 mode
= (bytes
> 4 ? DImode
: SImode
);
7038 if (mode
== BLKmode
)
7042 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
7045 /* Return where to put the arguments to a function.
7046 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7048 MODE is the argument's machine mode. TYPE is the data type of the
7049 argument. It is null for libcalls where that information may not be
7050 available. CUM gives information about the preceding args and about
7051 the function being called. NAMED is nonzero if this argument is a
7052 named parameter (otherwise it is an extra parameter matching an
7056 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
7057 const_tree type
, bool named
)
7059 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7060 enum machine_mode mode
= omode
;
7061 HOST_WIDE_INT bytes
, words
;
7064 if (mode
== BLKmode
)
7065 bytes
= int_size_in_bytes (type
);
7067 bytes
= GET_MODE_SIZE (mode
);
7068 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7070 /* To simplify the code below, represent vector types with a vector mode
7071 even if MMX/SSE are not active. */
7072 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7073 mode
= type_natural_mode (type
, cum
);
7075 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7076 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
7077 else if (TARGET_64BIT
)
7078 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
7080 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
7082 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
7084 /* This argument uses 256bit AVX modes. */
7086 cfun
->machine
->callee_pass_avx256_p
= true;
7088 cfun
->machine
->caller_pass_avx256_p
= true;
7094 /* A C expression that indicates when an argument must be passed by
7095 reference. If nonzero for an argument, a copy of that argument is
7096 made in memory and a pointer to the argument is passed instead of
7097 the argument itself. The pointer is passed in whatever way is
7098 appropriate for passing a pointer to that type. */
7101 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
7102 enum machine_mode mode ATTRIBUTE_UNUSED
,
7103 const_tree type
, bool named ATTRIBUTE_UNUSED
)
7105 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7107 /* See Windows x64 Software Convention. */
7108 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
7110 int msize
= (int) GET_MODE_SIZE (mode
);
7113 /* Arrays are passed by reference. */
7114 if (TREE_CODE (type
) == ARRAY_TYPE
)
7117 if (AGGREGATE_TYPE_P (type
))
7119 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7120 are passed by reference. */
7121 msize
= int_size_in_bytes (type
);
7125 /* __m128 is passed by reference. */
7127 case 1: case 2: case 4: case 8:
7133 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
7139 /* Return true when TYPE should be 128bit aligned for 32bit argument
7140 passing ABI. XXX: This function is obsolete and is only used for
7141 checking psABI compatibility with previous versions of GCC. */
7144 ix86_compat_aligned_value_p (const_tree type
)
7146 enum machine_mode mode
= TYPE_MODE (type
);
7147 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
7151 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
7153 if (TYPE_ALIGN (type
) < 128)
7156 if (AGGREGATE_TYPE_P (type
))
7158 /* Walk the aggregates recursively. */
7159 switch (TREE_CODE (type
))
7163 case QUAL_UNION_TYPE
:
7167 /* Walk all the structure fields. */
7168 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
7170 if (TREE_CODE (field
) == FIELD_DECL
7171 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
7178 /* Just for use if some languages passes arrays by value. */
7179 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
7190 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7191 XXX: This function is obsolete and is only used for checking psABI
7192 compatibility with previous versions of GCC. */
7195 ix86_compat_function_arg_boundary (enum machine_mode mode
,
7196 const_tree type
, unsigned int align
)
7198 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7199 natural boundaries. */
7200 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
7202 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7203 make an exception for SSE modes since these require 128bit
7206 The handling here differs from field_alignment. ICC aligns MMX
7207 arguments to 4 byte boundaries, while structure fields are aligned
7208 to 8 byte boundaries. */
7211 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
7212 align
= PARM_BOUNDARY
;
7216 if (!ix86_compat_aligned_value_p (type
))
7217 align
= PARM_BOUNDARY
;
7220 if (align
> BIGGEST_ALIGNMENT
)
7221 align
= BIGGEST_ALIGNMENT
;
7225 /* Return true when TYPE should be 128bit aligned for 32bit argument
7229 ix86_contains_aligned_value_p (const_tree type
)
7231 enum machine_mode mode
= TYPE_MODE (type
);
7233 if (mode
== XFmode
|| mode
== XCmode
)
7236 if (TYPE_ALIGN (type
) < 128)
7239 if (AGGREGATE_TYPE_P (type
))
7241 /* Walk the aggregates recursively. */
7242 switch (TREE_CODE (type
))
7246 case QUAL_UNION_TYPE
:
7250 /* Walk all the structure fields. */
7251 for (field
= TYPE_FIELDS (type
);
7253 field
= DECL_CHAIN (field
))
7255 if (TREE_CODE (field
) == FIELD_DECL
7256 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7263 /* Just for use if some languages passes arrays by value. */
7264 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7273 return TYPE_ALIGN (type
) >= 128;
7278 /* Gives the alignment boundary, in bits, of an argument with the
7279 specified mode and type. */
7282 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7287 /* Since the main variant type is used for call, we convert it to
7288 the main variant type. */
7289 type
= TYPE_MAIN_VARIANT (type
);
7290 align
= TYPE_ALIGN (type
);
7293 align
= GET_MODE_ALIGNMENT (mode
);
7294 if (align
< PARM_BOUNDARY
)
7295 align
= PARM_BOUNDARY
;
7299 unsigned int saved_align
= align
;
7303 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7306 if (mode
== XFmode
|| mode
== XCmode
)
7307 align
= PARM_BOUNDARY
;
7309 else if (!ix86_contains_aligned_value_p (type
))
7310 align
= PARM_BOUNDARY
;
7313 align
= PARM_BOUNDARY
;
7318 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7322 inform (input_location
,
7323 "The ABI for passing parameters with %d-byte"
7324 " alignment has changed in GCC 4.6",
7325 align
/ BITS_PER_UNIT
);
7332 /* Return true if N is a possible register number of function value. */
7335 ix86_function_value_regno_p (const unsigned int regno
)
7342 case FIRST_FLOAT_REG
:
7343 /* TODO: The function should depend on current function ABI but
7344 builtins.c would need updating then. Therefore we use the
7346 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7348 return TARGET_FLOAT_RETURNS_IN_80387
;
7354 if (TARGET_MACHO
|| TARGET_64BIT
)
7362 /* Define how to find the value returned by a function.
7363 VALTYPE is the data type of the value (as a tree).
7364 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7365 otherwise, FUNC is 0. */
7368 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7369 const_tree fntype
, const_tree fn
)
7373 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7374 we normally prevent this case when mmx is not available. However
7375 some ABIs may require the result to be returned like DImode. */
7376 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7377 regno
= FIRST_MMX_REG
;
7379 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7380 we prevent this case when sse is not available. However some ABIs
7381 may require the result to be returned like integer TImode. */
7382 else if (mode
== TImode
7383 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7384 regno
= FIRST_SSE_REG
;
7386 /* 32-byte vector modes in %ymm0. */
7387 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7388 regno
= FIRST_SSE_REG
;
7390 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7391 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7392 regno
= FIRST_FLOAT_REG
;
7394 /* Most things go in %eax. */
7397 /* Override FP return register with %xmm0 for local functions when
7398 SSE math is enabled or for functions with sseregparm attribute. */
7399 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7401 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7402 if ((sse_level
>= 1 && mode
== SFmode
)
7403 || (sse_level
== 2 && mode
== DFmode
))
7404 regno
= FIRST_SSE_REG
;
7407 /* OImode shouldn't be used directly. */
7408 gcc_assert (mode
!= OImode
);
7410 return gen_rtx_REG (orig_mode
, regno
);
7414 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7419 /* Handle libcalls, which don't provide a type node. */
7420 if (valtype
== NULL
)
7434 regno
= FIRST_SSE_REG
;
7438 regno
= FIRST_FLOAT_REG
;
7446 return gen_rtx_REG (mode
, regno
);
7448 else if (POINTER_TYPE_P (valtype
))
7450 /* Pointers are always returned in word_mode. */
7454 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7455 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7456 x86_64_int_return_registers
, 0);
7458 /* For zero sized structures, construct_container returns NULL, but we
7459 need to keep rest of compiler happy by returning meaningful value. */
7461 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7467 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7469 unsigned int regno
= AX_REG
;
7473 switch (GET_MODE_SIZE (mode
))
7476 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7477 && !COMPLEX_MODE_P (mode
))
7478 regno
= FIRST_SSE_REG
;
7482 if (mode
== SFmode
|| mode
== DFmode
)
7483 regno
= FIRST_SSE_REG
;
7489 return gen_rtx_REG (orig_mode
, regno
);
7493 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7494 enum machine_mode orig_mode
, enum machine_mode mode
)
7496 const_tree fn
, fntype
;
7499 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7500 fn
= fntype_or_decl
;
7501 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7503 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7504 return function_value_ms_64 (orig_mode
, mode
);
7505 else if (TARGET_64BIT
)
7506 return function_value_64 (orig_mode
, mode
, valtype
);
7508 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7512 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7513 bool outgoing ATTRIBUTE_UNUSED
)
7515 enum machine_mode mode
, orig_mode
;
7517 orig_mode
= TYPE_MODE (valtype
);
7518 mode
= type_natural_mode (valtype
, NULL
);
7519 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7522 /* Pointer function arguments and return values are promoted to
7525 static enum machine_mode
7526 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7527 int *punsignedp
, const_tree fntype
,
7530 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7532 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7535 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7540 ix86_libcall_value (enum machine_mode mode
)
7542 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7545 /* Return true iff type is returned in memory. */
7547 static bool ATTRIBUTE_UNUSED
7548 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7552 if (mode
== BLKmode
)
7555 size
= int_size_in_bytes (type
);
7557 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7560 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7562 /* User-created vectors small enough to fit in EAX. */
7566 /* MMX/3dNow values are returned in MM0,
7567 except when it doesn't exits or the ABI prescribes otherwise. */
7569 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7571 /* SSE values are returned in XMM0, except when it doesn't exist. */
7575 /* AVX values are returned in YMM0, except when it doesn't exist. */
7586 /* OImode shouldn't be used directly. */
7587 gcc_assert (mode
!= OImode
);
7592 static bool ATTRIBUTE_UNUSED
7593 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7595 int needed_intregs
, needed_sseregs
;
7596 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7599 static bool ATTRIBUTE_UNUSED
7600 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7602 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7604 /* __m128 is returned in xmm0. */
7605 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7606 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7609 /* Otherwise, the size must be exactly in [1248]. */
7610 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7614 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7616 #ifdef SUBTARGET_RETURN_IN_MEMORY
7617 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7619 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7623 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7624 return return_in_memory_ms_64 (type
, mode
);
7626 return return_in_memory_64 (type
, mode
);
7629 return return_in_memory_32 (type
, mode
);
7633 /* When returning SSE vector types, we have a choice of either
7634 (1) being abi incompatible with a -march switch, or
7635 (2) generating an error.
7636 Given no good solution, I think the safest thing is one warning.
7637 The user won't be able to use -Werror, but....
7639 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7640 called in response to actually generating a caller or callee that
7641 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7642 via aggregate_value_p for general type probing from tree-ssa. */
7645 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7647 static bool warnedsse
, warnedmmx
;
7649 if (!TARGET_64BIT
&& type
)
7651 /* Look at the return type of the function, not the function type. */
7652 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7654 if (!TARGET_SSE
&& !warnedsse
)
7657 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7660 warning (0, "SSE vector return without SSE enabled "
7665 if (!TARGET_MMX
&& !warnedmmx
)
7667 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7670 warning (0, "MMX vector return without MMX enabled "
7680 /* Create the va_list data type. */
7682 /* Returns the calling convention specific va_list date type.
7683 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7686 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7688 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7690 /* For i386 we use plain pointer to argument area. */
7691 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7692 return build_pointer_type (char_type_node
);
7694 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7695 type_decl
= build_decl (BUILTINS_LOCATION
,
7696 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7698 f_gpr
= build_decl (BUILTINS_LOCATION
,
7699 FIELD_DECL
, get_identifier ("gp_offset"),
7700 unsigned_type_node
);
7701 f_fpr
= build_decl (BUILTINS_LOCATION
,
7702 FIELD_DECL
, get_identifier ("fp_offset"),
7703 unsigned_type_node
);
7704 f_ovf
= build_decl (BUILTINS_LOCATION
,
7705 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7707 f_sav
= build_decl (BUILTINS_LOCATION
,
7708 FIELD_DECL
, get_identifier ("reg_save_area"),
7711 va_list_gpr_counter_field
= f_gpr
;
7712 va_list_fpr_counter_field
= f_fpr
;
7714 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7715 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7716 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7717 DECL_FIELD_CONTEXT (f_sav
) = record
;
7719 TYPE_STUB_DECL (record
) = type_decl
;
7720 TYPE_NAME (record
) = type_decl
;
7721 TYPE_FIELDS (record
) = f_gpr
;
7722 DECL_CHAIN (f_gpr
) = f_fpr
;
7723 DECL_CHAIN (f_fpr
) = f_ovf
;
7724 DECL_CHAIN (f_ovf
) = f_sav
;
7726 layout_type (record
);
7728 /* The correct type is an array type of one element. */
7729 return build_array_type (record
, build_index_type (size_zero_node
));
7732 /* Setup the builtin va_list data type and for 64-bit the additional
7733 calling convention specific va_list data types. */
7736 ix86_build_builtin_va_list (void)
7738 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7740 /* Initialize abi specific va_list builtin types. */
7744 if (ix86_abi
== MS_ABI
)
7746 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7747 if (TREE_CODE (t
) != RECORD_TYPE
)
7748 t
= build_variant_type_copy (t
);
7749 sysv_va_list_type_node
= t
;
7754 if (TREE_CODE (t
) != RECORD_TYPE
)
7755 t
= build_variant_type_copy (t
);
7756 sysv_va_list_type_node
= t
;
7758 if (ix86_abi
!= MS_ABI
)
7760 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7761 if (TREE_CODE (t
) != RECORD_TYPE
)
7762 t
= build_variant_type_copy (t
);
7763 ms_va_list_type_node
= t
;
7768 if (TREE_CODE (t
) != RECORD_TYPE
)
7769 t
= build_variant_type_copy (t
);
7770 ms_va_list_type_node
= t
;
7777 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7780 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7786 /* GPR size of varargs save area. */
7787 if (cfun
->va_list_gpr_size
)
7788 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7790 ix86_varargs_gpr_size
= 0;
7792 /* FPR size of varargs save area. We don't need it if we don't pass
7793 anything in SSE registers. */
7794 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7795 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7797 ix86_varargs_fpr_size
= 0;
7799 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7802 save_area
= frame_pointer_rtx
;
7803 set
= get_varargs_alias_set ();
7805 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7806 if (max
> X86_64_REGPARM_MAX
)
7807 max
= X86_64_REGPARM_MAX
;
7809 for (i
= cum
->regno
; i
< max
; i
++)
7811 mem
= gen_rtx_MEM (word_mode
,
7812 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
7813 MEM_NOTRAP_P (mem
) = 1;
7814 set_mem_alias_set (mem
, set
);
7815 emit_move_insn (mem
,
7816 gen_rtx_REG (word_mode
,
7817 x86_64_int_parameter_registers
[i
]));
7820 if (ix86_varargs_fpr_size
)
7822 enum machine_mode smode
;
7825 /* Now emit code to save SSE registers. The AX parameter contains number
7826 of SSE parameter registers used to call this function, though all we
7827 actually check here is the zero/non-zero status. */
7829 label
= gen_label_rtx ();
7830 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7831 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7834 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7835 we used movdqa (i.e. TImode) instead? Perhaps even better would
7836 be if we could determine the real mode of the data, via a hook
7837 into pass_stdarg. Ignore all that for now. */
7839 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7840 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7842 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7843 if (max
> X86_64_SSE_REGPARM_MAX
)
7844 max
= X86_64_SSE_REGPARM_MAX
;
7846 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7848 mem
= plus_constant (Pmode
, save_area
,
7849 i
* 16 + ix86_varargs_gpr_size
);
7850 mem
= gen_rtx_MEM (smode
, mem
);
7851 MEM_NOTRAP_P (mem
) = 1;
7852 set_mem_alias_set (mem
, set
);
7853 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7855 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7863 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7865 alias_set_type set
= get_varargs_alias_set ();
7868 /* Reset to zero, as there might be a sysv vaarg used
7870 ix86_varargs_gpr_size
= 0;
7871 ix86_varargs_fpr_size
= 0;
7873 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7877 mem
= gen_rtx_MEM (Pmode
,
7878 plus_constant (Pmode
, virtual_incoming_args_rtx
,
7879 i
* UNITS_PER_WORD
));
7880 MEM_NOTRAP_P (mem
) = 1;
7881 set_mem_alias_set (mem
, set
);
7883 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7884 emit_move_insn (mem
, reg
);
7889 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7890 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7893 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7894 CUMULATIVE_ARGS next_cum
;
7897 /* This argument doesn't appear to be used anymore. Which is good,
7898 because the old code here didn't suppress rtl generation. */
7899 gcc_assert (!no_rtl
);
7904 fntype
= TREE_TYPE (current_function_decl
);
7906 /* For varargs, we do not want to skip the dummy va_dcl argument.
7907 For stdargs, we do want to skip the last named argument. */
7909 if (stdarg_p (fntype
))
7910 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7913 if (cum
->call_abi
== MS_ABI
)
7914 setup_incoming_varargs_ms_64 (&next_cum
);
7916 setup_incoming_varargs_64 (&next_cum
);
7919 /* Checks if TYPE is of kind va_list char *. */
7922 is_va_list_char_pointer (tree type
)
7926 /* For 32-bit it is always true. */
7929 canonic
= ix86_canonical_va_list_type (type
);
7930 return (canonic
== ms_va_list_type_node
7931 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7934 /* Implement va_start. */
7937 ix86_va_start (tree valist
, rtx nextarg
)
7939 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7940 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7941 tree gpr
, fpr
, ovf
, sav
, t
;
7945 if (flag_split_stack
7946 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7948 unsigned int scratch_regno
;
7950 /* When we are splitting the stack, we can't refer to the stack
7951 arguments using internal_arg_pointer, because they may be on
7952 the old stack. The split stack prologue will arrange to
7953 leave a pointer to the old stack arguments in a scratch
7954 register, which we here copy to a pseudo-register. The split
7955 stack prologue can't set the pseudo-register directly because
7956 it (the prologue) runs before any registers have been saved. */
7958 scratch_regno
= split_stack_prologue_scratch_regno ();
7959 if (scratch_regno
!= INVALID_REGNUM
)
7963 reg
= gen_reg_rtx (Pmode
);
7964 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7967 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7971 push_topmost_sequence ();
7972 emit_insn_after (seq
, entry_of_function ());
7973 pop_topmost_sequence ();
7977 /* Only 64bit target needs something special. */
7978 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7980 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7981 std_expand_builtin_va_start (valist
, nextarg
);
7986 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7987 next
= expand_binop (ptr_mode
, add_optab
,
7988 cfun
->machine
->split_stack_varargs_pointer
,
7989 crtl
->args
.arg_offset_rtx
,
7990 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7991 convert_move (va_r
, next
, 0);
7996 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7997 f_fpr
= DECL_CHAIN (f_gpr
);
7998 f_ovf
= DECL_CHAIN (f_fpr
);
7999 f_sav
= DECL_CHAIN (f_ovf
);
8001 valist
= build_simple_mem_ref (valist
);
8002 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
8003 /* The following should be folded into the MEM_REF offset. */
8004 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
8006 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
8008 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
8010 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
8013 /* Count number of gp and fp argument registers used. */
8014 words
= crtl
->args
.info
.words
;
8015 n_gpr
= crtl
->args
.info
.regno
;
8016 n_fpr
= crtl
->args
.info
.sse_regno
;
8018 if (cfun
->va_list_gpr_size
)
8020 type
= TREE_TYPE (gpr
);
8021 t
= build2 (MODIFY_EXPR
, type
,
8022 gpr
, build_int_cst (type
, n_gpr
* 8));
8023 TREE_SIDE_EFFECTS (t
) = 1;
8024 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8027 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
8029 type
= TREE_TYPE (fpr
);
8030 t
= build2 (MODIFY_EXPR
, type
, fpr
,
8031 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
8032 TREE_SIDE_EFFECTS (t
) = 1;
8033 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8036 /* Find the overflow area. */
8037 type
= TREE_TYPE (ovf
);
8038 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
8039 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
8041 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
8042 t
= make_tree (type
, ovf_rtx
);
8044 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
8045 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
8046 TREE_SIDE_EFFECTS (t
) = 1;
8047 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8049 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
8051 /* Find the register save area.
8052 Prologue of the function save it right above stack frame. */
8053 type
= TREE_TYPE (sav
);
8054 t
= make_tree (type
, frame_pointer_rtx
);
8055 if (!ix86_varargs_gpr_size
)
8056 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
8057 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
8058 TREE_SIDE_EFFECTS (t
) = 1;
8059 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
8063 /* Implement va_arg. */
8066 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
8069 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
8070 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
8071 tree gpr
, fpr
, ovf
, sav
, t
;
8073 tree lab_false
, lab_over
= NULL_TREE
;
8078 enum machine_mode nat_mode
;
8079 unsigned int arg_boundary
;
8081 /* Only 64bit target needs something special. */
8082 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
8083 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
8085 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
8086 f_fpr
= DECL_CHAIN (f_gpr
);
8087 f_ovf
= DECL_CHAIN (f_fpr
);
8088 f_sav
= DECL_CHAIN (f_ovf
);
8090 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
8091 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
8092 valist
= build_va_arg_indirect_ref (valist
);
8093 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
8094 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
8095 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
8097 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
8099 type
= build_pointer_type (type
);
8100 size
= int_size_in_bytes (type
);
8101 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
8103 nat_mode
= type_natural_mode (type
, NULL
);
8112 /* Unnamed 256bit vector mode parameters are passed on stack. */
8113 if (!TARGET_64BIT_MS_ABI
)
8120 container
= construct_container (nat_mode
, TYPE_MODE (type
),
8121 type
, 0, X86_64_REGPARM_MAX
,
8122 X86_64_SSE_REGPARM_MAX
, intreg
,
8127 /* Pull the value out of the saved registers. */
8129 addr
= create_tmp_var (ptr_type_node
, "addr");
8133 int needed_intregs
, needed_sseregs
;
8135 tree int_addr
, sse_addr
;
8137 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
8138 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
8140 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
8142 need_temp
= (!REG_P (container
)
8143 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
8144 || TYPE_ALIGN (type
) > 128));
8146 /* In case we are passing structure, verify that it is consecutive block
8147 on the register save area. If not we need to do moves. */
8148 if (!need_temp
&& !REG_P (container
))
8150 /* Verify that all registers are strictly consecutive */
8151 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
8155 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8157 rtx slot
= XVECEXP (container
, 0, i
);
8158 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
8159 || INTVAL (XEXP (slot
, 1)) != i
* 16)
8167 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
8169 rtx slot
= XVECEXP (container
, 0, i
);
8170 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
8171 || INTVAL (XEXP (slot
, 1)) != i
* 8)
8183 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
8184 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
8187 /* First ensure that we fit completely in registers. */
8190 t
= build_int_cst (TREE_TYPE (gpr
),
8191 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
8192 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
8193 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8194 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8195 gimplify_and_add (t
, pre_p
);
8199 t
= build_int_cst (TREE_TYPE (fpr
),
8200 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
8201 + X86_64_REGPARM_MAX
* 8);
8202 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
8203 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
8204 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
8205 gimplify_and_add (t
, pre_p
);
8208 /* Compute index to start of area used for integer regs. */
8211 /* int_addr = gpr + sav; */
8212 t
= fold_build_pointer_plus (sav
, gpr
);
8213 gimplify_assign (int_addr
, t
, pre_p
);
8217 /* sse_addr = fpr + sav; */
8218 t
= fold_build_pointer_plus (sav
, fpr
);
8219 gimplify_assign (sse_addr
, t
, pre_p
);
8223 int i
, prev_size
= 0;
8224 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8227 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8228 gimplify_assign (addr
, t
, pre_p
);
8230 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8232 rtx slot
= XVECEXP (container
, 0, i
);
8233 rtx reg
= XEXP (slot
, 0);
8234 enum machine_mode mode
= GET_MODE (reg
);
8240 tree dest_addr
, dest
;
8241 int cur_size
= GET_MODE_SIZE (mode
);
8243 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8244 prev_size
= INTVAL (XEXP (slot
, 1));
8245 if (prev_size
+ cur_size
> size
)
8247 cur_size
= size
- prev_size
;
8248 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8249 if (mode
== BLKmode
)
8252 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8253 if (mode
== GET_MODE (reg
))
8254 addr_type
= build_pointer_type (piece_type
);
8256 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8258 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8261 if (SSE_REGNO_P (REGNO (reg
)))
8263 src_addr
= sse_addr
;
8264 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8268 src_addr
= int_addr
;
8269 src_offset
= REGNO (reg
) * 8;
8271 src_addr
= fold_convert (addr_type
, src_addr
);
8272 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8274 dest_addr
= fold_convert (daddr_type
, addr
);
8275 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8276 if (cur_size
== GET_MODE_SIZE (mode
))
8278 src
= build_va_arg_indirect_ref (src_addr
);
8279 dest
= build_va_arg_indirect_ref (dest_addr
);
8281 gimplify_assign (dest
, src
, pre_p
);
8286 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8287 3, dest_addr
, src_addr
,
8288 size_int (cur_size
));
8289 gimplify_and_add (copy
, pre_p
);
8291 prev_size
+= cur_size
;
8297 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8298 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8299 gimplify_assign (gpr
, t
, pre_p
);
8304 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8305 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8306 gimplify_assign (fpr
, t
, pre_p
);
8309 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8311 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8314 /* ... otherwise out of the overflow area. */
8316 /* When we align parameter on stack for caller, if the parameter
8317 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8318 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8319 here with caller. */
8320 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8321 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8322 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8324 /* Care for on-stack alignment if needed. */
8325 if (arg_boundary
<= 64 || size
== 0)
8329 HOST_WIDE_INT align
= arg_boundary
/ 8;
8330 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8331 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8332 build_int_cst (TREE_TYPE (t
), -align
));
8335 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8336 gimplify_assign (addr
, t
, pre_p
);
8338 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8339 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8342 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8344 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8345 addr
= fold_convert (ptrtype
, addr
);
8348 addr
= build_va_arg_indirect_ref (addr
);
8349 return build_va_arg_indirect_ref (addr
);
8352 /* Return true if OPNUM's MEM should be matched
8353 in movabs* patterns. */
8356 ix86_check_movabs (rtx insn
, int opnum
)
8360 set
= PATTERN (insn
);
8361 if (GET_CODE (set
) == PARALLEL
)
8362 set
= XVECEXP (set
, 0, 0);
8363 gcc_assert (GET_CODE (set
) == SET
);
8364 mem
= XEXP (set
, opnum
);
8365 while (GET_CODE (mem
) == SUBREG
)
8366 mem
= SUBREG_REG (mem
);
8367 gcc_assert (MEM_P (mem
));
8368 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8371 /* Initialize the table of extra 80387 mathematical constants. */
8374 init_ext_80387_constants (void)
8376 static const char * cst
[5] =
8378 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8379 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8380 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8381 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8382 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8386 for (i
= 0; i
< 5; i
++)
8388 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8389 /* Ensure each constant is rounded to XFmode precision. */
8390 real_convert (&ext_80387_constants_table
[i
],
8391 XFmode
, &ext_80387_constants_table
[i
]);
8394 ext_80387_constants_init
= 1;
8397 /* Return non-zero if the constant is something that
8398 can be loaded with a special instruction. */
8401 standard_80387_constant_p (rtx x
)
8403 enum machine_mode mode
= GET_MODE (x
);
8407 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8410 if (x
== CONST0_RTX (mode
))
8412 if (x
== CONST1_RTX (mode
))
8415 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8417 /* For XFmode constants, try to find a special 80387 instruction when
8418 optimizing for size or on those CPUs that benefit from them. */
8420 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8424 if (! ext_80387_constants_init
)
8425 init_ext_80387_constants ();
8427 for (i
= 0; i
< 5; i
++)
8428 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8432 /* Load of the constant -0.0 or -1.0 will be split as
8433 fldz;fchs or fld1;fchs sequence. */
8434 if (real_isnegzero (&r
))
8436 if (real_identical (&r
, &dconstm1
))
8442 /* Return the opcode of the special instruction to be used to load
8446 standard_80387_constant_opcode (rtx x
)
8448 switch (standard_80387_constant_p (x
))
8472 /* Return the CONST_DOUBLE representing the 80387 constant that is
8473 loaded by the specified special instruction. The argument IDX
8474 matches the return value from standard_80387_constant_p. */
8477 standard_80387_constant_rtx (int idx
)
8481 if (! ext_80387_constants_init
)
8482 init_ext_80387_constants ();
8498 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8502 /* Return 1 if X is all 0s and 2 if x is all 1s
8503 in supported SSE/AVX vector mode. */
8506 standard_sse_constant_p (rtx x
)
8508 enum machine_mode mode
= GET_MODE (x
);
8510 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8512 if (vector_all_ones_operand (x
, mode
))
8534 /* Return the opcode of the special instruction to be used to load
8538 standard_sse_constant_opcode (rtx insn
, rtx x
)
8540 switch (standard_sse_constant_p (x
))
8543 switch (get_attr_mode (insn
))
8546 return "%vpxor\t%0, %d0";
8548 return "%vxorpd\t%0, %d0";
8550 return "%vxorps\t%0, %d0";
8553 return "vpxor\t%x0, %x0, %x0";
8555 return "vxorpd\t%x0, %x0, %x0";
8557 return "vxorps\t%x0, %x0, %x0";
8565 return "vpcmpeqd\t%0, %0, %0";
8567 return "pcmpeqd\t%0, %0";
8575 /* Returns true if OP contains a symbol reference */
8578 symbolic_reference_mentioned_p (rtx op
)
8583 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8586 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8587 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8593 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8594 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8598 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8605 /* Return true if it is appropriate to emit `ret' instructions in the
8606 body of a function. Do this only if the epilogue is simple, needing a
8607 couple of insns. Prior to reloading, we can't tell how many registers
8608 must be saved, so return false then. Return false if there is no frame
8609 marker to de-allocate. */
8612 ix86_can_use_return_insn_p (void)
8614 struct ix86_frame frame
;
8616 if (! reload_completed
|| frame_pointer_needed
)
8619 /* Don't allow more than 32k pop, since that's all we can do
8620 with one instruction. */
8621 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8624 ix86_compute_frame_layout (&frame
);
8625 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8626 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8629 /* Value should be nonzero if functions must have frame pointers.
8630 Zero means the frame pointer need not be set up (and parms may
8631 be accessed via the stack pointer) in functions that seem suitable. */
8634 ix86_frame_pointer_required (void)
8636 /* If we accessed previous frames, then the generated code expects
8637 to be able to access the saved ebp value in our frame. */
8638 if (cfun
->machine
->accesses_prev_frame
)
8641 /* Several x86 os'es need a frame pointer for other reasons,
8642 usually pertaining to setjmp. */
8643 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8646 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8647 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8650 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
8651 allocation is 4GB. */
8652 if (TARGET_64BIT_MS_ABI
&& get_frame_size () > SEH_MAX_FRAME_SIZE
)
8655 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8656 turns off the frame pointer by default. Turn it back on now if
8657 we've not got a leaf function. */
8658 if (TARGET_OMIT_LEAF_FRAME_POINTER
8660 || ix86_current_function_calls_tls_descriptor
))
8663 if (crtl
->profile
&& !flag_fentry
)
8669 /* Record that the current function accesses previous call frames. */
8672 ix86_setup_frame_addresses (void)
8674 cfun
->machine
->accesses_prev_frame
= 1;
8677 #ifndef USE_HIDDEN_LINKONCE
8678 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8679 # define USE_HIDDEN_LINKONCE 1
8681 # define USE_HIDDEN_LINKONCE 0
8685 static int pic_labels_used
;
8687 /* Fills in the label name that should be used for a pc thunk for
8688 the given register. */
8691 get_pc_thunk_name (char name
[32], unsigned int regno
)
8693 gcc_assert (!TARGET_64BIT
);
8695 if (USE_HIDDEN_LINKONCE
)
8696 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8698 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8702 /* This function generates code for -fpic that loads %ebx with
8703 the return address of the caller and then returns. */
8706 ix86_code_end (void)
8711 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8716 if (!(pic_labels_used
& (1 << regno
)))
8719 get_pc_thunk_name (name
, regno
);
8721 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8722 get_identifier (name
),
8723 build_function_type_list (void_type_node
, NULL_TREE
));
8724 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8725 NULL_TREE
, void_type_node
);
8726 TREE_PUBLIC (decl
) = 1;
8727 TREE_STATIC (decl
) = 1;
8728 DECL_IGNORED_P (decl
) = 1;
8733 switch_to_section (darwin_sections
[text_coal_section
]);
8734 fputs ("\t.weak_definition\t", asm_out_file
);
8735 assemble_name (asm_out_file
, name
);
8736 fputs ("\n\t.private_extern\t", asm_out_file
);
8737 assemble_name (asm_out_file
, name
);
8738 putc ('\n', asm_out_file
);
8739 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8740 DECL_WEAK (decl
) = 1;
8744 if (USE_HIDDEN_LINKONCE
)
8746 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8748 targetm
.asm_out
.unique_section (decl
, 0);
8749 switch_to_section (get_named_section (decl
, NULL
, 0));
8751 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8752 fputs ("\t.hidden\t", asm_out_file
);
8753 assemble_name (asm_out_file
, name
);
8754 putc ('\n', asm_out_file
);
8755 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8759 switch_to_section (text_section
);
8760 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8763 DECL_INITIAL (decl
) = make_node (BLOCK
);
8764 current_function_decl
= decl
;
8765 init_function_start (decl
);
8766 first_function_block_is_cold
= false;
8767 /* Make sure unwind info is emitted for the thunk if needed. */
8768 final_start_function (emit_barrier (), asm_out_file
, 1);
8770 /* Pad stack IP move with 4 instructions (two NOPs count
8771 as one instruction). */
8772 if (TARGET_PAD_SHORT_FUNCTION
)
8777 fputs ("\tnop\n", asm_out_file
);
8780 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8781 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8782 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8783 fputs ("\tret\n", asm_out_file
);
8784 final_end_function ();
8785 init_insn_lengths ();
8786 free_after_compilation (cfun
);
8788 current_function_decl
= NULL
;
8791 if (flag_split_stack
)
8792 file_end_indicate_split_stack ();
8795 /* Emit code for the SET_GOT patterns. */
8798 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8804 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8806 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8807 xops
[2] = gen_rtx_MEM (Pmode
,
8808 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8809 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8811 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8812 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8813 an unadorned address. */
8814 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8815 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8816 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8820 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8824 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8826 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8829 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8830 is what will be referenced by the Mach-O PIC subsystem. */
8832 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8835 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8836 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8841 get_pc_thunk_name (name
, REGNO (dest
));
8842 pic_labels_used
|= 1 << REGNO (dest
);
8844 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8845 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8846 output_asm_insn ("call\t%X2", xops
);
8847 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8848 is what will be referenced by the Mach-O PIC subsystem. */
8851 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8853 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8854 CODE_LABEL_NUMBER (label
));
8859 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8864 /* Generate an "push" pattern for input ARG. */
8869 struct machine_function
*m
= cfun
->machine
;
8871 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8872 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8873 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8875 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8876 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8878 return gen_rtx_SET (VOIDmode
,
8879 gen_rtx_MEM (word_mode
,
8880 gen_rtx_PRE_DEC (Pmode
,
8881 stack_pointer_rtx
)),
8885 /* Generate an "pop" pattern for input ARG. */
8890 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
8891 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
8893 return gen_rtx_SET (VOIDmode
,
8895 gen_rtx_MEM (word_mode
,
8896 gen_rtx_POST_INC (Pmode
,
8897 stack_pointer_rtx
)));
8900 /* Return >= 0 if there is an unused call-clobbered register available
8901 for the entire function. */
8904 ix86_select_alt_pic_regnum (void)
8908 && !ix86_current_function_calls_tls_descriptor
)
8911 /* Can't use the same register for both PIC and DRAP. */
8913 drap
= REGNO (crtl
->drap_reg
);
8916 for (i
= 2; i
>= 0; --i
)
8917 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8921 return INVALID_REGNUM
;
8924 /* Return TRUE if we need to save REGNO. */
8927 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8929 if (pic_offset_table_rtx
8930 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8931 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8933 || crtl
->calls_eh_return
8934 || crtl
->uses_const_pool
))
8935 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8937 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8942 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8943 if (test
== INVALID_REGNUM
)
8950 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8953 return (df_regs_ever_live_p (regno
)
8954 && !call_used_regs
[regno
]
8955 && !fixed_regs
[regno
]
8956 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8959 /* Return number of saved general prupose registers. */
8962 ix86_nsaved_regs (void)
8967 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8968 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8973 /* Return number of saved SSE registrers. */
8976 ix86_nsaved_sseregs (void)
8981 if (!TARGET_64BIT_MS_ABI
)
8983 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8984 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8989 /* Given FROM and TO register numbers, say whether this elimination is
8990 allowed. If stack alignment is needed, we can only replace argument
8991 pointer with hard frame pointer, or replace frame pointer with stack
8992 pointer. Otherwise, frame pointer elimination is automatically
8993 handled and all other eliminations are valid. */
8996 ix86_can_eliminate (const int from
, const int to
)
8998 if (stack_realign_fp
)
8999 return ((from
== ARG_POINTER_REGNUM
9000 && to
== HARD_FRAME_POINTER_REGNUM
)
9001 || (from
== FRAME_POINTER_REGNUM
9002 && to
== STACK_POINTER_REGNUM
));
9004 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
9007 /* Return the offset between two registers, one to be eliminated, and the other
9008 its replacement, at the start of a routine. */
9011 ix86_initial_elimination_offset (int from
, int to
)
9013 struct ix86_frame frame
;
9014 ix86_compute_frame_layout (&frame
);
9016 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
9017 return frame
.hard_frame_pointer_offset
;
9018 else if (from
== FRAME_POINTER_REGNUM
9019 && to
== HARD_FRAME_POINTER_REGNUM
)
9020 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
9023 gcc_assert (to
== STACK_POINTER_REGNUM
);
9025 if (from
== ARG_POINTER_REGNUM
)
9026 return frame
.stack_pointer_offset
;
9028 gcc_assert (from
== FRAME_POINTER_REGNUM
);
9029 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
9033 /* In a dynamically-aligned function, we can't know the offset from
9034 stack pointer to frame pointer, so we must ensure that setjmp
9035 eliminates fp against the hard fp (%ebp) rather than trying to
9036 index from %esp up to the top of the frame across a gap that is
9037 of unknown (at compile-time) size. */
9039 ix86_builtin_setjmp_frame_value (void)
9041 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
9044 /* When using -fsplit-stack, the allocation routines set a field in
9045 the TCB to the bottom of the stack plus this much space, measured
9048 #define SPLIT_STACK_AVAILABLE 256
9050 /* Fill structure ix86_frame about frame of currently computed function. */
9053 ix86_compute_frame_layout (struct ix86_frame
*frame
)
9055 unsigned HOST_WIDE_INT stack_alignment_needed
;
9056 HOST_WIDE_INT offset
;
9057 unsigned HOST_WIDE_INT preferred_alignment
;
9058 HOST_WIDE_INT size
= get_frame_size ();
9059 HOST_WIDE_INT to_allocate
;
9061 frame
->nregs
= ix86_nsaved_regs ();
9062 frame
->nsseregs
= ix86_nsaved_sseregs ();
9064 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
9065 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
9067 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9068 function prologues and leaf. */
9069 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
9070 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
9071 || ix86_current_function_calls_tls_descriptor
))
9073 preferred_alignment
= 16;
9074 stack_alignment_needed
= 16;
9075 crtl
->preferred_stack_boundary
= 128;
9076 crtl
->stack_alignment_needed
= 128;
9079 gcc_assert (!size
|| stack_alignment_needed
);
9080 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
9081 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
9083 /* For SEH we have to limit the amount of code movement into the prologue.
9084 At present we do this via a BLOCKAGE, at which point there's very little
9085 scheduling that can be done, which means that there's very little point
9086 in doing anything except PUSHs. */
9088 cfun
->machine
->use_fast_prologue_epilogue
= false;
9090 /* During reload iteration the amount of registers saved can change.
9091 Recompute the value as needed. Do not recompute when amount of registers
9092 didn't change as reload does multiple calls to the function and does not
9093 expect the decision to change within single iteration. */
9094 else if (!optimize_function_for_size_p (cfun
)
9095 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
9097 int count
= frame
->nregs
;
9098 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
9100 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
9102 /* The fast prologue uses move instead of push to save registers. This
9103 is significantly longer, but also executes faster as modern hardware
9104 can execute the moves in parallel, but can't do that for push/pop.
9106 Be careful about choosing what prologue to emit: When function takes
9107 many instructions to execute we may use slow version as well as in
9108 case function is known to be outside hot spot (this is known with
9109 feedback only). Weight the size of function by number of registers
9110 to save as it is cheap to use one or two push instructions but very
9111 slow to use many of them. */
9113 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
9114 if (node
->frequency
< NODE_FREQUENCY_NORMAL
9115 || (flag_branch_probabilities
9116 && node
->frequency
< NODE_FREQUENCY_HOT
))
9117 cfun
->machine
->use_fast_prologue_epilogue
= false;
9119 cfun
->machine
->use_fast_prologue_epilogue
9120 = !expensive_function_p (count
);
9123 frame
->save_regs_using_mov
9124 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
9125 /* If static stack checking is enabled and done with probes,
9126 the registers need to be saved before allocating the frame. */
9127 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
9129 /* Skip return address. */
9130 offset
= UNITS_PER_WORD
;
9132 /* Skip pushed static chain. */
9133 if (ix86_static_chain_on_stack
)
9134 offset
+= UNITS_PER_WORD
;
9136 /* Skip saved base pointer. */
9137 if (frame_pointer_needed
)
9138 offset
+= UNITS_PER_WORD
;
9139 frame
->hfp_save_offset
= offset
;
9141 /* The traditional frame pointer location is at the top of the frame. */
9142 frame
->hard_frame_pointer_offset
= offset
;
9144 /* Register save area */
9145 offset
+= frame
->nregs
* UNITS_PER_WORD
;
9146 frame
->reg_save_offset
= offset
;
9148 /* On SEH target, registers are pushed just before the frame pointer
9151 frame
->hard_frame_pointer_offset
= offset
;
9153 /* Align and set SSE register save area. */
9154 if (frame
->nsseregs
)
9156 /* The only ABI that has saved SSE registers (Win64) also has a
9157 16-byte aligned default stack, and thus we don't need to be
9158 within the re-aligned local stack frame to save them. */
9159 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
9160 offset
= (offset
+ 16 - 1) & -16;
9161 offset
+= frame
->nsseregs
* 16;
9163 frame
->sse_reg_save_offset
= offset
;
9165 /* The re-aligned stack starts here. Values before this point are not
9166 directly comparable with values below this point. In order to make
9167 sure that no value happens to be the same before and after, force
9168 the alignment computation below to add a non-zero value. */
9169 if (stack_realign_fp
)
9170 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
9173 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
9174 offset
+= frame
->va_arg_size
;
9176 /* Align start of frame for local function. */
9177 if (stack_realign_fp
9178 || offset
!= frame
->sse_reg_save_offset
9181 || cfun
->calls_alloca
9182 || ix86_current_function_calls_tls_descriptor
)
9183 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
9185 /* Frame pointer points here. */
9186 frame
->frame_pointer_offset
= offset
;
9190 /* Add outgoing arguments area. Can be skipped if we eliminated
9191 all the function calls as dead code.
9192 Skipping is however impossible when function calls alloca. Alloca
9193 expander assumes that last crtl->outgoing_args_size
9194 of stack frame are unused. */
9195 if (ACCUMULATE_OUTGOING_ARGS
9196 && (!crtl
->is_leaf
|| cfun
->calls_alloca
9197 || ix86_current_function_calls_tls_descriptor
))
9199 offset
+= crtl
->outgoing_args_size
;
9200 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
9203 frame
->outgoing_arguments_size
= 0;
9205 /* Align stack boundary. Only needed if we're calling another function
9207 if (!crtl
->is_leaf
|| cfun
->calls_alloca
9208 || ix86_current_function_calls_tls_descriptor
)
9209 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
9211 /* We've reached end of stack frame. */
9212 frame
->stack_pointer_offset
= offset
;
9214 /* Size prologue needs to allocate. */
9215 to_allocate
= offset
- frame
->sse_reg_save_offset
;
9217 if ((!to_allocate
&& frame
->nregs
<= 1)
9218 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
9219 frame
->save_regs_using_mov
= false;
9221 if (ix86_using_red_zone ()
9222 && crtl
->sp_is_unchanging
9224 && !ix86_current_function_calls_tls_descriptor
)
9226 frame
->red_zone_size
= to_allocate
;
9227 if (frame
->save_regs_using_mov
)
9228 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
9229 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
9230 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
9233 frame
->red_zone_size
= 0;
9234 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9236 /* The SEH frame pointer location is near the bottom of the frame.
9237 This is enforced by the fact that the difference between the
9238 stack pointer and the frame pointer is limited to 240 bytes in
9239 the unwind data structure. */
9244 /* If we can leave the frame pointer where it is, do so. Also, returns
9245 the establisher frame for __builtin_frame_address (0). */
9246 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9247 if (diff
<= SEH_MAX_FRAME_SIZE
9248 && (diff
> 240 || (diff
& 15) != 0)
9249 && !crtl
->accesses_prior_frames
)
9251 /* Ideally we'd determine what portion of the local stack frame
9252 (within the constraint of the lowest 240) is most heavily used.
9253 But without that complication, simply bias the frame pointer
9254 by 128 bytes so as to maximize the amount of the local stack
9255 frame that is addressable with 8-bit offsets. */
9256 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9261 /* This is semi-inlined memory_address_length, but simplified
9262 since we know that we're always dealing with reg+offset, and
9263 to avoid having to create and discard all that rtl. */
9266 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9272 /* EBP and R13 cannot be encoded without an offset. */
9273 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9275 else if (IN_RANGE (offset
, -128, 127))
9278 /* ESP and R12 must be encoded with a SIB byte. */
9279 if (regno
== SP_REG
|| regno
== R12_REG
)
9285 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9286 The valid base registers are taken from CFUN->MACHINE->FS. */
9289 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9291 const struct machine_function
*m
= cfun
->machine
;
9292 rtx base_reg
= NULL
;
9293 HOST_WIDE_INT base_offset
= 0;
9295 if (m
->use_fast_prologue_epilogue
)
9297 /* Choose the base register most likely to allow the most scheduling
9298 opportunities. Generally FP is valid throughout the function,
9299 while DRAP must be reloaded within the epilogue. But choose either
9300 over the SP due to increased encoding size. */
9304 base_reg
= hard_frame_pointer_rtx
;
9305 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9307 else if (m
->fs
.drap_valid
)
9309 base_reg
= crtl
->drap_reg
;
9310 base_offset
= 0 - cfa_offset
;
9312 else if (m
->fs
.sp_valid
)
9314 base_reg
= stack_pointer_rtx
;
9315 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9320 HOST_WIDE_INT toffset
;
9323 /* Choose the base register with the smallest address encoding.
9324 With a tie, choose FP > DRAP > SP. */
9327 base_reg
= stack_pointer_rtx
;
9328 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9329 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9331 if (m
->fs
.drap_valid
)
9333 toffset
= 0 - cfa_offset
;
9334 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9337 base_reg
= crtl
->drap_reg
;
9338 base_offset
= toffset
;
9344 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9345 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9348 base_reg
= hard_frame_pointer_rtx
;
9349 base_offset
= toffset
;
9354 gcc_assert (base_reg
!= NULL
);
9356 return plus_constant (Pmode
, base_reg
, base_offset
);
9359 /* Emit code to save registers in the prologue. */
9362 ix86_emit_save_regs (void)
9367 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9368 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9370 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
9371 RTX_FRAME_RELATED_P (insn
) = 1;
9375 /* Emit a single register save at CFA - CFA_OFFSET. */
9378 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9379 HOST_WIDE_INT cfa_offset
)
9381 struct machine_function
*m
= cfun
->machine
;
9382 rtx reg
= gen_rtx_REG (mode
, regno
);
9383 rtx mem
, addr
, base
, insn
;
9385 addr
= choose_baseaddr (cfa_offset
);
9386 mem
= gen_frame_mem (mode
, addr
);
9388 /* For SSE saves, we need to indicate the 128-bit alignment. */
9389 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9391 insn
= emit_move_insn (mem
, reg
);
9392 RTX_FRAME_RELATED_P (insn
) = 1;
9395 if (GET_CODE (base
) == PLUS
)
9396 base
= XEXP (base
, 0);
9397 gcc_checking_assert (REG_P (base
));
9399 /* When saving registers into a re-aligned local stack frame, avoid
9400 any tricky guessing by dwarf2out. */
9401 if (m
->fs
.realigned
)
9403 gcc_checking_assert (stack_realign_drap
);
9405 if (regno
== REGNO (crtl
->drap_reg
))
9407 /* A bit of a hack. We force the DRAP register to be saved in
9408 the re-aligned stack frame, which provides us with a copy
9409 of the CFA that will last past the prologue. Install it. */
9410 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9411 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9412 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9413 mem
= gen_rtx_MEM (mode
, addr
);
9414 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9418 /* The frame pointer is a stable reference within the
9419 aligned frame. Use it. */
9420 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9421 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
9422 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9423 mem
= gen_rtx_MEM (mode
, addr
);
9424 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9425 gen_rtx_SET (VOIDmode
, mem
, reg
));
9429 /* The memory may not be relative to the current CFA register,
9430 which means that we may need to generate a new pattern for
9431 use by the unwind info. */
9432 else if (base
!= m
->fs
.cfa_reg
)
9434 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
9435 m
->fs
.cfa_offset
- cfa_offset
);
9436 mem
= gen_rtx_MEM (mode
, addr
);
9437 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9441 /* Emit code to save registers using MOV insns.
9442 First register is stored at CFA - CFA_OFFSET. */
9444 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9448 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9449 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9451 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
9452 cfa_offset
-= UNITS_PER_WORD
;
9456 /* Emit code to save SSE registers using MOV insns.
9457 First register is stored at CFA - CFA_OFFSET. */
9459 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9463 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9464 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9466 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9471 static GTY(()) rtx queued_cfa_restores
;
9473 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9474 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9475 Don't add the note if the previously saved value will be left untouched
9476 within stack red-zone till return, as unwinders can find the same value
9477 in the register and on the stack. */
9480 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9482 if (!crtl
->shrink_wrapped
9483 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9488 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9489 RTX_FRAME_RELATED_P (insn
) = 1;
9493 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9496 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9499 ix86_add_queued_cfa_restore_notes (rtx insn
)
9502 if (!queued_cfa_restores
)
9504 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9506 XEXP (last
, 1) = REG_NOTES (insn
);
9507 REG_NOTES (insn
) = queued_cfa_restores
;
9508 queued_cfa_restores
= NULL_RTX
;
9509 RTX_FRAME_RELATED_P (insn
) = 1;
9512 /* Expand prologue or epilogue stack adjustment.
9513 The pattern exist to put a dependency on all ebp-based memory accesses.
9514 STYLE should be negative if instructions should be marked as frame related,
9515 zero if %r11 register is live and cannot be freely used and positive
9519 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9520 int style
, bool set_cfa
)
9522 struct machine_function
*m
= cfun
->machine
;
9524 bool add_frame_related_expr
= false;
9526 if (Pmode
== SImode
)
9527 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9528 else if (x86_64_immediate_operand (offset
, DImode
))
9529 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9533 /* r11 is used by indirect sibcall return as well, set before the
9534 epilogue and used after the epilogue. */
9536 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9539 gcc_assert (src
!= hard_frame_pointer_rtx
9540 && dest
!= hard_frame_pointer_rtx
);
9541 tmp
= hard_frame_pointer_rtx
;
9543 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9545 add_frame_related_expr
= true;
9547 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9550 insn
= emit_insn (insn
);
9552 ix86_add_queued_cfa_restore_notes (insn
);
9558 gcc_assert (m
->fs
.cfa_reg
== src
);
9559 m
->fs
.cfa_offset
+= INTVAL (offset
);
9560 m
->fs
.cfa_reg
= dest
;
9562 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9563 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9564 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9565 RTX_FRAME_RELATED_P (insn
) = 1;
9569 RTX_FRAME_RELATED_P (insn
) = 1;
9570 if (add_frame_related_expr
)
9572 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9573 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9574 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9578 if (dest
== stack_pointer_rtx
)
9580 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9581 bool valid
= m
->fs
.sp_valid
;
9583 if (src
== hard_frame_pointer_rtx
)
9585 valid
= m
->fs
.fp_valid
;
9586 ooffset
= m
->fs
.fp_offset
;
9588 else if (src
== crtl
->drap_reg
)
9590 valid
= m
->fs
.drap_valid
;
9595 /* Else there are two possibilities: SP itself, which we set
9596 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9597 taken care of this by hand along the eh_return path. */
9598 gcc_checking_assert (src
== stack_pointer_rtx
9599 || offset
== const0_rtx
);
9602 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9603 m
->fs
.sp_valid
= valid
;
9607 /* Find an available register to be used as dynamic realign argument
9608 pointer regsiter. Such a register will be written in prologue and
9609 used in begin of body, so it must not be
9610 1. parameter passing register.
9612 We reuse static-chain register if it is available. Otherwise, we
9613 use DI for i386 and R13 for x86-64. We chose R13 since it has
9616 Return: the regno of chosen register. */
9619 find_drap_reg (void)
9621 tree decl
= cfun
->decl
;
9625 /* Use R13 for nested function or function need static chain.
9626 Since function with tail call may use any caller-saved
9627 registers in epilogue, DRAP must not use caller-saved
9628 register in such case. */
9629 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9636 /* Use DI for nested function or function need static chain.
9637 Since function with tail call may use any caller-saved
9638 registers in epilogue, DRAP must not use caller-saved
9639 register in such case. */
9640 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9643 /* Reuse static chain register if it isn't used for parameter
9645 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9647 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9648 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9655 /* Return minimum incoming stack alignment. */
9658 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9660 unsigned int incoming_stack_boundary
;
9662 /* Prefer the one specified at command line. */
9663 if (ix86_user_incoming_stack_boundary
)
9664 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9665 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9666 if -mstackrealign is used, it isn't used for sibcall check and
9667 estimated stack alignment is 128bit. */
9670 && ix86_force_align_arg_pointer
9671 && crtl
->stack_alignment_estimated
== 128)
9672 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9674 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9676 /* Incoming stack alignment can be changed on individual functions
9677 via force_align_arg_pointer attribute. We use the smallest
9678 incoming stack boundary. */
9679 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9680 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9681 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9682 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9684 /* The incoming stack frame has to be aligned at least at
9685 parm_stack_boundary. */
9686 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9687 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9689 /* Stack at entrance of main is aligned by runtime. We use the
9690 smallest incoming stack boundary. */
9691 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9692 && DECL_NAME (current_function_decl
)
9693 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9694 && DECL_FILE_SCOPE_P (current_function_decl
))
9695 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9697 return incoming_stack_boundary
;
9700 /* Update incoming stack boundary and estimated stack alignment. */
9703 ix86_update_stack_boundary (void)
9705 ix86_incoming_stack_boundary
9706 = ix86_minimum_incoming_stack_boundary (false);
9708 /* x86_64 vararg needs 16byte stack alignment for register save
9712 && crtl
->stack_alignment_estimated
< 128)
9713 crtl
->stack_alignment_estimated
= 128;
9716 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9717 needed or an rtx for DRAP otherwise. */
9720 ix86_get_drap_rtx (void)
9722 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9723 crtl
->need_drap
= true;
9725 if (stack_realign_drap
)
9727 /* Assign DRAP to vDRAP and returns vDRAP */
9728 unsigned int regno
= find_drap_reg ();
9733 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9734 crtl
->drap_reg
= arg_ptr
;
9737 drap_vreg
= copy_to_reg (arg_ptr
);
9741 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9744 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9745 RTX_FRAME_RELATED_P (insn
) = 1;
9753 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9756 ix86_internal_arg_pointer (void)
9758 return virtual_incoming_args_rtx
;
9761 struct scratch_reg
{
9766 /* Return a short-lived scratch register for use on function entry.
9767 In 32-bit mode, it is valid only after the registers are saved
9768 in the prologue. This register must be released by means of
9769 release_scratch_register_on_entry once it is dead. */
9772 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9780 /* We always use R11 in 64-bit mode. */
9785 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9787 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9788 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9789 int regparm
= ix86_function_regparm (fntype
, decl
);
9791 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9793 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9794 for the static chain register. */
9795 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9796 && drap_regno
!= AX_REG
)
9798 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9800 /* ecx is the static chain register. */
9801 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9802 && drap_regno
!= CX_REG
)
9804 else if (ix86_save_reg (BX_REG
, true))
9806 /* esi is the static chain register. */
9807 else if (!(regparm
== 3 && static_chain_p
)
9808 && ix86_save_reg (SI_REG
, true))
9810 else if (ix86_save_reg (DI_REG
, true))
9814 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9819 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9822 rtx insn
= emit_insn (gen_push (sr
->reg
));
9823 RTX_FRAME_RELATED_P (insn
) = 1;
9827 /* Release a scratch register obtained from the preceding function. */
9830 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9834 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9836 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9837 RTX_FRAME_RELATED_P (insn
) = 1;
9838 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9839 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9840 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9844 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9846 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9849 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9851 /* We skip the probe for the first interval + a small dope of 4 words and
9852 probe that many bytes past the specified size to maintain a protection
9853 area at the botton of the stack. */
9854 const int dope
= 4 * UNITS_PER_WORD
;
9855 rtx size_rtx
= GEN_INT (size
), last
;
9857 /* See if we have a constant small number of probes to generate. If so,
9858 that's the easy case. The run-time loop is made up of 11 insns in the
9859 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9860 for n # of intervals. */
9861 if (size
<= 5 * PROBE_INTERVAL
)
9863 HOST_WIDE_INT i
, adjust
;
9864 bool first_probe
= true;
9866 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9867 values of N from 1 until it exceeds SIZE. If only one probe is
9868 needed, this will not generate any code. Then adjust and probe
9869 to PROBE_INTERVAL + SIZE. */
9870 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9874 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9875 first_probe
= false;
9878 adjust
= PROBE_INTERVAL
;
9880 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9881 plus_constant (Pmode
, stack_pointer_rtx
,
9883 emit_stack_probe (stack_pointer_rtx
);
9887 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9889 adjust
= size
+ PROBE_INTERVAL
- i
;
9891 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9892 plus_constant (Pmode
, stack_pointer_rtx
,
9894 emit_stack_probe (stack_pointer_rtx
);
9896 /* Adjust back to account for the additional first interval. */
9897 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9898 plus_constant (Pmode
, stack_pointer_rtx
,
9899 PROBE_INTERVAL
+ dope
)));
9902 /* Otherwise, do the same as above, but in a loop. Note that we must be
9903 extra careful with variables wrapping around because we might be at
9904 the very top (or the very bottom) of the address space and we have
9905 to be able to handle this case properly; in particular, we use an
9906 equality test for the loop condition. */
9909 HOST_WIDE_INT rounded_size
;
9910 struct scratch_reg sr
;
9912 get_scratch_register_on_entry (&sr
);
9915 /* Step 1: round SIZE to the previous multiple of the interval. */
9917 rounded_size
= size
& -PROBE_INTERVAL
;
9920 /* Step 2: compute initial and final value of the loop counter. */
9922 /* SP = SP_0 + PROBE_INTERVAL. */
9923 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9924 plus_constant (Pmode
, stack_pointer_rtx
,
9925 - (PROBE_INTERVAL
+ dope
))));
9927 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9928 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9929 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9930 gen_rtx_PLUS (Pmode
, sr
.reg
,
9931 stack_pointer_rtx
)));
9936 while (SP != LAST_ADDR)
9938 SP = SP + PROBE_INTERVAL
9942 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9943 values of N from 1 until it is equal to ROUNDED_SIZE. */
9945 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9948 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9949 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9951 if (size
!= rounded_size
)
9953 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9954 plus_constant (Pmode
, stack_pointer_rtx
,
9955 rounded_size
- size
)));
9956 emit_stack_probe (stack_pointer_rtx
);
9959 /* Adjust back to account for the additional first interval. */
9960 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9961 plus_constant (Pmode
, stack_pointer_rtx
,
9962 PROBE_INTERVAL
+ dope
)));
9964 release_scratch_register_on_entry (&sr
);
9967 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9969 /* Even if the stack pointer isn't the CFA register, we need to correctly
9970 describe the adjustments made to it, in particular differentiate the
9971 frame-related ones from the frame-unrelated ones. */
9974 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9975 XVECEXP (expr
, 0, 0)
9976 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9977 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
9978 XVECEXP (expr
, 0, 1)
9979 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9980 plus_constant (Pmode
, stack_pointer_rtx
,
9981 PROBE_INTERVAL
+ dope
+ size
));
9982 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9983 RTX_FRAME_RELATED_P (last
) = 1;
9985 cfun
->machine
->fs
.sp_offset
+= size
;
9988 /* Make sure nothing is scheduled before we are done. */
9989 emit_insn (gen_blockage ());
9992 /* Adjust the stack pointer up to REG while probing it. */
9995 output_adjust_stack_and_probe (rtx reg
)
9997 static int labelno
= 0;
9998 char loop_lab
[32], end_lab
[32];
10001 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10002 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10004 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10006 /* Jump to END_LAB if SP == LAST_ADDR. */
10007 xops
[0] = stack_pointer_rtx
;
10009 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10010 fputs ("\tje\t", asm_out_file
);
10011 assemble_name_raw (asm_out_file
, end_lab
);
10012 fputc ('\n', asm_out_file
);
10014 /* SP = SP + PROBE_INTERVAL. */
10015 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10016 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10019 xops
[1] = const0_rtx
;
10020 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
10022 fprintf (asm_out_file
, "\tjmp\t");
10023 assemble_name_raw (asm_out_file
, loop_lab
);
10024 fputc ('\n', asm_out_file
);
10026 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10031 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10032 inclusive. These are offsets from the current stack pointer. */
10035 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
10037 /* See if we have a constant small number of probes to generate. If so,
10038 that's the easy case. The run-time loop is made up of 7 insns in the
10039 generic case while the compile-time loop is made up of n insns for n #
10041 if (size
<= 7 * PROBE_INTERVAL
)
10045 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10046 it exceeds SIZE. If only one probe is needed, this will not
10047 generate any code. Then probe at FIRST + SIZE. */
10048 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
10049 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10052 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
10056 /* Otherwise, do the same as above, but in a loop. Note that we must be
10057 extra careful with variables wrapping around because we might be at
10058 the very top (or the very bottom) of the address space and we have
10059 to be able to handle this case properly; in particular, we use an
10060 equality test for the loop condition. */
10063 HOST_WIDE_INT rounded_size
, last
;
10064 struct scratch_reg sr
;
10066 get_scratch_register_on_entry (&sr
);
10069 /* Step 1: round SIZE to the previous multiple of the interval. */
10071 rounded_size
= size
& -PROBE_INTERVAL
;
10074 /* Step 2: compute initial and final value of the loop counter. */
10076 /* TEST_OFFSET = FIRST. */
10077 emit_move_insn (sr
.reg
, GEN_INT (-first
));
10079 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10080 last
= first
+ rounded_size
;
10083 /* Step 3: the loop
10085 while (TEST_ADDR != LAST_ADDR)
10087 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10091 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10092 until it is equal to ROUNDED_SIZE. */
10094 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
10097 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10098 that SIZE is equal to ROUNDED_SIZE. */
10100 if (size
!= rounded_size
)
10101 emit_stack_probe (plus_constant (Pmode
,
10102 gen_rtx_PLUS (Pmode
,
10105 rounded_size
- size
));
10107 release_scratch_register_on_entry (&sr
);
10110 /* Make sure nothing is scheduled before we are done. */
10111 emit_insn (gen_blockage ());
10114 /* Probe a range of stack addresses from REG to END, inclusive. These are
10115 offsets from the current stack pointer. */
10118 output_probe_stack_range (rtx reg
, rtx end
)
10120 static int labelno
= 0;
10121 char loop_lab
[32], end_lab
[32];
10124 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
10125 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
10127 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
10129 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10132 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
10133 fputs ("\tje\t", asm_out_file
);
10134 assemble_name_raw (asm_out_file
, end_lab
);
10135 fputc ('\n', asm_out_file
);
10137 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10138 xops
[1] = GEN_INT (PROBE_INTERVAL
);
10139 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
10141 /* Probe at TEST_ADDR. */
10142 xops
[0] = stack_pointer_rtx
;
10144 xops
[2] = const0_rtx
;
10145 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
10147 fprintf (asm_out_file
, "\tjmp\t");
10148 assemble_name_raw (asm_out_file
, loop_lab
);
10149 fputc ('\n', asm_out_file
);
10151 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
10156 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10157 to be generated in correct form. */
10159 ix86_finalize_stack_realign_flags (void)
10161 /* Check if stack realign is really needed after reload, and
10162 stores result in cfun */
10163 unsigned int incoming_stack_boundary
10164 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
10165 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
10166 unsigned int stack_realign
= (incoming_stack_boundary
10168 ? crtl
->max_used_stack_slot_alignment
10169 : crtl
->stack_alignment_needed
));
10171 if (crtl
->stack_realign_finalized
)
10173 /* After stack_realign_needed is finalized, we can't no longer
10175 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
10179 /* If the only reason for frame_pointer_needed is that we conservatively
10180 assumed stack realignment might be needed, but in the end nothing that
10181 needed the stack alignment had been spilled, clear frame_pointer_needed
10182 and say we don't need stack realignment. */
10184 && !crtl
->need_drap
10185 && frame_pointer_needed
10187 && flag_omit_frame_pointer
10188 && crtl
->sp_is_unchanging
10189 && !ix86_current_function_calls_tls_descriptor
10190 && !crtl
->accesses_prior_frames
10191 && !cfun
->calls_alloca
10192 && !crtl
->calls_eh_return
10193 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
10194 && !ix86_frame_pointer_required ()
10195 && get_frame_size () == 0
10196 && ix86_nsaved_sseregs () == 0
10197 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
10199 HARD_REG_SET set_up_by_prologue
, prologue_used
;
10202 CLEAR_HARD_REG_SET (prologue_used
);
10203 CLEAR_HARD_REG_SET (set_up_by_prologue
);
10204 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
10205 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
10206 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
10207 HARD_FRAME_POINTER_REGNUM
);
10211 FOR_BB_INSNS (bb
, insn
)
10212 if (NONDEBUG_INSN_P (insn
)
10213 && requires_stack_frame_p (insn
, prologue_used
,
10214 set_up_by_prologue
))
10216 crtl
->stack_realign_needed
= stack_realign
;
10217 crtl
->stack_realign_finalized
= true;
10222 frame_pointer_needed
= false;
10223 stack_realign
= false;
10224 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
10225 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
10226 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
10227 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
10228 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
10229 df_finish_pass (true);
10230 df_scan_alloc (NULL
);
10232 df_compute_regs_ever_live (true);
10236 crtl
->stack_realign_needed
= stack_realign
;
10237 crtl
->stack_realign_finalized
= true;
10240 /* Expand the prologue into a bunch of separate insns. */
10243 ix86_expand_prologue (void)
10245 struct machine_function
*m
= cfun
->machine
;
10248 struct ix86_frame frame
;
10249 HOST_WIDE_INT allocate
;
10250 bool int_registers_saved
;
10251 bool sse_registers_saved
;
10253 ix86_finalize_stack_realign_flags ();
10255 /* DRAP should not coexist with stack_realign_fp */
10256 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10258 memset (&m
->fs
, 0, sizeof (m
->fs
));
10260 /* Initialize CFA state for before the prologue. */
10261 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10262 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10264 /* Track SP offset to the CFA. We continue tracking this after we've
10265 swapped the CFA register away from SP. In the case of re-alignment
10266 this is fudged; we're interested to offsets within the local frame. */
10267 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10268 m
->fs
.sp_valid
= true;
10270 ix86_compute_frame_layout (&frame
);
10272 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10274 /* We should have already generated an error for any use of
10275 ms_hook on a nested function. */
10276 gcc_checking_assert (!ix86_static_chain_on_stack
);
10278 /* Check if profiling is active and we shall use profiling before
10279 prologue variant. If so sorry. */
10280 if (crtl
->profile
&& flag_fentry
!= 0)
10281 sorry ("ms_hook_prologue attribute isn%'t compatible "
10282 "with -mfentry for 32-bit");
10284 /* In ix86_asm_output_function_label we emitted:
10285 8b ff movl.s %edi,%edi
10287 8b ec movl.s %esp,%ebp
10289 This matches the hookable function prologue in Win32 API
10290 functions in Microsoft Windows XP Service Pack 2 and newer.
10291 Wine uses this to enable Windows apps to hook the Win32 API
10292 functions provided by Wine.
10294 What that means is that we've already set up the frame pointer. */
10296 if (frame_pointer_needed
10297 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10301 /* We've decided to use the frame pointer already set up.
10302 Describe this to the unwinder by pretending that both
10303 push and mov insns happen right here.
10305 Putting the unwind info here at the end of the ms_hook
10306 is done so that we can make absolutely certain we get
10307 the required byte sequence at the start of the function,
10308 rather than relying on an assembler that can produce
10309 the exact encoding required.
10311 However it does mean (in the unpatched case) that we have
10312 a 1 insn window where the asynchronous unwind info is
10313 incorrect. However, if we placed the unwind info at
10314 its correct location we would have incorrect unwind info
10315 in the patched case. Which is probably all moot since
10316 I don't expect Wine generates dwarf2 unwind info for the
10317 system libraries that use this feature. */
10319 insn
= emit_insn (gen_blockage ());
10321 push
= gen_push (hard_frame_pointer_rtx
);
10322 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10323 stack_pointer_rtx
);
10324 RTX_FRAME_RELATED_P (push
) = 1;
10325 RTX_FRAME_RELATED_P (mov
) = 1;
10327 RTX_FRAME_RELATED_P (insn
) = 1;
10328 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10329 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10331 /* Note that gen_push incremented m->fs.cfa_offset, even
10332 though we didn't emit the push insn here. */
10333 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10334 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10335 m
->fs
.fp_valid
= true;
10339 /* The frame pointer is not needed so pop %ebp again.
10340 This leaves us with a pristine state. */
10341 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10345 /* The first insn of a function that accepts its static chain on the
10346 stack is to push the register that would be filled in by a direct
10347 call. This insn will be skipped by the trampoline. */
10348 else if (ix86_static_chain_on_stack
)
10350 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10351 emit_insn (gen_blockage ());
10353 /* We don't want to interpret this push insn as a register save,
10354 only as a stack adjustment. The real copy of the register as
10355 a save will be done later, if needed. */
10356 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
10357 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10358 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10359 RTX_FRAME_RELATED_P (insn
) = 1;
10362 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10363 of DRAP is needed and stack realignment is really needed after reload */
10364 if (stack_realign_drap
)
10366 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10368 /* Only need to push parameter pointer reg if it is caller saved. */
10369 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10371 /* Push arg pointer reg */
10372 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10373 RTX_FRAME_RELATED_P (insn
) = 1;
10376 /* Grab the argument pointer. */
10377 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
10378 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10379 RTX_FRAME_RELATED_P (insn
) = 1;
10380 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10381 m
->fs
.cfa_offset
= 0;
10383 /* Align the stack. */
10384 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10386 GEN_INT (-align_bytes
)));
10387 RTX_FRAME_RELATED_P (insn
) = 1;
10389 /* Replicate the return address on the stack so that return
10390 address can be reached via (argp - 1) slot. This is needed
10391 to implement macro RETURN_ADDR_RTX and intrinsic function
10392 expand_builtin_return_addr etc. */
10393 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
10394 t
= gen_frame_mem (word_mode
, t
);
10395 insn
= emit_insn (gen_push (t
));
10396 RTX_FRAME_RELATED_P (insn
) = 1;
10398 /* For the purposes of frame and register save area addressing,
10399 we've started over with a new frame. */
10400 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10401 m
->fs
.realigned
= true;
10404 int_registers_saved
= (frame
.nregs
== 0);
10405 sse_registers_saved
= (frame
.nsseregs
== 0);
10407 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10409 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10410 slower on all targets. Also sdb doesn't like it. */
10411 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10412 RTX_FRAME_RELATED_P (insn
) = 1;
10414 /* Push registers now, before setting the frame pointer
10416 if (!int_registers_saved
10418 && !frame
.save_regs_using_mov
)
10420 ix86_emit_save_regs ();
10421 int_registers_saved
= true;
10422 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10425 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10427 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10428 RTX_FRAME_RELATED_P (insn
) = 1;
10430 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10431 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10432 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10433 m
->fs
.fp_valid
= true;
10437 if (!int_registers_saved
)
10439 /* If saving registers via PUSH, do so now. */
10440 if (!frame
.save_regs_using_mov
)
10442 ix86_emit_save_regs ();
10443 int_registers_saved
= true;
10444 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10447 /* When using red zone we may start register saving before allocating
10448 the stack frame saving one cycle of the prologue. However, avoid
10449 doing this if we have to probe the stack; at least on x86_64 the
10450 stack probe can turn into a call that clobbers a red zone location. */
10451 else if (ix86_using_red_zone ()
10452 && (! TARGET_STACK_PROBE
10453 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10455 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10456 int_registers_saved
= true;
10460 if (stack_realign_fp
)
10462 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10463 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10465 /* The computation of the size of the re-aligned stack frame means
10466 that we must allocate the size of the register save area before
10467 performing the actual alignment. Otherwise we cannot guarantee
10468 that there's enough storage above the realignment point. */
10469 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10470 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10471 GEN_INT (m
->fs
.sp_offset
10472 - frame
.sse_reg_save_offset
),
10475 /* Align the stack. */
10476 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10478 GEN_INT (-align_bytes
)));
10480 /* For the purposes of register save area addressing, the stack
10481 pointer is no longer valid. As for the value of sp_offset,
10482 see ix86_compute_frame_layout, which we need to match in order
10483 to pass verification of stack_pointer_offset at the end. */
10484 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10485 m
->fs
.sp_valid
= false;
10488 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10490 if (flag_stack_usage_info
)
10492 /* We start to count from ARG_POINTER. */
10493 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10495 /* If it was realigned, take into account the fake frame. */
10496 if (stack_realign_drap
)
10498 if (ix86_static_chain_on_stack
)
10499 stack_size
+= UNITS_PER_WORD
;
10501 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10502 stack_size
+= UNITS_PER_WORD
;
10504 /* This over-estimates by 1 minimal-stack-alignment-unit but
10505 mitigates that by counting in the new return address slot. */
10506 current_function_dynamic_stack_size
10507 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10510 current_function_static_stack_size
= stack_size
;
10513 /* On SEH target with very large frame size, allocate an area to save
10514 SSE registers (as the very large allocation won't be described). */
10516 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
10517 && !sse_registers_saved
)
10519 HOST_WIDE_INT sse_size
=
10520 frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
10522 gcc_assert (int_registers_saved
);
10524 /* No need to do stack checking as the area will be immediately
10526 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10527 GEN_INT (-sse_size
), -1,
10528 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10529 allocate
-= sse_size
;
10530 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10531 sse_registers_saved
= true;
10534 /* The stack has already been decremented by the instruction calling us
10535 so probe if the size is non-negative to preserve the protection area. */
10536 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10538 /* We expect the registers to be saved when probes are used. */
10539 gcc_assert (int_registers_saved
);
10541 if (STACK_CHECK_MOVING_SP
)
10543 ix86_adjust_stack_and_probe (allocate
);
10548 HOST_WIDE_INT size
= allocate
;
10550 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10551 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10553 if (TARGET_STACK_PROBE
)
10554 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10556 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10562 else if (!ix86_target_stack_probe ()
10563 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10565 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10566 GEN_INT (-allocate
), -1,
10567 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10571 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10573 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10575 bool eax_live
= false;
10576 bool r10_live
= false;
10579 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10580 if (!TARGET_64BIT_MS_ABI
)
10581 eax_live
= ix86_eax_live_at_start_p ();
10585 emit_insn (gen_push (eax
));
10586 allocate
-= UNITS_PER_WORD
;
10590 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10591 emit_insn (gen_push (r10
));
10592 allocate
-= UNITS_PER_WORD
;
10595 emit_move_insn (eax
, GEN_INT (allocate
));
10596 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10598 /* Use the fact that AX still contains ALLOCATE. */
10599 adjust_stack_insn
= (Pmode
== DImode
10600 ? gen_pro_epilogue_adjust_stack_di_sub
10601 : gen_pro_epilogue_adjust_stack_si_sub
);
10603 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10604 stack_pointer_rtx
, eax
));
10606 /* Note that SEH directives need to continue tracking the stack
10607 pointer even after the frame pointer has been set up. */
10608 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10610 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10611 m
->fs
.cfa_offset
+= allocate
;
10613 RTX_FRAME_RELATED_P (insn
) = 1;
10614 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10615 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10616 plus_constant (Pmode
, stack_pointer_rtx
,
10619 m
->fs
.sp_offset
+= allocate
;
10621 if (r10_live
&& eax_live
)
10623 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10624 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
10625 gen_frame_mem (word_mode
, t
));
10626 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10627 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
10628 gen_frame_mem (word_mode
, t
));
10630 else if (eax_live
|| r10_live
)
10632 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10633 emit_move_insn (gen_rtx_REG (word_mode
,
10634 (eax_live
? AX_REG
: R10_REG
)),
10635 gen_frame_mem (word_mode
, t
));
10638 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10640 /* If we havn't already set up the frame pointer, do so now. */
10641 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10643 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10644 GEN_INT (frame
.stack_pointer_offset
10645 - frame
.hard_frame_pointer_offset
));
10646 insn
= emit_insn (insn
);
10647 RTX_FRAME_RELATED_P (insn
) = 1;
10648 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10650 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10651 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10652 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10653 m
->fs
.fp_valid
= true;
10656 if (!int_registers_saved
)
10657 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10658 if (!sse_registers_saved
)
10659 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10661 pic_reg_used
= false;
10662 if (pic_offset_table_rtx
10663 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10666 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10668 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10669 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10671 pic_reg_used
= true;
10678 if (ix86_cmodel
== CM_LARGE_PIC
)
10680 rtx label
, tmp_reg
;
10682 gcc_assert (Pmode
== DImode
);
10683 label
= gen_label_rtx ();
10684 emit_label (label
);
10685 LABEL_PRESERVE_P (label
) = 1;
10686 tmp_reg
= gen_rtx_REG (Pmode
, R11_REG
);
10687 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10688 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
10690 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10691 insn
= emit_insn (ix86_gen_add3 (pic_offset_table_rtx
,
10692 pic_offset_table_rtx
, tmp_reg
));
10695 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10699 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10700 RTX_FRAME_RELATED_P (insn
) = 1;
10701 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10705 /* In the pic_reg_used case, make sure that the got load isn't deleted
10706 when mcount needs it. Blockage to avoid call movement across mcount
10707 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10709 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10710 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10712 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10714 /* vDRAP is setup but after reload it turns out stack realign
10715 isn't necessary, here we will emit prologue to setup DRAP
10716 without stack realign adjustment */
10717 t
= choose_baseaddr (0);
10718 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10721 /* Prevent instructions from being scheduled into register save push
10722 sequence when access to the redzone area is done through frame pointer.
10723 The offset between the frame pointer and the stack pointer is calculated
10724 relative to the value of the stack pointer at the end of the function
10725 prologue, and moving instructions that access redzone area via frame
10726 pointer inside push sequence violates this assumption. */
10727 if (frame_pointer_needed
&& frame
.red_zone_size
)
10728 emit_insn (gen_memory_blockage ());
10730 /* Emit cld instruction if stringops are used in the function. */
10731 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10732 emit_insn (gen_cld ());
10734 /* SEH requires that the prologue end within 256 bytes of the start of
10735 the function. Prevent instruction schedules that would extend that.
10736 Further, prevent alloca modifications to the stack pointer from being
10737 combined with prologue modifications. */
10739 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10742 /* Emit code to restore REG using a POP insn. */
10745 ix86_emit_restore_reg_using_pop (rtx reg
)
10747 struct machine_function
*m
= cfun
->machine
;
10748 rtx insn
= emit_insn (gen_pop (reg
));
10750 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10751 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10753 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10754 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10756 /* Previously we'd represented the CFA as an expression
10757 like *(%ebp - 8). We've just popped that value from
10758 the stack, which means we need to reset the CFA to
10759 the drap register. This will remain until we restore
10760 the stack pointer. */
10761 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10762 RTX_FRAME_RELATED_P (insn
) = 1;
10764 /* This means that the DRAP register is valid for addressing too. */
10765 m
->fs
.drap_valid
= true;
10769 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10771 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
10772 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10773 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10774 RTX_FRAME_RELATED_P (insn
) = 1;
10776 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10779 /* When the frame pointer is the CFA, and we pop it, we are
10780 swapping back to the stack pointer as the CFA. This happens
10781 for stack frames that don't allocate other data, so we assume
10782 the stack pointer is now pointing at the return address, i.e.
10783 the function entry state, which makes the offset be 1 word. */
10784 if (reg
== hard_frame_pointer_rtx
)
10786 m
->fs
.fp_valid
= false;
10787 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10789 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10790 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10792 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10793 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10794 GEN_INT (m
->fs
.cfa_offset
)));
10795 RTX_FRAME_RELATED_P (insn
) = 1;
10800 /* Emit code to restore saved registers using POP insns. */
10803 ix86_emit_restore_regs_using_pop (void)
10805 unsigned int regno
;
10807 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10808 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10809 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
10812 /* Emit code and notes for the LEAVE instruction. */
10815 ix86_emit_leave (void)
10817 struct machine_function
*m
= cfun
->machine
;
10818 rtx insn
= emit_insn (ix86_gen_leave ());
10820 ix86_add_queued_cfa_restore_notes (insn
);
10822 gcc_assert (m
->fs
.fp_valid
);
10823 m
->fs
.sp_valid
= true;
10824 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10825 m
->fs
.fp_valid
= false;
10827 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10829 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10830 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10832 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10833 plus_constant (Pmode
, stack_pointer_rtx
,
10835 RTX_FRAME_RELATED_P (insn
) = 1;
10837 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10841 /* Emit code to restore saved registers using MOV insns.
10842 First register is restored from CFA - CFA_OFFSET. */
10844 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10845 bool maybe_eh_return
)
10847 struct machine_function
*m
= cfun
->machine
;
10848 unsigned int regno
;
10850 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10851 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10853 rtx reg
= gen_rtx_REG (word_mode
, regno
);
10856 mem
= choose_baseaddr (cfa_offset
);
10857 mem
= gen_frame_mem (word_mode
, mem
);
10858 insn
= emit_move_insn (reg
, mem
);
10860 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10862 /* Previously we'd represented the CFA as an expression
10863 like *(%ebp - 8). We've just popped that value from
10864 the stack, which means we need to reset the CFA to
10865 the drap register. This will remain until we restore
10866 the stack pointer. */
10867 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10868 RTX_FRAME_RELATED_P (insn
) = 1;
10870 /* This means that the DRAP register is valid for addressing. */
10871 m
->fs
.drap_valid
= true;
10874 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10876 cfa_offset
-= UNITS_PER_WORD
;
10880 /* Emit code to restore saved registers using MOV insns.
10881 First register is restored from CFA - CFA_OFFSET. */
10883 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10884 bool maybe_eh_return
)
10886 unsigned int regno
;
10888 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10889 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10891 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10894 mem
= choose_baseaddr (cfa_offset
);
10895 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10896 set_mem_align (mem
, 128);
10897 emit_move_insn (reg
, mem
);
10899 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10905 /* Emit vzeroupper if needed. */
10908 ix86_maybe_emit_epilogue_vzeroupper (void)
10910 if (TARGET_VZEROUPPER
10911 && !TREE_THIS_VOLATILE (cfun
->decl
)
10912 && !cfun
->machine
->caller_return_avx256_p
)
10913 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10916 /* Restore function stack, frame, and registers. */
10919 ix86_expand_epilogue (int style
)
10921 struct machine_function
*m
= cfun
->machine
;
10922 struct machine_frame_state frame_state_save
= m
->fs
;
10923 struct ix86_frame frame
;
10924 bool restore_regs_via_mov
;
10927 ix86_finalize_stack_realign_flags ();
10928 ix86_compute_frame_layout (&frame
);
10930 m
->fs
.sp_valid
= (!frame_pointer_needed
10931 || (crtl
->sp_is_unchanging
10932 && !stack_realign_fp
));
10933 gcc_assert (!m
->fs
.sp_valid
10934 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10936 /* The FP must be valid if the frame pointer is present. */
10937 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10938 gcc_assert (!m
->fs
.fp_valid
10939 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10941 /* We must have *some* valid pointer to the stack frame. */
10942 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10944 /* The DRAP is never valid at this point. */
10945 gcc_assert (!m
->fs
.drap_valid
);
10947 /* See the comment about red zone and frame
10948 pointer usage in ix86_expand_prologue. */
10949 if (frame_pointer_needed
&& frame
.red_zone_size
)
10950 emit_insn (gen_memory_blockage ());
10952 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10953 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10955 /* Determine the CFA offset of the end of the red-zone. */
10956 m
->fs
.red_zone_offset
= 0;
10957 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10959 /* The red-zone begins below the return address. */
10960 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10962 /* When the register save area is in the aligned portion of
10963 the stack, determine the maximum runtime displacement that
10964 matches up with the aligned frame. */
10965 if (stack_realign_drap
)
10966 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10970 /* Special care must be taken for the normal return case of a function
10971 using eh_return: the eax and edx registers are marked as saved, but
10972 not restored along this path. Adjust the save location to match. */
10973 if (crtl
->calls_eh_return
&& style
!= 2)
10974 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10976 /* EH_RETURN requires the use of moves to function properly. */
10977 if (crtl
->calls_eh_return
)
10978 restore_regs_via_mov
= true;
10979 /* SEH requires the use of pops to identify the epilogue. */
10980 else if (TARGET_SEH
)
10981 restore_regs_via_mov
= false;
10982 /* If we're only restoring one register and sp is not valid then
10983 using a move instruction to restore the register since it's
10984 less work than reloading sp and popping the register. */
10985 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10986 restore_regs_via_mov
= true;
10987 else if (TARGET_EPILOGUE_USING_MOVE
10988 && cfun
->machine
->use_fast_prologue_epilogue
10989 && (frame
.nregs
> 1
10990 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10991 restore_regs_via_mov
= true;
10992 else if (frame_pointer_needed
10994 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10995 restore_regs_via_mov
= true;
10996 else if (frame_pointer_needed
10997 && TARGET_USE_LEAVE
10998 && cfun
->machine
->use_fast_prologue_epilogue
10999 && frame
.nregs
== 1)
11000 restore_regs_via_mov
= true;
11002 restore_regs_via_mov
= false;
11004 if (restore_regs_via_mov
|| frame
.nsseregs
)
11006 /* Ensure that the entire register save area is addressable via
11007 the stack pointer, if we will restore via sp. */
11009 && m
->fs
.sp_offset
> 0x7fffffff
11010 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
11011 && (frame
.nsseregs
+ frame
.nregs
) != 0)
11013 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11014 GEN_INT (m
->fs
.sp_offset
11015 - frame
.sse_reg_save_offset
),
11017 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11021 /* If there are any SSE registers to restore, then we have to do it
11022 via moves, since there's obviously no pop for SSE regs. */
11023 if (frame
.nsseregs
)
11024 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
11027 if (restore_regs_via_mov
)
11032 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
11034 /* eh_return epilogues need %ecx added to the stack pointer. */
11037 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
11039 /* Stack align doesn't work with eh_return. */
11040 gcc_assert (!stack_realign_drap
);
11041 /* Neither does regparm nested functions. */
11042 gcc_assert (!ix86_static_chain_on_stack
);
11044 if (frame_pointer_needed
)
11046 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
11047 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
11048 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
11050 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
11051 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
11053 /* Note that we use SA as a temporary CFA, as the return
11054 address is at the proper place relative to it. We
11055 pretend this happens at the FP restore insn because
11056 prior to this insn the FP would be stored at the wrong
11057 offset relative to SA, and after this insn we have no
11058 other reasonable register to use for the CFA. We don't
11059 bother resetting the CFA to the SP for the duration of
11060 the return insn. */
11061 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11062 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
11063 ix86_add_queued_cfa_restore_notes (insn
);
11064 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
11065 RTX_FRAME_RELATED_P (insn
) = 1;
11067 m
->fs
.cfa_reg
= sa
;
11068 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11069 m
->fs
.fp_valid
= false;
11071 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
11072 const0_rtx
, style
, false);
11076 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
11077 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
11078 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
11079 ix86_add_queued_cfa_restore_notes (insn
);
11081 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
11082 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
11084 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
11085 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11086 plus_constant (Pmode
, stack_pointer_rtx
,
11088 RTX_FRAME_RELATED_P (insn
) = 1;
11091 m
->fs
.sp_offset
= UNITS_PER_WORD
;
11092 m
->fs
.sp_valid
= true;
11097 /* SEH requires that the function end with (1) a stack adjustment
11098 if necessary, (2) a sequence of pops, and (3) a return or
11099 jump instruction. Prevent insns from the function body from
11100 being scheduled into this sequence. */
11103 /* Prevent a catch region from being adjacent to the standard
11104 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11105 several other flags that would be interesting to test are
11107 if (flag_non_call_exceptions
)
11108 emit_insn (gen_nops (const1_rtx
));
11110 emit_insn (gen_blockage ());
11113 /* First step is to deallocate the stack frame so that we can
11114 pop the registers. Also do it on SEH target for very large
11115 frame as the emitted instructions aren't allowed by the ABI in
11117 if (!m
->fs
.sp_valid
11119 && (m
->fs
.sp_offset
- frame
.reg_save_offset
11120 >= SEH_MAX_FRAME_SIZE
)))
11122 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
11123 GEN_INT (m
->fs
.fp_offset
11124 - frame
.reg_save_offset
),
11127 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
11129 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11130 GEN_INT (m
->fs
.sp_offset
11131 - frame
.reg_save_offset
),
11133 m
->fs
.cfa_reg
== stack_pointer_rtx
);
11136 ix86_emit_restore_regs_using_pop ();
11139 /* If we used a stack pointer and haven't already got rid of it,
11141 if (m
->fs
.fp_valid
)
11143 /* If the stack pointer is valid and pointing at the frame
11144 pointer store address, then we only need a pop. */
11145 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
11146 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11147 /* Leave results in shorter dependency chains on CPUs that are
11148 able to grok it fast. */
11149 else if (TARGET_USE_LEAVE
11150 || optimize_function_for_size_p (cfun
)
11151 || !cfun
->machine
->use_fast_prologue_epilogue
)
11152 ix86_emit_leave ();
11155 pro_epilogue_adjust_stack (stack_pointer_rtx
,
11156 hard_frame_pointer_rtx
,
11157 const0_rtx
, style
, !using_drap
);
11158 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
11164 int param_ptr_offset
= UNITS_PER_WORD
;
11167 gcc_assert (stack_realign_drap
);
11169 if (ix86_static_chain_on_stack
)
11170 param_ptr_offset
+= UNITS_PER_WORD
;
11171 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11172 param_ptr_offset
+= UNITS_PER_WORD
;
11174 insn
= emit_insn (gen_rtx_SET
11175 (VOIDmode
, stack_pointer_rtx
,
11176 gen_rtx_PLUS (Pmode
,
11178 GEN_INT (-param_ptr_offset
))));
11179 m
->fs
.cfa_reg
= stack_pointer_rtx
;
11180 m
->fs
.cfa_offset
= param_ptr_offset
;
11181 m
->fs
.sp_offset
= param_ptr_offset
;
11182 m
->fs
.realigned
= false;
11184 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11185 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11186 GEN_INT (param_ptr_offset
)));
11187 RTX_FRAME_RELATED_P (insn
) = 1;
11189 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
11190 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
11193 /* At this point the stack pointer must be valid, and we must have
11194 restored all of the registers. We may not have deallocated the
11195 entire stack frame. We've delayed this until now because it may
11196 be possible to merge the local stack deallocation with the
11197 deallocation forced by ix86_static_chain_on_stack. */
11198 gcc_assert (m
->fs
.sp_valid
);
11199 gcc_assert (!m
->fs
.fp_valid
);
11200 gcc_assert (!m
->fs
.realigned
);
11201 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
11203 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11204 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
11208 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11210 /* Sibcall epilogues don't want a return instruction. */
11213 m
->fs
= frame_state_save
;
11217 /* Emit vzeroupper if needed. */
11218 ix86_maybe_emit_epilogue_vzeroupper ();
11220 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
11222 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
11224 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11225 address, do explicit add, and jump indirectly to the caller. */
11227 if (crtl
->args
.pops_args
>= 65536)
11229 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
11232 /* There is no "pascal" calling convention in any 64bit ABI. */
11233 gcc_assert (!TARGET_64BIT
);
11235 insn
= emit_insn (gen_pop (ecx
));
11236 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
11237 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
11239 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
11240 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
11241 add_reg_note (insn
, REG_CFA_REGISTER
,
11242 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
11243 RTX_FRAME_RELATED_P (insn
) = 1;
11245 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
11247 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
11250 emit_jump_insn (gen_simple_return_pop_internal (popc
));
11253 emit_jump_insn (gen_simple_return_internal ());
11255 /* Restore the state back to the state from the prologue,
11256 so that it's correct for the next epilogue. */
11257 m
->fs
= frame_state_save
;
11260 /* Reset from the function's potential modifications. */
11263 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11264 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
11266 if (pic_offset_table_rtx
)
11267 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
11269 /* Mach-O doesn't support labels at the end of objects, so if
11270 it looks like we might want one, insert a NOP. */
11272 rtx insn
= get_last_insn ();
11273 rtx deleted_debug_label
= NULL_RTX
;
11276 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
11278 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11279 notes only, instead set their CODE_LABEL_NUMBER to -1,
11280 otherwise there would be code generation differences
11281 in between -g and -g0. */
11282 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11283 deleted_debug_label
= insn
;
11284 insn
= PREV_INSN (insn
);
11289 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
11290 fputs ("\tnop\n", file
);
11291 else if (deleted_debug_label
)
11292 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
11293 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
11294 CODE_LABEL_NUMBER (insn
) = -1;
11300 /* Return a scratch register to use in the split stack prologue. The
11301 split stack prologue is used for -fsplit-stack. It is the first
11302 instructions in the function, even before the regular prologue.
11303 The scratch register can be any caller-saved register which is not
11304 used for parameters or for the static chain. */
11306 static unsigned int
11307 split_stack_prologue_scratch_regno (void)
11316 is_fastcall
= (lookup_attribute ("fastcall",
11317 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11319 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11323 if (DECL_STATIC_CHAIN (cfun
->decl
))
11325 sorry ("-fsplit-stack does not support fastcall with "
11326 "nested function");
11327 return INVALID_REGNUM
;
11331 else if (regparm
< 3)
11333 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11339 sorry ("-fsplit-stack does not support 2 register "
11340 " parameters for a nested function");
11341 return INVALID_REGNUM
;
11348 /* FIXME: We could make this work by pushing a register
11349 around the addition and comparison. */
11350 sorry ("-fsplit-stack does not support 3 register parameters");
11351 return INVALID_REGNUM
;
11356 /* A SYMBOL_REF for the function which allocates new stackspace for
11359 static GTY(()) rtx split_stack_fn
;
11361 /* A SYMBOL_REF for the more stack function when using the large
11364 static GTY(()) rtx split_stack_fn_large
;
11366 /* Handle -fsplit-stack. These are the first instructions in the
11367 function, even before the regular prologue. */
11370 ix86_expand_split_stack_prologue (void)
11372 struct ix86_frame frame
;
11373 HOST_WIDE_INT allocate
;
11374 unsigned HOST_WIDE_INT args_size
;
11375 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11376 rtx scratch_reg
= NULL_RTX
;
11377 rtx varargs_label
= NULL_RTX
;
11380 gcc_assert (flag_split_stack
&& reload_completed
);
11382 ix86_finalize_stack_realign_flags ();
11383 ix86_compute_frame_layout (&frame
);
11384 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11386 /* This is the label we will branch to if we have enough stack
11387 space. We expect the basic block reordering pass to reverse this
11388 branch if optimizing, so that we branch in the unlikely case. */
11389 label
= gen_label_rtx ();
11391 /* We need to compare the stack pointer minus the frame size with
11392 the stack boundary in the TCB. The stack boundary always gives
11393 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11394 can compare directly. Otherwise we need to do an addition. */
11396 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11397 UNSPEC_STACK_CHECK
);
11398 limit
= gen_rtx_CONST (Pmode
, limit
);
11399 limit
= gen_rtx_MEM (Pmode
, limit
);
11400 if (allocate
< SPLIT_STACK_AVAILABLE
)
11401 current
= stack_pointer_rtx
;
11404 unsigned int scratch_regno
;
11407 /* We need a scratch register to hold the stack pointer minus
11408 the required frame size. Since this is the very start of the
11409 function, the scratch register can be any caller-saved
11410 register which is not used for parameters. */
11411 offset
= GEN_INT (- allocate
);
11412 scratch_regno
= split_stack_prologue_scratch_regno ();
11413 if (scratch_regno
== INVALID_REGNUM
)
11415 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11416 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11418 /* We don't use ix86_gen_add3 in this case because it will
11419 want to split to lea, but when not optimizing the insn
11420 will not be split after this point. */
11421 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11422 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11427 emit_move_insn (scratch_reg
, offset
);
11428 emit_insn (ix86_gen_add3 (scratch_reg
, scratch_reg
,
11429 stack_pointer_rtx
));
11431 current
= scratch_reg
;
11434 ix86_expand_branch (GEU
, current
, limit
, label
);
11435 jump_insn
= get_last_insn ();
11436 JUMP_LABEL (jump_insn
) = label
;
11438 /* Mark the jump as very likely to be taken. */
11439 add_reg_note (jump_insn
, REG_BR_PROB
,
11440 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11442 if (split_stack_fn
== NULL_RTX
)
11443 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11444 fn
= split_stack_fn
;
11446 /* Get more stack space. We pass in the desired stack space and the
11447 size of the arguments to copy to the new stack. In 32-bit mode
11448 we push the parameters; __morestack will return on a new stack
11449 anyhow. In 64-bit mode we pass the parameters in r10 and
11451 allocate_rtx
= GEN_INT (allocate
);
11452 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11453 call_fusage
= NULL_RTX
;
11458 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11459 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11461 /* If this function uses a static chain, it will be in %r10.
11462 Preserve it across the call to __morestack. */
11463 if (DECL_STATIC_CHAIN (cfun
->decl
))
11467 rax
= gen_rtx_REG (word_mode
, AX_REG
);
11468 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
11469 use_reg (&call_fusage
, rax
);
11472 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11474 HOST_WIDE_INT argval
;
11476 gcc_assert (Pmode
== DImode
);
11477 /* When using the large model we need to load the address
11478 into a register, and we've run out of registers. So we
11479 switch to a different calling convention, and we call a
11480 different function: __morestack_large. We pass the
11481 argument size in the upper 32 bits of r10 and pass the
11482 frame size in the lower 32 bits. */
11483 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11484 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11486 if (split_stack_fn_large
== NULL_RTX
)
11487 split_stack_fn_large
=
11488 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11490 if (ix86_cmodel
== CM_LARGE_PIC
)
11494 label
= gen_label_rtx ();
11495 emit_label (label
);
11496 LABEL_PRESERVE_P (label
) = 1;
11497 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11498 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11499 emit_insn (ix86_gen_add3 (reg10
, reg10
, reg11
));
11500 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11502 x
= gen_rtx_CONST (Pmode
, x
);
11503 emit_move_insn (reg11
, x
);
11504 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11505 x
= gen_const_mem (Pmode
, x
);
11506 emit_move_insn (reg11
, x
);
11509 emit_move_insn (reg11
, split_stack_fn_large
);
11513 argval
= ((args_size
<< 16) << 16) + allocate
;
11514 emit_move_insn (reg10
, GEN_INT (argval
));
11518 emit_move_insn (reg10
, allocate_rtx
);
11519 emit_move_insn (reg11
, GEN_INT (args_size
));
11520 use_reg (&call_fusage
, reg11
);
11523 use_reg (&call_fusage
, reg10
);
11527 emit_insn (gen_push (GEN_INT (args_size
)));
11528 emit_insn (gen_push (allocate_rtx
));
11530 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11531 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11533 add_function_usage_to (call_insn
, call_fusage
);
11535 /* In order to make call/return prediction work right, we now need
11536 to execute a return instruction. See
11537 libgcc/config/i386/morestack.S for the details on how this works.
11539 For flow purposes gcc must not see this as a return
11540 instruction--we need control flow to continue at the subsequent
11541 label. Therefore, we use an unspec. */
11542 gcc_assert (crtl
->args
.pops_args
< 65536);
11543 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11545 /* If we are in 64-bit mode and this function uses a static chain,
11546 we saved %r10 in %rax before calling _morestack. */
11547 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11548 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
11549 gen_rtx_REG (word_mode
, AX_REG
));
11551 /* If this function calls va_start, we need to store a pointer to
11552 the arguments on the old stack, because they may not have been
11553 all copied to the new stack. At this point the old stack can be
11554 found at the frame pointer value used by __morestack, because
11555 __morestack has set that up before calling back to us. Here we
11556 store that pointer in a scratch register, and in
11557 ix86_expand_prologue we store the scratch register in a stack
11559 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11561 unsigned int scratch_regno
;
11565 scratch_regno
= split_stack_prologue_scratch_regno ();
11566 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11567 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11571 return address within this function
11572 return address of caller of this function
11574 So we add three words to get to the stack arguments.
11578 return address within this function
11579 first argument to __morestack
11580 second argument to __morestack
11581 return address of caller of this function
11583 So we add five words to get to the stack arguments.
11585 words
= TARGET_64BIT
? 3 : 5;
11586 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11587 gen_rtx_PLUS (Pmode
, frame_reg
,
11588 GEN_INT (words
* UNITS_PER_WORD
))));
11590 varargs_label
= gen_label_rtx ();
11591 emit_jump_insn (gen_jump (varargs_label
));
11592 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11597 emit_label (label
);
11598 LABEL_NUSES (label
) = 1;
11600 /* If this function calls va_start, we now have to set the scratch
11601 register for the case where we do not call __morestack. In this
11602 case we need to set it based on the stack pointer. */
11603 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11605 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11606 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11607 GEN_INT (UNITS_PER_WORD
))));
11609 emit_label (varargs_label
);
11610 LABEL_NUSES (varargs_label
) = 1;
11614 /* We may have to tell the dataflow pass that the split stack prologue
11615 is initializing a scratch register. */
11618 ix86_live_on_entry (bitmap regs
)
11620 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11622 gcc_assert (flag_split_stack
);
11623 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11627 /* Determine if op is suitable SUBREG RTX for address. */
11630 ix86_address_subreg_operand (rtx op
)
11632 enum machine_mode mode
;
11637 mode
= GET_MODE (op
);
11639 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11642 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11643 failures when the register is one word out of a two word structure. */
11644 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11647 /* Allow only SUBREGs of non-eliminable hard registers. */
11648 return register_no_elim_operand (op
, mode
);
11651 /* Extract the parts of an RTL expression that is a valid memory address
11652 for an instruction. Return 0 if the structure of the address is
11653 grossly off. Return -1 if the address contains ASHIFT, so it is not
11654 strictly valid, but still used for computing length of lea instruction. */
11657 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11659 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11660 rtx base_reg
, index_reg
;
11661 HOST_WIDE_INT scale
= 1;
11662 rtx scale_rtx
= NULL_RTX
;
11665 enum ix86_address_seg seg
= SEG_DEFAULT
;
11667 /* Allow zero-extended SImode addresses,
11668 they will be emitted with addr32 prefix. */
11669 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11671 if (GET_CODE (addr
) == ZERO_EXTEND
11672 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11674 addr
= XEXP (addr
, 0);
11675 if (CONST_INT_P (addr
))
11678 else if (GET_CODE (addr
) == AND
11679 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11681 addr
= XEXP (addr
, 0);
11683 /* Adjust SUBREGs. */
11684 if (GET_CODE (addr
) == SUBREG
11685 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11687 addr
= SUBREG_REG (addr
);
11688 if (CONST_INT_P (addr
))
11691 else if (GET_MODE (addr
) == DImode
)
11692 addr
= gen_rtx_SUBREG (SImode
, addr
, 0);
11693 else if (GET_MODE (addr
) != VOIDmode
)
11698 /* Allow SImode subregs of DImode addresses,
11699 they will be emitted with addr32 prefix. */
11700 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
11702 if (GET_CODE (addr
) == SUBREG
11703 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
11705 addr
= SUBREG_REG (addr
);
11706 if (CONST_INT_P (addr
))
11713 else if (GET_CODE (addr
) == SUBREG
)
11715 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11720 else if (GET_CODE (addr
) == PLUS
)
11722 rtx addends
[4], op
;
11730 addends
[n
++] = XEXP (op
, 1);
11733 while (GET_CODE (op
) == PLUS
);
11738 for (i
= n
; i
>= 0; --i
)
11741 switch (GET_CODE (op
))
11746 index
= XEXP (op
, 0);
11747 scale_rtx
= XEXP (op
, 1);
11753 index
= XEXP (op
, 0);
11754 tmp
= XEXP (op
, 1);
11755 if (!CONST_INT_P (tmp
))
11757 scale
= INTVAL (tmp
);
11758 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11760 scale
= 1 << scale
;
11765 if (GET_CODE (op
) != UNSPEC
)
11770 if (XINT (op
, 1) == UNSPEC_TP
11771 && TARGET_TLS_DIRECT_SEG_REFS
11772 && seg
== SEG_DEFAULT
)
11773 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11779 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11806 else if (GET_CODE (addr
) == MULT
)
11808 index
= XEXP (addr
, 0); /* index*scale */
11809 scale_rtx
= XEXP (addr
, 1);
11811 else if (GET_CODE (addr
) == ASHIFT
)
11813 /* We're called for lea too, which implements ashift on occasion. */
11814 index
= XEXP (addr
, 0);
11815 tmp
= XEXP (addr
, 1);
11816 if (!CONST_INT_P (tmp
))
11818 scale
= INTVAL (tmp
);
11819 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11821 scale
= 1 << scale
;
11824 else if (CONST_INT_P (addr
))
11826 if (!x86_64_immediate_operand (addr
, VOIDmode
))
11829 /* Constant addresses are sign extended to 64bit, we have to
11830 prevent addresses from 0x80000000 to 0xffffffff in x32 mode. */
11832 && val_signbit_known_set_p (SImode
, INTVAL (addr
)))
11838 disp
= addr
; /* displacement */
11844 else if (GET_CODE (index
) == SUBREG
11845 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11851 /* Address override works only on the (%reg) part of %fs:(%reg). */
11852 if (seg
!= SEG_DEFAULT
11853 && ((base
&& GET_MODE (base
) != word_mode
)
11854 || (index
&& GET_MODE (index
) != word_mode
)))
11857 /* Extract the integral value of scale. */
11860 if (!CONST_INT_P (scale_rtx
))
11862 scale
= INTVAL (scale_rtx
);
11865 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11866 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11868 /* Avoid useless 0 displacement. */
11869 if (disp
== const0_rtx
&& (base
|| index
))
11872 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11873 if (base_reg
&& index_reg
&& scale
== 1
11874 && (index_reg
== arg_pointer_rtx
11875 || index_reg
== frame_pointer_rtx
11876 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11879 tmp
= base
, base
= index
, index
= tmp
;
11880 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11883 /* Special case: %ebp cannot be encoded as a base without a displacement.
11887 && (base_reg
== hard_frame_pointer_rtx
11888 || base_reg
== frame_pointer_rtx
11889 || base_reg
== arg_pointer_rtx
11890 || (REG_P (base_reg
)
11891 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11892 || REGNO (base_reg
) == R13_REG
))))
11895 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11896 Avoid this by transforming to [%esi+0].
11897 Reload calls address legitimization without cfun defined, so we need
11898 to test cfun for being non-NULL. */
11899 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11900 && base_reg
&& !index_reg
&& !disp
11901 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11904 /* Special case: encode reg+reg instead of reg*2. */
11905 if (!base
&& index
&& scale
== 2)
11906 base
= index
, base_reg
= index_reg
, scale
= 1;
11908 /* Special case: scaling cannot be encoded without base or displacement. */
11909 if (!base
&& !disp
&& index
&& scale
!= 1)
11913 out
->index
= index
;
11915 out
->scale
= scale
;
11921 /* Return cost of the memory address x.
11922 For i386, it is better to use a complex address than let gcc copy
11923 the address into a reg and make a new pseudo. But not if the address
11924 requires to two regs - that would mean more pseudos with longer
11927 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11929 struct ix86_address parts
;
11931 int ok
= ix86_decompose_address (x
, &parts
);
11935 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11936 parts
.base
= SUBREG_REG (parts
.base
);
11937 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11938 parts
.index
= SUBREG_REG (parts
.index
);
11940 /* Attempt to minimize number of registers in the address. */
11942 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11944 && (!REG_P (parts
.index
)
11945 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11949 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11951 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11952 && parts
.base
!= parts
.index
)
11955 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11956 since it's predecode logic can't detect the length of instructions
11957 and it degenerates to vector decoded. Increase cost of such
11958 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11959 to split such addresses or even refuse such addresses at all.
11961 Following addressing modes are affected:
11966 The first and last case may be avoidable by explicitly coding the zero in
11967 memory address, but I don't have AMD-K6 machine handy to check this
11971 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11972 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11973 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11979 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11980 this is used for to form addresses to local data when -fPIC is in
11984 darwin_local_data_pic (rtx disp
)
11986 return (GET_CODE (disp
) == UNSPEC
11987 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11990 /* Determine if a given RTX is a valid constant. We already know this
11991 satisfies CONSTANT_P. */
11994 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11996 switch (GET_CODE (x
))
12001 if (GET_CODE (x
) == PLUS
)
12003 if (!CONST_INT_P (XEXP (x
, 1)))
12008 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
12011 /* Only some unspecs are valid as "constants". */
12012 if (GET_CODE (x
) == UNSPEC
)
12013 switch (XINT (x
, 1))
12016 case UNSPEC_GOTOFF
:
12017 case UNSPEC_PLTOFF
:
12018 return TARGET_64BIT
;
12020 case UNSPEC_NTPOFF
:
12021 x
= XVECEXP (x
, 0, 0);
12022 return (GET_CODE (x
) == SYMBOL_REF
12023 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12024 case UNSPEC_DTPOFF
:
12025 x
= XVECEXP (x
, 0, 0);
12026 return (GET_CODE (x
) == SYMBOL_REF
12027 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
12032 /* We must have drilled down to a symbol. */
12033 if (GET_CODE (x
) == LABEL_REF
)
12035 if (GET_CODE (x
) != SYMBOL_REF
)
12040 /* TLS symbols are never valid. */
12041 if (SYMBOL_REF_TLS_MODEL (x
))
12044 /* DLLIMPORT symbols are never valid. */
12045 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12046 && SYMBOL_REF_DLLIMPORT_P (x
))
12050 /* mdynamic-no-pic */
12051 if (MACHO_DYNAMIC_NO_PIC_P
)
12052 return machopic_symbol_defined_p (x
);
12057 if (GET_MODE (x
) == TImode
12058 && x
!= CONST0_RTX (TImode
)
12064 if (!standard_sse_constant_p (x
))
12071 /* Otherwise we handle everything else in the move patterns. */
12075 /* Determine if it's legal to put X into the constant pool. This
12076 is not possible for the address of thread-local symbols, which
12077 is checked above. */
12080 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
12082 /* We can always put integral constants and vectors in memory. */
12083 switch (GET_CODE (x
))
12093 return !ix86_legitimate_constant_p (mode
, x
);
12097 /* Nonzero if the constant value X is a legitimate general operand
12098 when generating PIC code. It is given that flag_pic is on and
12099 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12102 legitimate_pic_operand_p (rtx x
)
12106 switch (GET_CODE (x
))
12109 inner
= XEXP (x
, 0);
12110 if (GET_CODE (inner
) == PLUS
12111 && CONST_INT_P (XEXP (inner
, 1)))
12112 inner
= XEXP (inner
, 0);
12114 /* Only some unspecs are valid as "constants". */
12115 if (GET_CODE (inner
) == UNSPEC
)
12116 switch (XINT (inner
, 1))
12119 case UNSPEC_GOTOFF
:
12120 case UNSPEC_PLTOFF
:
12121 return TARGET_64BIT
;
12123 x
= XVECEXP (inner
, 0, 0);
12124 return (GET_CODE (x
) == SYMBOL_REF
12125 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
12126 case UNSPEC_MACHOPIC_OFFSET
:
12127 return legitimate_pic_address_disp_p (x
);
12135 return legitimate_pic_address_disp_p (x
);
12142 /* Determine if a given CONST RTX is a valid memory displacement
12146 legitimate_pic_address_disp_p (rtx disp
)
12150 /* In 64bit mode we can allow direct addresses of symbols and labels
12151 when they are not dynamic symbols. */
12154 rtx op0
= disp
, op1
;
12156 switch (GET_CODE (disp
))
12162 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
12164 op0
= XEXP (XEXP (disp
, 0), 0);
12165 op1
= XEXP (XEXP (disp
, 0), 1);
12166 if (!CONST_INT_P (op1
)
12167 || INTVAL (op1
) >= 16*1024*1024
12168 || INTVAL (op1
) < -16*1024*1024)
12170 if (GET_CODE (op0
) == LABEL_REF
)
12172 if (GET_CODE (op0
) == CONST
12173 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
12174 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
12176 if (GET_CODE (op0
) == UNSPEC
12177 && XINT (op0
, 1) == UNSPEC_PCREL
)
12179 if (GET_CODE (op0
) != SYMBOL_REF
)
12184 /* TLS references should always be enclosed in UNSPEC. */
12185 if (SYMBOL_REF_TLS_MODEL (op0
))
12187 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
12188 && ix86_cmodel
!= CM_LARGE_PIC
)
12196 if (GET_CODE (disp
) != CONST
)
12198 disp
= XEXP (disp
, 0);
12202 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12203 of GOT tables. We should not need these anyway. */
12204 if (GET_CODE (disp
) != UNSPEC
12205 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
12206 && XINT (disp
, 1) != UNSPEC_GOTOFF
12207 && XINT (disp
, 1) != UNSPEC_PCREL
12208 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
12211 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
12212 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
12218 if (GET_CODE (disp
) == PLUS
)
12220 if (!CONST_INT_P (XEXP (disp
, 1)))
12222 disp
= XEXP (disp
, 0);
12226 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
12229 if (GET_CODE (disp
) != UNSPEC
)
12232 switch (XINT (disp
, 1))
12237 /* We need to check for both symbols and labels because VxWorks loads
12238 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12240 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12241 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
12242 case UNSPEC_GOTOFF
:
12243 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12244 While ABI specify also 32bit relocation but we don't produce it in
12245 small PIC model at all. */
12246 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
12247 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
12249 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
12251 case UNSPEC_GOTTPOFF
:
12252 case UNSPEC_GOTNTPOFF
:
12253 case UNSPEC_INDNTPOFF
:
12256 disp
= XVECEXP (disp
, 0, 0);
12257 return (GET_CODE (disp
) == SYMBOL_REF
12258 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
12259 case UNSPEC_NTPOFF
:
12260 disp
= XVECEXP (disp
, 0, 0);
12261 return (GET_CODE (disp
) == SYMBOL_REF
12262 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
12263 case UNSPEC_DTPOFF
:
12264 disp
= XVECEXP (disp
, 0, 0);
12265 return (GET_CODE (disp
) == SYMBOL_REF
12266 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
12272 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12273 replace the input X, or the original X if no replacement is called for.
12274 The output parameter *WIN is 1 if the calling macro should goto WIN,
12275 0 if it should not. */
12278 ix86_legitimize_reload_address (rtx x
,
12279 enum machine_mode mode ATTRIBUTE_UNUSED
,
12280 int opnum
, int type
,
12281 int ind_levels ATTRIBUTE_UNUSED
)
12283 /* Reload can generate:
12285 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12289 This RTX is rejected from ix86_legitimate_address_p due to
12290 non-strictness of base register 97. Following this rejection,
12291 reload pushes all three components into separate registers,
12292 creating invalid memory address RTX.
12294 Following code reloads only the invalid part of the
12295 memory address RTX. */
12297 if (GET_CODE (x
) == PLUS
12298 && REG_P (XEXP (x
, 1))
12299 && GET_CODE (XEXP (x
, 0)) == PLUS
12300 && REG_P (XEXP (XEXP (x
, 0), 1)))
12303 bool something_reloaded
= false;
12305 base
= XEXP (XEXP (x
, 0), 1);
12306 if (!REG_OK_FOR_BASE_STRICT_P (base
))
12308 push_reload (base
, NULL_RTX
, &XEXP (XEXP (x
, 0), 1), NULL
,
12309 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12310 opnum
, (enum reload_type
) type
);
12311 something_reloaded
= true;
12314 index
= XEXP (x
, 1);
12315 if (!REG_OK_FOR_INDEX_STRICT_P (index
))
12317 push_reload (index
, NULL_RTX
, &XEXP (x
, 1), NULL
,
12318 INDEX_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
12319 opnum
, (enum reload_type
) type
);
12320 something_reloaded
= true;
12323 gcc_assert (something_reloaded
);
12330 /* Recognizes RTL expressions that are valid memory addresses for an
12331 instruction. The MODE argument is the machine mode for the MEM
12332 expression that wants to use this address.
12334 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12335 convert common non-canonical forms to canonical form so that they will
12339 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
12340 rtx addr
, bool strict
)
12342 struct ix86_address parts
;
12343 rtx base
, index
, disp
;
12344 HOST_WIDE_INT scale
;
12346 if (ix86_decompose_address (addr
, &parts
) <= 0)
12347 /* Decomposition failed. */
12351 index
= parts
.index
;
12353 scale
= parts
.scale
;
12355 /* Validate base register. */
12362 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
12363 reg
= SUBREG_REG (base
);
12365 /* Base is not a register. */
12368 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
12371 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
12372 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
12373 /* Base is not valid. */
12377 /* Validate index register. */
12384 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
12385 reg
= SUBREG_REG (index
);
12387 /* Index is not a register. */
12390 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
12393 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
12394 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
12395 /* Index is not valid. */
12399 /* Index and base should have the same mode. */
12401 && GET_MODE (base
) != GET_MODE (index
))
12404 /* Validate scale factor. */
12408 /* Scale without index. */
12411 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
12412 /* Scale is not a valid multiplier. */
12416 /* Validate displacement. */
12419 if (GET_CODE (disp
) == CONST
12420 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12421 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12422 switch (XINT (XEXP (disp
, 0), 1))
12424 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12425 used. While ABI specify also 32bit relocations, we don't produce
12426 them at all and use IP relative instead. */
12428 case UNSPEC_GOTOFF
:
12429 gcc_assert (flag_pic
);
12431 goto is_legitimate_pic
;
12433 /* 64bit address unspec. */
12436 case UNSPEC_GOTPCREL
:
12438 gcc_assert (flag_pic
);
12439 goto is_legitimate_pic
;
12441 case UNSPEC_GOTTPOFF
:
12442 case UNSPEC_GOTNTPOFF
:
12443 case UNSPEC_INDNTPOFF
:
12444 case UNSPEC_NTPOFF
:
12445 case UNSPEC_DTPOFF
:
12448 case UNSPEC_STACK_CHECK
:
12449 gcc_assert (flag_split_stack
);
12453 /* Invalid address unspec. */
12457 else if (SYMBOLIC_CONST (disp
)
12461 && MACHOPIC_INDIRECT
12462 && !machopic_operand_p (disp
)
12468 if (TARGET_64BIT
&& (index
|| base
))
12470 /* foo@dtpoff(%rX) is ok. */
12471 if (GET_CODE (disp
) != CONST
12472 || GET_CODE (XEXP (disp
, 0)) != PLUS
12473 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12474 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12475 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12476 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12477 /* Non-constant pic memory reference. */
12480 else if ((!TARGET_MACHO
|| flag_pic
)
12481 && ! legitimate_pic_address_disp_p (disp
))
12482 /* Displacement is an invalid pic construct. */
12485 else if (MACHO_DYNAMIC_NO_PIC_P
12486 && !ix86_legitimate_constant_p (Pmode
, disp
))
12487 /* displacment must be referenced via non_lazy_pointer */
12491 /* This code used to verify that a symbolic pic displacement
12492 includes the pic_offset_table_rtx register.
12494 While this is good idea, unfortunately these constructs may
12495 be created by "adds using lea" optimization for incorrect
12504 This code is nonsensical, but results in addressing
12505 GOT table with pic_offset_table_rtx base. We can't
12506 just refuse it easily, since it gets matched by
12507 "addsi3" pattern, that later gets split to lea in the
12508 case output register differs from input. While this
12509 can be handled by separate addsi pattern for this case
12510 that never results in lea, this seems to be easier and
12511 correct fix for crash to disable this test. */
12513 else if (GET_CODE (disp
) != LABEL_REF
12514 && !CONST_INT_P (disp
)
12515 && (GET_CODE (disp
) != CONST
12516 || !ix86_legitimate_constant_p (Pmode
, disp
))
12517 && (GET_CODE (disp
) != SYMBOL_REF
12518 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12519 /* Displacement is not constant. */
12521 else if (TARGET_64BIT
12522 && !x86_64_immediate_operand (disp
, VOIDmode
))
12523 /* Displacement is out of range. */
12527 /* Everything looks valid. */
12531 /* Determine if a given RTX is a valid constant address. */
12534 constant_address_p (rtx x
)
12536 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12539 /* Return a unique alias set for the GOT. */
12541 static alias_set_type
12542 ix86_GOT_alias_set (void)
12544 static alias_set_type set
= -1;
12546 set
= new_alias_set ();
12550 /* Return a legitimate reference for ORIG (an address) using the
12551 register REG. If REG is 0, a new pseudo is generated.
12553 There are two types of references that must be handled:
12555 1. Global data references must load the address from the GOT, via
12556 the PIC reg. An insn is emitted to do this load, and the reg is
12559 2. Static data references, constant pool addresses, and code labels
12560 compute the address as an offset from the GOT, whose base is in
12561 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12562 differentiate them from global data objects. The returned
12563 address is the PIC reg + an unspec constant.
12565 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12566 reg also appears in the address. */
12569 legitimize_pic_address (rtx orig
, rtx reg
)
12572 rtx new_rtx
= orig
;
12576 if (TARGET_MACHO
&& !TARGET_64BIT
)
12579 reg
= gen_reg_rtx (Pmode
);
12580 /* Use the generic Mach-O PIC machinery. */
12581 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12585 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12587 else if (TARGET_64BIT
12588 && ix86_cmodel
!= CM_SMALL_PIC
12589 && gotoff_operand (addr
, Pmode
))
12592 /* This symbol may be referenced via a displacement from the PIC
12593 base address (@GOTOFF). */
12595 if (reload_in_progress
)
12596 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12597 if (GET_CODE (addr
) == CONST
)
12598 addr
= XEXP (addr
, 0);
12599 if (GET_CODE (addr
) == PLUS
)
12601 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12603 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12606 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12607 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12609 tmpreg
= gen_reg_rtx (Pmode
);
12612 emit_move_insn (tmpreg
, new_rtx
);
12616 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12617 tmpreg
, 1, OPTAB_DIRECT
);
12620 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12622 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12624 /* This symbol may be referenced via a displacement from the PIC
12625 base address (@GOTOFF). */
12627 if (reload_in_progress
)
12628 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12629 if (GET_CODE (addr
) == CONST
)
12630 addr
= XEXP (addr
, 0);
12631 if (GET_CODE (addr
) == PLUS
)
12633 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12635 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12638 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12639 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12640 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12644 emit_move_insn (reg
, new_rtx
);
12648 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12649 /* We can't use @GOTOFF for text labels on VxWorks;
12650 see gotoff_operand. */
12651 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12653 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12655 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12656 return legitimize_dllimport_symbol (addr
, true);
12657 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12658 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12659 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12661 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12662 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12666 /* For x64 PE-COFF there is no GOT table. So we use address
12668 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12670 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12671 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12674 reg
= gen_reg_rtx (Pmode
);
12675 emit_move_insn (reg
, new_rtx
);
12678 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12680 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12681 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12682 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12683 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12686 reg
= gen_reg_rtx (Pmode
);
12687 /* Use directly gen_movsi, otherwise the address is loaded
12688 into register for CSE. We don't want to CSE this addresses,
12689 instead we CSE addresses from the GOT table, so skip this. */
12690 emit_insn (gen_movsi (reg
, new_rtx
));
12695 /* This symbol must be referenced via a load from the
12696 Global Offset Table (@GOT). */
12698 if (reload_in_progress
)
12699 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12700 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12701 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12703 new_rtx
= force_reg (Pmode
, new_rtx
);
12704 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12705 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12706 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12709 reg
= gen_reg_rtx (Pmode
);
12710 emit_move_insn (reg
, new_rtx
);
12716 if (CONST_INT_P (addr
)
12717 && !x86_64_immediate_operand (addr
, VOIDmode
))
12721 emit_move_insn (reg
, addr
);
12725 new_rtx
= force_reg (Pmode
, addr
);
12727 else if (GET_CODE (addr
) == CONST
)
12729 addr
= XEXP (addr
, 0);
12731 /* We must match stuff we generate before. Assume the only
12732 unspecs that can get here are ours. Not that we could do
12733 anything with them anyway.... */
12734 if (GET_CODE (addr
) == UNSPEC
12735 || (GET_CODE (addr
) == PLUS
12736 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12738 gcc_assert (GET_CODE (addr
) == PLUS
);
12740 if (GET_CODE (addr
) == PLUS
)
12742 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12744 /* Check first to see if this is a constant offset from a @GOTOFF
12745 symbol reference. */
12746 if (gotoff_operand (op0
, Pmode
)
12747 && CONST_INT_P (op1
))
12751 if (reload_in_progress
)
12752 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12753 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12755 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12756 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12757 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12761 emit_move_insn (reg
, new_rtx
);
12767 if (INTVAL (op1
) < -16*1024*1024
12768 || INTVAL (op1
) >= 16*1024*1024)
12770 if (!x86_64_immediate_operand (op1
, Pmode
))
12771 op1
= force_reg (Pmode
, op1
);
12772 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12778 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12779 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12780 base
== reg
? NULL_RTX
: reg
);
12782 if (CONST_INT_P (new_rtx
))
12783 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
12786 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12788 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12789 new_rtx
= XEXP (new_rtx
, 1);
12791 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12799 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12802 get_thread_pointer (enum machine_mode tp_mode
, bool to_reg
)
12804 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12806 if (GET_MODE (tp
) != tp_mode
)
12808 gcc_assert (GET_MODE (tp
) == SImode
);
12809 gcc_assert (tp_mode
== DImode
);
12811 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
12815 tp
= copy_to_mode_reg (tp_mode
, tp
);
12820 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12822 static GTY(()) rtx ix86_tls_symbol
;
12825 ix86_tls_get_addr (void)
12827 if (!ix86_tls_symbol
)
12830 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12831 ? "___tls_get_addr" : "__tls_get_addr");
12833 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12836 return ix86_tls_symbol
;
12839 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12841 static GTY(()) rtx ix86_tls_module_base_symbol
;
12844 ix86_tls_module_base (void)
12846 if (!ix86_tls_module_base_symbol
)
12848 ix86_tls_module_base_symbol
12849 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12851 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12852 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12855 return ix86_tls_module_base_symbol
;
12858 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12859 false if we expect this to be used for a memory address and true if
12860 we expect to load the address into a register. */
12863 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12865 rtx dest
, base
, off
;
12866 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12867 enum machine_mode tp_mode
= Pmode
;
12872 case TLS_MODEL_GLOBAL_DYNAMIC
:
12873 dest
= gen_reg_rtx (Pmode
);
12878 pic
= pic_offset_table_rtx
;
12881 pic
= gen_reg_rtx (Pmode
);
12882 emit_insn (gen_set_got (pic
));
12886 if (TARGET_GNU2_TLS
)
12889 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12891 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12893 tp
= get_thread_pointer (Pmode
, true);
12894 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12896 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12900 rtx caddr
= ix86_tls_get_addr ();
12904 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12907 emit_call_insn (ix86_gen_tls_global_dynamic_64 (rax
, x
,
12909 insns
= get_insns ();
12912 RTL_CONST_CALL_P (insns
) = 1;
12913 emit_libcall_block (insns
, dest
, rax
, x
);
12916 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12920 case TLS_MODEL_LOCAL_DYNAMIC
:
12921 base
= gen_reg_rtx (Pmode
);
12926 pic
= pic_offset_table_rtx
;
12929 pic
= gen_reg_rtx (Pmode
);
12930 emit_insn (gen_set_got (pic
));
12934 if (TARGET_GNU2_TLS
)
12936 rtx tmp
= ix86_tls_module_base ();
12939 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12941 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12943 tp
= get_thread_pointer (Pmode
, true);
12944 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12945 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12949 rtx caddr
= ix86_tls_get_addr ();
12953 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12956 emit_call_insn (ix86_gen_tls_local_dynamic_base_64 (rax
,
12958 insns
= get_insns ();
12961 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12962 share the LD_BASE result with other LD model accesses. */
12963 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12964 UNSPEC_TLS_LD_BASE
);
12966 RTL_CONST_CALL_P (insns
) = 1;
12967 emit_libcall_block (insns
, base
, rax
, eqv
);
12970 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12973 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12974 off
= gen_rtx_CONST (Pmode
, off
);
12976 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12978 if (TARGET_GNU2_TLS
)
12980 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12982 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12986 case TLS_MODEL_INITIAL_EXEC
:
12989 if (TARGET_SUN_TLS
&& !TARGET_X32
)
12991 /* The Sun linker took the AMD64 TLS spec literally
12992 and can only handle %rax as destination of the
12993 initial executable code sequence. */
12995 dest
= gen_reg_rtx (DImode
);
12996 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
13000 /* Generate DImode references to avoid %fs:(%reg32)
13001 problems and linker IE->LE relaxation bug. */
13004 type
= UNSPEC_GOTNTPOFF
;
13008 if (reload_in_progress
)
13009 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
13010 pic
= pic_offset_table_rtx
;
13011 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
13013 else if (!TARGET_ANY_GNU_TLS
)
13015 pic
= gen_reg_rtx (Pmode
);
13016 emit_insn (gen_set_got (pic
));
13017 type
= UNSPEC_GOTTPOFF
;
13022 type
= UNSPEC_INDNTPOFF
;
13025 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
13026 off
= gen_rtx_CONST (tp_mode
, off
);
13028 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
13029 off
= gen_const_mem (tp_mode
, off
);
13030 set_mem_alias_set (off
, ix86_GOT_alias_set ());
13032 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13034 base
= get_thread_pointer (tp_mode
,
13035 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13036 off
= force_reg (tp_mode
, off
);
13037 return gen_rtx_PLUS (tp_mode
, base
, off
);
13041 base
= get_thread_pointer (Pmode
, true);
13042 dest
= gen_reg_rtx (Pmode
);
13043 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13047 case TLS_MODEL_LOCAL_EXEC
:
13048 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
13049 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13050 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
13051 off
= gen_rtx_CONST (Pmode
, off
);
13053 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
13055 base
= get_thread_pointer (Pmode
,
13056 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
13057 return gen_rtx_PLUS (Pmode
, base
, off
);
13061 base
= get_thread_pointer (Pmode
, true);
13062 dest
= gen_reg_rtx (Pmode
);
13063 emit_insn (ix86_gen_sub3 (dest
, base
, off
));
13068 gcc_unreachable ();
13074 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13077 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
13078 htab_t dllimport_map
;
13081 get_dllimport_decl (tree decl
)
13083 struct tree_map
*h
, in
;
13086 const char *prefix
;
13087 size_t namelen
, prefixlen
;
13092 if (!dllimport_map
)
13093 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
13095 in
.hash
= htab_hash_pointer (decl
);
13096 in
.base
.from
= decl
;
13097 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
13098 h
= (struct tree_map
*) *loc
;
13102 *loc
= h
= ggc_alloc_tree_map ();
13104 h
->base
.from
= decl
;
13105 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
13106 VAR_DECL
, NULL
, ptr_type_node
);
13107 DECL_ARTIFICIAL (to
) = 1;
13108 DECL_IGNORED_P (to
) = 1;
13109 DECL_EXTERNAL (to
) = 1;
13110 TREE_READONLY (to
) = 1;
13112 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
13113 name
= targetm
.strip_name_encoding (name
);
13114 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
13115 ? "*__imp_" : "*__imp__";
13116 namelen
= strlen (name
);
13117 prefixlen
= strlen (prefix
);
13118 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
13119 memcpy (imp_name
, prefix
, prefixlen
);
13120 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
13122 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
13123 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
13124 SET_SYMBOL_REF_DECL (rtl
, to
);
13125 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
13127 rtl
= gen_const_mem (Pmode
, rtl
);
13128 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
13130 SET_DECL_RTL (to
, rtl
);
13131 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
13136 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13137 true if we require the result be a register. */
13140 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
13145 gcc_assert (SYMBOL_REF_DECL (symbol
));
13146 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
13148 x
= DECL_RTL (imp_decl
);
13150 x
= force_reg (Pmode
, x
);
13154 /* Try machine-dependent ways of modifying an illegitimate address
13155 to be legitimate. If we find one, return the new, valid address.
13156 This macro is used in only one place: `memory_address' in explow.c.
13158 OLDX is the address as it was before break_out_memory_refs was called.
13159 In some cases it is useful to look at this to decide what needs to be done.
13161 It is always safe for this macro to do nothing. It exists to recognize
13162 opportunities to optimize the output.
13164 For the 80386, we handle X+REG by loading X into a register R and
13165 using R+REG. R will go in a general reg and indexing will be used.
13166 However, if REG is a broken-out memory address or multiplication,
13167 nothing needs to be done because REG can certainly go in a general reg.
13169 When -fpic is used, special handling is needed for symbolic references.
13170 See comments by legitimize_pic_address in i386.c for details. */
13173 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
13174 enum machine_mode mode
)
13179 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
13181 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
13182 if (GET_CODE (x
) == CONST
13183 && GET_CODE (XEXP (x
, 0)) == PLUS
13184 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13185 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
13187 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
13188 (enum tls_model
) log
, false);
13189 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13192 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
13194 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
13195 return legitimize_dllimport_symbol (x
, true);
13196 if (GET_CODE (x
) == CONST
13197 && GET_CODE (XEXP (x
, 0)) == PLUS
13198 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
13199 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
13201 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
13202 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
13206 if (flag_pic
&& SYMBOLIC_CONST (x
))
13207 return legitimize_pic_address (x
, 0);
13210 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
13211 return machopic_indirect_data_reference (x
, 0);
13214 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13215 if (GET_CODE (x
) == ASHIFT
13216 && CONST_INT_P (XEXP (x
, 1))
13217 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
13220 log
= INTVAL (XEXP (x
, 1));
13221 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
13222 GEN_INT (1 << log
));
13225 if (GET_CODE (x
) == PLUS
)
13227 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13229 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
13230 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13231 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
13234 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
13235 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
13236 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
13237 GEN_INT (1 << log
));
13240 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
13241 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
13242 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
13245 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
13246 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
13247 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
13248 GEN_INT (1 << log
));
13251 /* Put multiply first if it isn't already. */
13252 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13254 rtx tmp
= XEXP (x
, 0);
13255 XEXP (x
, 0) = XEXP (x
, 1);
13260 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13261 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13262 created by virtual register instantiation, register elimination, and
13263 similar optimizations. */
13264 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
13267 x
= gen_rtx_PLUS (Pmode
,
13268 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
13269 XEXP (XEXP (x
, 1), 0)),
13270 XEXP (XEXP (x
, 1), 1));
13274 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13275 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13276 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
13277 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
13278 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
13279 && CONSTANT_P (XEXP (x
, 1)))
13282 rtx other
= NULL_RTX
;
13284 if (CONST_INT_P (XEXP (x
, 1)))
13286 constant
= XEXP (x
, 1);
13287 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13289 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
13291 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
13292 other
= XEXP (x
, 1);
13300 x
= gen_rtx_PLUS (Pmode
,
13301 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
13302 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
13303 plus_constant (Pmode
, other
,
13304 INTVAL (constant
)));
13308 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13311 if (GET_CODE (XEXP (x
, 0)) == MULT
)
13314 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
13317 if (GET_CODE (XEXP (x
, 1)) == MULT
)
13320 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
13324 && REG_P (XEXP (x
, 1))
13325 && REG_P (XEXP (x
, 0)))
13328 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
13331 x
= legitimize_pic_address (x
, 0);
13334 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
13337 if (REG_P (XEXP (x
, 0)))
13339 rtx temp
= gen_reg_rtx (Pmode
);
13340 rtx val
= force_operand (XEXP (x
, 1), temp
);
13343 if (GET_MODE (val
) != Pmode
)
13344 val
= convert_to_mode (Pmode
, val
, 1);
13345 emit_move_insn (temp
, val
);
13348 XEXP (x
, 1) = temp
;
13352 else if (REG_P (XEXP (x
, 1)))
13354 rtx temp
= gen_reg_rtx (Pmode
);
13355 rtx val
= force_operand (XEXP (x
, 0), temp
);
13358 if (GET_MODE (val
) != Pmode
)
13359 val
= convert_to_mode (Pmode
, val
, 1);
13360 emit_move_insn (temp
, val
);
13363 XEXP (x
, 0) = temp
;
13371 /* Print an integer constant expression in assembler syntax. Addition
13372 and subtraction are the only arithmetic that may appear in these
13373 expressions. FILE is the stdio stream to write to, X is the rtx, and
13374 CODE is the operand print code from the output string. */
13377 output_pic_addr_const (FILE *file
, rtx x
, int code
)
13381 switch (GET_CODE (x
))
13384 gcc_assert (flag_pic
);
13389 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
13390 output_addr_const (file
, x
);
13393 const char *name
= XSTR (x
, 0);
13395 /* Mark the decl as referenced so that cgraph will
13396 output the function. */
13397 if (SYMBOL_REF_DECL (x
))
13398 mark_decl_referenced (SYMBOL_REF_DECL (x
));
13401 if (MACHOPIC_INDIRECT
13402 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
13403 name
= machopic_indirection_name (x
, /*stub_p=*/true);
13405 assemble_name (file
, name
);
13407 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
13408 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
13409 fputs ("@PLT", file
);
13416 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
13417 assemble_name (asm_out_file
, buf
);
13421 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13425 /* This used to output parentheses around the expression,
13426 but that does not work on the 386 (either ATT or BSD assembler). */
13427 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13431 if (GET_MODE (x
) == VOIDmode
)
13433 /* We can use %d if the number is <32 bits and positive. */
13434 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13435 fprintf (file
, "0x%lx%08lx",
13436 (unsigned long) CONST_DOUBLE_HIGH (x
),
13437 (unsigned long) CONST_DOUBLE_LOW (x
));
13439 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13442 /* We can't handle floating point constants;
13443 TARGET_PRINT_OPERAND must handle them. */
13444 output_operand_lossage ("floating constant misused");
13448 /* Some assemblers need integer constants to appear first. */
13449 if (CONST_INT_P (XEXP (x
, 0)))
13451 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13453 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13457 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13458 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13460 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13466 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13467 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13469 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13471 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13475 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13477 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13482 gcc_assert (XVECLEN (x
, 0) == 1);
13483 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13484 switch (XINT (x
, 1))
13487 fputs ("@GOT", file
);
13489 case UNSPEC_GOTOFF
:
13490 fputs ("@GOTOFF", file
);
13492 case UNSPEC_PLTOFF
:
13493 fputs ("@PLTOFF", file
);
13496 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13497 "(%rip)" : "[rip]", file
);
13499 case UNSPEC_GOTPCREL
:
13500 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13501 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13503 case UNSPEC_GOTTPOFF
:
13504 /* FIXME: This might be @TPOFF in Sun ld too. */
13505 fputs ("@gottpoff", file
);
13508 fputs ("@tpoff", file
);
13510 case UNSPEC_NTPOFF
:
13512 fputs ("@tpoff", file
);
13514 fputs ("@ntpoff", file
);
13516 case UNSPEC_DTPOFF
:
13517 fputs ("@dtpoff", file
);
13519 case UNSPEC_GOTNTPOFF
:
13521 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13522 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13524 fputs ("@gotntpoff", file
);
13526 case UNSPEC_INDNTPOFF
:
13527 fputs ("@indntpoff", file
);
13530 case UNSPEC_MACHOPIC_OFFSET
:
13532 machopic_output_function_base_name (file
);
13536 output_operand_lossage ("invalid UNSPEC as operand");
13542 output_operand_lossage ("invalid expression as operand");
13546 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13547 We need to emit DTP-relative relocations. */
13549 static void ATTRIBUTE_UNUSED
13550 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13552 fputs (ASM_LONG
, file
);
13553 output_addr_const (file
, x
);
13554 fputs ("@dtpoff", file
);
13560 fputs (", 0", file
);
13563 gcc_unreachable ();
13567 /* Return true if X is a representation of the PIC register. This copes
13568 with calls from ix86_find_base_term, where the register might have
13569 been replaced by a cselib value. */
13572 ix86_pic_register_p (rtx x
)
13574 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13575 return (pic_offset_table_rtx
13576 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13578 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13581 /* Helper function for ix86_delegitimize_address.
13582 Attempt to delegitimize TLS local-exec accesses. */
13585 ix86_delegitimize_tls_address (rtx orig_x
)
13587 rtx x
= orig_x
, unspec
;
13588 struct ix86_address addr
;
13590 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13594 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13596 if (ix86_decompose_address (x
, &addr
) == 0
13597 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13598 || addr
.disp
== NULL_RTX
13599 || GET_CODE (addr
.disp
) != CONST
)
13601 unspec
= XEXP (addr
.disp
, 0);
13602 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13603 unspec
= XEXP (unspec
, 0);
13604 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13606 x
= XVECEXP (unspec
, 0, 0);
13607 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13608 if (unspec
!= XEXP (addr
.disp
, 0))
13609 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13612 rtx idx
= addr
.index
;
13613 if (addr
.scale
!= 1)
13614 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13615 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13618 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13619 if (MEM_P (orig_x
))
13620 x
= replace_equiv_address_nv (orig_x
, x
);
13624 /* In the name of slightly smaller debug output, and to cater to
13625 general assembler lossage, recognize PIC+GOTOFF and turn it back
13626 into a direct symbol reference.
13628 On Darwin, this is necessary to avoid a crash, because Darwin
13629 has a different PIC label for each routine but the DWARF debugging
13630 information is not associated with any particular routine, so it's
13631 necessary to remove references to the PIC label from RTL stored by
13632 the DWARF output code. */
13635 ix86_delegitimize_address (rtx x
)
13637 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13638 /* addend is NULL or some rtx if x is something+GOTOFF where
13639 something doesn't include the PIC register. */
13640 rtx addend
= NULL_RTX
;
13641 /* reg_addend is NULL or a multiple of some register. */
13642 rtx reg_addend
= NULL_RTX
;
13643 /* const_addend is NULL or a const_int. */
13644 rtx const_addend
= NULL_RTX
;
13645 /* This is the result, or NULL. */
13646 rtx result
= NULL_RTX
;
13655 if (GET_CODE (x
) == CONST
13656 && GET_CODE (XEXP (x
, 0)) == PLUS
13657 && GET_MODE (XEXP (x
, 0)) == Pmode
13658 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
13659 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
13660 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
13662 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
13663 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
13664 if (MEM_P (orig_x
))
13665 x
= replace_equiv_address_nv (orig_x
, x
);
13668 if (GET_CODE (x
) != CONST
13669 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13670 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13671 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13672 || (!MEM_P (orig_x
) && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
))
13673 return ix86_delegitimize_tls_address (orig_x
);
13674 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13675 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
13677 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13685 if (GET_CODE (x
) != PLUS
13686 || GET_CODE (XEXP (x
, 1)) != CONST
)
13687 return ix86_delegitimize_tls_address (orig_x
);
13689 if (ix86_pic_register_p (XEXP (x
, 0)))
13690 /* %ebx + GOT/GOTOFF */
13692 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13694 /* %ebx + %reg * scale + GOT/GOTOFF */
13695 reg_addend
= XEXP (x
, 0);
13696 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13697 reg_addend
= XEXP (reg_addend
, 1);
13698 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13699 reg_addend
= XEXP (reg_addend
, 0);
13702 reg_addend
= NULL_RTX
;
13703 addend
= XEXP (x
, 0);
13707 addend
= XEXP (x
, 0);
13709 x
= XEXP (XEXP (x
, 1), 0);
13710 if (GET_CODE (x
) == PLUS
13711 && CONST_INT_P (XEXP (x
, 1)))
13713 const_addend
= XEXP (x
, 1);
13717 if (GET_CODE (x
) == UNSPEC
13718 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13719 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13720 result
= XVECEXP (x
, 0, 0);
13722 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13723 && !MEM_P (orig_x
))
13724 result
= XVECEXP (x
, 0, 0);
13727 return ix86_delegitimize_tls_address (orig_x
);
13730 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13732 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13735 /* If the rest of original X doesn't involve the PIC register, add
13736 addend and subtract pic_offset_table_rtx. This can happen e.g.
13738 leal (%ebx, %ecx, 4), %ecx
13740 movl foo@GOTOFF(%ecx), %edx
13741 in which case we return (%ecx - %ebx) + foo. */
13742 if (pic_offset_table_rtx
)
13743 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13744 pic_offset_table_rtx
),
13749 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13751 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13752 if (result
== NULL_RTX
)
13758 /* If X is a machine specific address (i.e. a symbol or label being
13759 referenced as a displacement from the GOT implemented using an
13760 UNSPEC), then return the base term. Otherwise return X. */
13763 ix86_find_base_term (rtx x
)
13769 if (GET_CODE (x
) != CONST
)
13771 term
= XEXP (x
, 0);
13772 if (GET_CODE (term
) == PLUS
13773 && (CONST_INT_P (XEXP (term
, 1))
13774 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13775 term
= XEXP (term
, 0);
13776 if (GET_CODE (term
) != UNSPEC
13777 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13778 && XINT (term
, 1) != UNSPEC_PCREL
))
13781 return XVECEXP (term
, 0, 0);
13784 return ix86_delegitimize_address (x
);
13788 put_condition_code (enum rtx_code code
, enum machine_mode mode
, bool reverse
,
13789 bool fp
, FILE *file
)
13791 const char *suffix
;
13793 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13795 code
= ix86_fp_compare_code_to_integer (code
);
13799 code
= reverse_condition (code
);
13850 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13854 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13855 Those same assemblers have the same but opposite lossage on cmov. */
13856 if (mode
== CCmode
)
13857 suffix
= fp
? "nbe" : "a";
13858 else if (mode
== CCCmode
)
13861 gcc_unreachable ();
13877 gcc_unreachable ();
13881 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13898 gcc_unreachable ();
13902 /* ??? As above. */
13903 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13904 suffix
= fp
? "nb" : "ae";
13907 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13911 /* ??? As above. */
13912 if (mode
== CCmode
)
13914 else if (mode
== CCCmode
)
13915 suffix
= fp
? "nb" : "ae";
13917 gcc_unreachable ();
13920 suffix
= fp
? "u" : "p";
13923 suffix
= fp
? "nu" : "np";
13926 gcc_unreachable ();
13928 fputs (suffix
, file
);
13931 /* Print the name of register X to FILE based on its machine mode and number.
13932 If CODE is 'w', pretend the mode is HImode.
13933 If CODE is 'b', pretend the mode is QImode.
13934 If CODE is 'k', pretend the mode is SImode.
13935 If CODE is 'q', pretend the mode is DImode.
13936 If CODE is 'x', pretend the mode is V4SFmode.
13937 If CODE is 't', pretend the mode is V8SFmode.
13938 If CODE is 'h', pretend the reg is the 'high' byte register.
13939 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13940 If CODE is 'd', duplicate the operand for AVX instruction.
13944 print_reg (rtx x
, int code
, FILE *file
)
13947 bool duplicated
= code
== 'd' && TARGET_AVX
;
13949 gcc_assert (x
== pc_rtx
13950 || (REGNO (x
) != ARG_POINTER_REGNUM
13951 && REGNO (x
) != FRAME_POINTER_REGNUM
13952 && REGNO (x
) != FLAGS_REG
13953 && REGNO (x
) != FPSR_REG
13954 && REGNO (x
) != FPCR_REG
));
13956 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13961 gcc_assert (TARGET_64BIT
);
13962 fputs ("rip", file
);
13966 if (code
== 'w' || MMX_REG_P (x
))
13968 else if (code
== 'b')
13970 else if (code
== 'k')
13972 else if (code
== 'q')
13974 else if (code
== 'y')
13976 else if (code
== 'h')
13978 else if (code
== 'x')
13980 else if (code
== 't')
13983 code
= GET_MODE_SIZE (GET_MODE (x
));
13985 /* Irritatingly, AMD extended registers use different naming convention
13986 from the normal registers: "r%d[bwd]" */
13987 if (REX_INT_REG_P (x
))
13989 gcc_assert (TARGET_64BIT
);
13991 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13995 error ("extended registers have no high halves");
14010 error ("unsupported operand size for extended register");
14020 if (STACK_TOP_P (x
))
14029 if (! ANY_FP_REG_P (x
))
14030 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
14035 reg
= hi_reg_name
[REGNO (x
)];
14038 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
14040 reg
= qi_reg_name
[REGNO (x
)];
14043 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
14045 reg
= qi_high_reg_name
[REGNO (x
)];
14050 gcc_assert (!duplicated
);
14052 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
14057 gcc_unreachable ();
14063 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14064 fprintf (file
, ", %%%s", reg
);
14066 fprintf (file
, ", %s", reg
);
14070 /* Locate some local-dynamic symbol still in use by this function
14071 so that we can print its name in some tls_local_dynamic_base
14075 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
14079 if (GET_CODE (x
) == SYMBOL_REF
14080 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
14082 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
14089 static const char *
14090 get_some_local_dynamic_name (void)
14094 if (cfun
->machine
->some_ld_name
)
14095 return cfun
->machine
->some_ld_name
;
14097 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14098 if (NONDEBUG_INSN_P (insn
)
14099 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
14100 return cfun
->machine
->some_ld_name
;
14105 /* Meaning of CODE:
14106 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14107 C -- print opcode suffix for set/cmov insn.
14108 c -- like C, but print reversed condition
14109 F,f -- likewise, but for floating-point.
14110 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14112 R -- print the prefix for register names.
14113 z -- print the opcode suffix for the size of the current operand.
14114 Z -- likewise, with special suffixes for x87 instructions.
14115 * -- print a star (in certain assembler syntax)
14116 A -- print an absolute memory reference.
14117 E -- print address with DImode register names if TARGET_64BIT.
14118 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14119 s -- print a shift double count, followed by the assemblers argument
14121 b -- print the QImode name of the register for the indicated operand.
14122 %b0 would print %al if operands[0] is reg 0.
14123 w -- likewise, print the HImode name of the register.
14124 k -- likewise, print the SImode name of the register.
14125 q -- likewise, print the DImode name of the register.
14126 x -- likewise, print the V4SFmode name of the register.
14127 t -- likewise, print the V8SFmode name of the register.
14128 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14129 y -- print "st(0)" instead of "st" as a register.
14130 d -- print duplicated register operand for AVX instruction.
14131 D -- print condition for SSE cmp instruction.
14132 P -- if PIC, print an @PLT suffix.
14133 p -- print raw symbol name.
14134 X -- don't print any sort of PIC '@' suffix for a symbol.
14135 & -- print some in-use local-dynamic symbol name.
14136 H -- print a memory address offset by 8; used for sse high-parts
14137 Y -- print condition for XOP pcom* instruction.
14138 + -- print a branch hint as 'cs' or 'ds' prefix
14139 ; -- print a semicolon (after prefixes due to bug in older gas).
14140 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14141 @ -- print a segment register of thread base pointer load
14142 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14146 ix86_print_operand (FILE *file
, rtx x
, int code
)
14153 switch (ASSEMBLER_DIALECT
)
14160 /* Intel syntax. For absolute addresses, registers should not
14161 be surrounded by braces. */
14165 ix86_print_operand (file
, x
, 0);
14172 gcc_unreachable ();
14175 ix86_print_operand (file
, x
, 0);
14179 /* Wrap address in an UNSPEC to declare special handling. */
14181 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
14183 output_address (x
);
14187 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14192 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14197 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14202 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14207 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14212 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14217 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14218 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
14221 switch (GET_MODE_SIZE (GET_MODE (x
)))
14236 output_operand_lossage
14237 ("invalid operand size for operand code 'O'");
14246 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14248 /* Opcodes don't get size suffixes if using Intel opcodes. */
14249 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14252 switch (GET_MODE_SIZE (GET_MODE (x
)))
14271 output_operand_lossage
14272 ("invalid operand size for operand code 'z'");
14277 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14279 (0, "non-integer operand used with operand code 'z'");
14283 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14284 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
14287 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
14289 switch (GET_MODE_SIZE (GET_MODE (x
)))
14292 #ifdef HAVE_AS_IX86_FILDS
14302 #ifdef HAVE_AS_IX86_FILDQ
14305 fputs ("ll", file
);
14313 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
14315 /* 387 opcodes don't get size suffixes
14316 if the operands are registers. */
14317 if (STACK_REG_P (x
))
14320 switch (GET_MODE_SIZE (GET_MODE (x
)))
14341 output_operand_lossage
14342 ("invalid operand type used with operand code 'Z'");
14346 output_operand_lossage
14347 ("invalid operand size for operand code 'Z'");
14365 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
14367 ix86_print_operand (file
, x
, 0);
14368 fputs (", ", file
);
14373 switch (GET_CODE (x
))
14376 fputs ("neq", file
);
14379 fputs ("eq", file
);
14383 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14387 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14391 fputs ("le", file
);
14395 fputs ("lt", file
);
14398 fputs ("unord", file
);
14401 fputs ("ord", file
);
14404 fputs ("ueq", file
);
14407 fputs ("nlt", file
);
14410 fputs ("nle", file
);
14413 fputs ("ule", file
);
14416 fputs ("ult", file
);
14419 fputs ("une", file
);
14422 output_operand_lossage ("operand is not a condition code, "
14423 "invalid operand code 'Y'");
14429 /* Little bit of braindamage here. The SSE compare instructions
14430 does use completely different names for the comparisons that the
14431 fp conditional moves. */
14432 switch (GET_CODE (x
))
14437 fputs ("eq_us", file
);
14441 fputs ("eq", file
);
14446 fputs ("nge", file
);
14450 fputs ("lt", file
);
14455 fputs ("ngt", file
);
14459 fputs ("le", file
);
14462 fputs ("unord", file
);
14467 fputs ("neq_oq", file
);
14471 fputs ("neq", file
);
14476 fputs ("ge", file
);
14480 fputs ("nlt", file
);
14485 fputs ("gt", file
);
14489 fputs ("nle", file
);
14492 fputs ("ord", file
);
14495 output_operand_lossage ("operand is not a condition code, "
14496 "invalid operand code 'D'");
14503 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14504 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14510 if (!COMPARISON_P (x
))
14512 output_operand_lossage ("operand is not a condition code, "
14513 "invalid operand code '%c'", code
);
14516 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
14517 code
== 'c' || code
== 'f',
14518 code
== 'F' || code
== 'f',
14523 if (!offsettable_memref_p (x
))
14525 output_operand_lossage ("operand is not an offsettable memory "
14526 "reference, invalid operand code 'H'");
14529 /* It doesn't actually matter what mode we use here, as we're
14530 only going to use this for printing. */
14531 x
= adjust_address_nv (x
, DImode
, 8);
14535 gcc_assert (CONST_INT_P (x
));
14537 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
14538 #ifdef HAVE_AS_IX86_HLE
14539 fputs ("xacquire ", file
);
14541 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
14543 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
14544 #ifdef HAVE_AS_IX86_HLE
14545 fputs ("xrelease ", file
);
14547 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
14549 /* We do not want to print value of the operand. */
14553 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14559 const char *name
= get_some_local_dynamic_name ();
14561 output_operand_lossage ("'%%&' used without any "
14562 "local dynamic TLS references");
14564 assemble_name (file
, name
);
14573 || optimize_function_for_size_p (cfun
)
14574 || !TARGET_BRANCH_PREDICTION_HINTS
)
14577 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14580 int pred_val
= INTVAL (XEXP (x
, 0));
14582 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14583 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14585 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14587 = final_forward_branch_p (current_output_insn
) == 0;
14589 /* Emit hints only in the case default branch prediction
14590 heuristics would fail. */
14591 if (taken
!= cputaken
)
14593 /* We use 3e (DS) prefix for taken branches and
14594 2e (CS) prefix for not taken branches. */
14596 fputs ("ds ; ", file
);
14598 fputs ("cs ; ", file
);
14606 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14612 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14615 /* The kernel uses a different segment register for performance
14616 reasons; a system call would not have to trash the userspace
14617 segment register, which would be expensive. */
14618 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14619 fputs ("fs", file
);
14621 fputs ("gs", file
);
14625 putc (TARGET_AVX2
? 'i' : 'f', file
);
14629 if (TARGET_64BIT
&& Pmode
!= word_mode
)
14630 fputs ("addr32 ", file
);
14634 output_operand_lossage ("invalid operand code '%c'", code
);
14639 print_reg (x
, code
, file
);
14641 else if (MEM_P (x
))
14643 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14644 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14645 && GET_MODE (x
) != BLKmode
)
14648 switch (GET_MODE_SIZE (GET_MODE (x
)))
14650 case 1: size
= "BYTE"; break;
14651 case 2: size
= "WORD"; break;
14652 case 4: size
= "DWORD"; break;
14653 case 8: size
= "QWORD"; break;
14654 case 12: size
= "TBYTE"; break;
14656 if (GET_MODE (x
) == XFmode
)
14661 case 32: size
= "YMMWORD"; break;
14663 gcc_unreachable ();
14666 /* Check for explicit size override (codes 'b', 'w', 'k',
14670 else if (code
== 'w')
14672 else if (code
== 'k')
14674 else if (code
== 'q')
14676 else if (code
== 'x')
14679 fputs (size
, file
);
14680 fputs (" PTR ", file
);
14684 /* Avoid (%rip) for call operands. */
14685 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14686 && !CONST_INT_P (x
))
14687 output_addr_const (file
, x
);
14688 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14689 output_operand_lossage ("invalid constraints for operand");
14691 output_address (x
);
14694 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14699 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14700 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14702 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14704 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14706 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14708 fprintf (file
, "0x%08x", (unsigned int) l
);
14711 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14716 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14717 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14719 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14721 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14724 /* These float cases don't actually occur as immediate operands. */
14725 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14729 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14730 fputs (dstr
, file
);
14735 /* We have patterns that allow zero sets of memory, for instance.
14736 In 64-bit mode, we should probably support all 8-byte vectors,
14737 since we can in fact encode that into an immediate. */
14738 if (GET_CODE (x
) == CONST_VECTOR
)
14740 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14744 if (code
!= 'P' && code
!= 'p')
14746 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14748 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14751 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14752 || GET_CODE (x
) == LABEL_REF
)
14754 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14757 fputs ("OFFSET FLAT:", file
);
14760 if (CONST_INT_P (x
))
14761 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14762 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14763 output_pic_addr_const (file
, x
, code
);
14765 output_addr_const (file
, x
);
14770 ix86_print_operand_punct_valid_p (unsigned char code
)
14772 return (code
== '@' || code
== '*' || code
== '+' || code
== '&'
14773 || code
== ';' || code
== '~' || code
== '^');
14776 /* Print a memory operand whose address is ADDR. */
14779 ix86_print_operand_address (FILE *file
, rtx addr
)
14781 struct ix86_address parts
;
14782 rtx base
, index
, disp
;
14788 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14790 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14791 gcc_assert (parts
.index
== NULL_RTX
);
14792 parts
.index
= XVECEXP (addr
, 0, 1);
14793 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14794 addr
= XVECEXP (addr
, 0, 0);
14797 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
14799 gcc_assert (TARGET_64BIT
);
14800 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14804 ok
= ix86_decompose_address (addr
, &parts
);
14808 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14810 rtx tmp
= SUBREG_REG (parts
.base
);
14811 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14812 tmp
, GET_MODE (tmp
), 0);
14815 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14817 rtx tmp
= SUBREG_REG (parts
.index
);
14818 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14819 tmp
, GET_MODE (tmp
), 0);
14823 index
= parts
.index
;
14825 scale
= parts
.scale
;
14833 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14835 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14838 gcc_unreachable ();
14841 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14842 if (TARGET_64BIT
&& !base
&& !index
)
14846 if (GET_CODE (disp
) == CONST
14847 && GET_CODE (XEXP (disp
, 0)) == PLUS
14848 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14849 symbol
= XEXP (XEXP (disp
, 0), 0);
14851 if (GET_CODE (symbol
) == LABEL_REF
14852 || (GET_CODE (symbol
) == SYMBOL_REF
14853 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14856 if (!base
&& !index
)
14858 /* Displacement only requires special attention. */
14860 if (CONST_INT_P (disp
))
14862 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14863 fputs ("ds:", file
);
14864 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14867 output_pic_addr_const (file
, disp
, 0);
14869 output_addr_const (file
, disp
);
14873 /* Print SImode register names to force addr32 prefix. */
14874 if (GET_CODE (addr
) == SUBREG
)
14876 gcc_assert (TARGET_64BIT
);
14877 gcc_assert (GET_MODE (addr
) == SImode
);
14878 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
14879 gcc_assert (!code
);
14882 else if (GET_CODE (addr
) == ZERO_EXTEND
14883 || GET_CODE (addr
) == AND
)
14885 gcc_assert (TARGET_64BIT
);
14886 gcc_assert (GET_MODE (addr
) == DImode
);
14887 gcc_assert (!code
);
14891 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14896 output_pic_addr_const (file
, disp
, 0);
14897 else if (GET_CODE (disp
) == LABEL_REF
)
14898 output_asm_label (disp
);
14900 output_addr_const (file
, disp
);
14905 print_reg (base
, code
, file
);
14909 print_reg (index
, vsib
? 0 : code
, file
);
14910 if (scale
!= 1 || vsib
)
14911 fprintf (file
, ",%d", scale
);
14917 rtx offset
= NULL_RTX
;
14921 /* Pull out the offset of a symbol; print any symbol itself. */
14922 if (GET_CODE (disp
) == CONST
14923 && GET_CODE (XEXP (disp
, 0)) == PLUS
14924 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14926 offset
= XEXP (XEXP (disp
, 0), 1);
14927 disp
= gen_rtx_CONST (VOIDmode
,
14928 XEXP (XEXP (disp
, 0), 0));
14932 output_pic_addr_const (file
, disp
, 0);
14933 else if (GET_CODE (disp
) == LABEL_REF
)
14934 output_asm_label (disp
);
14935 else if (CONST_INT_P (disp
))
14938 output_addr_const (file
, disp
);
14944 print_reg (base
, code
, file
);
14947 if (INTVAL (offset
) >= 0)
14949 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14953 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14960 print_reg (index
, vsib
? 0 : code
, file
);
14961 if (scale
!= 1 || vsib
)
14962 fprintf (file
, "*%d", scale
);
14969 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14972 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14976 if (GET_CODE (x
) != UNSPEC
)
14979 op
= XVECEXP (x
, 0, 0);
14980 switch (XINT (x
, 1))
14982 case UNSPEC_GOTTPOFF
:
14983 output_addr_const (file
, op
);
14984 /* FIXME: This might be @TPOFF in Sun ld. */
14985 fputs ("@gottpoff", file
);
14988 output_addr_const (file
, op
);
14989 fputs ("@tpoff", file
);
14991 case UNSPEC_NTPOFF
:
14992 output_addr_const (file
, op
);
14994 fputs ("@tpoff", file
);
14996 fputs ("@ntpoff", file
);
14998 case UNSPEC_DTPOFF
:
14999 output_addr_const (file
, op
);
15000 fputs ("@dtpoff", file
);
15002 case UNSPEC_GOTNTPOFF
:
15003 output_addr_const (file
, op
);
15005 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
15006 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
15008 fputs ("@gotntpoff", file
);
15010 case UNSPEC_INDNTPOFF
:
15011 output_addr_const (file
, op
);
15012 fputs ("@indntpoff", file
);
15015 case UNSPEC_MACHOPIC_OFFSET
:
15016 output_addr_const (file
, op
);
15018 machopic_output_function_base_name (file
);
15022 case UNSPEC_STACK_CHECK
:
15026 gcc_assert (flag_split_stack
);
15028 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15029 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
15031 gcc_unreachable ();
15034 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
15045 /* Split one or more double-mode RTL references into pairs of half-mode
15046 references. The RTL can be REG, offsettable MEM, integer constant, or
15047 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15048 split and "num" is its length. lo_half and hi_half are output arrays
15049 that parallel "operands". */
15052 split_double_mode (enum machine_mode mode
, rtx operands
[],
15053 int num
, rtx lo_half
[], rtx hi_half
[])
15055 enum machine_mode half_mode
;
15061 half_mode
= DImode
;
15064 half_mode
= SImode
;
15067 gcc_unreachable ();
15070 byte
= GET_MODE_SIZE (half_mode
);
15074 rtx op
= operands
[num
];
15076 /* simplify_subreg refuse to split volatile memory addresses,
15077 but we still have to handle it. */
15080 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
15081 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
15085 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15086 GET_MODE (op
) == VOIDmode
15087 ? mode
: GET_MODE (op
), 0);
15088 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
15089 GET_MODE (op
) == VOIDmode
15090 ? mode
: GET_MODE (op
), byte
);
15095 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15096 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15097 is the expression of the binary operation. The output may either be
15098 emitted here, or returned to the caller, like all output_* functions.
15100 There is no guarantee that the operands are the same mode, as they
15101 might be within FLOAT or FLOAT_EXTEND expressions. */
15103 #ifndef SYSV386_COMPAT
15104 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15105 wants to fix the assemblers because that causes incompatibility
15106 with gcc. No-one wants to fix gcc because that causes
15107 incompatibility with assemblers... You can use the option of
15108 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15109 #define SYSV386_COMPAT 1
15113 output_387_binary_op (rtx insn
, rtx
*operands
)
15115 static char buf
[40];
15118 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
15120 #ifdef ENABLE_CHECKING
15121 /* Even if we do not want to check the inputs, this documents input
15122 constraints. Which helps in understanding the following code. */
15123 if (STACK_REG_P (operands
[0])
15124 && ((REG_P (operands
[1])
15125 && REGNO (operands
[0]) == REGNO (operands
[1])
15126 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
15127 || (REG_P (operands
[2])
15128 && REGNO (operands
[0]) == REGNO (operands
[2])
15129 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
15130 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
15133 gcc_assert (is_sse
);
15136 switch (GET_CODE (operands
[3]))
15139 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15140 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15148 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15149 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15157 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15158 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15166 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
15167 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
15175 gcc_unreachable ();
15182 strcpy (buf
, ssep
);
15183 if (GET_MODE (operands
[0]) == SFmode
)
15184 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
15186 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
15190 strcpy (buf
, ssep
+ 1);
15191 if (GET_MODE (operands
[0]) == SFmode
)
15192 strcat (buf
, "ss\t{%2, %0|%0, %2}");
15194 strcat (buf
, "sd\t{%2, %0|%0, %2}");
15200 switch (GET_CODE (operands
[3]))
15204 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
15206 rtx temp
= operands
[2];
15207 operands
[2] = operands
[1];
15208 operands
[1] = temp
;
15211 /* know operands[0] == operands[1]. */
15213 if (MEM_P (operands
[2]))
15219 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15221 if (STACK_TOP_P (operands
[0]))
15222 /* How is it that we are storing to a dead operand[2]?
15223 Well, presumably operands[1] is dead too. We can't
15224 store the result to st(0) as st(0) gets popped on this
15225 instruction. Instead store to operands[2] (which I
15226 think has to be st(1)). st(1) will be popped later.
15227 gcc <= 2.8.1 didn't have this check and generated
15228 assembly code that the Unixware assembler rejected. */
15229 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15231 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15235 if (STACK_TOP_P (operands
[0]))
15236 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15238 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15243 if (MEM_P (operands
[1]))
15249 if (MEM_P (operands
[2]))
15255 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
15258 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15259 derived assemblers, confusingly reverse the direction of
15260 the operation for fsub{r} and fdiv{r} when the
15261 destination register is not st(0). The Intel assembler
15262 doesn't have this brain damage. Read !SYSV386_COMPAT to
15263 figure out what the hardware really does. */
15264 if (STACK_TOP_P (operands
[0]))
15265 p
= "{p\t%0, %2|rp\t%2, %0}";
15267 p
= "{rp\t%2, %0|p\t%0, %2}";
15269 if (STACK_TOP_P (operands
[0]))
15270 /* As above for fmul/fadd, we can't store to st(0). */
15271 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15273 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15278 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
15281 if (STACK_TOP_P (operands
[0]))
15282 p
= "{rp\t%0, %1|p\t%1, %0}";
15284 p
= "{p\t%1, %0|rp\t%0, %1}";
15286 if (STACK_TOP_P (operands
[0]))
15287 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
15289 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
15294 if (STACK_TOP_P (operands
[0]))
15296 if (STACK_TOP_P (operands
[1]))
15297 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15299 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
15302 else if (STACK_TOP_P (operands
[1]))
15305 p
= "{\t%1, %0|r\t%0, %1}";
15307 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
15313 p
= "{r\t%2, %0|\t%0, %2}";
15315 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15321 gcc_unreachable ();
15328 /* Return needed mode for entity in optimize_mode_switching pass. */
15331 ix86_mode_needed (int entity
, rtx insn
)
15333 enum attr_i387_cw mode
;
15335 /* The mode UNINITIALIZED is used to store control word after a
15336 function call or ASM pattern. The mode ANY specify that function
15337 has no requirements on the control word and make no changes in the
15338 bits we are interested in. */
15341 || (NONJUMP_INSN_P (insn
)
15342 && (asm_noperands (PATTERN (insn
)) >= 0
15343 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
15344 return I387_CW_UNINITIALIZED
;
15346 if (recog_memoized (insn
) < 0)
15347 return I387_CW_ANY
;
15349 mode
= get_attr_i387_cw (insn
);
15354 if (mode
== I387_CW_TRUNC
)
15359 if (mode
== I387_CW_FLOOR
)
15364 if (mode
== I387_CW_CEIL
)
15369 if (mode
== I387_CW_MASK_PM
)
15374 gcc_unreachable ();
15377 return I387_CW_ANY
;
15380 /* Output code to initialize control word copies used by trunc?f?i and
15381 rounding patterns. CURRENT_MODE is set to current control word,
15382 while NEW_MODE is set to new control word. */
15385 emit_i387_cw_initialization (int mode
)
15387 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
15390 enum ix86_stack_slot slot
;
15392 rtx reg
= gen_reg_rtx (HImode
);
15394 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
15395 emit_move_insn (reg
, copy_rtx (stored_mode
));
15397 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
15398 || optimize_function_for_size_p (cfun
))
15402 case I387_CW_TRUNC
:
15403 /* round toward zero (truncate) */
15404 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
15405 slot
= SLOT_CW_TRUNC
;
15408 case I387_CW_FLOOR
:
15409 /* round down toward -oo */
15410 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15411 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
15412 slot
= SLOT_CW_FLOOR
;
15416 /* round up toward +oo */
15417 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
15418 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
15419 slot
= SLOT_CW_CEIL
;
15422 case I387_CW_MASK_PM
:
15423 /* mask precision exception for nearbyint() */
15424 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15425 slot
= SLOT_CW_MASK_PM
;
15429 gcc_unreachable ();
15436 case I387_CW_TRUNC
:
15437 /* round toward zero (truncate) */
15438 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
15439 slot
= SLOT_CW_TRUNC
;
15442 case I387_CW_FLOOR
:
15443 /* round down toward -oo */
15444 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
15445 slot
= SLOT_CW_FLOOR
;
15449 /* round up toward +oo */
15450 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
15451 slot
= SLOT_CW_CEIL
;
15454 case I387_CW_MASK_PM
:
15455 /* mask precision exception for nearbyint() */
15456 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15457 slot
= SLOT_CW_MASK_PM
;
15461 gcc_unreachable ();
15465 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15467 new_mode
= assign_386_stack_local (HImode
, slot
);
15468 emit_move_insn (new_mode
, reg
);
15471 /* Output code for INSN to convert a float to a signed int. OPERANDS
15472 are the insn operands. The output may be [HSD]Imode and the input
15473 operand may be [SDX]Fmode. */
15476 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15478 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15479 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15480 int round_mode
= get_attr_i387_cw (insn
);
15482 /* Jump through a hoop or two for DImode, since the hardware has no
15483 non-popping instruction. We used to do this a different way, but
15484 that was somewhat fragile and broke with post-reload splitters. */
15485 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15486 output_asm_insn ("fld\t%y1", operands
);
15488 gcc_assert (STACK_TOP_P (operands
[1]));
15489 gcc_assert (MEM_P (operands
[0]));
15490 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15493 output_asm_insn ("fisttp%Z0\t%0", operands
);
15496 if (round_mode
!= I387_CW_ANY
)
15497 output_asm_insn ("fldcw\t%3", operands
);
15498 if (stack_top_dies
|| dimode_p
)
15499 output_asm_insn ("fistp%Z0\t%0", operands
);
15501 output_asm_insn ("fist%Z0\t%0", operands
);
15502 if (round_mode
!= I387_CW_ANY
)
15503 output_asm_insn ("fldcw\t%2", operands
);
15509 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15510 have the values zero or one, indicates the ffreep insn's operand
15511 from the OPERANDS array. */
15513 static const char *
15514 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15516 if (TARGET_USE_FFREEP
)
15517 #ifdef HAVE_AS_IX86_FFREEP
15518 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15521 static char retval
[32];
15522 int regno
= REGNO (operands
[opno
]);
15524 gcc_assert (FP_REGNO_P (regno
));
15526 regno
-= FIRST_STACK_REG
;
15528 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15533 return opno
? "fstp\t%y1" : "fstp\t%y0";
15537 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15538 should be used. UNORDERED_P is true when fucom should be used. */
15541 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15543 int stack_top_dies
;
15544 rtx cmp_op0
, cmp_op1
;
15545 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15549 cmp_op0
= operands
[0];
15550 cmp_op1
= operands
[1];
15554 cmp_op0
= operands
[1];
15555 cmp_op1
= operands
[2];
15560 if (GET_MODE (operands
[0]) == SFmode
)
15562 return "%vucomiss\t{%1, %0|%0, %1}";
15564 return "%vcomiss\t{%1, %0|%0, %1}";
15567 return "%vucomisd\t{%1, %0|%0, %1}";
15569 return "%vcomisd\t{%1, %0|%0, %1}";
15572 gcc_assert (STACK_TOP_P (cmp_op0
));
15574 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15576 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15578 if (stack_top_dies
)
15580 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15581 return output_387_ffreep (operands
, 1);
15584 return "ftst\n\tfnstsw\t%0";
15587 if (STACK_REG_P (cmp_op1
)
15589 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15590 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15592 /* If both the top of the 387 stack dies, and the other operand
15593 is also a stack register that dies, then this must be a
15594 `fcompp' float compare */
15598 /* There is no double popping fcomi variant. Fortunately,
15599 eflags is immune from the fstp's cc clobbering. */
15601 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15603 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15604 return output_387_ffreep (operands
, 0);
15609 return "fucompp\n\tfnstsw\t%0";
15611 return "fcompp\n\tfnstsw\t%0";
15616 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15618 static const char * const alt
[16] =
15620 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15621 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15622 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15623 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15625 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15626 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15630 "fcomi\t{%y1, %0|%0, %y1}",
15631 "fcomip\t{%y1, %0|%0, %y1}",
15632 "fucomi\t{%y1, %0|%0, %y1}",
15633 "fucomip\t{%y1, %0|%0, %y1}",
15644 mask
= eflags_p
<< 3;
15645 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15646 mask
|= unordered_p
<< 1;
15647 mask
|= stack_top_dies
;
15649 gcc_assert (mask
< 16);
15658 ix86_output_addr_vec_elt (FILE *file
, int value
)
15660 const char *directive
= ASM_LONG
;
15664 directive
= ASM_QUAD
;
15666 gcc_assert (!TARGET_64BIT
);
15669 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15673 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15675 const char *directive
= ASM_LONG
;
15678 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15679 directive
= ASM_QUAD
;
15681 gcc_assert (!TARGET_64BIT
);
15683 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15684 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15685 fprintf (file
, "%s%s%d-%s%d\n",
15686 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15687 else if (HAVE_AS_GOTOFF_IN_DATA
)
15688 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15690 else if (TARGET_MACHO
)
15692 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15693 machopic_output_function_base_name (file
);
15698 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15699 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15702 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15706 ix86_expand_clear (rtx dest
)
15710 /* We play register width games, which are only valid after reload. */
15711 gcc_assert (reload_completed
);
15713 /* Avoid HImode and its attendant prefix byte. */
15714 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15715 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15716 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15718 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15719 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15721 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15722 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15728 /* X is an unchanging MEM. If it is a constant pool reference, return
15729 the constant pool rtx, else NULL. */
15732 maybe_get_pool_constant (rtx x
)
15734 x
= ix86_delegitimize_address (XEXP (x
, 0));
15736 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15737 return get_pool_constant (x
);
15743 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15746 enum tls_model model
;
15751 if (GET_CODE (op1
) == SYMBOL_REF
)
15753 model
= SYMBOL_REF_TLS_MODEL (op1
);
15756 op1
= legitimize_tls_address (op1
, model
, true);
15757 op1
= force_operand (op1
, op0
);
15760 if (GET_MODE (op1
) != mode
)
15761 op1
= convert_to_mode (mode
, op1
, 1);
15763 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15764 && SYMBOL_REF_DLLIMPORT_P (op1
))
15765 op1
= legitimize_dllimport_symbol (op1
, false);
15767 else if (GET_CODE (op1
) == CONST
15768 && GET_CODE (XEXP (op1
, 0)) == PLUS
15769 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15771 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15772 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15775 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15777 tmp
= legitimize_tls_address (symbol
, model
, true);
15778 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15779 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15780 tmp
= legitimize_dllimport_symbol (symbol
, true);
15784 tmp
= force_operand (tmp
, NULL
);
15785 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15786 op0
, 1, OPTAB_DIRECT
);
15789 if (GET_MODE (tmp
) != mode
)
15790 op1
= convert_to_mode (mode
, tmp
, 1);
15794 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15795 && symbolic_operand (op1
, mode
))
15797 if (TARGET_MACHO
&& !TARGET_64BIT
)
15800 /* dynamic-no-pic */
15801 if (MACHOPIC_INDIRECT
)
15803 rtx temp
= ((reload_in_progress
15804 || ((op0
&& REG_P (op0
))
15806 ? op0
: gen_reg_rtx (Pmode
));
15807 op1
= machopic_indirect_data_reference (op1
, temp
);
15809 op1
= machopic_legitimize_pic_address (op1
, mode
,
15810 temp
== op1
? 0 : temp
);
15812 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15814 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15818 if (GET_CODE (op0
) == MEM
)
15819 op1
= force_reg (Pmode
, op1
);
15823 if (GET_CODE (temp
) != REG
)
15824 temp
= gen_reg_rtx (Pmode
);
15825 temp
= legitimize_pic_address (op1
, temp
);
15830 /* dynamic-no-pic */
15836 op1
= force_reg (mode
, op1
);
15837 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15839 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15840 op1
= legitimize_pic_address (op1
, reg
);
15843 if (GET_MODE (op1
) != mode
)
15844 op1
= convert_to_mode (mode
, op1
, 1);
15851 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15852 || !push_operand (op0
, mode
))
15854 op1
= force_reg (mode
, op1
);
15856 if (push_operand (op0
, mode
)
15857 && ! general_no_elim_operand (op1
, mode
))
15858 op1
= copy_to_mode_reg (mode
, op1
);
15860 /* Force large constants in 64bit compilation into register
15861 to get them CSEed. */
15862 if (can_create_pseudo_p ()
15863 && (mode
== DImode
) && TARGET_64BIT
15864 && immediate_operand (op1
, mode
)
15865 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15866 && !register_operand (op0
, mode
)
15868 op1
= copy_to_mode_reg (mode
, op1
);
15870 if (can_create_pseudo_p ()
15871 && FLOAT_MODE_P (mode
)
15872 && GET_CODE (op1
) == CONST_DOUBLE
)
15874 /* If we are loading a floating point constant to a register,
15875 force the value to memory now, since we'll get better code
15876 out the back end. */
15878 op1
= validize_mem (force_const_mem (mode
, op1
));
15879 if (!register_operand (op0
, mode
))
15881 rtx temp
= gen_reg_rtx (mode
);
15882 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15883 emit_move_insn (op0
, temp
);
15889 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15893 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15895 rtx op0
= operands
[0], op1
= operands
[1];
15896 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15898 /* Force constants other than zero into memory. We do not know how
15899 the instructions used to build constants modify the upper 64 bits
15900 of the register, once we have that information we may be able
15901 to handle some of them more efficiently. */
15902 if (can_create_pseudo_p ()
15903 && register_operand (op0
, mode
)
15904 && (CONSTANT_P (op1
)
15905 || (GET_CODE (op1
) == SUBREG
15906 && CONSTANT_P (SUBREG_REG (op1
))))
15907 && !standard_sse_constant_p (op1
))
15908 op1
= validize_mem (force_const_mem (mode
, op1
));
15910 /* We need to check memory alignment for SSE mode since attribute
15911 can make operands unaligned. */
15912 if (can_create_pseudo_p ()
15913 && SSE_REG_MODE_P (mode
)
15914 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15915 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15919 /* ix86_expand_vector_move_misalign() does not like constants ... */
15920 if (CONSTANT_P (op1
)
15921 || (GET_CODE (op1
) == SUBREG
15922 && CONSTANT_P (SUBREG_REG (op1
))))
15923 op1
= validize_mem (force_const_mem (mode
, op1
));
15925 /* ... nor both arguments in memory. */
15926 if (!register_operand (op0
, mode
)
15927 && !register_operand (op1
, mode
))
15928 op1
= force_reg (mode
, op1
);
15930 tmp
[0] = op0
; tmp
[1] = op1
;
15931 ix86_expand_vector_move_misalign (mode
, tmp
);
15935 /* Make operand1 a register if it isn't already. */
15936 if (can_create_pseudo_p ()
15937 && !register_operand (op0
, mode
)
15938 && !register_operand (op1
, mode
))
15940 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15944 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15947 /* Split 32-byte AVX unaligned load and store if needed. */
15950 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15953 rtx (*extract
) (rtx
, rtx
, rtx
);
15954 rtx (*move_unaligned
) (rtx
, rtx
);
15955 enum machine_mode mode
;
15957 switch (GET_MODE (op0
))
15960 gcc_unreachable ();
15962 extract
= gen_avx_vextractf128v32qi
;
15963 move_unaligned
= gen_avx_movdqu256
;
15967 extract
= gen_avx_vextractf128v8sf
;
15968 move_unaligned
= gen_avx_movups256
;
15972 extract
= gen_avx_vextractf128v4df
;
15973 move_unaligned
= gen_avx_movupd256
;
15978 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15980 rtx r
= gen_reg_rtx (mode
);
15981 m
= adjust_address (op1
, mode
, 0);
15982 emit_move_insn (r
, m
);
15983 m
= adjust_address (op1
, mode
, 16);
15984 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15985 emit_move_insn (op0
, r
);
15987 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15989 m
= adjust_address (op0
, mode
, 0);
15990 emit_insn (extract (m
, op1
, const0_rtx
));
15991 m
= adjust_address (op0
, mode
, 16);
15992 emit_insn (extract (m
, op1
, const1_rtx
));
15995 emit_insn (move_unaligned (op0
, op1
));
15998 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15999 straight to ix86_expand_vector_move. */
16000 /* Code generation for scalar reg-reg moves of single and double precision data:
16001 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16005 if (x86_sse_partial_reg_dependency == true)
16010 Code generation for scalar loads of double precision data:
16011 if (x86_sse_split_regs == true)
16012 movlpd mem, reg (gas syntax)
16016 Code generation for unaligned packed loads of single precision data
16017 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
16018 if (x86_sse_unaligned_move_optimal)
16021 if (x86_sse_partial_reg_dependency == true)
16033 Code generation for unaligned packed loads of double precision data
16034 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
16035 if (x86_sse_unaligned_move_optimal)
16038 if (x86_sse_split_regs == true)
16051 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
16059 && GET_MODE_SIZE (mode
) == 32)
16061 switch (GET_MODE_CLASS (mode
))
16063 case MODE_VECTOR_INT
:
16065 op0
= gen_lowpart (V32QImode
, op0
);
16066 op1
= gen_lowpart (V32QImode
, op1
);
16069 case MODE_VECTOR_FLOAT
:
16070 ix86_avx256_split_vector_move_misalign (op0
, op1
);
16074 gcc_unreachable ();
16082 /* ??? If we have typed data, then it would appear that using
16083 movdqu is the only way to get unaligned data loaded with
16085 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16087 op0
= gen_lowpart (V16QImode
, op0
);
16088 op1
= gen_lowpart (V16QImode
, op1
);
16089 /* We will eventually emit movups based on insn attributes. */
16090 emit_insn (gen_sse2_movdqu (op0
, op1
));
16092 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16097 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16098 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16099 || optimize_function_for_size_p (cfun
))
16101 /* We will eventually emit movups based on insn attributes. */
16102 emit_insn (gen_sse2_movupd (op0
, op1
));
16106 /* When SSE registers are split into halves, we can avoid
16107 writing to the top half twice. */
16108 if (TARGET_SSE_SPLIT_REGS
)
16110 emit_clobber (op0
);
16115 /* ??? Not sure about the best option for the Intel chips.
16116 The following would seem to satisfy; the register is
16117 entirely cleared, breaking the dependency chain. We
16118 then store to the upper half, with a dependency depth
16119 of one. A rumor has it that Intel recommends two movsd
16120 followed by an unpacklpd, but this is unconfirmed. And
16121 given that the dependency depth of the unpacklpd would
16122 still be one, I'm not sure why this would be better. */
16123 zero
= CONST0_RTX (V2DFmode
);
16126 m
= adjust_address (op1
, DFmode
, 0);
16127 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
16128 m
= adjust_address (op1
, DFmode
, 8);
16129 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
16134 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
16135 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16136 || optimize_function_for_size_p (cfun
))
16138 op0
= gen_lowpart (V4SFmode
, op0
);
16139 op1
= gen_lowpart (V4SFmode
, op1
);
16140 emit_insn (gen_sse_movups (op0
, op1
));
16144 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
16145 emit_move_insn (op0
, CONST0_RTX (mode
));
16147 emit_clobber (op0
);
16149 if (mode
!= V4SFmode
)
16150 op0
= gen_lowpart (V4SFmode
, op0
);
16152 m
= adjust_address (op1
, V2SFmode
, 0);
16153 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
16154 m
= adjust_address (op1
, V2SFmode
, 8);
16155 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
16158 else if (MEM_P (op0
))
16160 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16162 op0
= gen_lowpart (V16QImode
, op0
);
16163 op1
= gen_lowpart (V16QImode
, op1
);
16164 /* We will eventually emit movups based on insn attributes. */
16165 emit_insn (gen_sse2_movdqu (op0
, op1
));
16167 else if (TARGET_SSE2
&& mode
== V2DFmode
)
16170 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16171 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16172 || optimize_function_for_size_p (cfun
))
16173 /* We will eventually emit movups based on insn attributes. */
16174 emit_insn (gen_sse2_movupd (op0
, op1
));
16177 m
= adjust_address (op0
, DFmode
, 0);
16178 emit_insn (gen_sse2_storelpd (m
, op1
));
16179 m
= adjust_address (op0
, DFmode
, 8);
16180 emit_insn (gen_sse2_storehpd (m
, op1
));
16185 if (mode
!= V4SFmode
)
16186 op1
= gen_lowpart (V4SFmode
, op1
);
16189 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
16190 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
16191 || optimize_function_for_size_p (cfun
))
16193 op0
= gen_lowpart (V4SFmode
, op0
);
16194 emit_insn (gen_sse_movups (op0
, op1
));
16198 m
= adjust_address (op0
, V2SFmode
, 0);
16199 emit_insn (gen_sse_storelps (m
, op1
));
16200 m
= adjust_address (op0
, V2SFmode
, 8);
16201 emit_insn (gen_sse_storehps (m
, op1
));
16206 gcc_unreachable ();
16209 /* Expand a push in MODE. This is some mode for which we do not support
16210 proper push instructions, at least from the registers that we expect
16211 the value to live in. */
16214 ix86_expand_push (enum machine_mode mode
, rtx x
)
16218 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
16219 GEN_INT (-GET_MODE_SIZE (mode
)),
16220 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
16221 if (tmp
!= stack_pointer_rtx
)
16222 emit_move_insn (stack_pointer_rtx
, tmp
);
16224 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
16226 /* When we push an operand onto stack, it has to be aligned at least
16227 at the function argument boundary. However since we don't have
16228 the argument type, we can't determine the actual argument
16230 emit_move_insn (tmp
, x
);
16233 /* Helper function of ix86_fixup_binary_operands to canonicalize
16234 operand order. Returns true if the operands should be swapped. */
16237 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
16240 rtx dst
= operands
[0];
16241 rtx src1
= operands
[1];
16242 rtx src2
= operands
[2];
16244 /* If the operation is not commutative, we can't do anything. */
16245 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
16248 /* Highest priority is that src1 should match dst. */
16249 if (rtx_equal_p (dst
, src1
))
16251 if (rtx_equal_p (dst
, src2
))
16254 /* Next highest priority is that immediate constants come second. */
16255 if (immediate_operand (src2
, mode
))
16257 if (immediate_operand (src1
, mode
))
16260 /* Lowest priority is that memory references should come second. */
16270 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
16271 destination to use for the operation. If different from the true
16272 destination in operands[0], a copy operation will be required. */
16275 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
16278 rtx dst
= operands
[0];
16279 rtx src1
= operands
[1];
16280 rtx src2
= operands
[2];
16282 /* Canonicalize operand order. */
16283 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16287 /* It is invalid to swap operands of different modes. */
16288 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
16295 /* Both source operands cannot be in memory. */
16296 if (MEM_P (src1
) && MEM_P (src2
))
16298 /* Optimization: Only read from memory once. */
16299 if (rtx_equal_p (src1
, src2
))
16301 src2
= force_reg (mode
, src2
);
16305 src2
= force_reg (mode
, src2
);
16308 /* If the destination is memory, and we do not have matching source
16309 operands, do things in registers. */
16310 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16311 dst
= gen_reg_rtx (mode
);
16313 /* Source 1 cannot be a constant. */
16314 if (CONSTANT_P (src1
))
16315 src1
= force_reg (mode
, src1
);
16317 /* Source 1 cannot be a non-matching memory. */
16318 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16319 src1
= force_reg (mode
, src1
);
16321 /* Improve address combine. */
16323 && GET_MODE_CLASS (mode
) == MODE_INT
16325 src2
= force_reg (mode
, src2
);
16327 operands
[1] = src1
;
16328 operands
[2] = src2
;
16332 /* Similarly, but assume that the destination has already been
16333 set up properly. */
16336 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
16337 enum machine_mode mode
, rtx operands
[])
16339 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16340 gcc_assert (dst
== operands
[0]);
16343 /* Attempt to expand a binary operator. Make the expansion closer to the
16344 actual machine, then just general_operand, which will allow 3 separate
16345 memory references (one output, two input) in a single insn. */
16348 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
16351 rtx src1
, src2
, dst
, op
, clob
;
16353 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
16354 src1
= operands
[1];
16355 src2
= operands
[2];
16357 /* Emit the instruction. */
16359 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
16360 if (reload_in_progress
)
16362 /* Reload doesn't know about the flags register, and doesn't know that
16363 it doesn't want to clobber it. We can only do this with PLUS. */
16364 gcc_assert (code
== PLUS
);
16367 else if (reload_completed
16369 && !rtx_equal_p (dst
, src1
))
16371 /* This is going to be an LEA; avoid splitting it later. */
16376 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16377 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16380 /* Fix up the destination if needed. */
16381 if (dst
!= operands
[0])
16382 emit_move_insn (operands
[0], dst
);
16385 /* Return TRUE or FALSE depending on whether the binary operator meets the
16386 appropriate constraints. */
16389 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
16392 rtx dst
= operands
[0];
16393 rtx src1
= operands
[1];
16394 rtx src2
= operands
[2];
16396 /* Both source operands cannot be in memory. */
16397 if (MEM_P (src1
) && MEM_P (src2
))
16400 /* Canonicalize operand order for commutative operators. */
16401 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16408 /* If the destination is memory, we must have a matching source operand. */
16409 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16412 /* Source 1 cannot be a constant. */
16413 if (CONSTANT_P (src1
))
16416 /* Source 1 cannot be a non-matching memory. */
16417 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16418 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16419 return (code
== AND
16422 || (TARGET_64BIT
&& mode
== DImode
))
16423 && satisfies_constraint_L (src2
));
16428 /* Attempt to expand a unary operator. Make the expansion closer to the
16429 actual machine, then just general_operand, which will allow 2 separate
16430 memory references (one output, one input) in a single insn. */
16433 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16436 int matching_memory
;
16437 rtx src
, dst
, op
, clob
;
16442 /* If the destination is memory, and we do not have matching source
16443 operands, do things in registers. */
16444 matching_memory
= 0;
16447 if (rtx_equal_p (dst
, src
))
16448 matching_memory
= 1;
16450 dst
= gen_reg_rtx (mode
);
16453 /* When source operand is memory, destination must match. */
16454 if (MEM_P (src
) && !matching_memory
)
16455 src
= force_reg (mode
, src
);
16457 /* Emit the instruction. */
16459 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16460 if (reload_in_progress
|| code
== NOT
)
16462 /* Reload doesn't know about the flags register, and doesn't know that
16463 it doesn't want to clobber it. */
16464 gcc_assert (code
== NOT
);
16469 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16470 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16473 /* Fix up the destination if needed. */
16474 if (dst
!= operands
[0])
16475 emit_move_insn (operands
[0], dst
);
16478 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16479 divisor are within the range [0-255]. */
16482 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16485 rtx end_label
, qimode_label
;
16486 rtx insn
, div
, mod
;
16487 rtx scratch
, tmp0
, tmp1
, tmp2
;
16488 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16489 rtx (*gen_zero_extend
) (rtx
, rtx
);
16490 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16495 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16496 gen_test_ccno_1
= gen_testsi_ccno_1
;
16497 gen_zero_extend
= gen_zero_extendqisi2
;
16500 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16501 gen_test_ccno_1
= gen_testdi_ccno_1
;
16502 gen_zero_extend
= gen_zero_extendqidi2
;
16505 gcc_unreachable ();
16508 end_label
= gen_label_rtx ();
16509 qimode_label
= gen_label_rtx ();
16511 scratch
= gen_reg_rtx (mode
);
16513 /* Use 8bit unsigned divimod if dividend and divisor are within
16514 the range [0-255]. */
16515 emit_move_insn (scratch
, operands
[2]);
16516 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16517 scratch
, 1, OPTAB_DIRECT
);
16518 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16519 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16520 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16521 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16522 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16524 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16525 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16526 JUMP_LABEL (insn
) = qimode_label
;
16528 /* Generate original signed/unsigned divimod. */
16529 div
= gen_divmod4_1 (operands
[0], operands
[1],
16530 operands
[2], operands
[3]);
16533 /* Branch to the end. */
16534 emit_jump_insn (gen_jump (end_label
));
16537 /* Generate 8bit unsigned divide. */
16538 emit_label (qimode_label
);
16539 /* Don't use operands[0] for result of 8bit divide since not all
16540 registers support QImode ZERO_EXTRACT. */
16541 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16542 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16543 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16544 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16548 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16549 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16553 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16554 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16557 /* Extract remainder from AH. */
16558 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16559 if (REG_P (operands
[1]))
16560 insn
= emit_move_insn (operands
[1], tmp1
);
16563 /* Need a new scratch register since the old one has result
16565 scratch
= gen_reg_rtx (mode
);
16566 emit_move_insn (scratch
, tmp1
);
16567 insn
= emit_move_insn (operands
[1], scratch
);
16569 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16571 /* Zero extend quotient from AL. */
16572 tmp1
= gen_lowpart (QImode
, tmp0
);
16573 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16574 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16576 emit_label (end_label
);
16579 #define LEA_MAX_STALL (3)
16580 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16582 /* Increase given DISTANCE in half-cycles according to
16583 dependencies between PREV and NEXT instructions.
16584 Add 1 half-cycle if there is no dependency and
16585 go to next cycle if there is some dependecy. */
16587 static unsigned int
16588 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16593 if (!prev
|| !next
)
16594 return distance
+ (distance
& 1) + 2;
16596 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16597 return distance
+ 1;
16599 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16600 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16601 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16602 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16603 return distance
+ (distance
& 1) + 2;
16605 return distance
+ 1;
16608 /* Function checks if instruction INSN defines register number
16609 REGNO1 or REGNO2. */
16612 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16617 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16618 if (DF_REF_REG_DEF_P (*def_rec
)
16619 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16620 && (regno1
== DF_REF_REGNO (*def_rec
)
16621 || regno2
== DF_REF_REGNO (*def_rec
)))
16629 /* Function checks if instruction INSN uses register number
16630 REGNO as a part of address expression. */
16633 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16637 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16638 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16644 /* Search backward for non-agu definition of register number REGNO1
16645 or register number REGNO2 in basic block starting from instruction
16646 START up to head of basic block or instruction INSN.
16648 Function puts true value into *FOUND var if definition was found
16649 and false otherwise.
16651 Distance in half-cycles between START and found instruction or head
16652 of BB is added to DISTANCE and returned. */
16655 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16656 rtx insn
, int distance
,
16657 rtx start
, bool *found
)
16659 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16667 && distance
< LEA_SEARCH_THRESHOLD
)
16669 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16671 distance
= increase_distance (prev
, next
, distance
);
16672 if (insn_defines_reg (regno1
, regno2
, prev
))
16674 if (recog_memoized (prev
) < 0
16675 || get_attr_type (prev
) != TYPE_LEA
)
16684 if (prev
== BB_HEAD (bb
))
16687 prev
= PREV_INSN (prev
);
16693 /* Search backward for non-agu definition of register number REGNO1
16694 or register number REGNO2 in INSN's basic block until
16695 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16696 2. Reach neighbour BBs boundary, or
16697 3. Reach agu definition.
16698 Returns the distance between the non-agu definition point and INSN.
16699 If no definition point, returns -1. */
16702 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16705 basic_block bb
= BLOCK_FOR_INSN (insn
);
16707 bool found
= false;
16709 if (insn
!= BB_HEAD (bb
))
16710 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16711 distance
, PREV_INSN (insn
),
16714 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16718 bool simple_loop
= false;
16720 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16723 simple_loop
= true;
16728 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16730 BB_END (bb
), &found
);
16733 int shortest_dist
= -1;
16734 bool found_in_bb
= false;
16736 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16739 = distance_non_agu_define_in_bb (regno1
, regno2
,
16745 if (shortest_dist
< 0)
16746 shortest_dist
= bb_dist
;
16747 else if (bb_dist
> 0)
16748 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16754 distance
= shortest_dist
;
16758 /* get_attr_type may modify recog data. We want to make sure
16759 that recog data is valid for instruction INSN, on which
16760 distance_non_agu_define is called. INSN is unchanged here. */
16761 extract_insn_cached (insn
);
16766 return distance
>> 1;
16769 /* Return the distance in half-cycles between INSN and the next
16770 insn that uses register number REGNO in memory address added
16771 to DISTANCE. Return -1 if REGNO0 is set.
16773 Put true value into *FOUND if register usage was found and
16775 Put true value into *REDEFINED if register redefinition was
16776 found and false otherwise. */
16779 distance_agu_use_in_bb (unsigned int regno
,
16780 rtx insn
, int distance
, rtx start
,
16781 bool *found
, bool *redefined
)
16783 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16788 *redefined
= false;
16792 && distance
< LEA_SEARCH_THRESHOLD
)
16794 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16796 distance
= increase_distance(prev
, next
, distance
);
16797 if (insn_uses_reg_mem (regno
, next
))
16799 /* Return DISTANCE if OP0 is used in memory
16800 address in NEXT. */
16805 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16807 /* Return -1 if OP0 is set in NEXT. */
16815 if (next
== BB_END (bb
))
16818 next
= NEXT_INSN (next
);
16824 /* Return the distance between INSN and the next insn that uses
16825 register number REGNO0 in memory address. Return -1 if no such
16826 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16829 distance_agu_use (unsigned int regno0
, rtx insn
)
16831 basic_block bb
= BLOCK_FOR_INSN (insn
);
16833 bool found
= false;
16834 bool redefined
= false;
16836 if (insn
!= BB_END (bb
))
16837 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16839 &found
, &redefined
);
16841 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16845 bool simple_loop
= false;
16847 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16850 simple_loop
= true;
16855 distance
= distance_agu_use_in_bb (regno0
, insn
,
16856 distance
, BB_HEAD (bb
),
16857 &found
, &redefined
);
16860 int shortest_dist
= -1;
16861 bool found_in_bb
= false;
16862 bool redefined_in_bb
= false;
16864 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16867 = distance_agu_use_in_bb (regno0
, insn
,
16868 distance
, BB_HEAD (e
->dest
),
16869 &found_in_bb
, &redefined_in_bb
);
16872 if (shortest_dist
< 0)
16873 shortest_dist
= bb_dist
;
16874 else if (bb_dist
> 0)
16875 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16881 distance
= shortest_dist
;
16885 if (!found
|| redefined
)
16888 return distance
>> 1;
16891 /* Define this macro to tune LEA priority vs ADD, it take effect when
16892 there is a dilemma of choicing LEA or ADD
16893 Negative value: ADD is more preferred than LEA
16895 Positive value: LEA is more preferred than ADD*/
16896 #define IX86_LEA_PRIORITY 0
16898 /* Return true if usage of lea INSN has performance advantage
16899 over a sequence of instructions. Instructions sequence has
16900 SPLIT_COST cycles higher latency than lea latency. */
16903 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16904 unsigned int regno2
, int split_cost
)
16906 int dist_define
, dist_use
;
16908 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16909 dist_use
= distance_agu_use (regno0
, insn
);
16911 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16913 /* If there is no non AGU operand definition, no AGU
16914 operand usage and split cost is 0 then both lea
16915 and non lea variants have same priority. Currently
16916 we prefer lea for 64 bit code and non lea on 32 bit
16918 if (dist_use
< 0 && split_cost
== 0)
16919 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16924 /* With longer definitions distance lea is more preferable.
16925 Here we change it to take into account splitting cost and
16927 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16929 /* If there is no use in memory addess then we just check
16930 that split cost does not exceed AGU stall. */
16932 return dist_define
>= LEA_MAX_STALL
;
16934 /* If this insn has both backward non-agu dependence and forward
16935 agu dependence, the one with short distance takes effect. */
16936 return dist_define
>= dist_use
;
16939 /* Return true if it is legal to clobber flags by INSN and
16940 false otherwise. */
16943 ix86_ok_to_clobber_flags (rtx insn
)
16945 basic_block bb
= BLOCK_FOR_INSN (insn
);
16951 if (NONDEBUG_INSN_P (insn
))
16953 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16954 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16957 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16961 if (insn
== BB_END (bb
))
16964 insn
= NEXT_INSN (insn
);
16967 live
= df_get_live_out(bb
);
16968 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16971 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16972 move and add to avoid AGU stalls. */
16975 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16977 unsigned int regno0
, regno1
, regno2
;
16979 /* Check if we need to optimize. */
16980 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16983 /* Check it is correct to split here. */
16984 if (!ix86_ok_to_clobber_flags(insn
))
16987 regno0
= true_regnum (operands
[0]);
16988 regno1
= true_regnum (operands
[1]);
16989 regno2
= true_regnum (operands
[2]);
16991 /* We need to split only adds with non destructive
16992 destination operand. */
16993 if (regno0
== regno1
|| regno0
== regno2
)
16996 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16999 /* Return true if we should emit lea instruction instead of mov
17003 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
17005 unsigned int regno0
, regno1
;
17007 /* Check if we need to optimize. */
17008 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17011 /* Use lea for reg to reg moves only. */
17012 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
17015 regno0
= true_regnum (operands
[0]);
17016 regno1
= true_regnum (operands
[1]);
17018 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0);
17021 /* Return true if we need to split lea into a sequence of
17022 instructions to avoid AGU stalls. */
17025 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
17027 unsigned int regno0
, regno1
, regno2
;
17029 struct ix86_address parts
;
17032 /* FIXME: Handle zero-extended addresses. */
17033 if (GET_CODE (operands
[1]) == ZERO_EXTEND
17034 || GET_CODE (operands
[1]) == AND
)
17037 /* Check we need to optimize. */
17038 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17041 /* Check it is correct to split here. */
17042 if (!ix86_ok_to_clobber_flags(insn
))
17045 ok
= ix86_decompose_address (operands
[1], &parts
);
17048 /* We should not split into add if non legitimate pic
17049 operand is used as displacement. */
17050 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
17053 regno0
= true_regnum (operands
[0]) ;
17054 regno1
= INVALID_REGNUM
;
17055 regno2
= INVALID_REGNUM
;
17058 regno1
= true_regnum (parts
.base
);
17060 regno2
= true_regnum (parts
.index
);
17064 /* Compute how many cycles we will add to execution time
17065 if split lea into a sequence of instructions. */
17066 if (parts
.base
|| parts
.index
)
17068 /* Have to use mov instruction if non desctructive
17069 destination form is used. */
17070 if (regno1
!= regno0
&& regno2
!= regno0
)
17073 /* Have to add index to base if both exist. */
17074 if (parts
.base
&& parts
.index
)
17077 /* Have to use shift and adds if scale is 2 or greater. */
17078 if (parts
.scale
> 1)
17080 if (regno0
!= regno1
)
17082 else if (regno2
== regno0
)
17085 split_cost
+= parts
.scale
;
17088 /* Have to use add instruction with immediate if
17089 disp is non zero. */
17090 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17093 /* Subtract the price of lea. */
17097 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
17100 /* Emit x86 binary operand CODE in mode MODE, where the first operand
17101 matches destination. RTX includes clobber of FLAGS_REG. */
17104 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
17109 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
17110 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17112 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
17115 /* Split lea instructions into a sequence of instructions
17116 which are executed on ALU to avoid AGU stalls.
17117 It is assumed that it is allowed to clobber flags register
17118 at lea position. */
17121 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
17123 unsigned int regno0
, regno1
, regno2
;
17124 struct ix86_address parts
;
17128 ok
= ix86_decompose_address (operands
[1], &parts
);
17131 target
= operands
[0];
17133 regno0
= true_regnum (target
);
17134 regno1
= INVALID_REGNUM
;
17135 regno2
= INVALID_REGNUM
;
17139 if (GET_MODE (parts
.base
) != mode
)
17140 parts
.base
= gen_lowpart (mode
, parts
.base
);
17141 regno1
= true_regnum (parts
.base
);
17146 if (GET_MODE (parts
.index
) != mode
)
17147 parts
.index
= gen_lowpart (mode
, parts
.index
);
17148 regno2
= true_regnum (parts
.index
);
17151 if (parts
.scale
> 1)
17153 /* Case r1 = r1 + ... */
17154 if (regno1
== regno0
)
17156 /* If we have a case r1 = r1 + C * r1 then we
17157 should use multiplication which is very
17158 expensive. Assume cost model is wrong if we
17159 have such case here. */
17160 gcc_assert (regno2
!= regno0
);
17162 for (adds
= parts
.scale
; adds
> 0; adds
--)
17163 ix86_emit_binop (PLUS
, mode
, target
, parts
.index
);
17167 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
17168 if (regno0
!= regno2
)
17169 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17171 /* Use shift for scaling. */
17172 ix86_emit_binop (ASHIFT
, mode
, target
,
17173 GEN_INT (exact_log2 (parts
.scale
)));
17176 ix86_emit_binop (PLUS
, mode
, target
, parts
.base
);
17178 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17179 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17182 else if (!parts
.base
&& !parts
.index
)
17184 gcc_assert(parts
.disp
);
17185 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.disp
));
17191 if (regno0
!= regno2
)
17192 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.index
));
17194 else if (!parts
.index
)
17196 if (regno0
!= regno1
)
17197 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17201 if (regno0
== regno1
)
17203 else if (regno0
== regno2
)
17207 emit_insn (gen_rtx_SET (VOIDmode
, target
, parts
.base
));
17211 ix86_emit_binop (PLUS
, mode
, target
, tmp
);
17214 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
17215 ix86_emit_binop (PLUS
, mode
, target
, parts
.disp
);
17219 /* Return true if it is ok to optimize an ADD operation to LEA
17220 operation to avoid flag register consumation. For most processors,
17221 ADD is faster than LEA. For the processors like ATOM, if the
17222 destination register of LEA holds an actual address which will be
17223 used soon, LEA is better and otherwise ADD is better. */
17226 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
17228 unsigned int regno0
= true_regnum (operands
[0]);
17229 unsigned int regno1
= true_regnum (operands
[1]);
17230 unsigned int regno2
= true_regnum (operands
[2]);
17232 /* If a = b + c, (a!=b && a!=c), must use lea form. */
17233 if (regno0
!= regno1
&& regno0
!= regno2
)
17236 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
17239 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
17242 /* Return true if destination reg of SET_BODY is shift count of
17246 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
17252 /* Retrieve destination of SET_BODY. */
17253 switch (GET_CODE (set_body
))
17256 set_dest
= SET_DEST (set_body
);
17257 if (!set_dest
|| !REG_P (set_dest
))
17261 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
17262 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
17270 /* Retrieve shift count of USE_BODY. */
17271 switch (GET_CODE (use_body
))
17274 shift_rtx
= XEXP (use_body
, 1);
17277 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
17278 if (ix86_dep_by_shift_count_body (set_body
,
17279 XVECEXP (use_body
, 0, i
)))
17287 && (GET_CODE (shift_rtx
) == ASHIFT
17288 || GET_CODE (shift_rtx
) == LSHIFTRT
17289 || GET_CODE (shift_rtx
) == ASHIFTRT
17290 || GET_CODE (shift_rtx
) == ROTATE
17291 || GET_CODE (shift_rtx
) == ROTATERT
))
17293 rtx shift_count
= XEXP (shift_rtx
, 1);
17295 /* Return true if shift count is dest of SET_BODY. */
17296 if (REG_P (shift_count
)
17297 && true_regnum (set_dest
) == true_regnum (shift_count
))
17304 /* Return true if destination reg of SET_INSN is shift count of
17308 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
17310 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
17311 PATTERN (use_insn
));
17314 /* Return TRUE or FALSE depending on whether the unary operator meets the
17315 appropriate constraints. */
17318 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
17319 enum machine_mode mode ATTRIBUTE_UNUSED
,
17320 rtx operands
[2] ATTRIBUTE_UNUSED
)
17322 /* If one of operands is memory, source and destination must match. */
17323 if ((MEM_P (operands
[0])
17324 || MEM_P (operands
[1]))
17325 && ! rtx_equal_p (operands
[0], operands
[1]))
17330 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
17331 are ok, keeping in mind the possible movddup alternative. */
17334 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
17336 if (MEM_P (operands
[0]))
17337 return rtx_equal_p (operands
[0], operands
[1 + high
]);
17338 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
17339 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
17343 /* Post-reload splitter for converting an SF or DFmode value in an
17344 SSE register into an unsigned SImode. */
17347 ix86_split_convert_uns_si_sse (rtx operands
[])
17349 enum machine_mode vecmode
;
17350 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
17352 large
= operands
[1];
17353 zero_or_two31
= operands
[2];
17354 input
= operands
[3];
17355 two31
= operands
[4];
17356 vecmode
= GET_MODE (large
);
17357 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
17359 /* Load up the value into the low element. We must ensure that the other
17360 elements are valid floats -- zero is the easiest such value. */
17363 if (vecmode
== V4SFmode
)
17364 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
17366 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
17370 input
= gen_rtx_REG (vecmode
, REGNO (input
));
17371 emit_move_insn (value
, CONST0_RTX (vecmode
));
17372 if (vecmode
== V4SFmode
)
17373 emit_insn (gen_sse_movss (value
, value
, input
));
17375 emit_insn (gen_sse2_movsd (value
, value
, input
));
17378 emit_move_insn (large
, two31
);
17379 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
17381 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
17382 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
17384 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
17385 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
17387 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
17388 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
17390 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
17391 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
17393 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
17394 if (vecmode
== V4SFmode
)
17395 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
17397 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
17400 emit_insn (gen_xorv4si3 (value
, value
, large
));
17403 /* Convert an unsigned DImode value into a DFmode, using only SSE.
17404 Expects the 64-bit DImode to be supplied in a pair of integral
17405 registers. Requires SSE2; will use SSE3 if available. For x86_32,
17406 -mfpmath=sse, !optimize_size only. */
17409 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17411 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17412 rtx int_xmm
, fp_xmm
;
17413 rtx biases
, exponents
;
17416 int_xmm
= gen_reg_rtx (V4SImode
);
17417 if (TARGET_INTER_UNIT_MOVES
)
17418 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17419 else if (TARGET_SSE_SPLIT_REGS
)
17421 emit_clobber (int_xmm
);
17422 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17426 x
= gen_reg_rtx (V2DImode
);
17427 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17428 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17431 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17432 gen_rtvec (4, GEN_INT (0x43300000UL
),
17433 GEN_INT (0x45300000UL
),
17434 const0_rtx
, const0_rtx
));
17435 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17437 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17438 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17440 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17441 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17442 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17443 (0x1.0p84 + double(fp_value_hi_xmm)).
17444 Note these exponents differ by 32. */
17446 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17448 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17449 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17450 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17451 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17452 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17453 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17454 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17455 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17456 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17458 /* Add the upper and lower DFmode values together. */
17460 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17463 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17464 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17465 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17468 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17471 /* Not used, but eases macroization of patterns. */
17473 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17474 rtx input ATTRIBUTE_UNUSED
)
17476 gcc_unreachable ();
17479 /* Convert an unsigned SImode value into a DFmode. Only currently used
17480 for SSE, but applicable anywhere. */
17483 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17485 REAL_VALUE_TYPE TWO31r
;
17488 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17489 NULL
, 1, OPTAB_DIRECT
);
17491 fp
= gen_reg_rtx (DFmode
);
17492 emit_insn (gen_floatsidf2 (fp
, x
));
17494 real_ldexp (&TWO31r
, &dconst1
, 31);
17495 x
= const_double_from_real_value (TWO31r
, DFmode
);
17497 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17499 emit_move_insn (target
, x
);
17502 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17503 32-bit mode; otherwise we have a direct convert instruction. */
17506 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17508 REAL_VALUE_TYPE TWO32r
;
17509 rtx fp_lo
, fp_hi
, x
;
17511 fp_lo
= gen_reg_rtx (DFmode
);
17512 fp_hi
= gen_reg_rtx (DFmode
);
17514 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17516 real_ldexp (&TWO32r
, &dconst1
, 32);
17517 x
= const_double_from_real_value (TWO32r
, DFmode
);
17518 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17520 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17522 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17525 emit_move_insn (target
, x
);
17528 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17529 For x86_32, -mfpmath=sse, !optimize_size only. */
17531 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17533 REAL_VALUE_TYPE ONE16r
;
17534 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17536 real_ldexp (&ONE16r
, &dconst1
, 16);
17537 x
= const_double_from_real_value (ONE16r
, SFmode
);
17538 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17539 NULL
, 0, OPTAB_DIRECT
);
17540 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17541 NULL
, 0, OPTAB_DIRECT
);
17542 fp_hi
= gen_reg_rtx (SFmode
);
17543 fp_lo
= gen_reg_rtx (SFmode
);
17544 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17545 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17546 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17548 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17550 if (!rtx_equal_p (target
, fp_hi
))
17551 emit_move_insn (target
, fp_hi
);
17554 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17555 a vector of unsigned ints VAL to vector of floats TARGET. */
17558 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17561 REAL_VALUE_TYPE TWO16r
;
17562 enum machine_mode intmode
= GET_MODE (val
);
17563 enum machine_mode fltmode
= GET_MODE (target
);
17564 rtx (*cvt
) (rtx
, rtx
);
17566 if (intmode
== V4SImode
)
17567 cvt
= gen_floatv4siv4sf2
;
17569 cvt
= gen_floatv8siv8sf2
;
17570 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17571 tmp
[0] = force_reg (intmode
, tmp
[0]);
17572 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17574 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17575 NULL_RTX
, 1, OPTAB_DIRECT
);
17576 tmp
[3] = gen_reg_rtx (fltmode
);
17577 emit_insn (cvt (tmp
[3], tmp
[1]));
17578 tmp
[4] = gen_reg_rtx (fltmode
);
17579 emit_insn (cvt (tmp
[4], tmp
[2]));
17580 real_ldexp (&TWO16r
, &dconst1
, 16);
17581 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17582 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17583 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17585 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17587 if (tmp
[7] != target
)
17588 emit_move_insn (target
, tmp
[7]);
17591 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17592 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17593 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17594 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17597 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17599 REAL_VALUE_TYPE TWO31r
;
17600 rtx two31r
, tmp
[4];
17601 enum machine_mode mode
= GET_MODE (val
);
17602 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17603 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17604 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17607 for (i
= 0; i
< 3; i
++)
17608 tmp
[i
] = gen_reg_rtx (mode
);
17609 real_ldexp (&TWO31r
, &dconst1
, 31);
17610 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17611 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17612 two31r
= force_reg (mode
, two31r
);
17615 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17616 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17617 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17618 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17619 default: gcc_unreachable ();
17621 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17622 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17623 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17625 if (intmode
== V4SImode
|| TARGET_AVX2
)
17626 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17627 gen_lowpart (intmode
, tmp
[0]),
17628 GEN_INT (31), NULL_RTX
, 0,
17632 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17633 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17634 *xorp
= expand_simple_binop (intmode
, AND
,
17635 gen_lowpart (intmode
, tmp
[0]),
17636 two31
, NULL_RTX
, 0,
17639 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17643 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17644 then replicate the value for all elements of the vector
17648 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17652 enum machine_mode scalar_mode
;
17669 n_elt
= GET_MODE_NUNITS (mode
);
17670 v
= rtvec_alloc (n_elt
);
17671 scalar_mode
= GET_MODE_INNER (mode
);
17673 RTVEC_ELT (v
, 0) = value
;
17675 for (i
= 1; i
< n_elt
; ++i
)
17676 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17678 return gen_rtx_CONST_VECTOR (mode
, v
);
17681 gcc_unreachable ();
17685 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17686 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17687 for an SSE register. If VECT is true, then replicate the mask for
17688 all elements of the vector register. If INVERT is true, then create
17689 a mask excluding the sign bit. */
17692 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17694 enum machine_mode vec_mode
, imode
;
17695 HOST_WIDE_INT hi
, lo
;
17700 /* Find the sign bit, sign extended to 2*HWI. */
17708 mode
= GET_MODE_INNER (mode
);
17710 lo
= 0x80000000, hi
= lo
< 0;
17718 mode
= GET_MODE_INNER (mode
);
17720 if (HOST_BITS_PER_WIDE_INT
>= 64)
17721 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17723 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17728 vec_mode
= VOIDmode
;
17729 if (HOST_BITS_PER_WIDE_INT
>= 64)
17732 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17739 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17743 lo
= ~lo
, hi
= ~hi
;
17749 mask
= immed_double_const (lo
, hi
, imode
);
17751 vec
= gen_rtvec (2, v
, mask
);
17752 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17753 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17760 gcc_unreachable ();
17764 lo
= ~lo
, hi
= ~hi
;
17766 /* Force this value into the low part of a fp vector constant. */
17767 mask
= immed_double_const (lo
, hi
, imode
);
17768 mask
= gen_lowpart (mode
, mask
);
17770 if (vec_mode
== VOIDmode
)
17771 return force_reg (mode
, mask
);
17773 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17774 return force_reg (vec_mode
, v
);
17777 /* Generate code for floating point ABS or NEG. */
17780 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17783 rtx mask
, set
, dst
, src
;
17784 bool use_sse
= false;
17785 bool vector_mode
= VECTOR_MODE_P (mode
);
17786 enum machine_mode vmode
= mode
;
17790 else if (mode
== TFmode
)
17792 else if (TARGET_SSE_MATH
)
17794 use_sse
= SSE_FLOAT_MODE_P (mode
);
17795 if (mode
== SFmode
)
17797 else if (mode
== DFmode
)
17801 /* NEG and ABS performed with SSE use bitwise mask operations.
17802 Create the appropriate mask now. */
17804 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17811 set
= gen_rtx_fmt_e (code
, mode
, src
);
17812 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17819 use
= gen_rtx_USE (VOIDmode
, mask
);
17821 par
= gen_rtvec (2, set
, use
);
17824 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17825 par
= gen_rtvec (3, set
, use
, clob
);
17827 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17833 /* Expand a copysign operation. Special case operand 0 being a constant. */
17836 ix86_expand_copysign (rtx operands
[])
17838 enum machine_mode mode
, vmode
;
17839 rtx dest
, op0
, op1
, mask
, nmask
;
17841 dest
= operands
[0];
17845 mode
= GET_MODE (dest
);
17847 if (mode
== SFmode
)
17849 else if (mode
== DFmode
)
17854 if (GET_CODE (op0
) == CONST_DOUBLE
)
17856 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17858 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17859 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17861 if (mode
== SFmode
|| mode
== DFmode
)
17863 if (op0
== CONST0_RTX (mode
))
17864 op0
= CONST0_RTX (vmode
);
17867 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17869 op0
= force_reg (vmode
, v
);
17872 else if (op0
!= CONST0_RTX (mode
))
17873 op0
= force_reg (mode
, op0
);
17875 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17877 if (mode
== SFmode
)
17878 copysign_insn
= gen_copysignsf3_const
;
17879 else if (mode
== DFmode
)
17880 copysign_insn
= gen_copysigndf3_const
;
17882 copysign_insn
= gen_copysigntf3_const
;
17884 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17888 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17890 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17891 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17893 if (mode
== SFmode
)
17894 copysign_insn
= gen_copysignsf3_var
;
17895 else if (mode
== DFmode
)
17896 copysign_insn
= gen_copysigndf3_var
;
17898 copysign_insn
= gen_copysigntf3_var
;
17900 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17904 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17905 be a constant, and so has already been expanded into a vector constant. */
17908 ix86_split_copysign_const (rtx operands
[])
17910 enum machine_mode mode
, vmode
;
17911 rtx dest
, op0
, mask
, x
;
17913 dest
= operands
[0];
17915 mask
= operands
[3];
17917 mode
= GET_MODE (dest
);
17918 vmode
= GET_MODE (mask
);
17920 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17921 x
= gen_rtx_AND (vmode
, dest
, mask
);
17922 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17924 if (op0
!= CONST0_RTX (vmode
))
17926 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17927 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17931 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17932 so we have to do two masks. */
17935 ix86_split_copysign_var (rtx operands
[])
17937 enum machine_mode mode
, vmode
;
17938 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17940 dest
= operands
[0];
17941 scratch
= operands
[1];
17944 nmask
= operands
[4];
17945 mask
= operands
[5];
17947 mode
= GET_MODE (dest
);
17948 vmode
= GET_MODE (mask
);
17950 if (rtx_equal_p (op0
, op1
))
17952 /* Shouldn't happen often (it's useless, obviously), but when it does
17953 we'd generate incorrect code if we continue below. */
17954 emit_move_insn (dest
, op0
);
17958 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17960 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17962 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17963 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17966 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17967 x
= gen_rtx_NOT (vmode
, dest
);
17968 x
= gen_rtx_AND (vmode
, x
, op0
);
17969 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17973 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17975 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17977 else /* alternative 2,4 */
17979 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17980 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17981 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17983 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17985 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17987 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17988 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17990 else /* alternative 3,4 */
17992 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17994 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17995 x
= gen_rtx_AND (vmode
, dest
, op0
);
17997 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18000 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
18001 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
18004 /* Return TRUE or FALSE depending on whether the first SET in INSN
18005 has source and destination with matching CC modes, and that the
18006 CC mode is at least as constrained as REQ_MODE. */
18009 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
18012 enum machine_mode set_mode
;
18014 set
= PATTERN (insn
);
18015 if (GET_CODE (set
) == PARALLEL
)
18016 set
= XVECEXP (set
, 0, 0);
18017 gcc_assert (GET_CODE (set
) == SET
);
18018 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
18020 set_mode
= GET_MODE (SET_DEST (set
));
18024 if (req_mode
!= CCNOmode
18025 && (req_mode
!= CCmode
18026 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
18030 if (req_mode
== CCGCmode
)
18034 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
18038 if (req_mode
== CCZmode
)
18048 if (set_mode
!= req_mode
)
18053 gcc_unreachable ();
18056 return GET_MODE (SET_SRC (set
)) == set_mode
;
18059 /* Generate insn patterns to do an integer compare of OPERANDS. */
18062 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18064 enum machine_mode cmpmode
;
18067 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
18068 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
18070 /* This is very simple, but making the interface the same as in the
18071 FP case makes the rest of the code easier. */
18072 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
18073 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
18075 /* Return the test that should be put into the flags user, i.e.
18076 the bcc, scc, or cmov instruction. */
18077 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
18080 /* Figure out whether to use ordered or unordered fp comparisons.
18081 Return the appropriate mode to use. */
18084 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
18086 /* ??? In order to make all comparisons reversible, we do all comparisons
18087 non-trapping when compiling for IEEE. Once gcc is able to distinguish
18088 all forms trapping and nontrapping comparisons, we can make inequality
18089 comparisons trapping again, since it results in better code when using
18090 FCOM based compares. */
18091 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
18095 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
18097 enum machine_mode mode
= GET_MODE (op0
);
18099 if (SCALAR_FLOAT_MODE_P (mode
))
18101 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18102 return ix86_fp_compare_mode (code
);
18107 /* Only zero flag is needed. */
18108 case EQ
: /* ZF=0 */
18109 case NE
: /* ZF!=0 */
18111 /* Codes needing carry flag. */
18112 case GEU
: /* CF=0 */
18113 case LTU
: /* CF=1 */
18114 /* Detect overflow checks. They need just the carry flag. */
18115 if (GET_CODE (op0
) == PLUS
18116 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18120 case GTU
: /* CF=0 & ZF=0 */
18121 case LEU
: /* CF=1 | ZF=1 */
18122 /* Detect overflow checks. They need just the carry flag. */
18123 if (GET_CODE (op0
) == MINUS
18124 && rtx_equal_p (op1
, XEXP (op0
, 0)))
18128 /* Codes possibly doable only with sign flag when
18129 comparing against zero. */
18130 case GE
: /* SF=OF or SF=0 */
18131 case LT
: /* SF<>OF or SF=1 */
18132 if (op1
== const0_rtx
)
18135 /* For other cases Carry flag is not required. */
18137 /* Codes doable only with sign flag when comparing
18138 against zero, but we miss jump instruction for it
18139 so we need to use relational tests against overflow
18140 that thus needs to be zero. */
18141 case GT
: /* ZF=0 & SF=OF */
18142 case LE
: /* ZF=1 | SF<>OF */
18143 if (op1
== const0_rtx
)
18147 /* strcmp pattern do (use flags) and combine may ask us for proper
18152 gcc_unreachable ();
18156 /* Return the fixed registers used for condition codes. */
18159 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
18166 /* If two condition code modes are compatible, return a condition code
18167 mode which is compatible with both. Otherwise, return
18170 static enum machine_mode
18171 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
18176 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
18179 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
18180 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
18183 if (m1
== CCZmode
&& (m2
== CCGCmode
|| m2
== CCGOCmode
))
18185 else if (m2
== CCZmode
&& (m1
== CCGCmode
|| m1
== CCGOCmode
))
18191 gcc_unreachable ();
18221 /* These are only compatible with themselves, which we already
18228 /* Return a comparison we can do and that it is equivalent to
18229 swap_condition (code) apart possibly from orderedness.
18230 But, never change orderedness if TARGET_IEEE_FP, returning
18231 UNKNOWN in that case if necessary. */
18233 static enum rtx_code
18234 ix86_fp_swap_condition (enum rtx_code code
)
18238 case GT
: /* GTU - CF=0 & ZF=0 */
18239 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
18240 case GE
: /* GEU - CF=0 */
18241 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
18242 case UNLT
: /* LTU - CF=1 */
18243 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
18244 case UNLE
: /* LEU - CF=1 | ZF=1 */
18245 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
18247 return swap_condition (code
);
18251 /* Return cost of comparison CODE using the best strategy for performance.
18252 All following functions do use number of instructions as a cost metrics.
18253 In future this should be tweaked to compute bytes for optimize_size and
18254 take into account performance of various instructions on various CPUs. */
18257 ix86_fp_comparison_cost (enum rtx_code code
)
18261 /* The cost of code using bit-twiddling on %ah. */
18278 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
18282 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
18285 gcc_unreachable ();
18288 switch (ix86_fp_comparison_strategy (code
))
18290 case IX86_FPCMP_COMI
:
18291 return arith_cost
> 4 ? 3 : 2;
18292 case IX86_FPCMP_SAHF
:
18293 return arith_cost
> 4 ? 4 : 3;
18299 /* Return strategy to use for floating-point. We assume that fcomi is always
18300 preferrable where available, since that is also true when looking at size
18301 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
18303 enum ix86_fpcmp_strategy
18304 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
18306 /* Do fcomi/sahf based test when profitable. */
18309 return IX86_FPCMP_COMI
;
18311 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
18312 return IX86_FPCMP_SAHF
;
18314 return IX86_FPCMP_ARITH
;
18317 /* Swap, force into registers, or otherwise massage the two operands
18318 to a fp comparison. The operands are updated in place; the new
18319 comparison code is returned. */
18321 static enum rtx_code
18322 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
18324 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
18325 rtx op0
= *pop0
, op1
= *pop1
;
18326 enum machine_mode op_mode
= GET_MODE (op0
);
18327 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
18329 /* All of the unordered compare instructions only work on registers.
18330 The same is true of the fcomi compare instructions. The XFmode
18331 compare instructions require registers except when comparing
18332 against zero or when converting operand 1 from fixed point to
18336 && (fpcmp_mode
== CCFPUmode
18337 || (op_mode
== XFmode
18338 && ! (standard_80387_constant_p (op0
) == 1
18339 || standard_80387_constant_p (op1
) == 1)
18340 && GET_CODE (op1
) != FLOAT
)
18341 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
18343 op0
= force_reg (op_mode
, op0
);
18344 op1
= force_reg (op_mode
, op1
);
18348 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
18349 things around if they appear profitable, otherwise force op0
18350 into a register. */
18352 if (standard_80387_constant_p (op0
) == 0
18354 && ! (standard_80387_constant_p (op1
) == 0
18357 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
18358 if (new_code
!= UNKNOWN
)
18361 tmp
= op0
, op0
= op1
, op1
= tmp
;
18367 op0
= force_reg (op_mode
, op0
);
18369 if (CONSTANT_P (op1
))
18371 int tmp
= standard_80387_constant_p (op1
);
18373 op1
= validize_mem (force_const_mem (op_mode
, op1
));
18377 op1
= force_reg (op_mode
, op1
);
18380 op1
= force_reg (op_mode
, op1
);
18384 /* Try to rearrange the comparison to make it cheaper. */
18385 if (ix86_fp_comparison_cost (code
)
18386 > ix86_fp_comparison_cost (swap_condition (code
))
18387 && (REG_P (op1
) || can_create_pseudo_p ()))
18390 tmp
= op0
, op0
= op1
, op1
= tmp
;
18391 code
= swap_condition (code
);
18393 op0
= force_reg (op_mode
, op0
);
18401 /* Convert comparison codes we use to represent FP comparison to integer
18402 code that will result in proper branch. Return UNKNOWN if no such code
18406 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18435 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18438 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18440 enum machine_mode fpcmp_mode
, intcmp_mode
;
18443 fpcmp_mode
= ix86_fp_compare_mode (code
);
18444 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18446 /* Do fcomi/sahf based test when profitable. */
18447 switch (ix86_fp_comparison_strategy (code
))
18449 case IX86_FPCMP_COMI
:
18450 intcmp_mode
= fpcmp_mode
;
18451 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18452 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18457 case IX86_FPCMP_SAHF
:
18458 intcmp_mode
= fpcmp_mode
;
18459 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18460 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18464 scratch
= gen_reg_rtx (HImode
);
18465 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18466 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18469 case IX86_FPCMP_ARITH
:
18470 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18471 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18472 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18474 scratch
= gen_reg_rtx (HImode
);
18475 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18477 /* In the unordered case, we have to check C2 for NaN's, which
18478 doesn't happen to work out to anything nice combination-wise.
18479 So do some bit twiddling on the value we've got in AH to come
18480 up with an appropriate set of condition codes. */
18482 intcmp_mode
= CCNOmode
;
18487 if (code
== GT
|| !TARGET_IEEE_FP
)
18489 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18494 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18495 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18496 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18497 intcmp_mode
= CCmode
;
18503 if (code
== LT
&& TARGET_IEEE_FP
)
18505 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18506 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18507 intcmp_mode
= CCmode
;
18512 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18518 if (code
== GE
|| !TARGET_IEEE_FP
)
18520 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18525 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18526 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18532 if (code
== LE
&& TARGET_IEEE_FP
)
18534 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18535 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18536 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18537 intcmp_mode
= CCmode
;
18542 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18548 if (code
== EQ
&& TARGET_IEEE_FP
)
18550 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18551 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18552 intcmp_mode
= CCmode
;
18557 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18563 if (code
== NE
&& TARGET_IEEE_FP
)
18565 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18566 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18572 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18578 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18582 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18587 gcc_unreachable ();
18595 /* Return the test that should be put into the flags user, i.e.
18596 the bcc, scc, or cmov instruction. */
18597 return gen_rtx_fmt_ee (code
, VOIDmode
,
18598 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18603 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18607 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18608 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18610 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18612 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18613 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18616 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18622 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18624 enum machine_mode mode
= GET_MODE (op0
);
18636 tmp
= ix86_expand_compare (code
, op0
, op1
);
18637 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18638 gen_rtx_LABEL_REF (VOIDmode
, label
),
18640 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18647 /* Expand DImode branch into multiple compare+branch. */
18649 rtx lo
[2], hi
[2], label2
;
18650 enum rtx_code code1
, code2
, code3
;
18651 enum machine_mode submode
;
18653 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18655 tmp
= op0
, op0
= op1
, op1
= tmp
;
18656 code
= swap_condition (code
);
18659 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18660 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18662 submode
= mode
== DImode
? SImode
: DImode
;
18664 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18665 avoid two branches. This costs one extra insn, so disable when
18666 optimizing for size. */
18668 if ((code
== EQ
|| code
== NE
)
18669 && (!optimize_insn_for_size_p ()
18670 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18675 if (hi
[1] != const0_rtx
)
18676 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18677 NULL_RTX
, 0, OPTAB_WIDEN
);
18680 if (lo
[1] != const0_rtx
)
18681 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18682 NULL_RTX
, 0, OPTAB_WIDEN
);
18684 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18685 NULL_RTX
, 0, OPTAB_WIDEN
);
18687 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18691 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18692 op1 is a constant and the low word is zero, then we can just
18693 examine the high word. Similarly for low word -1 and
18694 less-or-equal-than or greater-than. */
18696 if (CONST_INT_P (hi
[1]))
18699 case LT
: case LTU
: case GE
: case GEU
:
18700 if (lo
[1] == const0_rtx
)
18702 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18706 case LE
: case LEU
: case GT
: case GTU
:
18707 if (lo
[1] == constm1_rtx
)
18709 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18717 /* Otherwise, we need two or three jumps. */
18719 label2
= gen_label_rtx ();
18722 code2
= swap_condition (code
);
18723 code3
= unsigned_condition (code
);
18727 case LT
: case GT
: case LTU
: case GTU
:
18730 case LE
: code1
= LT
; code2
= GT
; break;
18731 case GE
: code1
= GT
; code2
= LT
; break;
18732 case LEU
: code1
= LTU
; code2
= GTU
; break;
18733 case GEU
: code1
= GTU
; code2
= LTU
; break;
18735 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18736 case NE
: code2
= UNKNOWN
; break;
18739 gcc_unreachable ();
18744 * if (hi(a) < hi(b)) goto true;
18745 * if (hi(a) > hi(b)) goto false;
18746 * if (lo(a) < lo(b)) goto true;
18750 if (code1
!= UNKNOWN
)
18751 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18752 if (code2
!= UNKNOWN
)
18753 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18755 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18757 if (code2
!= UNKNOWN
)
18758 emit_label (label2
);
18763 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18768 /* Split branch based on floating point condition. */
18770 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18771 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18776 if (target2
!= pc_rtx
)
18779 code
= reverse_condition_maybe_unordered (code
);
18784 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18787 /* Remove pushed operand from stack. */
18789 ix86_free_from_memory (GET_MODE (pushed
));
18791 i
= emit_jump_insn (gen_rtx_SET
18793 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18794 condition
, target1
, target2
)));
18795 if (split_branch_probability
>= 0)
18796 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18800 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18804 gcc_assert (GET_MODE (dest
) == QImode
);
18806 ret
= ix86_expand_compare (code
, op0
, op1
);
18807 PUT_MODE (ret
, QImode
);
18808 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18811 /* Expand comparison setting or clearing carry flag. Return true when
18812 successful and set pop for the operation. */
18814 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18816 enum machine_mode mode
=
18817 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18819 /* Do not handle double-mode compares that go through special path. */
18820 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18823 if (SCALAR_FLOAT_MODE_P (mode
))
18825 rtx compare_op
, compare_seq
;
18827 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18829 /* Shortcut: following common codes never translate
18830 into carry flag compares. */
18831 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18832 || code
== ORDERED
|| code
== UNORDERED
)
18835 /* These comparisons require zero flag; swap operands so they won't. */
18836 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18837 && !TARGET_IEEE_FP
)
18842 code
= swap_condition (code
);
18845 /* Try to expand the comparison and verify that we end up with
18846 carry flag based comparison. This fails to be true only when
18847 we decide to expand comparison using arithmetic that is not
18848 too common scenario. */
18850 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18851 compare_seq
= get_insns ();
18854 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18855 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18856 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18858 code
= GET_CODE (compare_op
);
18860 if (code
!= LTU
&& code
!= GEU
)
18863 emit_insn (compare_seq
);
18868 if (!INTEGRAL_MODE_P (mode
))
18877 /* Convert a==0 into (unsigned)a<1. */
18880 if (op1
!= const0_rtx
)
18883 code
= (code
== EQ
? LTU
: GEU
);
18886 /* Convert a>b into b<a or a>=b-1. */
18889 if (CONST_INT_P (op1
))
18891 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18892 /* Bail out on overflow. We still can swap operands but that
18893 would force loading of the constant into register. */
18894 if (op1
== const0_rtx
18895 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18897 code
= (code
== GTU
? GEU
: LTU
);
18904 code
= (code
== GTU
? LTU
: GEU
);
18908 /* Convert a>=0 into (unsigned)a<0x80000000. */
18911 if (mode
== DImode
|| op1
!= const0_rtx
)
18913 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18914 code
= (code
== LT
? GEU
: LTU
);
18918 if (mode
== DImode
|| op1
!= constm1_rtx
)
18920 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18921 code
= (code
== LE
? GEU
: LTU
);
18927 /* Swapping operands may cause constant to appear as first operand. */
18928 if (!nonimmediate_operand (op0
, VOIDmode
))
18930 if (!can_create_pseudo_p ())
18932 op0
= force_reg (mode
, op0
);
18934 *pop
= ix86_expand_compare (code
, op0
, op1
);
18935 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18940 ix86_expand_int_movcc (rtx operands
[])
18942 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18943 rtx compare_seq
, compare_op
;
18944 enum machine_mode mode
= GET_MODE (operands
[0]);
18945 bool sign_bit_compare_p
= false;
18946 rtx op0
= XEXP (operands
[1], 0);
18947 rtx op1
= XEXP (operands
[1], 1);
18949 if (GET_MODE (op0
) == TImode
18950 || (GET_MODE (op0
) == DImode
18955 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18956 compare_seq
= get_insns ();
18959 compare_code
= GET_CODE (compare_op
);
18961 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18962 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18963 sign_bit_compare_p
= true;
18965 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18966 HImode insns, we'd be swallowed in word prefix ops. */
18968 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18969 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18970 && CONST_INT_P (operands
[2])
18971 && CONST_INT_P (operands
[3]))
18973 rtx out
= operands
[0];
18974 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18975 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18976 HOST_WIDE_INT diff
;
18979 /* Sign bit compares are better done using shifts than we do by using
18981 if (sign_bit_compare_p
18982 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18984 /* Detect overlap between destination and compare sources. */
18987 if (!sign_bit_compare_p
)
18990 bool fpcmp
= false;
18992 compare_code
= GET_CODE (compare_op
);
18994 flags
= XEXP (compare_op
, 0);
18996 if (GET_MODE (flags
) == CCFPmode
18997 || GET_MODE (flags
) == CCFPUmode
)
19001 = ix86_fp_compare_code_to_integer (compare_code
);
19004 /* To simplify rest of code, restrict to the GEU case. */
19005 if (compare_code
== LTU
)
19007 HOST_WIDE_INT tmp
= ct
;
19010 compare_code
= reverse_condition (compare_code
);
19011 code
= reverse_condition (code
);
19016 PUT_CODE (compare_op
,
19017 reverse_condition_maybe_unordered
19018 (GET_CODE (compare_op
)));
19020 PUT_CODE (compare_op
,
19021 reverse_condition (GET_CODE (compare_op
)));
19025 if (reg_overlap_mentioned_p (out
, op0
)
19026 || reg_overlap_mentioned_p (out
, op1
))
19027 tmp
= gen_reg_rtx (mode
);
19029 if (mode
== DImode
)
19030 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
19032 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
19033 flags
, compare_op
));
19037 if (code
== GT
|| code
== GE
)
19038 code
= reverse_condition (code
);
19041 HOST_WIDE_INT tmp
= ct
;
19046 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
19059 tmp
= expand_simple_binop (mode
, PLUS
,
19061 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19072 tmp
= expand_simple_binop (mode
, IOR
,
19074 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19076 else if (diff
== -1 && ct
)
19086 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19088 tmp
= expand_simple_binop (mode
, PLUS
,
19089 copy_rtx (tmp
), GEN_INT (cf
),
19090 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19098 * andl cf - ct, dest
19108 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
19111 tmp
= expand_simple_binop (mode
, AND
,
19113 gen_int_mode (cf
- ct
, mode
),
19114 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19116 tmp
= expand_simple_binop (mode
, PLUS
,
19117 copy_rtx (tmp
), GEN_INT (ct
),
19118 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
19121 if (!rtx_equal_p (tmp
, out
))
19122 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
19129 enum machine_mode cmp_mode
= GET_MODE (op0
);
19132 tmp
= ct
, ct
= cf
, cf
= tmp
;
19135 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19137 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19139 /* We may be reversing unordered compare to normal compare, that
19140 is not valid in general (we may convert non-trapping condition
19141 to trapping one), however on i386 we currently emit all
19142 comparisons unordered. */
19143 compare_code
= reverse_condition_maybe_unordered (compare_code
);
19144 code
= reverse_condition_maybe_unordered (code
);
19148 compare_code
= reverse_condition (compare_code
);
19149 code
= reverse_condition (code
);
19153 compare_code
= UNKNOWN
;
19154 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
19155 && CONST_INT_P (op1
))
19157 if (op1
== const0_rtx
19158 && (code
== LT
|| code
== GE
))
19159 compare_code
= code
;
19160 else if (op1
== constm1_rtx
)
19164 else if (code
== GT
)
19169 /* Optimize dest = (op0 < 0) ? -1 : cf. */
19170 if (compare_code
!= UNKNOWN
19171 && GET_MODE (op0
) == GET_MODE (out
)
19172 && (cf
== -1 || ct
== -1))
19174 /* If lea code below could be used, only optimize
19175 if it results in a 2 insn sequence. */
19177 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19178 || diff
== 3 || diff
== 5 || diff
== 9)
19179 || (compare_code
== LT
&& ct
== -1)
19180 || (compare_code
== GE
&& cf
== -1))
19183 * notl op1 (if necessary)
19191 code
= reverse_condition (code
);
19194 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19196 out
= expand_simple_binop (mode
, IOR
,
19198 out
, 1, OPTAB_DIRECT
);
19199 if (out
!= operands
[0])
19200 emit_move_insn (operands
[0], out
);
19207 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
19208 || diff
== 3 || diff
== 5 || diff
== 9)
19209 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
19211 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
19217 * lea cf(dest*(ct-cf)),dest
19221 * This also catches the degenerate setcc-only case.
19227 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19230 /* On x86_64 the lea instruction operates on Pmode, so we need
19231 to get arithmetics done in proper mode to match. */
19233 tmp
= copy_rtx (out
);
19237 out1
= copy_rtx (out
);
19238 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
19242 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
19248 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
19251 if (!rtx_equal_p (tmp
, out
))
19254 out
= force_operand (tmp
, copy_rtx (out
));
19256 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
19258 if (!rtx_equal_p (out
, operands
[0]))
19259 emit_move_insn (operands
[0], copy_rtx (out
));
19265 * General case: Jumpful:
19266 * xorl dest,dest cmpl op1, op2
19267 * cmpl op1, op2 movl ct, dest
19268 * setcc dest jcc 1f
19269 * decl dest movl cf, dest
19270 * andl (cf-ct),dest 1:
19273 * Size 20. Size 14.
19275 * This is reasonably steep, but branch mispredict costs are
19276 * high on modern cpus, so consider failing only if optimizing
19280 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19281 && BRANCH_COST (optimize_insn_for_speed_p (),
19286 enum machine_mode cmp_mode
= GET_MODE (op0
);
19291 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
19293 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
19295 /* We may be reversing unordered compare to normal compare,
19296 that is not valid in general (we may convert non-trapping
19297 condition to trapping one), however on i386 we currently
19298 emit all comparisons unordered. */
19299 code
= reverse_condition_maybe_unordered (code
);
19303 code
= reverse_condition (code
);
19304 if (compare_code
!= UNKNOWN
)
19305 compare_code
= reverse_condition (compare_code
);
19309 if (compare_code
!= UNKNOWN
)
19311 /* notl op1 (if needed)
19316 For x < 0 (resp. x <= -1) there will be no notl,
19317 so if possible swap the constants to get rid of the
19319 True/false will be -1/0 while code below (store flag
19320 followed by decrement) is 0/-1, so the constants need
19321 to be exchanged once more. */
19323 if (compare_code
== GE
|| !cf
)
19325 code
= reverse_condition (code
);
19330 HOST_WIDE_INT tmp
= cf
;
19335 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
19339 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
19341 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
19343 copy_rtx (out
), 1, OPTAB_DIRECT
);
19346 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
19347 gen_int_mode (cf
- ct
, mode
),
19348 copy_rtx (out
), 1, OPTAB_DIRECT
);
19350 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
19351 copy_rtx (out
), 1, OPTAB_DIRECT
);
19352 if (!rtx_equal_p (out
, operands
[0]))
19353 emit_move_insn (operands
[0], copy_rtx (out
));
19359 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
19361 /* Try a few things more with specific constants and a variable. */
19364 rtx var
, orig_out
, out
, tmp
;
19366 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
19369 /* If one of the two operands is an interesting constant, load a
19370 constant with the above and mask it in with a logical operation. */
19372 if (CONST_INT_P (operands
[2]))
19375 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
19376 operands
[3] = constm1_rtx
, op
= and_optab
;
19377 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
19378 operands
[3] = const0_rtx
, op
= ior_optab
;
19382 else if (CONST_INT_P (operands
[3]))
19385 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
19386 operands
[2] = constm1_rtx
, op
= and_optab
;
19387 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
19388 operands
[2] = const0_rtx
, op
= ior_optab
;
19395 orig_out
= operands
[0];
19396 tmp
= gen_reg_rtx (mode
);
19399 /* Recurse to get the constant loaded. */
19400 if (ix86_expand_int_movcc (operands
) == 0)
19403 /* Mask in the interesting variable. */
19404 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
19406 if (!rtx_equal_p (out
, orig_out
))
19407 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
19413 * For comparison with above,
19423 if (! nonimmediate_operand (operands
[2], mode
))
19424 operands
[2] = force_reg (mode
, operands
[2]);
19425 if (! nonimmediate_operand (operands
[3], mode
))
19426 operands
[3] = force_reg (mode
, operands
[3]);
19428 if (! register_operand (operands
[2], VOIDmode
)
19430 || ! register_operand (operands
[3], VOIDmode
)))
19431 operands
[2] = force_reg (mode
, operands
[2]);
19434 && ! register_operand (operands
[3], VOIDmode
))
19435 operands
[3] = force_reg (mode
, operands
[3]);
19437 emit_insn (compare_seq
);
19438 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19439 gen_rtx_IF_THEN_ELSE (mode
,
19440 compare_op
, operands
[2],
19445 /* Swap, force into registers, or otherwise massage the two operands
19446 to an sse comparison with a mask result. Thus we differ a bit from
19447 ix86_prepare_fp_compare_args which expects to produce a flags result.
19449 The DEST operand exists to help determine whether to commute commutative
19450 operators. The POP0/POP1 operands are updated in place. The new
19451 comparison code is returned, or UNKNOWN if not implementable. */
19453 static enum rtx_code
19454 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19455 rtx
*pop0
, rtx
*pop1
)
19463 /* AVX supports all the needed comparisons. */
19466 /* We have no LTGT as an operator. We could implement it with
19467 NE & ORDERED, but this requires an extra temporary. It's
19468 not clear that it's worth it. */
19475 /* These are supported directly. */
19482 /* AVX has 3 operand comparisons, no need to swap anything. */
19485 /* For commutative operators, try to canonicalize the destination
19486 operand to be first in the comparison - this helps reload to
19487 avoid extra moves. */
19488 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19496 /* These are not supported directly before AVX, and furthermore
19497 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19498 comparison operands to transform into something that is
19503 code
= swap_condition (code
);
19507 gcc_unreachable ();
19513 /* Detect conditional moves that exactly match min/max operational
19514 semantics. Note that this is IEEE safe, as long as we don't
19515 interchange the operands.
19517 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19518 and TRUE if the operation is successful and instructions are emitted. */
19521 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19522 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19524 enum machine_mode mode
;
19530 else if (code
== UNGE
)
19533 if_true
= if_false
;
19539 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19541 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19546 mode
= GET_MODE (dest
);
19548 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19549 but MODE may be a vector mode and thus not appropriate. */
19550 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19552 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19555 if_true
= force_reg (mode
, if_true
);
19556 v
= gen_rtvec (2, if_true
, if_false
);
19557 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19561 code
= is_min
? SMIN
: SMAX
;
19562 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19565 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19569 /* Expand an sse vector comparison. Return the register with the result. */
19572 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19573 rtx op_true
, rtx op_false
)
19575 enum machine_mode mode
= GET_MODE (dest
);
19576 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19579 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19580 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19581 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19584 || reg_overlap_mentioned_p (dest
, op_true
)
19585 || reg_overlap_mentioned_p (dest
, op_false
))
19586 dest
= gen_reg_rtx (mode
);
19588 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19589 if (cmp_mode
!= mode
)
19591 x
= force_reg (cmp_mode
, x
);
19592 convert_move (dest
, x
, false);
19595 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19600 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19601 operations. This is used for both scalar and vector conditional moves. */
19604 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19606 enum machine_mode mode
= GET_MODE (dest
);
19609 if (vector_all_ones_operand (op_true
, mode
)
19610 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19612 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19614 else if (op_false
== CONST0_RTX (mode
))
19616 op_true
= force_reg (mode
, op_true
);
19617 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19618 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19620 else if (op_true
== CONST0_RTX (mode
))
19622 op_false
= force_reg (mode
, op_false
);
19623 x
= gen_rtx_NOT (mode
, cmp
);
19624 x
= gen_rtx_AND (mode
, x
, op_false
);
19625 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19627 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19629 op_false
= force_reg (mode
, op_false
);
19630 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19631 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19633 else if (TARGET_XOP
)
19635 op_true
= force_reg (mode
, op_true
);
19637 if (!nonimmediate_operand (op_false
, mode
))
19638 op_false
= force_reg (mode
, op_false
);
19640 emit_insn (gen_rtx_SET (mode
, dest
,
19641 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19647 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19649 if (!nonimmediate_operand (op_true
, mode
))
19650 op_true
= force_reg (mode
, op_true
);
19652 op_false
= force_reg (mode
, op_false
);
19658 gen
= gen_sse4_1_blendvps
;
19662 gen
= gen_sse4_1_blendvpd
;
19670 gen
= gen_sse4_1_pblendvb
;
19671 dest
= gen_lowpart (V16QImode
, dest
);
19672 op_false
= gen_lowpart (V16QImode
, op_false
);
19673 op_true
= gen_lowpart (V16QImode
, op_true
);
19674 cmp
= gen_lowpart (V16QImode
, cmp
);
19679 gen
= gen_avx_blendvps256
;
19683 gen
= gen_avx_blendvpd256
;
19691 gen
= gen_avx2_pblendvb
;
19692 dest
= gen_lowpart (V32QImode
, dest
);
19693 op_false
= gen_lowpart (V32QImode
, op_false
);
19694 op_true
= gen_lowpart (V32QImode
, op_true
);
19695 cmp
= gen_lowpart (V32QImode
, cmp
);
19703 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19706 op_true
= force_reg (mode
, op_true
);
19708 t2
= gen_reg_rtx (mode
);
19710 t3
= gen_reg_rtx (mode
);
19714 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19715 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19717 x
= gen_rtx_NOT (mode
, cmp
);
19718 x
= gen_rtx_AND (mode
, x
, op_false
);
19719 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19721 x
= gen_rtx_IOR (mode
, t3
, t2
);
19722 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19727 /* Expand a floating-point conditional move. Return true if successful. */
19730 ix86_expand_fp_movcc (rtx operands
[])
19732 enum machine_mode mode
= GET_MODE (operands
[0]);
19733 enum rtx_code code
= GET_CODE (operands
[1]);
19734 rtx tmp
, compare_op
;
19735 rtx op0
= XEXP (operands
[1], 0);
19736 rtx op1
= XEXP (operands
[1], 1);
19738 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19740 enum machine_mode cmode
;
19742 /* Since we've no cmove for sse registers, don't force bad register
19743 allocation just to gain access to it. Deny movcc when the
19744 comparison mode doesn't match the move mode. */
19745 cmode
= GET_MODE (op0
);
19746 if (cmode
== VOIDmode
)
19747 cmode
= GET_MODE (op1
);
19751 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19752 if (code
== UNKNOWN
)
19755 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19756 operands
[2], operands
[3]))
19759 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19760 operands
[2], operands
[3]);
19761 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19765 /* The floating point conditional move instructions don't directly
19766 support conditions resulting from a signed integer comparison. */
19768 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19769 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19771 tmp
= gen_reg_rtx (QImode
);
19772 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19774 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19777 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19778 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19779 operands
[2], operands
[3])));
19784 /* Expand a floating-point vector conditional move; a vcond operation
19785 rather than a movcc operation. */
19788 ix86_expand_fp_vcond (rtx operands
[])
19790 enum rtx_code code
= GET_CODE (operands
[3]);
19793 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19794 &operands
[4], &operands
[5]);
19795 if (code
== UNKNOWN
)
19798 switch (GET_CODE (operands
[3]))
19801 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19802 operands
[5], operands
[0], operands
[0]);
19803 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19804 operands
[5], operands
[1], operands
[2]);
19808 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19809 operands
[5], operands
[0], operands
[0]);
19810 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19811 operands
[5], operands
[1], operands
[2]);
19815 gcc_unreachable ();
19817 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19819 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19823 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19824 operands
[5], operands
[1], operands
[2]))
19827 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19828 operands
[1], operands
[2]);
19829 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19833 /* Expand a signed/unsigned integral vector conditional move. */
19836 ix86_expand_int_vcond (rtx operands
[])
19838 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19839 enum machine_mode mode
= GET_MODE (operands
[4]);
19840 enum rtx_code code
= GET_CODE (operands
[3]);
19841 bool negate
= false;
19844 cop0
= operands
[4];
19845 cop1
= operands
[5];
19847 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
19848 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
19849 if ((code
== LT
|| code
== GE
)
19850 && data_mode
== mode
19851 && cop1
== CONST0_RTX (mode
)
19852 && operands
[1 + (code
== LT
)] == CONST0_RTX (data_mode
)
19853 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) > 1
19854 && GET_MODE_SIZE (GET_MODE_INNER (data_mode
)) <= 8
19855 && (GET_MODE_SIZE (data_mode
) == 16
19856 || (TARGET_AVX2
&& GET_MODE_SIZE (data_mode
) == 32)))
19858 rtx negop
= operands
[2 - (code
== LT
)];
19859 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (data_mode
)) - 1;
19860 if (negop
== CONST1_RTX (data_mode
))
19862 rtx res
= expand_simple_binop (mode
, LSHIFTRT
, cop0
, GEN_INT (shift
),
19863 operands
[0], 1, OPTAB_DIRECT
);
19864 if (res
!= operands
[0])
19865 emit_move_insn (operands
[0], res
);
19868 else if (GET_MODE_INNER (data_mode
) != DImode
19869 && vector_all_ones_operand (negop
, data_mode
))
19871 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cop0
, GEN_INT (shift
),
19872 operands
[0], 0, OPTAB_DIRECT
);
19873 if (res
!= operands
[0])
19874 emit_move_insn (operands
[0], res
);
19879 if (!nonimmediate_operand (cop1
, mode
))
19880 cop1
= force_reg (mode
, cop1
);
19881 if (!general_operand (operands
[1], data_mode
))
19882 operands
[1] = force_reg (data_mode
, operands
[1]);
19883 if (!general_operand (operands
[2], data_mode
))
19884 operands
[2] = force_reg (data_mode
, operands
[2]);
19886 /* XOP supports all of the comparisons on all 128-bit vector int types. */
19888 && (mode
== V16QImode
|| mode
== V8HImode
19889 || mode
== V4SImode
|| mode
== V2DImode
))
19893 /* Canonicalize the comparison to EQ, GT, GTU. */
19904 code
= reverse_condition (code
);
19910 code
= reverse_condition (code
);
19916 code
= swap_condition (code
);
19917 x
= cop0
, cop0
= cop1
, cop1
= x
;
19921 gcc_unreachable ();
19924 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19925 if (mode
== V2DImode
)
19930 /* SSE4.1 supports EQ. */
19931 if (!TARGET_SSE4_1
)
19937 /* SSE4.2 supports GT/GTU. */
19938 if (!TARGET_SSE4_2
)
19943 gcc_unreachable ();
19947 /* Unsigned parallel compare is not supported by the hardware.
19948 Play some tricks to turn this into a signed comparison
19952 cop0
= force_reg (mode
, cop0
);
19962 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19966 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19967 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19968 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19969 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19971 gcc_unreachable ();
19973 /* Subtract (-(INT MAX) - 1) from both operands to make
19975 mask
= ix86_build_signbit_mask (mode
, true, false);
19976 t1
= gen_reg_rtx (mode
);
19977 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19979 t2
= gen_reg_rtx (mode
);
19980 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19992 /* Perform a parallel unsigned saturating subtraction. */
19993 x
= gen_reg_rtx (mode
);
19994 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19995 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19998 cop1
= CONST0_RTX (mode
);
20004 gcc_unreachable ();
20009 /* Allow the comparison to be done in one mode, but the movcc to
20010 happen in another mode. */
20011 if (data_mode
== mode
)
20013 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
20014 operands
[1+negate
], operands
[2-negate
]);
20018 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
20019 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
20021 operands
[1+negate
], operands
[2-negate
]);
20022 x
= gen_lowpart (data_mode
, x
);
20025 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
20026 operands
[2-negate
]);
20030 /* Expand a variable vector permutation. */
20033 ix86_expand_vec_perm (rtx operands
[])
20035 rtx target
= operands
[0];
20036 rtx op0
= operands
[1];
20037 rtx op1
= operands
[2];
20038 rtx mask
= operands
[3];
20039 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
20040 enum machine_mode mode
= GET_MODE (op0
);
20041 enum machine_mode maskmode
= GET_MODE (mask
);
20043 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
20045 /* Number of elements in the vector. */
20046 w
= GET_MODE_NUNITS (mode
);
20047 e
= GET_MODE_UNIT_SIZE (mode
);
20048 gcc_assert (w
<= 32);
20052 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
20054 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
20055 an constant shuffle operand. With a tiny bit of effort we can
20056 use VPERMD instead. A re-interpretation stall for V4DFmode is
20057 unfortunate but there's no avoiding it.
20058 Similarly for V16HImode we don't have instructions for variable
20059 shuffling, while for V32QImode we can use after preparing suitable
20060 masks vpshufb; vpshufb; vpermq; vpor. */
20062 if (mode
== V16HImode
)
20064 maskmode
= mode
= V32QImode
;
20070 maskmode
= mode
= V8SImode
;
20074 t1
= gen_reg_rtx (maskmode
);
20076 /* Replicate the low bits of the V4DImode mask into V8SImode:
20078 t1 = { A A B B C C D D }. */
20079 for (i
= 0; i
< w
/ 2; ++i
)
20080 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
20081 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20082 vt
= force_reg (maskmode
, vt
);
20083 mask
= gen_lowpart (maskmode
, mask
);
20084 if (maskmode
== V8SImode
)
20085 emit_insn (gen_avx2_permvarv8si (t1
, mask
, vt
));
20087 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
20089 /* Multiply the shuffle indicies by two. */
20090 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
20093 /* Add one to the odd shuffle indicies:
20094 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
20095 for (i
= 0; i
< w
/ 2; ++i
)
20097 vec
[i
* 2] = const0_rtx
;
20098 vec
[i
* 2 + 1] = const1_rtx
;
20100 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20101 vt
= force_const_mem (maskmode
, vt
);
20102 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
20105 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
20106 operands
[3] = mask
= t1
;
20107 target
= gen_lowpart (mode
, target
);
20108 op0
= gen_lowpart (mode
, op0
);
20109 op1
= gen_lowpart (mode
, op1
);
20115 /* The VPERMD and VPERMPS instructions already properly ignore
20116 the high bits of the shuffle elements. No need for us to
20117 perform an AND ourselves. */
20118 if (one_operand_shuffle
)
20119 emit_insn (gen_avx2_permvarv8si (target
, op0
, mask
));
20122 t1
= gen_reg_rtx (V8SImode
);
20123 t2
= gen_reg_rtx (V8SImode
);
20124 emit_insn (gen_avx2_permvarv8si (t1
, op0
, mask
));
20125 emit_insn (gen_avx2_permvarv8si (t2
, op1
, mask
));
20131 mask
= gen_lowpart (V8SFmode
, mask
);
20132 if (one_operand_shuffle
)
20133 emit_insn (gen_avx2_permvarv8sf (target
, op0
, mask
));
20136 t1
= gen_reg_rtx (V8SFmode
);
20137 t2
= gen_reg_rtx (V8SFmode
);
20138 emit_insn (gen_avx2_permvarv8sf (t1
, op0
, mask
));
20139 emit_insn (gen_avx2_permvarv8sf (t2
, op1
, mask
));
20145 /* By combining the two 128-bit input vectors into one 256-bit
20146 input vector, we can use VPERMD and VPERMPS for the full
20147 two-operand shuffle. */
20148 t1
= gen_reg_rtx (V8SImode
);
20149 t2
= gen_reg_rtx (V8SImode
);
20150 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
20151 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20152 emit_insn (gen_avx2_permvarv8si (t1
, t1
, t2
));
20153 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
20157 t1
= gen_reg_rtx (V8SFmode
);
20158 t2
= gen_reg_rtx (V8SImode
);
20159 mask
= gen_lowpart (V4SImode
, mask
);
20160 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
20161 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
20162 emit_insn (gen_avx2_permvarv8sf (t1
, t1
, t2
));
20163 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
20167 t1
= gen_reg_rtx (V32QImode
);
20168 t2
= gen_reg_rtx (V32QImode
);
20169 t3
= gen_reg_rtx (V32QImode
);
20170 vt2
= GEN_INT (128);
20171 for (i
= 0; i
< 32; i
++)
20173 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20174 vt
= force_reg (V32QImode
, vt
);
20175 for (i
= 0; i
< 32; i
++)
20176 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
20177 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
20178 vt2
= force_reg (V32QImode
, vt2
);
20179 /* From mask create two adjusted masks, which contain the same
20180 bits as mask in the low 7 bits of each vector element.
20181 The first mask will have the most significant bit clear
20182 if it requests element from the same 128-bit lane
20183 and MSB set if it requests element from the other 128-bit lane.
20184 The second mask will have the opposite values of the MSB,
20185 and additionally will have its 128-bit lanes swapped.
20186 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
20187 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
20188 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
20189 stands for other 12 bytes. */
20190 /* The bit whether element is from the same lane or the other
20191 lane is bit 4, so shift it up by 3 to the MSB position. */
20192 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
20193 gen_lowpart (V4DImode
, mask
),
20195 /* Clear MSB bits from the mask just in case it had them set. */
20196 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
20197 /* After this t1 will have MSB set for elements from other lane. */
20198 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
20199 /* Clear bits other than MSB. */
20200 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
20201 /* Or in the lower bits from mask into t3. */
20202 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
20203 /* And invert MSB bits in t1, so MSB is set for elements from the same
20205 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
20206 /* Swap 128-bit lanes in t3. */
20207 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20208 gen_lowpart (V4DImode
, t3
),
20209 const2_rtx
, GEN_INT (3),
20210 const0_rtx
, const1_rtx
));
20211 /* And or in the lower bits from mask into t1. */
20212 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
20213 if (one_operand_shuffle
)
20215 /* Each of these shuffles will put 0s in places where
20216 element from the other 128-bit lane is needed, otherwise
20217 will shuffle in the requested value. */
20218 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
20219 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
20220 /* For t3 the 128-bit lanes are swapped again. */
20221 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20222 gen_lowpart (V4DImode
, t3
),
20223 const2_rtx
, GEN_INT (3),
20224 const0_rtx
, const1_rtx
));
20225 /* And oring both together leads to the result. */
20226 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
20230 t4
= gen_reg_rtx (V32QImode
);
20231 /* Similarly to the above one_operand_shuffle code,
20232 just for repeated twice for each operand. merge_two:
20233 code will merge the two results together. */
20234 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
20235 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
20236 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
20237 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
20238 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
20239 gen_lowpart (V4DImode
, t4
),
20240 const2_rtx
, GEN_INT (3),
20241 const0_rtx
, const1_rtx
));
20242 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
20243 gen_lowpart (V4DImode
, t3
),
20244 const2_rtx
, GEN_INT (3),
20245 const0_rtx
, const1_rtx
));
20246 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
20247 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
20253 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
20260 /* The XOP VPPERM insn supports three inputs. By ignoring the
20261 one_operand_shuffle special case, we avoid creating another
20262 set of constant vectors in memory. */
20263 one_operand_shuffle
= false;
20265 /* mask = mask & {2*w-1, ...} */
20266 vt
= GEN_INT (2*w
- 1);
20270 /* mask = mask & {w-1, ...} */
20271 vt
= GEN_INT (w
- 1);
20274 for (i
= 0; i
< w
; i
++)
20276 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20277 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20278 NULL_RTX
, 0, OPTAB_DIRECT
);
20280 /* For non-QImode operations, convert the word permutation control
20281 into a byte permutation control. */
20282 if (mode
!= V16QImode
)
20284 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
20285 GEN_INT (exact_log2 (e
)),
20286 NULL_RTX
, 0, OPTAB_DIRECT
);
20288 /* Convert mask to vector of chars. */
20289 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
20291 /* Replicate each of the input bytes into byte positions:
20292 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
20293 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
20294 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
20295 for (i
= 0; i
< 16; ++i
)
20296 vec
[i
] = GEN_INT (i
/e
* e
);
20297 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20298 vt
= force_const_mem (V16QImode
, vt
);
20300 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
20302 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
20304 /* Convert it into the byte positions by doing
20305 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
20306 for (i
= 0; i
< 16; ++i
)
20307 vec
[i
] = GEN_INT (i
% e
);
20308 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
20309 vt
= force_const_mem (V16QImode
, vt
);
20310 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
20313 /* The actual shuffle operations all operate on V16QImode. */
20314 op0
= gen_lowpart (V16QImode
, op0
);
20315 op1
= gen_lowpart (V16QImode
, op1
);
20316 target
= gen_lowpart (V16QImode
, target
);
20320 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
20322 else if (one_operand_shuffle
)
20324 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
20331 /* Shuffle the two input vectors independently. */
20332 t1
= gen_reg_rtx (V16QImode
);
20333 t2
= gen_reg_rtx (V16QImode
);
20334 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
20335 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
20338 /* Then merge them together. The key is whether any given control
20339 element contained a bit set that indicates the second word. */
20340 mask
= operands
[3];
20342 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
20344 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
20345 more shuffle to convert the V2DI input mask into a V4SI
20346 input mask. At which point the masking that expand_int_vcond
20347 will work as desired. */
20348 rtx t3
= gen_reg_rtx (V4SImode
);
20349 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
20350 const0_rtx
, const0_rtx
,
20351 const2_rtx
, const2_rtx
));
20353 maskmode
= V4SImode
;
20357 for (i
= 0; i
< w
; i
++)
20359 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
20360 vt
= force_reg (maskmode
, vt
);
20361 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
20362 NULL_RTX
, 0, OPTAB_DIRECT
);
20364 xops
[0] = gen_lowpart (mode
, operands
[0]);
20365 xops
[1] = gen_lowpart (mode
, t2
);
20366 xops
[2] = gen_lowpart (mode
, t1
);
20367 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
20370 ok
= ix86_expand_int_vcond (xops
);
20375 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
20376 true if we should do zero extension, else sign extension. HIGH_P is
20377 true if we want the N/2 high elements, else the low elements. */
20380 ix86_expand_sse_unpack (rtx dest
, rtx src
, bool unsigned_p
, bool high_p
)
20382 enum machine_mode imode
= GET_MODE (src
);
20387 rtx (*unpack
)(rtx
, rtx
);
20388 rtx (*extract
)(rtx
, rtx
) = NULL
;
20389 enum machine_mode halfmode
= BLKmode
;
20395 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
20397 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
20398 halfmode
= V16QImode
;
20400 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
20404 unpack
= gen_avx2_zero_extendv8hiv8si2
;
20406 unpack
= gen_avx2_sign_extendv8hiv8si2
;
20407 halfmode
= V8HImode
;
20409 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
20413 unpack
= gen_avx2_zero_extendv4siv4di2
;
20415 unpack
= gen_avx2_sign_extendv4siv4di2
;
20416 halfmode
= V4SImode
;
20418 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
20422 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
20424 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
20428 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
20430 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
20434 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
20436 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
20439 gcc_unreachable ();
20442 if (GET_MODE_SIZE (imode
) == 32)
20444 tmp
= gen_reg_rtx (halfmode
);
20445 emit_insn (extract (tmp
, src
));
20449 /* Shift higher 8 bytes to lower 8 bytes. */
20450 tmp
= gen_reg_rtx (imode
);
20451 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
20452 gen_lowpart (V1TImode
, src
),
20458 emit_insn (unpack (dest
, tmp
));
20462 rtx (*unpack
)(rtx
, rtx
, rtx
);
20468 unpack
= gen_vec_interleave_highv16qi
;
20470 unpack
= gen_vec_interleave_lowv16qi
;
20474 unpack
= gen_vec_interleave_highv8hi
;
20476 unpack
= gen_vec_interleave_lowv8hi
;
20480 unpack
= gen_vec_interleave_highv4si
;
20482 unpack
= gen_vec_interleave_lowv4si
;
20485 gcc_unreachable ();
20489 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20491 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20492 src
, pc_rtx
, pc_rtx
);
20494 emit_insn (unpack (gen_lowpart (imode
, dest
), src
, tmp
));
20498 /* Expand conditional increment or decrement using adb/sbb instructions.
20499 The default case using setcc followed by the conditional move can be
20500 done by generic code. */
20502 ix86_expand_int_addcc (rtx operands
[])
20504 enum rtx_code code
= GET_CODE (operands
[1]);
20506 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20508 rtx val
= const0_rtx
;
20509 bool fpcmp
= false;
20510 enum machine_mode mode
;
20511 rtx op0
= XEXP (operands
[1], 0);
20512 rtx op1
= XEXP (operands
[1], 1);
20514 if (operands
[3] != const1_rtx
20515 && operands
[3] != constm1_rtx
)
20517 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20519 code
= GET_CODE (compare_op
);
20521 flags
= XEXP (compare_op
, 0);
20523 if (GET_MODE (flags
) == CCFPmode
20524 || GET_MODE (flags
) == CCFPUmode
)
20527 code
= ix86_fp_compare_code_to_integer (code
);
20534 PUT_CODE (compare_op
,
20535 reverse_condition_maybe_unordered
20536 (GET_CODE (compare_op
)));
20538 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20541 mode
= GET_MODE (operands
[0]);
20543 /* Construct either adc or sbb insn. */
20544 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20549 insn
= gen_subqi3_carry
;
20552 insn
= gen_subhi3_carry
;
20555 insn
= gen_subsi3_carry
;
20558 insn
= gen_subdi3_carry
;
20561 gcc_unreachable ();
20569 insn
= gen_addqi3_carry
;
20572 insn
= gen_addhi3_carry
;
20575 insn
= gen_addsi3_carry
;
20578 insn
= gen_adddi3_carry
;
20581 gcc_unreachable ();
20584 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20590 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20591 but works for floating pointer parameters and nonoffsetable memories.
20592 For pushes, it returns just stack offsets; the values will be saved
20593 in the right order. Maximally three parts are generated. */
20596 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20601 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20603 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20605 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20606 gcc_assert (size
>= 2 && size
<= 4);
20608 /* Optimize constant pool reference to immediates. This is used by fp
20609 moves, that force all constants to memory to allow combining. */
20610 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20612 rtx tmp
= maybe_get_pool_constant (operand
);
20617 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20619 /* The only non-offsetable memories we handle are pushes. */
20620 int ok
= push_operand (operand
, VOIDmode
);
20624 operand
= copy_rtx (operand
);
20625 PUT_MODE (operand
, word_mode
);
20626 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20630 if (GET_CODE (operand
) == CONST_VECTOR
)
20632 enum machine_mode imode
= int_mode_for_mode (mode
);
20633 /* Caution: if we looked through a constant pool memory above,
20634 the operand may actually have a different mode now. That's
20635 ok, since we want to pun this all the way back to an integer. */
20636 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20637 gcc_assert (operand
!= NULL
);
20643 if (mode
== DImode
)
20644 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20649 if (REG_P (operand
))
20651 gcc_assert (reload_completed
);
20652 for (i
= 0; i
< size
; i
++)
20653 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20655 else if (offsettable_memref_p (operand
))
20657 operand
= adjust_address (operand
, SImode
, 0);
20658 parts
[0] = operand
;
20659 for (i
= 1; i
< size
; i
++)
20660 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20662 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20667 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20671 real_to_target (l
, &r
, mode
);
20672 parts
[3] = gen_int_mode (l
[3], SImode
);
20673 parts
[2] = gen_int_mode (l
[2], SImode
);
20676 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20677 parts
[2] = gen_int_mode (l
[2], SImode
);
20680 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20683 gcc_unreachable ();
20685 parts
[1] = gen_int_mode (l
[1], SImode
);
20686 parts
[0] = gen_int_mode (l
[0], SImode
);
20689 gcc_unreachable ();
20694 if (mode
== TImode
)
20695 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20696 if (mode
== XFmode
|| mode
== TFmode
)
20698 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20699 if (REG_P (operand
))
20701 gcc_assert (reload_completed
);
20702 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20703 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20705 else if (offsettable_memref_p (operand
))
20707 operand
= adjust_address (operand
, DImode
, 0);
20708 parts
[0] = operand
;
20709 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20711 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20716 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20717 real_to_target (l
, &r
, mode
);
20719 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20720 if (HOST_BITS_PER_WIDE_INT
>= 64)
20723 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20724 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20727 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20729 if (upper_mode
== SImode
)
20730 parts
[1] = gen_int_mode (l
[2], SImode
);
20731 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20734 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20735 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20738 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20741 gcc_unreachable ();
20748 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20749 Return false when normal moves are needed; true when all required
20750 insns have been emitted. Operands 2-4 contain the input values
20751 int the correct order; operands 5-7 contain the output values. */
20754 ix86_split_long_move (rtx operands
[])
20759 int collisions
= 0;
20760 enum machine_mode mode
= GET_MODE (operands
[0]);
20761 bool collisionparts
[4];
20763 /* The DFmode expanders may ask us to move double.
20764 For 64bit target this is single move. By hiding the fact
20765 here we simplify i386.md splitters. */
20766 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20768 /* Optimize constant pool reference to immediates. This is used by
20769 fp moves, that force all constants to memory to allow combining. */
20771 if (MEM_P (operands
[1])
20772 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20773 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20774 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20775 if (push_operand (operands
[0], VOIDmode
))
20777 operands
[0] = copy_rtx (operands
[0]);
20778 PUT_MODE (operands
[0], word_mode
);
20781 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20782 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20783 emit_move_insn (operands
[0], operands
[1]);
20787 /* The only non-offsettable memory we handle is push. */
20788 if (push_operand (operands
[0], VOIDmode
))
20791 gcc_assert (!MEM_P (operands
[0])
20792 || offsettable_memref_p (operands
[0]));
20794 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20795 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20797 /* When emitting push, take care for source operands on the stack. */
20798 if (push
&& MEM_P (operands
[1])
20799 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20801 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20803 /* Compensate for the stack decrement by 4. */
20804 if (!TARGET_64BIT
&& nparts
== 3
20805 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20806 src_base
= plus_constant (Pmode
, src_base
, 4);
20808 /* src_base refers to the stack pointer and is
20809 automatically decreased by emitted push. */
20810 for (i
= 0; i
< nparts
; i
++)
20811 part
[1][i
] = change_address (part
[1][i
],
20812 GET_MODE (part
[1][i
]), src_base
);
20815 /* We need to do copy in the right order in case an address register
20816 of the source overlaps the destination. */
20817 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20821 for (i
= 0; i
< nparts
; i
++)
20824 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20825 if (collisionparts
[i
])
20829 /* Collision in the middle part can be handled by reordering. */
20830 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20832 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20833 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20835 else if (collisions
== 1
20837 && (collisionparts
[1] || collisionparts
[2]))
20839 if (collisionparts
[1])
20841 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20842 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20846 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20847 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20851 /* If there are more collisions, we can't handle it by reordering.
20852 Do an lea to the last part and use only one colliding move. */
20853 else if (collisions
> 1)
20859 base
= part
[0][nparts
- 1];
20861 /* Handle the case when the last part isn't valid for lea.
20862 Happens in 64-bit mode storing the 12-byte XFmode. */
20863 if (GET_MODE (base
) != Pmode
)
20864 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20866 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20867 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20868 for (i
= 1; i
< nparts
; i
++)
20870 tmp
= plus_constant (Pmode
, base
, UNITS_PER_WORD
* i
);
20871 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20882 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20883 emit_insn (ix86_gen_add3 (stack_pointer_rtx
,
20884 stack_pointer_rtx
, GEN_INT (-4)));
20885 emit_move_insn (part
[0][2], part
[1][2]);
20887 else if (nparts
== 4)
20889 emit_move_insn (part
[0][3], part
[1][3]);
20890 emit_move_insn (part
[0][2], part
[1][2]);
20895 /* In 64bit mode we don't have 32bit push available. In case this is
20896 register, it is OK - we will just use larger counterpart. We also
20897 retype memory - these comes from attempt to avoid REX prefix on
20898 moving of second half of TFmode value. */
20899 if (GET_MODE (part
[1][1]) == SImode
)
20901 switch (GET_CODE (part
[1][1]))
20904 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20908 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20912 gcc_unreachable ();
20915 if (GET_MODE (part
[1][0]) == SImode
)
20916 part
[1][0] = part
[1][1];
20919 emit_move_insn (part
[0][1], part
[1][1]);
20920 emit_move_insn (part
[0][0], part
[1][0]);
20924 /* Choose correct order to not overwrite the source before it is copied. */
20925 if ((REG_P (part
[0][0])
20926 && REG_P (part
[1][1])
20927 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20929 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20931 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20933 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20935 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20937 operands
[2 + i
] = part
[0][j
];
20938 operands
[6 + i
] = part
[1][j
];
20943 for (i
= 0; i
< nparts
; i
++)
20945 operands
[2 + i
] = part
[0][i
];
20946 operands
[6 + i
] = part
[1][i
];
20950 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20951 if (optimize_insn_for_size_p ())
20953 for (j
= 0; j
< nparts
- 1; j
++)
20954 if (CONST_INT_P (operands
[6 + j
])
20955 && operands
[6 + j
] != const0_rtx
20956 && REG_P (operands
[2 + j
]))
20957 for (i
= j
; i
< nparts
- 1; i
++)
20958 if (CONST_INT_P (operands
[7 + i
])
20959 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20960 operands
[7 + i
] = operands
[2 + j
];
20963 for (i
= 0; i
< nparts
; i
++)
20964 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20969 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20970 left shift by a constant, either using a single shift or
20971 a sequence of add instructions. */
20974 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20976 rtx (*insn
)(rtx
, rtx
, rtx
);
20979 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20980 && !optimize_insn_for_size_p ()))
20982 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20983 while (count
-- > 0)
20984 emit_insn (insn (operand
, operand
, operand
));
20988 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20989 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20994 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20996 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20997 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20998 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21000 rtx low
[2], high
[2];
21003 if (CONST_INT_P (operands
[2]))
21005 split_double_mode (mode
, operands
, 2, low
, high
);
21006 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21008 if (count
>= half_width
)
21010 emit_move_insn (high
[0], low
[1]);
21011 emit_move_insn (low
[0], const0_rtx
);
21013 if (count
> half_width
)
21014 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
21018 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21020 if (!rtx_equal_p (operands
[0], operands
[1]))
21021 emit_move_insn (operands
[0], operands
[1]);
21023 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
21024 ix86_expand_ashl_const (low
[0], count
, mode
);
21029 split_double_mode (mode
, operands
, 1, low
, high
);
21031 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
21033 if (operands
[1] == const1_rtx
)
21035 /* Assuming we've chosen a QImode capable registers, then 1 << N
21036 can be done with two 32/64-bit shifts, no branches, no cmoves. */
21037 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
21039 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
21041 ix86_expand_clear (low
[0]);
21042 ix86_expand_clear (high
[0]);
21043 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
21045 d
= gen_lowpart (QImode
, low
[0]);
21046 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21047 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
21048 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21050 d
= gen_lowpart (QImode
, high
[0]);
21051 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
21052 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
21053 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
21056 /* Otherwise, we can get the same results by manually performing
21057 a bit extract operation on bit 5/6, and then performing the two
21058 shifts. The two methods of getting 0/1 into low/high are exactly
21059 the same size. Avoiding the shift in the bit extract case helps
21060 pentium4 a bit; no one else seems to care much either way. */
21063 enum machine_mode half_mode
;
21064 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
21065 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
21066 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
21067 HOST_WIDE_INT bits
;
21070 if (mode
== DImode
)
21072 half_mode
= SImode
;
21073 gen_lshr3
= gen_lshrsi3
;
21074 gen_and3
= gen_andsi3
;
21075 gen_xor3
= gen_xorsi3
;
21080 half_mode
= DImode
;
21081 gen_lshr3
= gen_lshrdi3
;
21082 gen_and3
= gen_anddi3
;
21083 gen_xor3
= gen_xordi3
;
21087 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
21088 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
21090 x
= gen_lowpart (half_mode
, operands
[2]);
21091 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
21093 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
21094 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
21095 emit_move_insn (low
[0], high
[0]);
21096 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
21099 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21100 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
21104 if (operands
[1] == constm1_rtx
)
21106 /* For -1 << N, we can avoid the shld instruction, because we
21107 know that we're shifting 0...31/63 ones into a -1. */
21108 emit_move_insn (low
[0], constm1_rtx
);
21109 if (optimize_insn_for_size_p ())
21110 emit_move_insn (high
[0], low
[0]);
21112 emit_move_insn (high
[0], constm1_rtx
);
21116 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
21118 if (!rtx_equal_p (operands
[0], operands
[1]))
21119 emit_move_insn (operands
[0], operands
[1]);
21121 split_double_mode (mode
, operands
, 1, low
, high
);
21122 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
21125 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
21127 if (TARGET_CMOVE
&& scratch
)
21129 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21130 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21132 ix86_expand_clear (scratch
);
21133 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
21137 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21138 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21140 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
21145 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21147 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
21148 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
21149 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21150 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21152 rtx low
[2], high
[2];
21155 if (CONST_INT_P (operands
[2]))
21157 split_double_mode (mode
, operands
, 2, low
, high
);
21158 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21160 if (count
== GET_MODE_BITSIZE (mode
) - 1)
21162 emit_move_insn (high
[0], high
[1]);
21163 emit_insn (gen_ashr3 (high
[0], high
[0],
21164 GEN_INT (half_width
- 1)));
21165 emit_move_insn (low
[0], high
[0]);
21168 else if (count
>= half_width
)
21170 emit_move_insn (low
[0], high
[1]);
21171 emit_move_insn (high
[0], low
[0]);
21172 emit_insn (gen_ashr3 (high
[0], high
[0],
21173 GEN_INT (half_width
- 1)));
21175 if (count
> half_width
)
21176 emit_insn (gen_ashr3 (low
[0], low
[0],
21177 GEN_INT (count
- half_width
)));
21181 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21183 if (!rtx_equal_p (operands
[0], operands
[1]))
21184 emit_move_insn (operands
[0], operands
[1]);
21186 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21187 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
21192 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21194 if (!rtx_equal_p (operands
[0], operands
[1]))
21195 emit_move_insn (operands
[0], operands
[1]);
21197 split_double_mode (mode
, operands
, 1, low
, high
);
21199 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21200 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
21202 if (TARGET_CMOVE
&& scratch
)
21204 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21205 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21207 emit_move_insn (scratch
, high
[0]);
21208 emit_insn (gen_ashr3 (scratch
, scratch
,
21209 GEN_INT (half_width
- 1)));
21210 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21215 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
21216 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
21218 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
21224 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
21226 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
21227 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
21228 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
21229 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
21231 rtx low
[2], high
[2];
21234 if (CONST_INT_P (operands
[2]))
21236 split_double_mode (mode
, operands
, 2, low
, high
);
21237 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
21239 if (count
>= half_width
)
21241 emit_move_insn (low
[0], high
[1]);
21242 ix86_expand_clear (high
[0]);
21244 if (count
> half_width
)
21245 emit_insn (gen_lshr3 (low
[0], low
[0],
21246 GEN_INT (count
- half_width
)));
21250 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21252 if (!rtx_equal_p (operands
[0], operands
[1]))
21253 emit_move_insn (operands
[0], operands
[1]);
21255 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
21256 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
21261 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
21263 if (!rtx_equal_p (operands
[0], operands
[1]))
21264 emit_move_insn (operands
[0], operands
[1]);
21266 split_double_mode (mode
, operands
, 1, low
, high
);
21268 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
21269 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
21271 if (TARGET_CMOVE
&& scratch
)
21273 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
21274 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
21276 ix86_expand_clear (scratch
);
21277 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
21282 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
21283 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
21285 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
21290 /* Predict just emitted jump instruction to be taken with probability PROB. */
21292 predict_jump (int prob
)
21294 rtx insn
= get_last_insn ();
21295 gcc_assert (JUMP_P (insn
));
21296 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
21299 /* Helper function for the string operations below. Dest VARIABLE whether
21300 it is aligned to VALUE bytes. If true, jump to the label. */
21302 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
21304 rtx label
= gen_label_rtx ();
21305 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
21306 if (GET_MODE (variable
) == DImode
)
21307 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
21309 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
21310 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
21313 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
21315 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
21319 /* Adjust COUNTER by the VALUE. */
21321 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
21323 rtx (*gen_add
)(rtx
, rtx
, rtx
)
21324 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
21326 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
21329 /* Zero extend possibly SImode EXP to Pmode register. */
21331 ix86_zero_extend_to_Pmode (rtx exp
)
21333 if (GET_MODE (exp
) != Pmode
)
21334 exp
= convert_to_mode (Pmode
, exp
, 1);
21335 return force_reg (Pmode
, exp
);
21338 /* Divide COUNTREG by SCALE. */
21340 scale_counter (rtx countreg
, int scale
)
21346 if (CONST_INT_P (countreg
))
21347 return GEN_INT (INTVAL (countreg
) / scale
);
21348 gcc_assert (REG_P (countreg
));
21350 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
21351 GEN_INT (exact_log2 (scale
)),
21352 NULL
, 1, OPTAB_DIRECT
);
21356 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
21357 DImode for constant loop counts. */
21359 static enum machine_mode
21360 counter_mode (rtx count_exp
)
21362 if (GET_MODE (count_exp
) != VOIDmode
)
21363 return GET_MODE (count_exp
);
21364 if (!CONST_INT_P (count_exp
))
21366 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
21371 /* When SRCPTR is non-NULL, output simple loop to move memory
21372 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
21373 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
21374 equivalent loop to set memory by VALUE (supposed to be in MODE).
21376 The size is rounded down to whole number of chunk size moved at once.
21377 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
21381 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
21382 rtx destptr
, rtx srcptr
, rtx value
,
21383 rtx count
, enum machine_mode mode
, int unroll
,
21386 rtx out_label
, top_label
, iter
, tmp
;
21387 enum machine_mode iter_mode
= counter_mode (count
);
21388 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
21389 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
21395 top_label
= gen_label_rtx ();
21396 out_label
= gen_label_rtx ();
21397 iter
= gen_reg_rtx (iter_mode
);
21399 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
21400 NULL
, 1, OPTAB_DIRECT
);
21401 /* Those two should combine. */
21402 if (piece_size
== const1_rtx
)
21404 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
21406 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
21408 emit_move_insn (iter
, const0_rtx
);
21410 emit_label (top_label
);
21412 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
21413 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
21414 destmem
= change_address (destmem
, mode
, x_addr
);
21418 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
21419 srcmem
= change_address (srcmem
, mode
, y_addr
);
21421 /* When unrolling for chips that reorder memory reads and writes,
21422 we can save registers by using single temporary.
21423 Also using 4 temporaries is overkill in 32bit mode. */
21424 if (!TARGET_64BIT
&& 0)
21426 for (i
= 0; i
< unroll
; i
++)
21431 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21433 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21435 emit_move_insn (destmem
, srcmem
);
21441 gcc_assert (unroll
<= 4);
21442 for (i
= 0; i
< unroll
; i
++)
21444 tmpreg
[i
] = gen_reg_rtx (mode
);
21448 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
21450 emit_move_insn (tmpreg
[i
], srcmem
);
21452 for (i
= 0; i
< unroll
; i
++)
21457 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21459 emit_move_insn (destmem
, tmpreg
[i
]);
21464 for (i
= 0; i
< unroll
; i
++)
21468 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21469 emit_move_insn (destmem
, value
);
21472 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21473 true, OPTAB_LIB_WIDEN
);
21475 emit_move_insn (iter
, tmp
);
21477 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21479 if (expected_size
!= -1)
21481 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21482 if (expected_size
== 0)
21484 else if (expected_size
> REG_BR_PROB_BASE
)
21485 predict_jump (REG_BR_PROB_BASE
- 1);
21487 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21490 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21491 iter
= ix86_zero_extend_to_Pmode (iter
);
21492 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21493 true, OPTAB_LIB_WIDEN
);
21494 if (tmp
!= destptr
)
21495 emit_move_insn (destptr
, tmp
);
21498 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21499 true, OPTAB_LIB_WIDEN
);
21501 emit_move_insn (srcptr
, tmp
);
21503 emit_label (out_label
);
21506 /* Output "rep; mov" instruction.
21507 Arguments have same meaning as for previous function */
21509 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21510 rtx destptr
, rtx srcptr
,
21512 enum machine_mode mode
)
21517 HOST_WIDE_INT rounded_count
;
21519 /* If the size is known, it is shorter to use rep movs. */
21520 if (mode
== QImode
&& CONST_INT_P (count
)
21521 && !(INTVAL (count
) & 3))
21524 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21525 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21526 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21527 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21528 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21529 if (mode
!= QImode
)
21531 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21532 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21533 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21534 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21535 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21536 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21540 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21541 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21543 if (CONST_INT_P (count
))
21545 rounded_count
= (INTVAL (count
)
21546 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21547 destmem
= shallow_copy_rtx (destmem
);
21548 srcmem
= shallow_copy_rtx (srcmem
);
21549 set_mem_size (destmem
, rounded_count
);
21550 set_mem_size (srcmem
, rounded_count
);
21554 if (MEM_SIZE_KNOWN_P (destmem
))
21555 clear_mem_size (destmem
);
21556 if (MEM_SIZE_KNOWN_P (srcmem
))
21557 clear_mem_size (srcmem
);
21559 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21563 /* Output "rep; stos" instruction.
21564 Arguments have same meaning as for previous function */
21566 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21567 rtx count
, enum machine_mode mode
,
21572 HOST_WIDE_INT rounded_count
;
21574 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21575 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21576 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21577 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21578 if (mode
!= QImode
)
21580 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21581 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21582 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21585 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21586 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21588 rounded_count
= (INTVAL (count
)
21589 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21590 destmem
= shallow_copy_rtx (destmem
);
21591 set_mem_size (destmem
, rounded_count
);
21593 else if (MEM_SIZE_KNOWN_P (destmem
))
21594 clear_mem_size (destmem
);
21595 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21599 emit_strmov (rtx destmem
, rtx srcmem
,
21600 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21602 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21603 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21604 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21607 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21609 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21610 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21613 if (CONST_INT_P (count
))
21615 HOST_WIDE_INT countval
= INTVAL (count
);
21618 if ((countval
& 0x10) && max_size
> 16)
21622 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21623 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21626 gcc_unreachable ();
21629 if ((countval
& 0x08) && max_size
> 8)
21632 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21635 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21636 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21640 if ((countval
& 0x04) && max_size
> 4)
21642 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21645 if ((countval
& 0x02) && max_size
> 2)
21647 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21650 if ((countval
& 0x01) && max_size
> 1)
21652 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21659 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21660 count
, 1, OPTAB_DIRECT
);
21661 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21662 count
, QImode
, 1, 4);
21666 /* When there are stringops, we can cheaply increase dest and src pointers.
21667 Otherwise we save code size by maintaining offset (zero is readily
21668 available from preceding rep operation) and using x86 addressing modes.
21670 if (TARGET_SINGLE_STRINGOP
)
21674 rtx label
= ix86_expand_aligntest (count
, 4, true);
21675 src
= change_address (srcmem
, SImode
, srcptr
);
21676 dest
= change_address (destmem
, SImode
, destptr
);
21677 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21678 emit_label (label
);
21679 LABEL_NUSES (label
) = 1;
21683 rtx label
= ix86_expand_aligntest (count
, 2, true);
21684 src
= change_address (srcmem
, HImode
, srcptr
);
21685 dest
= change_address (destmem
, HImode
, destptr
);
21686 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21687 emit_label (label
);
21688 LABEL_NUSES (label
) = 1;
21692 rtx label
= ix86_expand_aligntest (count
, 1, true);
21693 src
= change_address (srcmem
, QImode
, srcptr
);
21694 dest
= change_address (destmem
, QImode
, destptr
);
21695 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21696 emit_label (label
);
21697 LABEL_NUSES (label
) = 1;
21702 rtx offset
= force_reg (Pmode
, const0_rtx
);
21707 rtx label
= ix86_expand_aligntest (count
, 4, true);
21708 src
= change_address (srcmem
, SImode
, srcptr
);
21709 dest
= change_address (destmem
, SImode
, destptr
);
21710 emit_move_insn (dest
, src
);
21711 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21712 true, OPTAB_LIB_WIDEN
);
21714 emit_move_insn (offset
, tmp
);
21715 emit_label (label
);
21716 LABEL_NUSES (label
) = 1;
21720 rtx label
= ix86_expand_aligntest (count
, 2, true);
21721 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21722 src
= change_address (srcmem
, HImode
, tmp
);
21723 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21724 dest
= change_address (destmem
, HImode
, tmp
);
21725 emit_move_insn (dest
, src
);
21726 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21727 true, OPTAB_LIB_WIDEN
);
21729 emit_move_insn (offset
, tmp
);
21730 emit_label (label
);
21731 LABEL_NUSES (label
) = 1;
21735 rtx label
= ix86_expand_aligntest (count
, 1, true);
21736 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21737 src
= change_address (srcmem
, QImode
, tmp
);
21738 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21739 dest
= change_address (destmem
, QImode
, tmp
);
21740 emit_move_insn (dest
, src
);
21741 emit_label (label
);
21742 LABEL_NUSES (label
) = 1;
21747 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21749 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21750 rtx count
, int max_size
)
21753 expand_simple_binop (counter_mode (count
), AND
, count
,
21754 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21755 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21756 gen_lowpart (QImode
, value
), count
, QImode
,
21760 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21762 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21766 if (CONST_INT_P (count
))
21768 HOST_WIDE_INT countval
= INTVAL (count
);
21771 if ((countval
& 0x10) && max_size
> 16)
21775 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21776 emit_insn (gen_strset (destptr
, dest
, value
));
21777 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21778 emit_insn (gen_strset (destptr
, dest
, value
));
21781 gcc_unreachable ();
21784 if ((countval
& 0x08) && max_size
> 8)
21788 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21789 emit_insn (gen_strset (destptr
, dest
, value
));
21793 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21794 emit_insn (gen_strset (destptr
, dest
, value
));
21795 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21796 emit_insn (gen_strset (destptr
, dest
, value
));
21800 if ((countval
& 0x04) && max_size
> 4)
21802 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21803 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21806 if ((countval
& 0x02) && max_size
> 2)
21808 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21809 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21812 if ((countval
& 0x01) && max_size
> 1)
21814 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21815 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21822 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21827 rtx label
= ix86_expand_aligntest (count
, 16, true);
21830 dest
= change_address (destmem
, DImode
, destptr
);
21831 emit_insn (gen_strset (destptr
, dest
, value
));
21832 emit_insn (gen_strset (destptr
, dest
, value
));
21836 dest
= change_address (destmem
, SImode
, destptr
);
21837 emit_insn (gen_strset (destptr
, dest
, value
));
21838 emit_insn (gen_strset (destptr
, dest
, value
));
21839 emit_insn (gen_strset (destptr
, dest
, value
));
21840 emit_insn (gen_strset (destptr
, dest
, value
));
21842 emit_label (label
);
21843 LABEL_NUSES (label
) = 1;
21847 rtx label
= ix86_expand_aligntest (count
, 8, true);
21850 dest
= change_address (destmem
, DImode
, destptr
);
21851 emit_insn (gen_strset (destptr
, dest
, value
));
21855 dest
= change_address (destmem
, SImode
, destptr
);
21856 emit_insn (gen_strset (destptr
, dest
, value
));
21857 emit_insn (gen_strset (destptr
, dest
, value
));
21859 emit_label (label
);
21860 LABEL_NUSES (label
) = 1;
21864 rtx label
= ix86_expand_aligntest (count
, 4, true);
21865 dest
= change_address (destmem
, SImode
, destptr
);
21866 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21867 emit_label (label
);
21868 LABEL_NUSES (label
) = 1;
21872 rtx label
= ix86_expand_aligntest (count
, 2, true);
21873 dest
= change_address (destmem
, HImode
, destptr
);
21874 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21875 emit_label (label
);
21876 LABEL_NUSES (label
) = 1;
21880 rtx label
= ix86_expand_aligntest (count
, 1, true);
21881 dest
= change_address (destmem
, QImode
, destptr
);
21882 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21883 emit_label (label
);
21884 LABEL_NUSES (label
) = 1;
21888 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21889 DESIRED_ALIGNMENT. */
21891 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21892 rtx destptr
, rtx srcptr
, rtx count
,
21893 int align
, int desired_alignment
)
21895 if (align
<= 1 && desired_alignment
> 1)
21897 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21898 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21899 destmem
= change_address (destmem
, QImode
, destptr
);
21900 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21901 ix86_adjust_counter (count
, 1);
21902 emit_label (label
);
21903 LABEL_NUSES (label
) = 1;
21905 if (align
<= 2 && desired_alignment
> 2)
21907 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21908 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21909 destmem
= change_address (destmem
, HImode
, destptr
);
21910 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21911 ix86_adjust_counter (count
, 2);
21912 emit_label (label
);
21913 LABEL_NUSES (label
) = 1;
21915 if (align
<= 4 && desired_alignment
> 4)
21917 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21918 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21919 destmem
= change_address (destmem
, SImode
, destptr
);
21920 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21921 ix86_adjust_counter (count
, 4);
21922 emit_label (label
);
21923 LABEL_NUSES (label
) = 1;
21925 gcc_assert (desired_alignment
<= 8);
21928 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21929 ALIGN_BYTES is how many bytes need to be copied. */
21931 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21932 int desired_align
, int align_bytes
)
21935 rtx orig_dst
= dst
;
21936 rtx orig_src
= src
;
21938 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21939 if (src_align_bytes
>= 0)
21940 src_align_bytes
= desired_align
- src_align_bytes
;
21941 if (align_bytes
& 1)
21943 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21944 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21946 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21948 if (align_bytes
& 2)
21950 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21951 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21952 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21953 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21954 if (src_align_bytes
>= 0
21955 && (src_align_bytes
& 1) == (align_bytes
& 1)
21956 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21957 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21959 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21961 if (align_bytes
& 4)
21963 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21964 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21965 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21966 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21967 if (src_align_bytes
>= 0)
21969 unsigned int src_align
= 0;
21970 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21972 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21974 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21975 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21978 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21980 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21981 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21982 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21983 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21984 if (src_align_bytes
>= 0)
21986 unsigned int src_align
= 0;
21987 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21989 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21991 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21993 if (src_align
> (unsigned int) desired_align
)
21994 src_align
= desired_align
;
21995 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21996 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21998 if (MEM_SIZE_KNOWN_P (orig_dst
))
21999 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22000 if (MEM_SIZE_KNOWN_P (orig_src
))
22001 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
22006 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
22007 DESIRED_ALIGNMENT. */
22009 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
22010 int align
, int desired_alignment
)
22012 if (align
<= 1 && desired_alignment
> 1)
22014 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
22015 destmem
= change_address (destmem
, QImode
, destptr
);
22016 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
22017 ix86_adjust_counter (count
, 1);
22018 emit_label (label
);
22019 LABEL_NUSES (label
) = 1;
22021 if (align
<= 2 && desired_alignment
> 2)
22023 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
22024 destmem
= change_address (destmem
, HImode
, destptr
);
22025 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
22026 ix86_adjust_counter (count
, 2);
22027 emit_label (label
);
22028 LABEL_NUSES (label
) = 1;
22030 if (align
<= 4 && desired_alignment
> 4)
22032 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
22033 destmem
= change_address (destmem
, SImode
, destptr
);
22034 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
22035 ix86_adjust_counter (count
, 4);
22036 emit_label (label
);
22037 LABEL_NUSES (label
) = 1;
22039 gcc_assert (desired_alignment
<= 8);
22042 /* Set enough from DST to align DST known to by aligned by ALIGN to
22043 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
22045 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
22046 int desired_align
, int align_bytes
)
22049 rtx orig_dst
= dst
;
22050 if (align_bytes
& 1)
22052 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
22054 emit_insn (gen_strset (destreg
, dst
,
22055 gen_lowpart (QImode
, value
)));
22057 if (align_bytes
& 2)
22059 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
22060 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
22061 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
22063 emit_insn (gen_strset (destreg
, dst
,
22064 gen_lowpart (HImode
, value
)));
22066 if (align_bytes
& 4)
22068 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
22069 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
22070 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
22072 emit_insn (gen_strset (destreg
, dst
,
22073 gen_lowpart (SImode
, value
)));
22075 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
22076 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
22077 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
22078 if (MEM_SIZE_KNOWN_P (orig_dst
))
22079 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
22083 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
22084 static enum stringop_alg
22085 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
22086 int *dynamic_check
)
22088 const struct stringop_algs
* algs
;
22089 bool optimize_for_speed
;
22090 /* Algorithms using the rep prefix want at least edi and ecx;
22091 additionally, memset wants eax and memcpy wants esi. Don't
22092 consider such algorithms if the user has appropriated those
22093 registers for their own purposes. */
22094 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
22096 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
22098 #define ALG_USABLE_P(alg) (rep_prefix_usable \
22099 || (alg != rep_prefix_1_byte \
22100 && alg != rep_prefix_4_byte \
22101 && alg != rep_prefix_8_byte))
22102 const struct processor_costs
*cost
;
22104 /* Even if the string operation call is cold, we still might spend a lot
22105 of time processing large blocks. */
22106 if (optimize_function_for_size_p (cfun
)
22107 || (optimize_insn_for_size_p ()
22108 && expected_size
!= -1 && expected_size
< 256))
22109 optimize_for_speed
= false;
22111 optimize_for_speed
= true;
22113 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
22115 *dynamic_check
= -1;
22117 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
22119 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
22120 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
22121 return ix86_stringop_alg
;
22122 /* rep; movq or rep; movl is the smallest variant. */
22123 else if (!optimize_for_speed
)
22125 if (!count
|| (count
& 3))
22126 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
22128 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
22130 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
22132 else if (expected_size
!= -1 && expected_size
< 4)
22133 return loop_1_byte
;
22134 else if (expected_size
!= -1)
22137 enum stringop_alg alg
= libcall
;
22138 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22140 /* We get here if the algorithms that were not libcall-based
22141 were rep-prefix based and we are unable to use rep prefixes
22142 based on global register usage. Break out of the loop and
22143 use the heuristic below. */
22144 if (algs
->size
[i
].max
== 0)
22146 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
22148 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22150 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
22152 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
22153 last non-libcall inline algorithm. */
22154 if (TARGET_INLINE_ALL_STRINGOPS
)
22156 /* When the current size is best to be copied by a libcall,
22157 but we are still forced to inline, run the heuristic below
22158 that will pick code for medium sized blocks. */
22159 if (alg
!= libcall
)
22163 else if (ALG_USABLE_P (candidate
))
22167 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
22169 /* When asked to inline the call anyway, try to pick meaningful choice.
22170 We look for maximal size of block that is faster to copy by hand and
22171 take blocks of at most of that size guessing that average size will
22172 be roughly half of the block.
22174 If this turns out to be bad, we might simply specify the preferred
22175 choice in ix86_costs. */
22176 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22177 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
22180 enum stringop_alg alg
;
22182 bool any_alg_usable_p
= true;
22184 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
22186 enum stringop_alg candidate
= algs
->size
[i
].alg
;
22187 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
22189 if (candidate
!= libcall
&& candidate
22190 && ALG_USABLE_P (candidate
))
22191 max
= algs
->size
[i
].max
;
22193 /* If there aren't any usable algorithms, then recursing on
22194 smaller sizes isn't going to find anything. Just return the
22195 simple byte-at-a-time copy loop. */
22196 if (!any_alg_usable_p
)
22198 /* Pick something reasonable. */
22199 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22200 *dynamic_check
= 128;
22201 return loop_1_byte
;
22205 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
22206 gcc_assert (*dynamic_check
== -1);
22207 gcc_assert (alg
!= libcall
);
22208 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
22209 *dynamic_check
= max
;
22212 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
22213 #undef ALG_USABLE_P
22216 /* Decide on alignment. We know that the operand is already aligned to ALIGN
22217 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
22219 decide_alignment (int align
,
22220 enum stringop_alg alg
,
22223 int desired_align
= 0;
22227 gcc_unreachable ();
22229 case unrolled_loop
:
22230 desired_align
= GET_MODE_SIZE (Pmode
);
22232 case rep_prefix_8_byte
:
22235 case rep_prefix_4_byte
:
22236 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22237 copying whole cacheline at once. */
22238 if (TARGET_PENTIUMPRO
)
22243 case rep_prefix_1_byte
:
22244 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
22245 copying whole cacheline at once. */
22246 if (TARGET_PENTIUMPRO
)
22260 if (desired_align
< align
)
22261 desired_align
= align
;
22262 if (expected_size
!= -1 && expected_size
< 4)
22263 desired_align
= align
;
22264 return desired_align
;
22267 /* Return the smallest power of 2 greater than VAL. */
22269 smallest_pow2_greater_than (int val
)
22277 /* Expand string move (memcpy) operation. Use i386 string operations
22278 when profitable. expand_setmem contains similar code. The code
22279 depends upon architecture, block size and alignment, but always has
22280 the same overall structure:
22282 1) Prologue guard: Conditional that jumps up to epilogues for small
22283 blocks that can be handled by epilogue alone. This is faster
22284 but also needed for correctness, since prologue assume the block
22285 is larger than the desired alignment.
22287 Optional dynamic check for size and libcall for large
22288 blocks is emitted here too, with -minline-stringops-dynamically.
22290 2) Prologue: copy first few bytes in order to get destination
22291 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
22292 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
22293 copied. We emit either a jump tree on power of two sized
22294 blocks, or a byte loop.
22296 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
22297 with specified algorithm.
22299 4) Epilogue: code copying tail of the block that is too small to be
22300 handled by main body (or up to size guarded by prologue guard). */
22303 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
22304 rtx expected_align_exp
, rtx expected_size_exp
)
22310 rtx jump_around_label
= NULL
;
22311 HOST_WIDE_INT align
= 1;
22312 unsigned HOST_WIDE_INT count
= 0;
22313 HOST_WIDE_INT expected_size
= -1;
22314 int size_needed
= 0, epilogue_size_needed
;
22315 int desired_align
= 0, align_bytes
= 0;
22316 enum stringop_alg alg
;
22318 bool need_zero_guard
= false;
22320 if (CONST_INT_P (align_exp
))
22321 align
= INTVAL (align_exp
);
22322 /* i386 can do misaligned access on reasonably increased cost. */
22323 if (CONST_INT_P (expected_align_exp
)
22324 && INTVAL (expected_align_exp
) > align
)
22325 align
= INTVAL (expected_align_exp
);
22326 /* ALIGN is the minimum of destination and source alignment, but we care here
22327 just about destination alignment. */
22328 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
22329 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
22331 if (CONST_INT_P (count_exp
))
22332 count
= expected_size
= INTVAL (count_exp
);
22333 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22334 expected_size
= INTVAL (expected_size_exp
);
22336 /* Make sure we don't need to care about overflow later on. */
22337 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22340 /* Step 0: Decide on preferred algorithm, desired alignment and
22341 size of chunks to be copied by main loop. */
22343 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
22344 desired_align
= decide_alignment (align
, alg
, expected_size
);
22346 if (!TARGET_ALIGN_STRINGOPS
)
22347 align
= desired_align
;
22349 if (alg
== libcall
)
22351 gcc_assert (alg
!= no_stringop
);
22353 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
22354 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22355 srcreg
= copy_addr_to_reg (XEXP (src
, 0));
22360 gcc_unreachable ();
22362 need_zero_guard
= true;
22363 size_needed
= GET_MODE_SIZE (word_mode
);
22365 case unrolled_loop
:
22366 need_zero_guard
= true;
22367 size_needed
= GET_MODE_SIZE (word_mode
) * (TARGET_64BIT
? 4 : 2);
22369 case rep_prefix_8_byte
:
22372 case rep_prefix_4_byte
:
22375 case rep_prefix_1_byte
:
22379 need_zero_guard
= true;
22384 epilogue_size_needed
= size_needed
;
22386 /* Step 1: Prologue guard. */
22388 /* Alignment code needs count to be in register. */
22389 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22391 if (INTVAL (count_exp
) > desired_align
22392 && INTVAL (count_exp
) > size_needed
)
22395 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22396 if (align_bytes
<= 0)
22399 align_bytes
= desired_align
- align_bytes
;
22401 if (align_bytes
== 0)
22402 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
22404 gcc_assert (desired_align
>= 1 && align
>= 1);
22406 /* Ensure that alignment prologue won't copy past end of block. */
22407 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22409 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22410 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
22411 Make sure it is power of 2. */
22412 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22416 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22418 /* If main algorithm works on QImode, no epilogue is needed.
22419 For small sizes just don't align anything. */
22420 if (size_needed
== 1)
22421 desired_align
= align
;
22428 label
= gen_label_rtx ();
22429 emit_cmp_and_jump_insns (count_exp
,
22430 GEN_INT (epilogue_size_needed
),
22431 LTU
, 0, counter_mode (count_exp
), 1, label
);
22432 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
22433 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22435 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22439 /* Emit code to decide on runtime whether library call or inline should be
22441 if (dynamic_check
!= -1)
22443 if (CONST_INT_P (count_exp
))
22445 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
22447 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22448 count_exp
= const0_rtx
;
22454 rtx hot_label
= gen_label_rtx ();
22455 jump_around_label
= gen_label_rtx ();
22456 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22457 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22458 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22459 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22460 emit_jump (jump_around_label
);
22461 emit_label (hot_label
);
22465 /* Step 2: Alignment prologue. */
22467 if (desired_align
> align
)
22469 if (align_bytes
== 0)
22471 /* Except for the first move in epilogue, we no longer know
22472 constant offset in aliasing info. It don't seems to worth
22473 the pain to maintain it for the first move, so throw away
22475 src
= change_address (src
, BLKmode
, srcreg
);
22476 dst
= change_address (dst
, BLKmode
, destreg
);
22477 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22482 /* If we know how many bytes need to be stored before dst is
22483 sufficiently aligned, maintain aliasing info accurately. */
22484 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22485 desired_align
, align_bytes
);
22486 count_exp
= plus_constant (counter_mode (count_exp
),
22487 count_exp
, -align_bytes
);
22488 count
-= align_bytes
;
22490 if (need_zero_guard
22491 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22492 || (align_bytes
== 0
22493 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22494 + desired_align
- align
))))
22496 /* It is possible that we copied enough so the main loop will not
22498 gcc_assert (size_needed
> 1);
22499 if (label
== NULL_RTX
)
22500 label
= gen_label_rtx ();
22501 emit_cmp_and_jump_insns (count_exp
,
22502 GEN_INT (size_needed
),
22503 LTU
, 0, counter_mode (count_exp
), 1, label
);
22504 if (expected_size
== -1
22505 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22506 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22508 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22511 if (label
&& size_needed
== 1)
22513 emit_label (label
);
22514 LABEL_NUSES (label
) = 1;
22516 epilogue_size_needed
= 1;
22518 else if (label
== NULL_RTX
)
22519 epilogue_size_needed
= size_needed
;
22521 /* Step 3: Main loop. */
22527 gcc_unreachable ();
22529 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22530 count_exp
, QImode
, 1, expected_size
);
22533 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22534 count_exp
, word_mode
, 1, expected_size
);
22536 case unrolled_loop
:
22537 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22538 registers for 4 temporaries anyway. */
22539 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22540 count_exp
, word_mode
, TARGET_64BIT
? 4 : 2,
22543 case rep_prefix_8_byte
:
22544 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22547 case rep_prefix_4_byte
:
22548 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22551 case rep_prefix_1_byte
:
22552 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22556 /* Adjust properly the offset of src and dest memory for aliasing. */
22557 if (CONST_INT_P (count_exp
))
22559 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22560 (count
/ size_needed
) * size_needed
);
22561 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22562 (count
/ size_needed
) * size_needed
);
22566 src
= change_address (src
, BLKmode
, srcreg
);
22567 dst
= change_address (dst
, BLKmode
, destreg
);
22570 /* Step 4: Epilogue to copy the remaining bytes. */
22574 /* When the main loop is done, COUNT_EXP might hold original count,
22575 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22576 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22577 bytes. Compensate if needed. */
22579 if (size_needed
< epilogue_size_needed
)
22582 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22583 GEN_INT (size_needed
- 1), count_exp
, 1,
22585 if (tmp
!= count_exp
)
22586 emit_move_insn (count_exp
, tmp
);
22588 emit_label (label
);
22589 LABEL_NUSES (label
) = 1;
22592 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22593 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22594 epilogue_size_needed
);
22595 if (jump_around_label
)
22596 emit_label (jump_around_label
);
22600 /* Helper function for memcpy. For QImode value 0xXY produce
22601 0xXYXYXYXY of wide specified by MODE. This is essentially
22602 a * 0x10101010, but we can do slightly better than
22603 synth_mult by unwinding the sequence by hand on CPUs with
22606 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22608 enum machine_mode valmode
= GET_MODE (val
);
22610 int nops
= mode
== DImode
? 3 : 2;
22612 gcc_assert (mode
== SImode
|| mode
== DImode
);
22613 if (val
== const0_rtx
)
22614 return copy_to_mode_reg (mode
, const0_rtx
);
22615 if (CONST_INT_P (val
))
22617 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22621 if (mode
== DImode
)
22622 v
|= (v
<< 16) << 16;
22623 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22626 if (valmode
== VOIDmode
)
22628 if (valmode
!= QImode
)
22629 val
= gen_lowpart (QImode
, val
);
22630 if (mode
== QImode
)
22632 if (!TARGET_PARTIAL_REG_STALL
)
22634 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22635 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22636 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22637 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22639 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22640 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22641 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22646 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22648 if (!TARGET_PARTIAL_REG_STALL
)
22649 if (mode
== SImode
)
22650 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22652 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22655 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22656 NULL
, 1, OPTAB_DIRECT
);
22658 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22660 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22661 NULL
, 1, OPTAB_DIRECT
);
22662 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22663 if (mode
== SImode
)
22665 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22666 NULL
, 1, OPTAB_DIRECT
);
22667 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22672 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22673 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22674 alignment from ALIGN to DESIRED_ALIGN. */
22676 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22681 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22682 promoted_val
= promote_duplicated_reg (DImode
, val
);
22683 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22684 promoted_val
= promote_duplicated_reg (SImode
, val
);
22685 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22686 promoted_val
= promote_duplicated_reg (HImode
, val
);
22688 promoted_val
= val
;
22690 return promoted_val
;
22693 /* Expand string clear operation (bzero). Use i386 string operations when
22694 profitable. See expand_movmem comment for explanation of individual
22695 steps performed. */
22697 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22698 rtx expected_align_exp
, rtx expected_size_exp
)
22703 rtx jump_around_label
= NULL
;
22704 HOST_WIDE_INT align
= 1;
22705 unsigned HOST_WIDE_INT count
= 0;
22706 HOST_WIDE_INT expected_size
= -1;
22707 int size_needed
= 0, epilogue_size_needed
;
22708 int desired_align
= 0, align_bytes
= 0;
22709 enum stringop_alg alg
;
22710 rtx promoted_val
= NULL
;
22711 bool force_loopy_epilogue
= false;
22713 bool need_zero_guard
= false;
22715 if (CONST_INT_P (align_exp
))
22716 align
= INTVAL (align_exp
);
22717 /* i386 can do misaligned access on reasonably increased cost. */
22718 if (CONST_INT_P (expected_align_exp
)
22719 && INTVAL (expected_align_exp
) > align
)
22720 align
= INTVAL (expected_align_exp
);
22721 if (CONST_INT_P (count_exp
))
22722 count
= expected_size
= INTVAL (count_exp
);
22723 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22724 expected_size
= INTVAL (expected_size_exp
);
22726 /* Make sure we don't need to care about overflow later on. */
22727 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22730 /* Step 0: Decide on preferred algorithm, desired alignment and
22731 size of chunks to be copied by main loop. */
22733 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22734 desired_align
= decide_alignment (align
, alg
, expected_size
);
22736 if (!TARGET_ALIGN_STRINGOPS
)
22737 align
= desired_align
;
22739 if (alg
== libcall
)
22741 gcc_assert (alg
!= no_stringop
);
22743 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22744 destreg
= copy_addr_to_reg (XEXP (dst
, 0));
22749 gcc_unreachable ();
22751 need_zero_guard
= true;
22752 size_needed
= GET_MODE_SIZE (word_mode
);
22754 case unrolled_loop
:
22755 need_zero_guard
= true;
22756 size_needed
= GET_MODE_SIZE (word_mode
) * 4;
22758 case rep_prefix_8_byte
:
22761 case rep_prefix_4_byte
:
22764 case rep_prefix_1_byte
:
22768 need_zero_guard
= true;
22772 epilogue_size_needed
= size_needed
;
22774 /* Step 1: Prologue guard. */
22776 /* Alignment code needs count to be in register. */
22777 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22779 if (INTVAL (count_exp
) > desired_align
22780 && INTVAL (count_exp
) > size_needed
)
22783 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22784 if (align_bytes
<= 0)
22787 align_bytes
= desired_align
- align_bytes
;
22789 if (align_bytes
== 0)
22791 enum machine_mode mode
= SImode
;
22792 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22794 count_exp
= force_reg (mode
, count_exp
);
22797 /* Do the cheap promotion to allow better CSE across the
22798 main loop and epilogue (ie one load of the big constant in the
22799 front of all code. */
22800 if (CONST_INT_P (val_exp
))
22801 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22802 desired_align
, align
);
22803 /* Ensure that alignment prologue won't copy past end of block. */
22804 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22806 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22807 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22808 Make sure it is power of 2. */
22809 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22811 /* To improve performance of small blocks, we jump around the VAL
22812 promoting mode. This mean that if the promoted VAL is not constant,
22813 we might not use it in the epilogue and have to use byte
22815 if (epilogue_size_needed
> 2 && !promoted_val
)
22816 force_loopy_epilogue
= true;
22819 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22821 /* If main algorithm works on QImode, no epilogue is needed.
22822 For small sizes just don't align anything. */
22823 if (size_needed
== 1)
22824 desired_align
= align
;
22831 label
= gen_label_rtx ();
22832 emit_cmp_and_jump_insns (count_exp
,
22833 GEN_INT (epilogue_size_needed
),
22834 LTU
, 0, counter_mode (count_exp
), 1, label
);
22835 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22836 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22838 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22841 if (dynamic_check
!= -1)
22843 rtx hot_label
= gen_label_rtx ();
22844 jump_around_label
= gen_label_rtx ();
22845 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22846 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22847 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22848 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22849 emit_jump (jump_around_label
);
22850 emit_label (hot_label
);
22853 /* Step 2: Alignment prologue. */
22855 /* Do the expensive promotion once we branched off the small blocks. */
22857 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22858 desired_align
, align
);
22859 gcc_assert (desired_align
>= 1 && align
>= 1);
22861 if (desired_align
> align
)
22863 if (align_bytes
== 0)
22865 /* Except for the first move in epilogue, we no longer know
22866 constant offset in aliasing info. It don't seems to worth
22867 the pain to maintain it for the first move, so throw away
22869 dst
= change_address (dst
, BLKmode
, destreg
);
22870 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22875 /* If we know how many bytes need to be stored before dst is
22876 sufficiently aligned, maintain aliasing info accurately. */
22877 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22878 desired_align
, align_bytes
);
22879 count_exp
= plus_constant (counter_mode (count_exp
),
22880 count_exp
, -align_bytes
);
22881 count
-= align_bytes
;
22883 if (need_zero_guard
22884 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22885 || (align_bytes
== 0
22886 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22887 + desired_align
- align
))))
22889 /* It is possible that we copied enough so the main loop will not
22891 gcc_assert (size_needed
> 1);
22892 if (label
== NULL_RTX
)
22893 label
= gen_label_rtx ();
22894 emit_cmp_and_jump_insns (count_exp
,
22895 GEN_INT (size_needed
),
22896 LTU
, 0, counter_mode (count_exp
), 1, label
);
22897 if (expected_size
== -1
22898 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22899 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22901 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22904 if (label
&& size_needed
== 1)
22906 emit_label (label
);
22907 LABEL_NUSES (label
) = 1;
22909 promoted_val
= val_exp
;
22910 epilogue_size_needed
= 1;
22912 else if (label
== NULL_RTX
)
22913 epilogue_size_needed
= size_needed
;
22915 /* Step 3: Main loop. */
22921 gcc_unreachable ();
22923 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22924 count_exp
, QImode
, 1, expected_size
);
22927 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22928 count_exp
, word_mode
, 1, expected_size
);
22930 case unrolled_loop
:
22931 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22932 count_exp
, word_mode
, 4, expected_size
);
22934 case rep_prefix_8_byte
:
22935 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22938 case rep_prefix_4_byte
:
22939 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22942 case rep_prefix_1_byte
:
22943 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22947 /* Adjust properly the offset of src and dest memory for aliasing. */
22948 if (CONST_INT_P (count_exp
))
22949 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22950 (count
/ size_needed
) * size_needed
);
22952 dst
= change_address (dst
, BLKmode
, destreg
);
22954 /* Step 4: Epilogue to copy the remaining bytes. */
22958 /* When the main loop is done, COUNT_EXP might hold original count,
22959 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22960 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22961 bytes. Compensate if needed. */
22963 if (size_needed
< epilogue_size_needed
)
22966 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22967 GEN_INT (size_needed
- 1), count_exp
, 1,
22969 if (tmp
!= count_exp
)
22970 emit_move_insn (count_exp
, tmp
);
22972 emit_label (label
);
22973 LABEL_NUSES (label
) = 1;
22976 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22978 if (force_loopy_epilogue
)
22979 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22980 epilogue_size_needed
);
22982 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22983 epilogue_size_needed
);
22985 if (jump_around_label
)
22986 emit_label (jump_around_label
);
22990 /* Expand the appropriate insns for doing strlen if not just doing
22993 out = result, initialized with the start address
22994 align_rtx = alignment of the address.
22995 scratch = scratch register, initialized with the startaddress when
22996 not aligned, otherwise undefined
22998 This is just the body. It needs the initializations mentioned above and
22999 some address computing at the end. These things are done in i386.md. */
23002 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
23006 rtx align_2_label
= NULL_RTX
;
23007 rtx align_3_label
= NULL_RTX
;
23008 rtx align_4_label
= gen_label_rtx ();
23009 rtx end_0_label
= gen_label_rtx ();
23011 rtx tmpreg
= gen_reg_rtx (SImode
);
23012 rtx scratch
= gen_reg_rtx (SImode
);
23016 if (CONST_INT_P (align_rtx
))
23017 align
= INTVAL (align_rtx
);
23019 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
23021 /* Is there a known alignment and is it less than 4? */
23024 rtx scratch1
= gen_reg_rtx (Pmode
);
23025 emit_move_insn (scratch1
, out
);
23026 /* Is there a known alignment and is it not 2? */
23029 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
23030 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
23032 /* Leave just the 3 lower bits. */
23033 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
23034 NULL_RTX
, 0, OPTAB_WIDEN
);
23036 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23037 Pmode
, 1, align_4_label
);
23038 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
23039 Pmode
, 1, align_2_label
);
23040 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
23041 Pmode
, 1, align_3_label
);
23045 /* Since the alignment is 2, we have to check 2 or 0 bytes;
23046 check if is aligned to 4 - byte. */
23048 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
23049 NULL_RTX
, 0, OPTAB_WIDEN
);
23051 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
23052 Pmode
, 1, align_4_label
);
23055 mem
= change_address (src
, QImode
, out
);
23057 /* Now compare the bytes. */
23059 /* Compare the first n unaligned byte on a byte per byte basis. */
23060 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
23061 QImode
, 1, end_0_label
);
23063 /* Increment the address. */
23064 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23066 /* Not needed with an alignment of 2 */
23069 emit_label (align_2_label
);
23071 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23074 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23076 emit_label (align_3_label
);
23079 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
23082 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
23085 /* Generate loop to check 4 bytes at a time. It is not a good idea to
23086 align this loop. It gives only huge programs, but does not help to
23088 emit_label (align_4_label
);
23090 mem
= change_address (src
, SImode
, out
);
23091 emit_move_insn (scratch
, mem
);
23092 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
23094 /* This formula yields a nonzero result iff one of the bytes is zero.
23095 This saves three branches inside loop and many cycles. */
23097 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
23098 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
23099 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
23100 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
23101 gen_int_mode (0x80808080, SImode
)));
23102 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
23107 rtx reg
= gen_reg_rtx (SImode
);
23108 rtx reg2
= gen_reg_rtx (Pmode
);
23109 emit_move_insn (reg
, tmpreg
);
23110 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
23112 /* If zero is not in the first two bytes, move two bytes forward. */
23113 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23114 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23115 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23116 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
23117 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
23120 /* Emit lea manually to avoid clobbering of flags. */
23121 emit_insn (gen_rtx_SET (SImode
, reg2
,
23122 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
23124 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23125 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
23126 emit_insn (gen_rtx_SET (VOIDmode
, out
,
23127 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
23133 rtx end_2_label
= gen_label_rtx ();
23134 /* Is zero in the first two bytes? */
23136 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
23137 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
23138 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
23139 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
23140 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
23142 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
23143 JUMP_LABEL (tmp
) = end_2_label
;
23145 /* Not in the first two. Move two bytes forward. */
23146 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
23147 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
23149 emit_label (end_2_label
);
23153 /* Avoid branch in fixing the byte. */
23154 tmpreg
= gen_lowpart (QImode
, tmpreg
);
23155 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
23156 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
23157 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
23158 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
23160 emit_label (end_0_label
);
23163 /* Expand strlen. */
23166 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
23168 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
23170 /* The generic case of strlen expander is long. Avoid it's
23171 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
23173 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23174 && !TARGET_INLINE_ALL_STRINGOPS
23175 && !optimize_insn_for_size_p ()
23176 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
23179 addr
= force_reg (Pmode
, XEXP (src
, 0));
23180 scratch1
= gen_reg_rtx (Pmode
);
23182 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
23183 && !optimize_insn_for_size_p ())
23185 /* Well it seems that some optimizer does not combine a call like
23186 foo(strlen(bar), strlen(bar));
23187 when the move and the subtraction is done here. It does calculate
23188 the length just once when these instructions are done inside of
23189 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
23190 often used and I use one fewer register for the lifetime of
23191 output_strlen_unroll() this is better. */
23193 emit_move_insn (out
, addr
);
23195 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
23197 /* strlensi_unroll_1 returns the address of the zero at the end of
23198 the string, like memchr(), so compute the length by subtracting
23199 the start address. */
23200 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
23206 /* Can't use this if the user has appropriated eax, ecx, or edi. */
23207 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
23210 scratch2
= gen_reg_rtx (Pmode
);
23211 scratch3
= gen_reg_rtx (Pmode
);
23212 scratch4
= force_reg (Pmode
, constm1_rtx
);
23214 emit_move_insn (scratch3
, addr
);
23215 eoschar
= force_reg (QImode
, eoschar
);
23217 src
= replace_equiv_address_nv (src
, scratch3
);
23219 /* If .md starts supporting :P, this can be done in .md. */
23220 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
23221 scratch4
), UNSPEC_SCAS
);
23222 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
23223 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
23224 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
23229 /* For given symbol (function) construct code to compute address of it's PLT
23230 entry in large x86-64 PIC model. */
23232 construct_plt_address (rtx symbol
)
23236 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
23237 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
23238 gcc_assert (Pmode
== DImode
);
23240 tmp
= gen_reg_rtx (Pmode
);
23241 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
23243 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
23244 emit_insn (ix86_gen_add3 (tmp
, tmp
, pic_offset_table_rtx
));
23249 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
23251 rtx pop
, bool sibcall
)
23253 /* We need to represent that SI and DI registers are clobbered
23255 static int clobbered_registers
[] = {
23256 XMM6_REG
, XMM7_REG
, XMM8_REG
,
23257 XMM9_REG
, XMM10_REG
, XMM11_REG
,
23258 XMM12_REG
, XMM13_REG
, XMM14_REG
,
23259 XMM15_REG
, SI_REG
, DI_REG
23261 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
23262 rtx use
= NULL
, call
;
23263 unsigned int vec_len
;
23265 if (pop
== const0_rtx
)
23267 gcc_assert (!TARGET_64BIT
|| !pop
);
23269 if (TARGET_MACHO
&& !TARGET_64BIT
)
23272 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
23273 fnaddr
= machopic_indirect_call_target (fnaddr
);
23278 /* Static functions and indirect calls don't need the pic register. */
23279 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
23280 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23281 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
23282 use_reg (&use
, pic_offset_table_rtx
);
23285 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
23287 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
23288 emit_move_insn (al
, callarg2
);
23289 use_reg (&use
, al
);
23292 if (ix86_cmodel
== CM_LARGE_PIC
23294 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
23295 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
23296 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
23298 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), word_mode
)
23299 : !call_insn_operand (XEXP (fnaddr
, 0), word_mode
))
23301 fnaddr
= XEXP (fnaddr
, 0);
23302 if (GET_MODE (fnaddr
) != word_mode
)
23303 fnaddr
= convert_to_mode (word_mode
, fnaddr
, 1);
23304 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (word_mode
, fnaddr
));
23308 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
23310 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
23311 vec
[vec_len
++] = call
;
23315 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
23316 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
23317 vec
[vec_len
++] = pop
;
23320 if (TARGET_64BIT_MS_ABI
23321 && (!callarg2
|| INTVAL (callarg2
) != -2))
23325 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
23326 UNSPEC_MS_TO_SYSV_CALL
);
23328 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
23330 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
23332 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
23334 clobbered_registers
[i
]));
23337 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
23338 if (TARGET_VZEROUPPER
)
23341 if (cfun
->machine
->callee_pass_avx256_p
)
23343 if (cfun
->machine
->callee_return_avx256_p
)
23344 avx256
= callee_return_pass_avx256
;
23346 avx256
= callee_pass_avx256
;
23348 else if (cfun
->machine
->callee_return_avx256_p
)
23349 avx256
= callee_return_avx256
;
23351 avx256
= call_no_avx256
;
23353 if (reload_completed
)
23354 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
23356 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
23357 gen_rtvec (1, GEN_INT (avx256
)),
23358 UNSPEC_CALL_NEEDS_VZEROUPPER
);
23362 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
23363 call
= emit_call_insn (call
);
23365 CALL_INSN_FUNCTION_USAGE (call
) = use
;
23371 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
23373 rtx pat
= PATTERN (insn
);
23374 rtvec vec
= XVEC (pat
, 0);
23375 int len
= GET_NUM_ELEM (vec
) - 1;
23377 /* Strip off the last entry of the parallel. */
23378 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
23379 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
23381 pat
= RTVEC_ELT (vec
, 0);
23383 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
23385 emit_insn (gen_avx_vzeroupper (vzeroupper
));
23386 emit_call_insn (pat
);
23389 /* Output the assembly for a call instruction. */
23392 ix86_output_call_insn (rtx insn
, rtx call_op
)
23394 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
23395 bool seh_nop_p
= false;
23398 if (SIBLING_CALL_P (insn
))
23402 /* SEH epilogue detection requires the indirect branch case
23403 to include REX.W. */
23404 else if (TARGET_SEH
)
23405 xasm
= "rex.W jmp %A0";
23409 output_asm_insn (xasm
, &call_op
);
23413 /* SEH unwinding can require an extra nop to be emitted in several
23414 circumstances. Determine if we have one of those. */
23419 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
23421 /* If we get to another real insn, we don't need the nop. */
23425 /* If we get to the epilogue note, prevent a catch region from
23426 being adjacent to the standard epilogue sequence. If non-
23427 call-exceptions, we'll have done this during epilogue emission. */
23428 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
23429 && !flag_non_call_exceptions
23430 && !can_throw_internal (insn
))
23437 /* If we didn't find a real insn following the call, prevent the
23438 unwinder from looking into the next function. */
23444 xasm
= "call\t%P0";
23446 xasm
= "call\t%A0";
23448 output_asm_insn (xasm
, &call_op
);
23456 /* Clear stack slot assignments remembered from previous functions.
23457 This is called from INIT_EXPANDERS once before RTL is emitted for each
23460 static struct machine_function
*
23461 ix86_init_machine_status (void)
23463 struct machine_function
*f
;
23465 f
= ggc_alloc_cleared_machine_function ();
23466 f
->use_fast_prologue_epilogue_nregs
= -1;
23467 f
->tls_descriptor_call_expanded_p
= 0;
23468 f
->call_abi
= ix86_abi
;
23473 /* Return a MEM corresponding to a stack slot with mode MODE.
23474 Allocate a new slot if necessary.
23476 The RTL for a function can have several slots available: N is
23477 which slot to use. */
23480 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23482 struct stack_local_entry
*s
;
23484 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23486 /* Virtual slot is valid only before vregs are instantiated. */
23487 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23489 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23490 if (s
->mode
== mode
&& s
->n
== n
)
23491 return validize_mem (copy_rtx (s
->rtl
));
23493 s
= ggc_alloc_stack_local_entry ();
23496 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23498 s
->next
= ix86_stack_locals
;
23499 ix86_stack_locals
= s
;
23500 return validize_mem (s
->rtl
);
23503 /* Calculate the length of the memory address in the instruction encoding.
23504 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23505 or other prefixes. */
23508 memory_address_length (rtx addr
)
23510 struct ix86_address parts
;
23511 rtx base
, index
, disp
;
23515 if (GET_CODE (addr
) == PRE_DEC
23516 || GET_CODE (addr
) == POST_INC
23517 || GET_CODE (addr
) == PRE_MODIFY
23518 || GET_CODE (addr
) == POST_MODIFY
)
23521 ok
= ix86_decompose_address (addr
, &parts
);
23524 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23525 parts
.base
= SUBREG_REG (parts
.base
);
23526 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23527 parts
.index
= SUBREG_REG (parts
.index
);
23530 index
= parts
.index
;
23533 /* Add length of addr32 prefix. */
23534 len
= (GET_CODE (addr
) == ZERO_EXTEND
23535 || GET_CODE (addr
) == AND
);
23538 - esp as the base always wants an index,
23539 - ebp as the base always wants a displacement,
23540 - r12 as the base always wants an index,
23541 - r13 as the base always wants a displacement. */
23543 /* Register Indirect. */
23544 if (base
&& !index
&& !disp
)
23546 /* esp (for its index) and ebp (for its displacement) need
23547 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23550 && (addr
== arg_pointer_rtx
23551 || addr
== frame_pointer_rtx
23552 || REGNO (addr
) == SP_REG
23553 || REGNO (addr
) == BP_REG
23554 || REGNO (addr
) == R12_REG
23555 || REGNO (addr
) == R13_REG
))
23559 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23560 is not disp32, but disp32(%rip), so for disp32
23561 SIB byte is needed, unless print_operand_address
23562 optimizes it into disp32(%rip) or (%rip) is implied
23564 else if (disp
&& !base
&& !index
)
23571 if (GET_CODE (disp
) == CONST
)
23572 symbol
= XEXP (disp
, 0);
23573 if (GET_CODE (symbol
) == PLUS
23574 && CONST_INT_P (XEXP (symbol
, 1)))
23575 symbol
= XEXP (symbol
, 0);
23577 if (GET_CODE (symbol
) != LABEL_REF
23578 && (GET_CODE (symbol
) != SYMBOL_REF
23579 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23580 && (GET_CODE (symbol
) != UNSPEC
23581 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23582 && XINT (symbol
, 1) != UNSPEC_PCREL
23583 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23590 /* Find the length of the displacement constant. */
23593 if (base
&& satisfies_constraint_K (disp
))
23598 /* ebp always wants a displacement. Similarly r13. */
23599 else if (base
&& REG_P (base
)
23600 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23603 /* An index requires the two-byte modrm form.... */
23605 /* ...like esp (or r12), which always wants an index. */
23606 || base
== arg_pointer_rtx
23607 || base
== frame_pointer_rtx
23608 || (base
&& REG_P (base
)
23609 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23626 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23627 is set, expect that insn have 8bit immediate alternative. */
23629 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23633 extract_insn_cached (insn
);
23634 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23635 if (CONSTANT_P (recog_data
.operand
[i
]))
23637 enum attr_mode mode
= get_attr_mode (insn
);
23640 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23642 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23649 ival
= trunc_int_for_mode (ival
, HImode
);
23652 ival
= trunc_int_for_mode (ival
, SImode
);
23657 if (IN_RANGE (ival
, -128, 127))
23674 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23679 fatal_insn ("unknown insn mode", insn
);
23684 /* Compute default value for "length_address" attribute. */
23686 ix86_attr_length_address_default (rtx insn
)
23690 if (get_attr_type (insn
) == TYPE_LEA
)
23692 rtx set
= PATTERN (insn
), addr
;
23694 if (GET_CODE (set
) == PARALLEL
)
23695 set
= XVECEXP (set
, 0, 0);
23697 gcc_assert (GET_CODE (set
) == SET
);
23699 addr
= SET_SRC (set
);
23700 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23702 if (GET_CODE (addr
) == ZERO_EXTEND
)
23703 addr
= XEXP (addr
, 0);
23704 if (GET_CODE (addr
) == SUBREG
)
23705 addr
= SUBREG_REG (addr
);
23708 return memory_address_length (addr
);
23711 extract_insn_cached (insn
);
23712 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23713 if (MEM_P (recog_data
.operand
[i
]))
23715 constrain_operands_cached (reload_completed
);
23716 if (which_alternative
!= -1)
23718 const char *constraints
= recog_data
.constraints
[i
];
23719 int alt
= which_alternative
;
23721 while (*constraints
== '=' || *constraints
== '+')
23724 while (*constraints
++ != ',')
23726 /* Skip ignored operands. */
23727 if (*constraints
== 'X')
23730 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23735 /* Compute default value for "length_vex" attribute. It includes
23736 2 or 3 byte VEX prefix and 1 opcode byte. */
23739 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23743 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23744 byte VEX prefix. */
23745 if (!has_0f_opcode
|| has_vex_w
)
23748 /* We can always use 2 byte VEX prefix in 32bit. */
23752 extract_insn_cached (insn
);
23754 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23755 if (REG_P (recog_data
.operand
[i
]))
23757 /* REX.W bit uses 3 byte VEX prefix. */
23758 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23759 && GENERAL_REG_P (recog_data
.operand
[i
]))
23764 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23765 if (MEM_P (recog_data
.operand
[i
])
23766 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23773 /* Return the maximum number of instructions a cpu can issue. */
23776 ix86_issue_rate (void)
23780 case PROCESSOR_PENTIUM
:
23781 case PROCESSOR_ATOM
:
23783 case PROCESSOR_BTVER2
:
23786 case PROCESSOR_PENTIUMPRO
:
23787 case PROCESSOR_PENTIUM4
:
23788 case PROCESSOR_CORE2_32
:
23789 case PROCESSOR_CORE2_64
:
23790 case PROCESSOR_COREI7_32
:
23791 case PROCESSOR_COREI7_64
:
23792 case PROCESSOR_ATHLON
:
23794 case PROCESSOR_AMDFAM10
:
23795 case PROCESSOR_NOCONA
:
23796 case PROCESSOR_GENERIC32
:
23797 case PROCESSOR_GENERIC64
:
23798 case PROCESSOR_BDVER1
:
23799 case PROCESSOR_BDVER2
:
23800 case PROCESSOR_BTVER1
:
23808 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23809 by DEP_INSN and nothing set by DEP_INSN. */
23812 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23816 /* Simplify the test for uninteresting insns. */
23817 if (insn_type
!= TYPE_SETCC
23818 && insn_type
!= TYPE_ICMOV
23819 && insn_type
!= TYPE_FCMOV
23820 && insn_type
!= TYPE_IBR
)
23823 if ((set
= single_set (dep_insn
)) != 0)
23825 set
= SET_DEST (set
);
23828 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23829 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23830 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23831 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23833 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23834 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23839 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23842 /* This test is true if the dependent insn reads the flags but
23843 not any other potentially set register. */
23844 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23847 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23853 /* Return true iff USE_INSN has a memory address with operands set by
23857 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23860 extract_insn_cached (use_insn
);
23861 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23862 if (MEM_P (recog_data
.operand
[i
]))
23864 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23865 return modified_in_p (addr
, set_insn
) != 0;
23871 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23873 enum attr_type insn_type
, dep_insn_type
;
23874 enum attr_memory memory
;
23876 int dep_insn_code_number
;
23878 /* Anti and output dependencies have zero cost on all CPUs. */
23879 if (REG_NOTE_KIND (link
) != 0)
23882 dep_insn_code_number
= recog_memoized (dep_insn
);
23884 /* If we can't recognize the insns, we can't really do anything. */
23885 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23888 insn_type
= get_attr_type (insn
);
23889 dep_insn_type
= get_attr_type (dep_insn
);
23893 case PROCESSOR_PENTIUM
:
23894 /* Address Generation Interlock adds a cycle of latency. */
23895 if (insn_type
== TYPE_LEA
)
23897 rtx addr
= PATTERN (insn
);
23899 if (GET_CODE (addr
) == PARALLEL
)
23900 addr
= XVECEXP (addr
, 0, 0);
23902 gcc_assert (GET_CODE (addr
) == SET
);
23904 addr
= SET_SRC (addr
);
23905 if (modified_in_p (addr
, dep_insn
))
23908 else if (ix86_agi_dependent (dep_insn
, insn
))
23911 /* ??? Compares pair with jump/setcc. */
23912 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23915 /* Floating point stores require value to be ready one cycle earlier. */
23916 if (insn_type
== TYPE_FMOV
23917 && get_attr_memory (insn
) == MEMORY_STORE
23918 && !ix86_agi_dependent (dep_insn
, insn
))
23922 case PROCESSOR_PENTIUMPRO
:
23923 memory
= get_attr_memory (insn
);
23925 /* INT->FP conversion is expensive. */
23926 if (get_attr_fp_int_src (dep_insn
))
23929 /* There is one cycle extra latency between an FP op and a store. */
23930 if (insn_type
== TYPE_FMOV
23931 && (set
= single_set (dep_insn
)) != NULL_RTX
23932 && (set2
= single_set (insn
)) != NULL_RTX
23933 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23934 && MEM_P (SET_DEST (set2
)))
23937 /* Show ability of reorder buffer to hide latency of load by executing
23938 in parallel with previous instruction in case
23939 previous instruction is not needed to compute the address. */
23940 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23941 && !ix86_agi_dependent (dep_insn
, insn
))
23943 /* Claim moves to take one cycle, as core can issue one load
23944 at time and the next load can start cycle later. */
23945 if (dep_insn_type
== TYPE_IMOV
23946 || dep_insn_type
== TYPE_FMOV
)
23954 memory
= get_attr_memory (insn
);
23956 /* The esp dependency is resolved before the instruction is really
23958 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23959 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23962 /* INT->FP conversion is expensive. */
23963 if (get_attr_fp_int_src (dep_insn
))
23966 /* Show ability of reorder buffer to hide latency of load by executing
23967 in parallel with previous instruction in case
23968 previous instruction is not needed to compute the address. */
23969 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23970 && !ix86_agi_dependent (dep_insn
, insn
))
23972 /* Claim moves to take one cycle, as core can issue one load
23973 at time and the next load can start cycle later. */
23974 if (dep_insn_type
== TYPE_IMOV
23975 || dep_insn_type
== TYPE_FMOV
)
23984 case PROCESSOR_ATHLON
:
23986 case PROCESSOR_AMDFAM10
:
23987 case PROCESSOR_BDVER1
:
23988 case PROCESSOR_BDVER2
:
23989 case PROCESSOR_BTVER1
:
23990 case PROCESSOR_BTVER2
:
23991 case PROCESSOR_ATOM
:
23992 case PROCESSOR_GENERIC32
:
23993 case PROCESSOR_GENERIC64
:
23994 memory
= get_attr_memory (insn
);
23996 /* Show ability of reorder buffer to hide latency of load by executing
23997 in parallel with previous instruction in case
23998 previous instruction is not needed to compute the address. */
23999 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
24000 && !ix86_agi_dependent (dep_insn
, insn
))
24002 enum attr_unit unit
= get_attr_unit (insn
);
24005 /* Because of the difference between the length of integer and
24006 floating unit pipeline preparation stages, the memory operands
24007 for floating point are cheaper.
24009 ??? For Athlon it the difference is most probably 2. */
24010 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
24013 loadcost
= TARGET_ATHLON
? 2 : 0;
24015 if (cost
>= loadcost
)
24028 /* How many alternative schedules to try. This should be as wide as the
24029 scheduling freedom in the DFA, but no wider. Making this value too
24030 large results extra work for the scheduler. */
24033 ia32_multipass_dfa_lookahead (void)
24037 case PROCESSOR_PENTIUM
:
24040 case PROCESSOR_PENTIUMPRO
:
24044 case PROCESSOR_CORE2_32
:
24045 case PROCESSOR_CORE2_64
:
24046 case PROCESSOR_COREI7_32
:
24047 case PROCESSOR_COREI7_64
:
24048 /* Generally, we want haifa-sched:max_issue() to look ahead as far
24049 as many instructions can be executed on a cycle, i.e.,
24050 issue_rate. I wonder why tuning for many CPUs does not do this. */
24051 return ix86_issue_rate ();
24058 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
24059 execution. It is applied if
24060 (1) IMUL instruction is on the top of list;
24061 (2) There exists the only producer of independent IMUL instruction in
24063 (3) Put found producer on the top of ready list.
24064 Returns issue rate. */
24067 ix86_sched_reorder(FILE *dump
, int sched_verbose
, rtx
*ready
, int *pn_ready
,
24068 int clock_var ATTRIBUTE_UNUSED
)
24070 static int issue_rate
= -1;
24071 int n_ready
= *pn_ready
;
24072 rtx insn
, insn1
, insn2
;
24074 sd_iterator_def sd_it
;
24078 /* Set up issue rate. */
24079 issue_rate
= ix86_issue_rate();
24081 /* Do reodering for Atom only. */
24082 if (ix86_tune
!= PROCESSOR_ATOM
)
24084 /* Nothing to do if ready list contains only 1 instruction. */
24088 /* Check that IMUL instruction is on the top of ready list. */
24089 insn
= ready
[n_ready
- 1];
24090 if (!NONDEBUG_INSN_P (insn
))
24092 insn
= PATTERN (insn
);
24093 if (GET_CODE (insn
) == PARALLEL
)
24094 insn
= XVECEXP (insn
, 0, 0);
24095 if (GET_CODE (insn
) != SET
)
24097 if (!(GET_CODE (SET_SRC (insn
)) == MULT
24098 && GET_MODE (SET_SRC (insn
)) == SImode
))
24101 /* Search for producer of independent IMUL instruction. */
24102 for (i
= n_ready
- 2; i
>= 0; i
--)
24105 if (!NONDEBUG_INSN_P (insn
))
24107 /* Skip IMUL instruction. */
24108 insn2
= PATTERN (insn
);
24109 if (GET_CODE (insn2
) == PARALLEL
)
24110 insn2
= XVECEXP (insn2
, 0, 0);
24111 if (GET_CODE (insn2
) == SET
24112 && GET_CODE (SET_SRC (insn2
)) == MULT
24113 && GET_MODE (SET_SRC (insn2
)) == SImode
)
24116 FOR_EACH_DEP (insn
, SD_LIST_FORW
, sd_it
, dep
)
24119 con
= DEP_CON (dep
);
24120 if (!NONDEBUG_INSN_P (con
))
24122 insn1
= PATTERN (con
);
24123 if (GET_CODE (insn1
) == PARALLEL
)
24124 insn1
= XVECEXP (insn1
, 0, 0);
24126 if (GET_CODE (insn1
) == SET
24127 && GET_CODE (SET_SRC (insn1
)) == MULT
24128 && GET_MODE (SET_SRC (insn1
)) == SImode
)
24130 sd_iterator_def sd_it1
;
24132 /* Check if there is no other dependee for IMUL. */
24134 FOR_EACH_DEP (con
, SD_LIST_BACK
, sd_it1
, dep1
)
24137 pro
= DEP_PRO (dep1
);
24138 if (!NONDEBUG_INSN_P (pro
))
24151 return issue_rate
; /* Didn't find IMUL producer. */
24153 if (sched_verbose
> 1)
24154 fprintf(dump
, ";;\tatom sched_reorder: swap %d and %d insns\n",
24155 INSN_UID (ready
[index
]), INSN_UID (ready
[n_ready
- 1]));
24157 /* Put IMUL producer (ready[index]) at the top of ready list. */
24158 insn1
= ready
[index
];
24159 for (i
= index
; i
< n_ready
- 1; i
++)
24160 ready
[i
] = ready
[i
+ 1];
24161 ready
[n_ready
- 1] = insn1
;
24168 /* Model decoder of Core 2/i7.
24169 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
24170 track the instruction fetch block boundaries and make sure that long
24171 (9+ bytes) instructions are assigned to D0. */
24173 /* Maximum length of an insn that can be handled by
24174 a secondary decoder unit. '8' for Core 2/i7. */
24175 static int core2i7_secondary_decoder_max_insn_size
;
24177 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
24178 '16' for Core 2/i7. */
24179 static int core2i7_ifetch_block_size
;
24181 /* Maximum number of instructions decoder can handle per cycle.
24182 '6' for Core 2/i7. */
24183 static int core2i7_ifetch_block_max_insns
;
24185 typedef struct ix86_first_cycle_multipass_data_
*
24186 ix86_first_cycle_multipass_data_t
;
24187 typedef const struct ix86_first_cycle_multipass_data_
*
24188 const_ix86_first_cycle_multipass_data_t
;
24190 /* A variable to store target state across calls to max_issue within
24192 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
24193 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
24195 /* Initialize DATA. */
24197 core2i7_first_cycle_multipass_init (void *_data
)
24199 ix86_first_cycle_multipass_data_t data
24200 = (ix86_first_cycle_multipass_data_t
) _data
;
24202 data
->ifetch_block_len
= 0;
24203 data
->ifetch_block_n_insns
= 0;
24204 data
->ready_try_change
= NULL
;
24205 data
->ready_try_change_size
= 0;
24208 /* Advancing the cycle; reset ifetch block counts. */
24210 core2i7_dfa_post_advance_cycle (void)
24212 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
24214 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24216 data
->ifetch_block_len
= 0;
24217 data
->ifetch_block_n_insns
= 0;
24220 static int min_insn_size (rtx
);
24222 /* Filter out insns from ready_try that the core will not be able to issue
24223 on current cycle due to decoder. */
24225 core2i7_first_cycle_multipass_filter_ready_try
24226 (const_ix86_first_cycle_multipass_data_t data
,
24227 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
24234 if (ready_try
[n_ready
])
24237 insn
= get_ready_element (n_ready
);
24238 insn_size
= min_insn_size (insn
);
24240 if (/* If this is a too long an insn for a secondary decoder ... */
24241 (!first_cycle_insn_p
24242 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
24243 /* ... or it would not fit into the ifetch block ... */
24244 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
24245 /* ... or the decoder is full already ... */
24246 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
24247 /* ... mask the insn out. */
24249 ready_try
[n_ready
] = 1;
24251 if (data
->ready_try_change
)
24252 SET_BIT (data
->ready_try_change
, n_ready
);
24257 /* Prepare for a new round of multipass lookahead scheduling. */
24259 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
24260 bool first_cycle_insn_p
)
24262 ix86_first_cycle_multipass_data_t data
24263 = (ix86_first_cycle_multipass_data_t
) _data
;
24264 const_ix86_first_cycle_multipass_data_t prev_data
24265 = ix86_first_cycle_multipass_data
;
24267 /* Restore the state from the end of the previous round. */
24268 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
24269 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
24271 /* Filter instructions that cannot be issued on current cycle due to
24272 decoder restrictions. */
24273 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24274 first_cycle_insn_p
);
24277 /* INSN is being issued in current solution. Account for its impact on
24278 the decoder model. */
24280 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
24281 rtx insn
, const void *_prev_data
)
24283 ix86_first_cycle_multipass_data_t data
24284 = (ix86_first_cycle_multipass_data_t
) _data
;
24285 const_ix86_first_cycle_multipass_data_t prev_data
24286 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
24288 int insn_size
= min_insn_size (insn
);
24290 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
24291 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
24292 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
24293 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
24295 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
24296 if (!data
->ready_try_change
)
24298 data
->ready_try_change
= sbitmap_alloc (n_ready
);
24299 data
->ready_try_change_size
= n_ready
;
24301 else if (data
->ready_try_change_size
< n_ready
)
24303 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
24305 data
->ready_try_change_size
= n_ready
;
24307 sbitmap_zero (data
->ready_try_change
);
24309 /* Filter out insns from ready_try that the core will not be able to issue
24310 on current cycle due to decoder. */
24311 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
24315 /* Revert the effect on ready_try. */
24317 core2i7_first_cycle_multipass_backtrack (const void *_data
,
24319 int n_ready ATTRIBUTE_UNUSED
)
24321 const_ix86_first_cycle_multipass_data_t data
24322 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24323 unsigned int i
= 0;
24324 sbitmap_iterator sbi
;
24326 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
24327 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
24333 /* Save the result of multipass lookahead scheduling for the next round. */
24335 core2i7_first_cycle_multipass_end (const void *_data
)
24337 const_ix86_first_cycle_multipass_data_t data
24338 = (const_ix86_first_cycle_multipass_data_t
) _data
;
24339 ix86_first_cycle_multipass_data_t next_data
24340 = ix86_first_cycle_multipass_data
;
24344 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
24345 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
24349 /* Deallocate target data. */
24351 core2i7_first_cycle_multipass_fini (void *_data
)
24353 ix86_first_cycle_multipass_data_t data
24354 = (ix86_first_cycle_multipass_data_t
) _data
;
24356 if (data
->ready_try_change
)
24358 sbitmap_free (data
->ready_try_change
);
24359 data
->ready_try_change
= NULL
;
24360 data
->ready_try_change_size
= 0;
24364 /* Prepare for scheduling pass. */
24366 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
24367 int verbose ATTRIBUTE_UNUSED
,
24368 int max_uid ATTRIBUTE_UNUSED
)
24370 /* Install scheduling hooks for current CPU. Some of these hooks are used
24371 in time-critical parts of the scheduler, so we only set them up when
24372 they are actually used. */
24375 case PROCESSOR_CORE2_32
:
24376 case PROCESSOR_CORE2_64
:
24377 case PROCESSOR_COREI7_32
:
24378 case PROCESSOR_COREI7_64
:
24379 targetm
.sched
.dfa_post_advance_cycle
24380 = core2i7_dfa_post_advance_cycle
;
24381 targetm
.sched
.first_cycle_multipass_init
24382 = core2i7_first_cycle_multipass_init
;
24383 targetm
.sched
.first_cycle_multipass_begin
24384 = core2i7_first_cycle_multipass_begin
;
24385 targetm
.sched
.first_cycle_multipass_issue
24386 = core2i7_first_cycle_multipass_issue
;
24387 targetm
.sched
.first_cycle_multipass_backtrack
24388 = core2i7_first_cycle_multipass_backtrack
;
24389 targetm
.sched
.first_cycle_multipass_end
24390 = core2i7_first_cycle_multipass_end
;
24391 targetm
.sched
.first_cycle_multipass_fini
24392 = core2i7_first_cycle_multipass_fini
;
24394 /* Set decoder parameters. */
24395 core2i7_secondary_decoder_max_insn_size
= 8;
24396 core2i7_ifetch_block_size
= 16;
24397 core2i7_ifetch_block_max_insns
= 6;
24401 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
24402 targetm
.sched
.first_cycle_multipass_init
= NULL
;
24403 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
24404 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
24405 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
24406 targetm
.sched
.first_cycle_multipass_end
= NULL
;
24407 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
24413 /* Compute the alignment given to a constant that is being placed in memory.
24414 EXP is the constant and ALIGN is the alignment that the object would
24416 The value of this function is used instead of that alignment to align
24420 ix86_constant_alignment (tree exp
, int align
)
24422 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
24423 || TREE_CODE (exp
) == INTEGER_CST
)
24425 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
24427 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
24430 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
24431 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
24432 return BITS_PER_WORD
;
24437 /* Compute the alignment for a static variable.
24438 TYPE is the data type, and ALIGN is the alignment that
24439 the object would ordinarily have. The value of this function is used
24440 instead of that alignment to align the object. */
24443 ix86_data_alignment (tree type
, int align
)
24445 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
24447 if (AGGREGATE_TYPE_P (type
)
24448 && TYPE_SIZE (type
)
24449 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24450 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
24451 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
24452 && align
< max_align
)
24455 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24456 to 16byte boundary. */
24459 if (AGGREGATE_TYPE_P (type
)
24460 && TYPE_SIZE (type
)
24461 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24462 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
24463 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24467 if (TREE_CODE (type
) == ARRAY_TYPE
)
24469 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24471 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24474 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24477 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24479 if ((TYPE_MODE (type
) == XCmode
24480 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24483 else if ((TREE_CODE (type
) == RECORD_TYPE
24484 || TREE_CODE (type
) == UNION_TYPE
24485 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24486 && TYPE_FIELDS (type
))
24488 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24490 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24493 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24494 || TREE_CODE (type
) == INTEGER_TYPE
)
24496 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24498 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24505 /* Compute the alignment for a local variable or a stack slot. EXP is
24506 the data type or decl itself, MODE is the widest mode available and
24507 ALIGN is the alignment that the object would ordinarily have. The
24508 value of this macro is used instead of that alignment to align the
24512 ix86_local_alignment (tree exp
, enum machine_mode mode
,
24513 unsigned int align
)
24517 if (exp
&& DECL_P (exp
))
24519 type
= TREE_TYPE (exp
);
24528 /* Don't do dynamic stack realignment for long long objects with
24529 -mpreferred-stack-boundary=2. */
24532 && ix86_preferred_stack_boundary
< 64
24533 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24534 && (!type
|| !TYPE_USER_ALIGN (type
))
24535 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24538 /* If TYPE is NULL, we are allocating a stack slot for caller-save
24539 register in MODE. We will return the largest alignment of XF
24543 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
24544 align
= GET_MODE_ALIGNMENT (DFmode
);
24548 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
24549 to 16byte boundary. Exact wording is:
24551 An array uses the same alignment as its elements, except that a local or
24552 global array variable of length at least 16 bytes or
24553 a C99 variable-length array variable always has alignment of at least 16 bytes.
24555 This was added to allow use of aligned SSE instructions at arrays. This
24556 rule is meant for static storage (where compiler can not do the analysis
24557 by itself). We follow it for automatic variables only when convenient.
24558 We fully control everything in the function compiled and functions from
24559 other unit can not rely on the alignment.
24561 Exclude va_list type. It is the common case of local array where
24562 we can not benefit from the alignment. */
24563 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
24566 if (AGGREGATE_TYPE_P (type
)
24567 && (va_list_type_node
== NULL_TREE
24568 || (TYPE_MAIN_VARIANT (type
)
24569 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24570 && TYPE_SIZE (type
)
24571 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24572 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24573 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24576 if (TREE_CODE (type
) == ARRAY_TYPE
)
24578 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24580 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24583 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24585 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24587 if ((TYPE_MODE (type
) == XCmode
24588 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24591 else if ((TREE_CODE (type
) == RECORD_TYPE
24592 || TREE_CODE (type
) == UNION_TYPE
24593 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24594 && TYPE_FIELDS (type
))
24596 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24598 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24601 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24602 || TREE_CODE (type
) == INTEGER_TYPE
)
24605 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24607 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24613 /* Compute the minimum required alignment for dynamic stack realignment
24614 purposes for a local variable, parameter or a stack slot. EXP is
24615 the data type or decl itself, MODE is its mode and ALIGN is the
24616 alignment that the object would ordinarily have. */
24619 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24620 unsigned int align
)
24624 if (exp
&& DECL_P (exp
))
24626 type
= TREE_TYPE (exp
);
24635 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24638 /* Don't do dynamic stack realignment for long long objects with
24639 -mpreferred-stack-boundary=2. */
24640 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24641 && (!type
|| !TYPE_USER_ALIGN (type
))
24642 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24648 /* Find a location for the static chain incoming to a nested function.
24649 This is a register, unless all free registers are used by arguments. */
24652 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24656 if (!DECL_STATIC_CHAIN (fndecl
))
24661 /* We always use R10 in 64-bit mode. */
24669 /* By default in 32-bit mode we use ECX to pass the static chain. */
24672 fntype
= TREE_TYPE (fndecl
);
24673 ccvt
= ix86_get_callcvt (fntype
);
24674 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24676 /* Fastcall functions use ecx/edx for arguments, which leaves
24677 us with EAX for the static chain.
24678 Thiscall functions use ecx for arguments, which also
24679 leaves us with EAX for the static chain. */
24682 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24684 /* For regparm 3, we have no free call-clobbered registers in
24685 which to store the static chain. In order to implement this,
24686 we have the trampoline push the static chain to the stack.
24687 However, we can't push a value below the return address when
24688 we call the nested function directly, so we have to use an
24689 alternate entry point. For this we use ESI, and have the
24690 alternate entry point push ESI, so that things appear the
24691 same once we're executing the nested function. */
24694 if (fndecl
== current_function_decl
)
24695 ix86_static_chain_on_stack
= true;
24696 return gen_frame_mem (SImode
,
24697 plus_constant (Pmode
,
24698 arg_pointer_rtx
, -8));
24704 return gen_rtx_REG (Pmode
, regno
);
24707 /* Emit RTL insns to initialize the variable parts of a trampoline.
24708 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24709 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24710 to be passed to the target function. */
24713 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24719 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24725 /* Load the function address to r11. Try to load address using
24726 the shorter movl instead of movabs. We may want to support
24727 movq for kernel mode, but kernel does not use trampolines at
24728 the moment. FNADDR is a 32bit address and may not be in
24729 DImode when ptr_mode == SImode. Always use movl in this
24731 if (ptr_mode
== SImode
24732 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24734 fnaddr
= copy_addr_to_reg (fnaddr
);
24736 mem
= adjust_address (m_tramp
, HImode
, offset
);
24737 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24739 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24740 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24745 mem
= adjust_address (m_tramp
, HImode
, offset
);
24746 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24748 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24749 emit_move_insn (mem
, fnaddr
);
24753 /* Load static chain using movabs to r10. Use the shorter movl
24754 instead of movabs when ptr_mode == SImode. */
24755 if (ptr_mode
== SImode
)
24766 mem
= adjust_address (m_tramp
, HImode
, offset
);
24767 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24769 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24770 emit_move_insn (mem
, chain_value
);
24773 /* Jump to r11; the last (unused) byte is a nop, only there to
24774 pad the write out to a single 32-bit store. */
24775 mem
= adjust_address (m_tramp
, SImode
, offset
);
24776 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24783 /* Depending on the static chain location, either load a register
24784 with a constant, or push the constant to the stack. All of the
24785 instructions are the same size. */
24786 chain
= ix86_static_chain (fndecl
, true);
24789 switch (REGNO (chain
))
24792 opcode
= 0xb8; break;
24794 opcode
= 0xb9; break;
24796 gcc_unreachable ();
24802 mem
= adjust_address (m_tramp
, QImode
, offset
);
24803 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24805 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24806 emit_move_insn (mem
, chain_value
);
24809 mem
= adjust_address (m_tramp
, QImode
, offset
);
24810 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24812 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24814 /* Compute offset from the end of the jmp to the target function.
24815 In the case in which the trampoline stores the static chain on
24816 the stack, we need to skip the first insn which pushes the
24817 (call-saved) register static chain; this push is 1 byte. */
24819 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24820 plus_constant (Pmode
, XEXP (m_tramp
, 0),
24821 offset
- (MEM_P (chain
) ? 1 : 0)),
24822 NULL_RTX
, 1, OPTAB_DIRECT
);
24823 emit_move_insn (mem
, disp
);
24826 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24828 #ifdef HAVE_ENABLE_EXECUTE_STACK
24829 #ifdef CHECK_EXECUTE_STACK_ENABLED
24830 if (CHECK_EXECUTE_STACK_ENABLED
)
24832 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24833 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24837 /* The following file contains several enumerations and data structures
24838 built from the definitions in i386-builtin-types.def. */
24840 #include "i386-builtin-types.inc"
24842 /* Table for the ix86 builtin non-function types. */
24843 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24845 /* Retrieve an element from the above table, building some of
24846 the types lazily. */
24849 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24851 unsigned int index
;
24854 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24856 type
= ix86_builtin_type_tab
[(int) tcode
];
24860 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24861 if (tcode
<= IX86_BT_LAST_VECT
)
24863 enum machine_mode mode
;
24865 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24866 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24867 mode
= ix86_builtin_type_vect_mode
[index
];
24869 type
= build_vector_type_for_mode (itype
, mode
);
24875 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24876 if (tcode
<= IX86_BT_LAST_PTR
)
24877 quals
= TYPE_UNQUALIFIED
;
24879 quals
= TYPE_QUAL_CONST
;
24881 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24882 if (quals
!= TYPE_UNQUALIFIED
)
24883 itype
= build_qualified_type (itype
, quals
);
24885 type
= build_pointer_type (itype
);
24888 ix86_builtin_type_tab
[(int) tcode
] = type
;
24892 /* Table for the ix86 builtin function types. */
24893 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24895 /* Retrieve an element from the above table, building some of
24896 the types lazily. */
24899 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24903 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24905 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24909 if (tcode
<= IX86_BT_LAST_FUNC
)
24911 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24912 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24913 tree rtype
, atype
, args
= void_list_node
;
24916 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24917 for (i
= after
- 1; i
> start
; --i
)
24919 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24920 args
= tree_cons (NULL
, atype
, args
);
24923 type
= build_function_type (rtype
, args
);
24927 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24928 enum ix86_builtin_func_type icode
;
24930 icode
= ix86_builtin_func_alias_base
[index
];
24931 type
= ix86_get_builtin_func_type (icode
);
24934 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24939 /* Codes for all the SSE/MMX builtins. */
24942 IX86_BUILTIN_ADDPS
,
24943 IX86_BUILTIN_ADDSS
,
24944 IX86_BUILTIN_DIVPS
,
24945 IX86_BUILTIN_DIVSS
,
24946 IX86_BUILTIN_MULPS
,
24947 IX86_BUILTIN_MULSS
,
24948 IX86_BUILTIN_SUBPS
,
24949 IX86_BUILTIN_SUBSS
,
24951 IX86_BUILTIN_CMPEQPS
,
24952 IX86_BUILTIN_CMPLTPS
,
24953 IX86_BUILTIN_CMPLEPS
,
24954 IX86_BUILTIN_CMPGTPS
,
24955 IX86_BUILTIN_CMPGEPS
,
24956 IX86_BUILTIN_CMPNEQPS
,
24957 IX86_BUILTIN_CMPNLTPS
,
24958 IX86_BUILTIN_CMPNLEPS
,
24959 IX86_BUILTIN_CMPNGTPS
,
24960 IX86_BUILTIN_CMPNGEPS
,
24961 IX86_BUILTIN_CMPORDPS
,
24962 IX86_BUILTIN_CMPUNORDPS
,
24963 IX86_BUILTIN_CMPEQSS
,
24964 IX86_BUILTIN_CMPLTSS
,
24965 IX86_BUILTIN_CMPLESS
,
24966 IX86_BUILTIN_CMPNEQSS
,
24967 IX86_BUILTIN_CMPNLTSS
,
24968 IX86_BUILTIN_CMPNLESS
,
24969 IX86_BUILTIN_CMPNGTSS
,
24970 IX86_BUILTIN_CMPNGESS
,
24971 IX86_BUILTIN_CMPORDSS
,
24972 IX86_BUILTIN_CMPUNORDSS
,
24974 IX86_BUILTIN_COMIEQSS
,
24975 IX86_BUILTIN_COMILTSS
,
24976 IX86_BUILTIN_COMILESS
,
24977 IX86_BUILTIN_COMIGTSS
,
24978 IX86_BUILTIN_COMIGESS
,
24979 IX86_BUILTIN_COMINEQSS
,
24980 IX86_BUILTIN_UCOMIEQSS
,
24981 IX86_BUILTIN_UCOMILTSS
,
24982 IX86_BUILTIN_UCOMILESS
,
24983 IX86_BUILTIN_UCOMIGTSS
,
24984 IX86_BUILTIN_UCOMIGESS
,
24985 IX86_BUILTIN_UCOMINEQSS
,
24987 IX86_BUILTIN_CVTPI2PS
,
24988 IX86_BUILTIN_CVTPS2PI
,
24989 IX86_BUILTIN_CVTSI2SS
,
24990 IX86_BUILTIN_CVTSI642SS
,
24991 IX86_BUILTIN_CVTSS2SI
,
24992 IX86_BUILTIN_CVTSS2SI64
,
24993 IX86_BUILTIN_CVTTPS2PI
,
24994 IX86_BUILTIN_CVTTSS2SI
,
24995 IX86_BUILTIN_CVTTSS2SI64
,
24997 IX86_BUILTIN_MAXPS
,
24998 IX86_BUILTIN_MAXSS
,
24999 IX86_BUILTIN_MINPS
,
25000 IX86_BUILTIN_MINSS
,
25002 IX86_BUILTIN_LOADUPS
,
25003 IX86_BUILTIN_STOREUPS
,
25004 IX86_BUILTIN_MOVSS
,
25006 IX86_BUILTIN_MOVHLPS
,
25007 IX86_BUILTIN_MOVLHPS
,
25008 IX86_BUILTIN_LOADHPS
,
25009 IX86_BUILTIN_LOADLPS
,
25010 IX86_BUILTIN_STOREHPS
,
25011 IX86_BUILTIN_STORELPS
,
25013 IX86_BUILTIN_MASKMOVQ
,
25014 IX86_BUILTIN_MOVMSKPS
,
25015 IX86_BUILTIN_PMOVMSKB
,
25017 IX86_BUILTIN_MOVNTPS
,
25018 IX86_BUILTIN_MOVNTQ
,
25020 IX86_BUILTIN_LOADDQU
,
25021 IX86_BUILTIN_STOREDQU
,
25023 IX86_BUILTIN_PACKSSWB
,
25024 IX86_BUILTIN_PACKSSDW
,
25025 IX86_BUILTIN_PACKUSWB
,
25027 IX86_BUILTIN_PADDB
,
25028 IX86_BUILTIN_PADDW
,
25029 IX86_BUILTIN_PADDD
,
25030 IX86_BUILTIN_PADDQ
,
25031 IX86_BUILTIN_PADDSB
,
25032 IX86_BUILTIN_PADDSW
,
25033 IX86_BUILTIN_PADDUSB
,
25034 IX86_BUILTIN_PADDUSW
,
25035 IX86_BUILTIN_PSUBB
,
25036 IX86_BUILTIN_PSUBW
,
25037 IX86_BUILTIN_PSUBD
,
25038 IX86_BUILTIN_PSUBQ
,
25039 IX86_BUILTIN_PSUBSB
,
25040 IX86_BUILTIN_PSUBSW
,
25041 IX86_BUILTIN_PSUBUSB
,
25042 IX86_BUILTIN_PSUBUSW
,
25045 IX86_BUILTIN_PANDN
,
25049 IX86_BUILTIN_PAVGB
,
25050 IX86_BUILTIN_PAVGW
,
25052 IX86_BUILTIN_PCMPEQB
,
25053 IX86_BUILTIN_PCMPEQW
,
25054 IX86_BUILTIN_PCMPEQD
,
25055 IX86_BUILTIN_PCMPGTB
,
25056 IX86_BUILTIN_PCMPGTW
,
25057 IX86_BUILTIN_PCMPGTD
,
25059 IX86_BUILTIN_PMADDWD
,
25061 IX86_BUILTIN_PMAXSW
,
25062 IX86_BUILTIN_PMAXUB
,
25063 IX86_BUILTIN_PMINSW
,
25064 IX86_BUILTIN_PMINUB
,
25066 IX86_BUILTIN_PMULHUW
,
25067 IX86_BUILTIN_PMULHW
,
25068 IX86_BUILTIN_PMULLW
,
25070 IX86_BUILTIN_PSADBW
,
25071 IX86_BUILTIN_PSHUFW
,
25073 IX86_BUILTIN_PSLLW
,
25074 IX86_BUILTIN_PSLLD
,
25075 IX86_BUILTIN_PSLLQ
,
25076 IX86_BUILTIN_PSRAW
,
25077 IX86_BUILTIN_PSRAD
,
25078 IX86_BUILTIN_PSRLW
,
25079 IX86_BUILTIN_PSRLD
,
25080 IX86_BUILTIN_PSRLQ
,
25081 IX86_BUILTIN_PSLLWI
,
25082 IX86_BUILTIN_PSLLDI
,
25083 IX86_BUILTIN_PSLLQI
,
25084 IX86_BUILTIN_PSRAWI
,
25085 IX86_BUILTIN_PSRADI
,
25086 IX86_BUILTIN_PSRLWI
,
25087 IX86_BUILTIN_PSRLDI
,
25088 IX86_BUILTIN_PSRLQI
,
25090 IX86_BUILTIN_PUNPCKHBW
,
25091 IX86_BUILTIN_PUNPCKHWD
,
25092 IX86_BUILTIN_PUNPCKHDQ
,
25093 IX86_BUILTIN_PUNPCKLBW
,
25094 IX86_BUILTIN_PUNPCKLWD
,
25095 IX86_BUILTIN_PUNPCKLDQ
,
25097 IX86_BUILTIN_SHUFPS
,
25099 IX86_BUILTIN_RCPPS
,
25100 IX86_BUILTIN_RCPSS
,
25101 IX86_BUILTIN_RSQRTPS
,
25102 IX86_BUILTIN_RSQRTPS_NR
,
25103 IX86_BUILTIN_RSQRTSS
,
25104 IX86_BUILTIN_RSQRTF
,
25105 IX86_BUILTIN_SQRTPS
,
25106 IX86_BUILTIN_SQRTPS_NR
,
25107 IX86_BUILTIN_SQRTSS
,
25109 IX86_BUILTIN_UNPCKHPS
,
25110 IX86_BUILTIN_UNPCKLPS
,
25112 IX86_BUILTIN_ANDPS
,
25113 IX86_BUILTIN_ANDNPS
,
25115 IX86_BUILTIN_XORPS
,
25118 IX86_BUILTIN_LDMXCSR
,
25119 IX86_BUILTIN_STMXCSR
,
25120 IX86_BUILTIN_SFENCE
,
25122 /* 3DNow! Original */
25123 IX86_BUILTIN_FEMMS
,
25124 IX86_BUILTIN_PAVGUSB
,
25125 IX86_BUILTIN_PF2ID
,
25126 IX86_BUILTIN_PFACC
,
25127 IX86_BUILTIN_PFADD
,
25128 IX86_BUILTIN_PFCMPEQ
,
25129 IX86_BUILTIN_PFCMPGE
,
25130 IX86_BUILTIN_PFCMPGT
,
25131 IX86_BUILTIN_PFMAX
,
25132 IX86_BUILTIN_PFMIN
,
25133 IX86_BUILTIN_PFMUL
,
25134 IX86_BUILTIN_PFRCP
,
25135 IX86_BUILTIN_PFRCPIT1
,
25136 IX86_BUILTIN_PFRCPIT2
,
25137 IX86_BUILTIN_PFRSQIT1
,
25138 IX86_BUILTIN_PFRSQRT
,
25139 IX86_BUILTIN_PFSUB
,
25140 IX86_BUILTIN_PFSUBR
,
25141 IX86_BUILTIN_PI2FD
,
25142 IX86_BUILTIN_PMULHRW
,
25144 /* 3DNow! Athlon Extensions */
25145 IX86_BUILTIN_PF2IW
,
25146 IX86_BUILTIN_PFNACC
,
25147 IX86_BUILTIN_PFPNACC
,
25148 IX86_BUILTIN_PI2FW
,
25149 IX86_BUILTIN_PSWAPDSI
,
25150 IX86_BUILTIN_PSWAPDSF
,
25153 IX86_BUILTIN_ADDPD
,
25154 IX86_BUILTIN_ADDSD
,
25155 IX86_BUILTIN_DIVPD
,
25156 IX86_BUILTIN_DIVSD
,
25157 IX86_BUILTIN_MULPD
,
25158 IX86_BUILTIN_MULSD
,
25159 IX86_BUILTIN_SUBPD
,
25160 IX86_BUILTIN_SUBSD
,
25162 IX86_BUILTIN_CMPEQPD
,
25163 IX86_BUILTIN_CMPLTPD
,
25164 IX86_BUILTIN_CMPLEPD
,
25165 IX86_BUILTIN_CMPGTPD
,
25166 IX86_BUILTIN_CMPGEPD
,
25167 IX86_BUILTIN_CMPNEQPD
,
25168 IX86_BUILTIN_CMPNLTPD
,
25169 IX86_BUILTIN_CMPNLEPD
,
25170 IX86_BUILTIN_CMPNGTPD
,
25171 IX86_BUILTIN_CMPNGEPD
,
25172 IX86_BUILTIN_CMPORDPD
,
25173 IX86_BUILTIN_CMPUNORDPD
,
25174 IX86_BUILTIN_CMPEQSD
,
25175 IX86_BUILTIN_CMPLTSD
,
25176 IX86_BUILTIN_CMPLESD
,
25177 IX86_BUILTIN_CMPNEQSD
,
25178 IX86_BUILTIN_CMPNLTSD
,
25179 IX86_BUILTIN_CMPNLESD
,
25180 IX86_BUILTIN_CMPORDSD
,
25181 IX86_BUILTIN_CMPUNORDSD
,
25183 IX86_BUILTIN_COMIEQSD
,
25184 IX86_BUILTIN_COMILTSD
,
25185 IX86_BUILTIN_COMILESD
,
25186 IX86_BUILTIN_COMIGTSD
,
25187 IX86_BUILTIN_COMIGESD
,
25188 IX86_BUILTIN_COMINEQSD
,
25189 IX86_BUILTIN_UCOMIEQSD
,
25190 IX86_BUILTIN_UCOMILTSD
,
25191 IX86_BUILTIN_UCOMILESD
,
25192 IX86_BUILTIN_UCOMIGTSD
,
25193 IX86_BUILTIN_UCOMIGESD
,
25194 IX86_BUILTIN_UCOMINEQSD
,
25196 IX86_BUILTIN_MAXPD
,
25197 IX86_BUILTIN_MAXSD
,
25198 IX86_BUILTIN_MINPD
,
25199 IX86_BUILTIN_MINSD
,
25201 IX86_BUILTIN_ANDPD
,
25202 IX86_BUILTIN_ANDNPD
,
25204 IX86_BUILTIN_XORPD
,
25206 IX86_BUILTIN_SQRTPD
,
25207 IX86_BUILTIN_SQRTSD
,
25209 IX86_BUILTIN_UNPCKHPD
,
25210 IX86_BUILTIN_UNPCKLPD
,
25212 IX86_BUILTIN_SHUFPD
,
25214 IX86_BUILTIN_LOADUPD
,
25215 IX86_BUILTIN_STOREUPD
,
25216 IX86_BUILTIN_MOVSD
,
25218 IX86_BUILTIN_LOADHPD
,
25219 IX86_BUILTIN_LOADLPD
,
25221 IX86_BUILTIN_CVTDQ2PD
,
25222 IX86_BUILTIN_CVTDQ2PS
,
25224 IX86_BUILTIN_CVTPD2DQ
,
25225 IX86_BUILTIN_CVTPD2PI
,
25226 IX86_BUILTIN_CVTPD2PS
,
25227 IX86_BUILTIN_CVTTPD2DQ
,
25228 IX86_BUILTIN_CVTTPD2PI
,
25230 IX86_BUILTIN_CVTPI2PD
,
25231 IX86_BUILTIN_CVTSI2SD
,
25232 IX86_BUILTIN_CVTSI642SD
,
25234 IX86_BUILTIN_CVTSD2SI
,
25235 IX86_BUILTIN_CVTSD2SI64
,
25236 IX86_BUILTIN_CVTSD2SS
,
25237 IX86_BUILTIN_CVTSS2SD
,
25238 IX86_BUILTIN_CVTTSD2SI
,
25239 IX86_BUILTIN_CVTTSD2SI64
,
25241 IX86_BUILTIN_CVTPS2DQ
,
25242 IX86_BUILTIN_CVTPS2PD
,
25243 IX86_BUILTIN_CVTTPS2DQ
,
25245 IX86_BUILTIN_MOVNTI
,
25246 IX86_BUILTIN_MOVNTI64
,
25247 IX86_BUILTIN_MOVNTPD
,
25248 IX86_BUILTIN_MOVNTDQ
,
25250 IX86_BUILTIN_MOVQ128
,
25253 IX86_BUILTIN_MASKMOVDQU
,
25254 IX86_BUILTIN_MOVMSKPD
,
25255 IX86_BUILTIN_PMOVMSKB128
,
25257 IX86_BUILTIN_PACKSSWB128
,
25258 IX86_BUILTIN_PACKSSDW128
,
25259 IX86_BUILTIN_PACKUSWB128
,
25261 IX86_BUILTIN_PADDB128
,
25262 IX86_BUILTIN_PADDW128
,
25263 IX86_BUILTIN_PADDD128
,
25264 IX86_BUILTIN_PADDQ128
,
25265 IX86_BUILTIN_PADDSB128
,
25266 IX86_BUILTIN_PADDSW128
,
25267 IX86_BUILTIN_PADDUSB128
,
25268 IX86_BUILTIN_PADDUSW128
,
25269 IX86_BUILTIN_PSUBB128
,
25270 IX86_BUILTIN_PSUBW128
,
25271 IX86_BUILTIN_PSUBD128
,
25272 IX86_BUILTIN_PSUBQ128
,
25273 IX86_BUILTIN_PSUBSB128
,
25274 IX86_BUILTIN_PSUBSW128
,
25275 IX86_BUILTIN_PSUBUSB128
,
25276 IX86_BUILTIN_PSUBUSW128
,
25278 IX86_BUILTIN_PAND128
,
25279 IX86_BUILTIN_PANDN128
,
25280 IX86_BUILTIN_POR128
,
25281 IX86_BUILTIN_PXOR128
,
25283 IX86_BUILTIN_PAVGB128
,
25284 IX86_BUILTIN_PAVGW128
,
25286 IX86_BUILTIN_PCMPEQB128
,
25287 IX86_BUILTIN_PCMPEQW128
,
25288 IX86_BUILTIN_PCMPEQD128
,
25289 IX86_BUILTIN_PCMPGTB128
,
25290 IX86_BUILTIN_PCMPGTW128
,
25291 IX86_BUILTIN_PCMPGTD128
,
25293 IX86_BUILTIN_PMADDWD128
,
25295 IX86_BUILTIN_PMAXSW128
,
25296 IX86_BUILTIN_PMAXUB128
,
25297 IX86_BUILTIN_PMINSW128
,
25298 IX86_BUILTIN_PMINUB128
,
25300 IX86_BUILTIN_PMULUDQ
,
25301 IX86_BUILTIN_PMULUDQ128
,
25302 IX86_BUILTIN_PMULHUW128
,
25303 IX86_BUILTIN_PMULHW128
,
25304 IX86_BUILTIN_PMULLW128
,
25306 IX86_BUILTIN_PSADBW128
,
25307 IX86_BUILTIN_PSHUFHW
,
25308 IX86_BUILTIN_PSHUFLW
,
25309 IX86_BUILTIN_PSHUFD
,
25311 IX86_BUILTIN_PSLLDQI128
,
25312 IX86_BUILTIN_PSLLWI128
,
25313 IX86_BUILTIN_PSLLDI128
,
25314 IX86_BUILTIN_PSLLQI128
,
25315 IX86_BUILTIN_PSRAWI128
,
25316 IX86_BUILTIN_PSRADI128
,
25317 IX86_BUILTIN_PSRLDQI128
,
25318 IX86_BUILTIN_PSRLWI128
,
25319 IX86_BUILTIN_PSRLDI128
,
25320 IX86_BUILTIN_PSRLQI128
,
25322 IX86_BUILTIN_PSLLDQ128
,
25323 IX86_BUILTIN_PSLLW128
,
25324 IX86_BUILTIN_PSLLD128
,
25325 IX86_BUILTIN_PSLLQ128
,
25326 IX86_BUILTIN_PSRAW128
,
25327 IX86_BUILTIN_PSRAD128
,
25328 IX86_BUILTIN_PSRLW128
,
25329 IX86_BUILTIN_PSRLD128
,
25330 IX86_BUILTIN_PSRLQ128
,
25332 IX86_BUILTIN_PUNPCKHBW128
,
25333 IX86_BUILTIN_PUNPCKHWD128
,
25334 IX86_BUILTIN_PUNPCKHDQ128
,
25335 IX86_BUILTIN_PUNPCKHQDQ128
,
25336 IX86_BUILTIN_PUNPCKLBW128
,
25337 IX86_BUILTIN_PUNPCKLWD128
,
25338 IX86_BUILTIN_PUNPCKLDQ128
,
25339 IX86_BUILTIN_PUNPCKLQDQ128
,
25341 IX86_BUILTIN_CLFLUSH
,
25342 IX86_BUILTIN_MFENCE
,
25343 IX86_BUILTIN_LFENCE
,
25344 IX86_BUILTIN_PAUSE
,
25346 IX86_BUILTIN_BSRSI
,
25347 IX86_BUILTIN_BSRDI
,
25348 IX86_BUILTIN_RDPMC
,
25349 IX86_BUILTIN_RDTSC
,
25350 IX86_BUILTIN_RDTSCP
,
25351 IX86_BUILTIN_ROLQI
,
25352 IX86_BUILTIN_ROLHI
,
25353 IX86_BUILTIN_RORQI
,
25354 IX86_BUILTIN_RORHI
,
25357 IX86_BUILTIN_ADDSUBPS
,
25358 IX86_BUILTIN_HADDPS
,
25359 IX86_BUILTIN_HSUBPS
,
25360 IX86_BUILTIN_MOVSHDUP
,
25361 IX86_BUILTIN_MOVSLDUP
,
25362 IX86_BUILTIN_ADDSUBPD
,
25363 IX86_BUILTIN_HADDPD
,
25364 IX86_BUILTIN_HSUBPD
,
25365 IX86_BUILTIN_LDDQU
,
25367 IX86_BUILTIN_MONITOR
,
25368 IX86_BUILTIN_MWAIT
,
25371 IX86_BUILTIN_PHADDW
,
25372 IX86_BUILTIN_PHADDD
,
25373 IX86_BUILTIN_PHADDSW
,
25374 IX86_BUILTIN_PHSUBW
,
25375 IX86_BUILTIN_PHSUBD
,
25376 IX86_BUILTIN_PHSUBSW
,
25377 IX86_BUILTIN_PMADDUBSW
,
25378 IX86_BUILTIN_PMULHRSW
,
25379 IX86_BUILTIN_PSHUFB
,
25380 IX86_BUILTIN_PSIGNB
,
25381 IX86_BUILTIN_PSIGNW
,
25382 IX86_BUILTIN_PSIGND
,
25383 IX86_BUILTIN_PALIGNR
,
25384 IX86_BUILTIN_PABSB
,
25385 IX86_BUILTIN_PABSW
,
25386 IX86_BUILTIN_PABSD
,
25388 IX86_BUILTIN_PHADDW128
,
25389 IX86_BUILTIN_PHADDD128
,
25390 IX86_BUILTIN_PHADDSW128
,
25391 IX86_BUILTIN_PHSUBW128
,
25392 IX86_BUILTIN_PHSUBD128
,
25393 IX86_BUILTIN_PHSUBSW128
,
25394 IX86_BUILTIN_PMADDUBSW128
,
25395 IX86_BUILTIN_PMULHRSW128
,
25396 IX86_BUILTIN_PSHUFB128
,
25397 IX86_BUILTIN_PSIGNB128
,
25398 IX86_BUILTIN_PSIGNW128
,
25399 IX86_BUILTIN_PSIGND128
,
25400 IX86_BUILTIN_PALIGNR128
,
25401 IX86_BUILTIN_PABSB128
,
25402 IX86_BUILTIN_PABSW128
,
25403 IX86_BUILTIN_PABSD128
,
25405 /* AMDFAM10 - SSE4A New Instructions. */
25406 IX86_BUILTIN_MOVNTSD
,
25407 IX86_BUILTIN_MOVNTSS
,
25408 IX86_BUILTIN_EXTRQI
,
25409 IX86_BUILTIN_EXTRQ
,
25410 IX86_BUILTIN_INSERTQI
,
25411 IX86_BUILTIN_INSERTQ
,
25414 IX86_BUILTIN_BLENDPD
,
25415 IX86_BUILTIN_BLENDPS
,
25416 IX86_BUILTIN_BLENDVPD
,
25417 IX86_BUILTIN_BLENDVPS
,
25418 IX86_BUILTIN_PBLENDVB128
,
25419 IX86_BUILTIN_PBLENDW128
,
25424 IX86_BUILTIN_INSERTPS128
,
25426 IX86_BUILTIN_MOVNTDQA
,
25427 IX86_BUILTIN_MPSADBW128
,
25428 IX86_BUILTIN_PACKUSDW128
,
25429 IX86_BUILTIN_PCMPEQQ
,
25430 IX86_BUILTIN_PHMINPOSUW128
,
25432 IX86_BUILTIN_PMAXSB128
,
25433 IX86_BUILTIN_PMAXSD128
,
25434 IX86_BUILTIN_PMAXUD128
,
25435 IX86_BUILTIN_PMAXUW128
,
25437 IX86_BUILTIN_PMINSB128
,
25438 IX86_BUILTIN_PMINSD128
,
25439 IX86_BUILTIN_PMINUD128
,
25440 IX86_BUILTIN_PMINUW128
,
25442 IX86_BUILTIN_PMOVSXBW128
,
25443 IX86_BUILTIN_PMOVSXBD128
,
25444 IX86_BUILTIN_PMOVSXBQ128
,
25445 IX86_BUILTIN_PMOVSXWD128
,
25446 IX86_BUILTIN_PMOVSXWQ128
,
25447 IX86_BUILTIN_PMOVSXDQ128
,
25449 IX86_BUILTIN_PMOVZXBW128
,
25450 IX86_BUILTIN_PMOVZXBD128
,
25451 IX86_BUILTIN_PMOVZXBQ128
,
25452 IX86_BUILTIN_PMOVZXWD128
,
25453 IX86_BUILTIN_PMOVZXWQ128
,
25454 IX86_BUILTIN_PMOVZXDQ128
,
25456 IX86_BUILTIN_PMULDQ128
,
25457 IX86_BUILTIN_PMULLD128
,
25459 IX86_BUILTIN_ROUNDSD
,
25460 IX86_BUILTIN_ROUNDSS
,
25462 IX86_BUILTIN_ROUNDPD
,
25463 IX86_BUILTIN_ROUNDPS
,
25465 IX86_BUILTIN_FLOORPD
,
25466 IX86_BUILTIN_CEILPD
,
25467 IX86_BUILTIN_TRUNCPD
,
25468 IX86_BUILTIN_RINTPD
,
25469 IX86_BUILTIN_ROUNDPD_AZ
,
25471 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
,
25472 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
,
25473 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
,
25475 IX86_BUILTIN_FLOORPS
,
25476 IX86_BUILTIN_CEILPS
,
25477 IX86_BUILTIN_TRUNCPS
,
25478 IX86_BUILTIN_RINTPS
,
25479 IX86_BUILTIN_ROUNDPS_AZ
,
25481 IX86_BUILTIN_FLOORPS_SFIX
,
25482 IX86_BUILTIN_CEILPS_SFIX
,
25483 IX86_BUILTIN_ROUNDPS_AZ_SFIX
,
25485 IX86_BUILTIN_PTESTZ
,
25486 IX86_BUILTIN_PTESTC
,
25487 IX86_BUILTIN_PTESTNZC
,
25489 IX86_BUILTIN_VEC_INIT_V2SI
,
25490 IX86_BUILTIN_VEC_INIT_V4HI
,
25491 IX86_BUILTIN_VEC_INIT_V8QI
,
25492 IX86_BUILTIN_VEC_EXT_V2DF
,
25493 IX86_BUILTIN_VEC_EXT_V2DI
,
25494 IX86_BUILTIN_VEC_EXT_V4SF
,
25495 IX86_BUILTIN_VEC_EXT_V4SI
,
25496 IX86_BUILTIN_VEC_EXT_V8HI
,
25497 IX86_BUILTIN_VEC_EXT_V2SI
,
25498 IX86_BUILTIN_VEC_EXT_V4HI
,
25499 IX86_BUILTIN_VEC_EXT_V16QI
,
25500 IX86_BUILTIN_VEC_SET_V2DI
,
25501 IX86_BUILTIN_VEC_SET_V4SF
,
25502 IX86_BUILTIN_VEC_SET_V4SI
,
25503 IX86_BUILTIN_VEC_SET_V8HI
,
25504 IX86_BUILTIN_VEC_SET_V4HI
,
25505 IX86_BUILTIN_VEC_SET_V16QI
,
25507 IX86_BUILTIN_VEC_PACK_SFIX
,
25508 IX86_BUILTIN_VEC_PACK_SFIX256
,
25511 IX86_BUILTIN_CRC32QI
,
25512 IX86_BUILTIN_CRC32HI
,
25513 IX86_BUILTIN_CRC32SI
,
25514 IX86_BUILTIN_CRC32DI
,
25516 IX86_BUILTIN_PCMPESTRI128
,
25517 IX86_BUILTIN_PCMPESTRM128
,
25518 IX86_BUILTIN_PCMPESTRA128
,
25519 IX86_BUILTIN_PCMPESTRC128
,
25520 IX86_BUILTIN_PCMPESTRO128
,
25521 IX86_BUILTIN_PCMPESTRS128
,
25522 IX86_BUILTIN_PCMPESTRZ128
,
25523 IX86_BUILTIN_PCMPISTRI128
,
25524 IX86_BUILTIN_PCMPISTRM128
,
25525 IX86_BUILTIN_PCMPISTRA128
,
25526 IX86_BUILTIN_PCMPISTRC128
,
25527 IX86_BUILTIN_PCMPISTRO128
,
25528 IX86_BUILTIN_PCMPISTRS128
,
25529 IX86_BUILTIN_PCMPISTRZ128
,
25531 IX86_BUILTIN_PCMPGTQ
,
25533 /* AES instructions */
25534 IX86_BUILTIN_AESENC128
,
25535 IX86_BUILTIN_AESENCLAST128
,
25536 IX86_BUILTIN_AESDEC128
,
25537 IX86_BUILTIN_AESDECLAST128
,
25538 IX86_BUILTIN_AESIMC128
,
25539 IX86_BUILTIN_AESKEYGENASSIST128
,
25541 /* PCLMUL instruction */
25542 IX86_BUILTIN_PCLMULQDQ128
,
25545 IX86_BUILTIN_ADDPD256
,
25546 IX86_BUILTIN_ADDPS256
,
25547 IX86_BUILTIN_ADDSUBPD256
,
25548 IX86_BUILTIN_ADDSUBPS256
,
25549 IX86_BUILTIN_ANDPD256
,
25550 IX86_BUILTIN_ANDPS256
,
25551 IX86_BUILTIN_ANDNPD256
,
25552 IX86_BUILTIN_ANDNPS256
,
25553 IX86_BUILTIN_BLENDPD256
,
25554 IX86_BUILTIN_BLENDPS256
,
25555 IX86_BUILTIN_BLENDVPD256
,
25556 IX86_BUILTIN_BLENDVPS256
,
25557 IX86_BUILTIN_DIVPD256
,
25558 IX86_BUILTIN_DIVPS256
,
25559 IX86_BUILTIN_DPPS256
,
25560 IX86_BUILTIN_HADDPD256
,
25561 IX86_BUILTIN_HADDPS256
,
25562 IX86_BUILTIN_HSUBPD256
,
25563 IX86_BUILTIN_HSUBPS256
,
25564 IX86_BUILTIN_MAXPD256
,
25565 IX86_BUILTIN_MAXPS256
,
25566 IX86_BUILTIN_MINPD256
,
25567 IX86_BUILTIN_MINPS256
,
25568 IX86_BUILTIN_MULPD256
,
25569 IX86_BUILTIN_MULPS256
,
25570 IX86_BUILTIN_ORPD256
,
25571 IX86_BUILTIN_ORPS256
,
25572 IX86_BUILTIN_SHUFPD256
,
25573 IX86_BUILTIN_SHUFPS256
,
25574 IX86_BUILTIN_SUBPD256
,
25575 IX86_BUILTIN_SUBPS256
,
25576 IX86_BUILTIN_XORPD256
,
25577 IX86_BUILTIN_XORPS256
,
25578 IX86_BUILTIN_CMPSD
,
25579 IX86_BUILTIN_CMPSS
,
25580 IX86_BUILTIN_CMPPD
,
25581 IX86_BUILTIN_CMPPS
,
25582 IX86_BUILTIN_CMPPD256
,
25583 IX86_BUILTIN_CMPPS256
,
25584 IX86_BUILTIN_CVTDQ2PD256
,
25585 IX86_BUILTIN_CVTDQ2PS256
,
25586 IX86_BUILTIN_CVTPD2PS256
,
25587 IX86_BUILTIN_CVTPS2DQ256
,
25588 IX86_BUILTIN_CVTPS2PD256
,
25589 IX86_BUILTIN_CVTTPD2DQ256
,
25590 IX86_BUILTIN_CVTPD2DQ256
,
25591 IX86_BUILTIN_CVTTPS2DQ256
,
25592 IX86_BUILTIN_EXTRACTF128PD256
,
25593 IX86_BUILTIN_EXTRACTF128PS256
,
25594 IX86_BUILTIN_EXTRACTF128SI256
,
25595 IX86_BUILTIN_VZEROALL
,
25596 IX86_BUILTIN_VZEROUPPER
,
25597 IX86_BUILTIN_VPERMILVARPD
,
25598 IX86_BUILTIN_VPERMILVARPS
,
25599 IX86_BUILTIN_VPERMILVARPD256
,
25600 IX86_BUILTIN_VPERMILVARPS256
,
25601 IX86_BUILTIN_VPERMILPD
,
25602 IX86_BUILTIN_VPERMILPS
,
25603 IX86_BUILTIN_VPERMILPD256
,
25604 IX86_BUILTIN_VPERMILPS256
,
25605 IX86_BUILTIN_VPERMIL2PD
,
25606 IX86_BUILTIN_VPERMIL2PS
,
25607 IX86_BUILTIN_VPERMIL2PD256
,
25608 IX86_BUILTIN_VPERMIL2PS256
,
25609 IX86_BUILTIN_VPERM2F128PD256
,
25610 IX86_BUILTIN_VPERM2F128PS256
,
25611 IX86_BUILTIN_VPERM2F128SI256
,
25612 IX86_BUILTIN_VBROADCASTSS
,
25613 IX86_BUILTIN_VBROADCASTSD256
,
25614 IX86_BUILTIN_VBROADCASTSS256
,
25615 IX86_BUILTIN_VBROADCASTPD256
,
25616 IX86_BUILTIN_VBROADCASTPS256
,
25617 IX86_BUILTIN_VINSERTF128PD256
,
25618 IX86_BUILTIN_VINSERTF128PS256
,
25619 IX86_BUILTIN_VINSERTF128SI256
,
25620 IX86_BUILTIN_LOADUPD256
,
25621 IX86_BUILTIN_LOADUPS256
,
25622 IX86_BUILTIN_STOREUPD256
,
25623 IX86_BUILTIN_STOREUPS256
,
25624 IX86_BUILTIN_LDDQU256
,
25625 IX86_BUILTIN_MOVNTDQ256
,
25626 IX86_BUILTIN_MOVNTPD256
,
25627 IX86_BUILTIN_MOVNTPS256
,
25628 IX86_BUILTIN_LOADDQU256
,
25629 IX86_BUILTIN_STOREDQU256
,
25630 IX86_BUILTIN_MASKLOADPD
,
25631 IX86_BUILTIN_MASKLOADPS
,
25632 IX86_BUILTIN_MASKSTOREPD
,
25633 IX86_BUILTIN_MASKSTOREPS
,
25634 IX86_BUILTIN_MASKLOADPD256
,
25635 IX86_BUILTIN_MASKLOADPS256
,
25636 IX86_BUILTIN_MASKSTOREPD256
,
25637 IX86_BUILTIN_MASKSTOREPS256
,
25638 IX86_BUILTIN_MOVSHDUP256
,
25639 IX86_BUILTIN_MOVSLDUP256
,
25640 IX86_BUILTIN_MOVDDUP256
,
25642 IX86_BUILTIN_SQRTPD256
,
25643 IX86_BUILTIN_SQRTPS256
,
25644 IX86_BUILTIN_SQRTPS_NR256
,
25645 IX86_BUILTIN_RSQRTPS256
,
25646 IX86_BUILTIN_RSQRTPS_NR256
,
25648 IX86_BUILTIN_RCPPS256
,
25650 IX86_BUILTIN_ROUNDPD256
,
25651 IX86_BUILTIN_ROUNDPS256
,
25653 IX86_BUILTIN_FLOORPD256
,
25654 IX86_BUILTIN_CEILPD256
,
25655 IX86_BUILTIN_TRUNCPD256
,
25656 IX86_BUILTIN_RINTPD256
,
25657 IX86_BUILTIN_ROUNDPD_AZ256
,
25659 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
,
25660 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
,
25661 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
,
25663 IX86_BUILTIN_FLOORPS256
,
25664 IX86_BUILTIN_CEILPS256
,
25665 IX86_BUILTIN_TRUNCPS256
,
25666 IX86_BUILTIN_RINTPS256
,
25667 IX86_BUILTIN_ROUNDPS_AZ256
,
25669 IX86_BUILTIN_FLOORPS_SFIX256
,
25670 IX86_BUILTIN_CEILPS_SFIX256
,
25671 IX86_BUILTIN_ROUNDPS_AZ_SFIX256
,
25673 IX86_BUILTIN_UNPCKHPD256
,
25674 IX86_BUILTIN_UNPCKLPD256
,
25675 IX86_BUILTIN_UNPCKHPS256
,
25676 IX86_BUILTIN_UNPCKLPS256
,
25678 IX86_BUILTIN_SI256_SI
,
25679 IX86_BUILTIN_PS256_PS
,
25680 IX86_BUILTIN_PD256_PD
,
25681 IX86_BUILTIN_SI_SI256
,
25682 IX86_BUILTIN_PS_PS256
,
25683 IX86_BUILTIN_PD_PD256
,
25685 IX86_BUILTIN_VTESTZPD
,
25686 IX86_BUILTIN_VTESTCPD
,
25687 IX86_BUILTIN_VTESTNZCPD
,
25688 IX86_BUILTIN_VTESTZPS
,
25689 IX86_BUILTIN_VTESTCPS
,
25690 IX86_BUILTIN_VTESTNZCPS
,
25691 IX86_BUILTIN_VTESTZPD256
,
25692 IX86_BUILTIN_VTESTCPD256
,
25693 IX86_BUILTIN_VTESTNZCPD256
,
25694 IX86_BUILTIN_VTESTZPS256
,
25695 IX86_BUILTIN_VTESTCPS256
,
25696 IX86_BUILTIN_VTESTNZCPS256
,
25697 IX86_BUILTIN_PTESTZ256
,
25698 IX86_BUILTIN_PTESTC256
,
25699 IX86_BUILTIN_PTESTNZC256
,
25701 IX86_BUILTIN_MOVMSKPD256
,
25702 IX86_BUILTIN_MOVMSKPS256
,
25705 IX86_BUILTIN_MPSADBW256
,
25706 IX86_BUILTIN_PABSB256
,
25707 IX86_BUILTIN_PABSW256
,
25708 IX86_BUILTIN_PABSD256
,
25709 IX86_BUILTIN_PACKSSDW256
,
25710 IX86_BUILTIN_PACKSSWB256
,
25711 IX86_BUILTIN_PACKUSDW256
,
25712 IX86_BUILTIN_PACKUSWB256
,
25713 IX86_BUILTIN_PADDB256
,
25714 IX86_BUILTIN_PADDW256
,
25715 IX86_BUILTIN_PADDD256
,
25716 IX86_BUILTIN_PADDQ256
,
25717 IX86_BUILTIN_PADDSB256
,
25718 IX86_BUILTIN_PADDSW256
,
25719 IX86_BUILTIN_PADDUSB256
,
25720 IX86_BUILTIN_PADDUSW256
,
25721 IX86_BUILTIN_PALIGNR256
,
25722 IX86_BUILTIN_AND256I
,
25723 IX86_BUILTIN_ANDNOT256I
,
25724 IX86_BUILTIN_PAVGB256
,
25725 IX86_BUILTIN_PAVGW256
,
25726 IX86_BUILTIN_PBLENDVB256
,
25727 IX86_BUILTIN_PBLENDVW256
,
25728 IX86_BUILTIN_PCMPEQB256
,
25729 IX86_BUILTIN_PCMPEQW256
,
25730 IX86_BUILTIN_PCMPEQD256
,
25731 IX86_BUILTIN_PCMPEQQ256
,
25732 IX86_BUILTIN_PCMPGTB256
,
25733 IX86_BUILTIN_PCMPGTW256
,
25734 IX86_BUILTIN_PCMPGTD256
,
25735 IX86_BUILTIN_PCMPGTQ256
,
25736 IX86_BUILTIN_PHADDW256
,
25737 IX86_BUILTIN_PHADDD256
,
25738 IX86_BUILTIN_PHADDSW256
,
25739 IX86_BUILTIN_PHSUBW256
,
25740 IX86_BUILTIN_PHSUBD256
,
25741 IX86_BUILTIN_PHSUBSW256
,
25742 IX86_BUILTIN_PMADDUBSW256
,
25743 IX86_BUILTIN_PMADDWD256
,
25744 IX86_BUILTIN_PMAXSB256
,
25745 IX86_BUILTIN_PMAXSW256
,
25746 IX86_BUILTIN_PMAXSD256
,
25747 IX86_BUILTIN_PMAXUB256
,
25748 IX86_BUILTIN_PMAXUW256
,
25749 IX86_BUILTIN_PMAXUD256
,
25750 IX86_BUILTIN_PMINSB256
,
25751 IX86_BUILTIN_PMINSW256
,
25752 IX86_BUILTIN_PMINSD256
,
25753 IX86_BUILTIN_PMINUB256
,
25754 IX86_BUILTIN_PMINUW256
,
25755 IX86_BUILTIN_PMINUD256
,
25756 IX86_BUILTIN_PMOVMSKB256
,
25757 IX86_BUILTIN_PMOVSXBW256
,
25758 IX86_BUILTIN_PMOVSXBD256
,
25759 IX86_BUILTIN_PMOVSXBQ256
,
25760 IX86_BUILTIN_PMOVSXWD256
,
25761 IX86_BUILTIN_PMOVSXWQ256
,
25762 IX86_BUILTIN_PMOVSXDQ256
,
25763 IX86_BUILTIN_PMOVZXBW256
,
25764 IX86_BUILTIN_PMOVZXBD256
,
25765 IX86_BUILTIN_PMOVZXBQ256
,
25766 IX86_BUILTIN_PMOVZXWD256
,
25767 IX86_BUILTIN_PMOVZXWQ256
,
25768 IX86_BUILTIN_PMOVZXDQ256
,
25769 IX86_BUILTIN_PMULDQ256
,
25770 IX86_BUILTIN_PMULHRSW256
,
25771 IX86_BUILTIN_PMULHUW256
,
25772 IX86_BUILTIN_PMULHW256
,
25773 IX86_BUILTIN_PMULLW256
,
25774 IX86_BUILTIN_PMULLD256
,
25775 IX86_BUILTIN_PMULUDQ256
,
25776 IX86_BUILTIN_POR256
,
25777 IX86_BUILTIN_PSADBW256
,
25778 IX86_BUILTIN_PSHUFB256
,
25779 IX86_BUILTIN_PSHUFD256
,
25780 IX86_BUILTIN_PSHUFHW256
,
25781 IX86_BUILTIN_PSHUFLW256
,
25782 IX86_BUILTIN_PSIGNB256
,
25783 IX86_BUILTIN_PSIGNW256
,
25784 IX86_BUILTIN_PSIGND256
,
25785 IX86_BUILTIN_PSLLDQI256
,
25786 IX86_BUILTIN_PSLLWI256
,
25787 IX86_BUILTIN_PSLLW256
,
25788 IX86_BUILTIN_PSLLDI256
,
25789 IX86_BUILTIN_PSLLD256
,
25790 IX86_BUILTIN_PSLLQI256
,
25791 IX86_BUILTIN_PSLLQ256
,
25792 IX86_BUILTIN_PSRAWI256
,
25793 IX86_BUILTIN_PSRAW256
,
25794 IX86_BUILTIN_PSRADI256
,
25795 IX86_BUILTIN_PSRAD256
,
25796 IX86_BUILTIN_PSRLDQI256
,
25797 IX86_BUILTIN_PSRLWI256
,
25798 IX86_BUILTIN_PSRLW256
,
25799 IX86_BUILTIN_PSRLDI256
,
25800 IX86_BUILTIN_PSRLD256
,
25801 IX86_BUILTIN_PSRLQI256
,
25802 IX86_BUILTIN_PSRLQ256
,
25803 IX86_BUILTIN_PSUBB256
,
25804 IX86_BUILTIN_PSUBW256
,
25805 IX86_BUILTIN_PSUBD256
,
25806 IX86_BUILTIN_PSUBQ256
,
25807 IX86_BUILTIN_PSUBSB256
,
25808 IX86_BUILTIN_PSUBSW256
,
25809 IX86_BUILTIN_PSUBUSB256
,
25810 IX86_BUILTIN_PSUBUSW256
,
25811 IX86_BUILTIN_PUNPCKHBW256
,
25812 IX86_BUILTIN_PUNPCKHWD256
,
25813 IX86_BUILTIN_PUNPCKHDQ256
,
25814 IX86_BUILTIN_PUNPCKHQDQ256
,
25815 IX86_BUILTIN_PUNPCKLBW256
,
25816 IX86_BUILTIN_PUNPCKLWD256
,
25817 IX86_BUILTIN_PUNPCKLDQ256
,
25818 IX86_BUILTIN_PUNPCKLQDQ256
,
25819 IX86_BUILTIN_PXOR256
,
25820 IX86_BUILTIN_MOVNTDQA256
,
25821 IX86_BUILTIN_VBROADCASTSS_PS
,
25822 IX86_BUILTIN_VBROADCASTSS_PS256
,
25823 IX86_BUILTIN_VBROADCASTSD_PD256
,
25824 IX86_BUILTIN_VBROADCASTSI256
,
25825 IX86_BUILTIN_PBLENDD256
,
25826 IX86_BUILTIN_PBLENDD128
,
25827 IX86_BUILTIN_PBROADCASTB256
,
25828 IX86_BUILTIN_PBROADCASTW256
,
25829 IX86_BUILTIN_PBROADCASTD256
,
25830 IX86_BUILTIN_PBROADCASTQ256
,
25831 IX86_BUILTIN_PBROADCASTB128
,
25832 IX86_BUILTIN_PBROADCASTW128
,
25833 IX86_BUILTIN_PBROADCASTD128
,
25834 IX86_BUILTIN_PBROADCASTQ128
,
25835 IX86_BUILTIN_VPERMVARSI256
,
25836 IX86_BUILTIN_VPERMDF256
,
25837 IX86_BUILTIN_VPERMVARSF256
,
25838 IX86_BUILTIN_VPERMDI256
,
25839 IX86_BUILTIN_VPERMTI256
,
25840 IX86_BUILTIN_VEXTRACT128I256
,
25841 IX86_BUILTIN_VINSERT128I256
,
25842 IX86_BUILTIN_MASKLOADD
,
25843 IX86_BUILTIN_MASKLOADQ
,
25844 IX86_BUILTIN_MASKLOADD256
,
25845 IX86_BUILTIN_MASKLOADQ256
,
25846 IX86_BUILTIN_MASKSTORED
,
25847 IX86_BUILTIN_MASKSTOREQ
,
25848 IX86_BUILTIN_MASKSTORED256
,
25849 IX86_BUILTIN_MASKSTOREQ256
,
25850 IX86_BUILTIN_PSLLVV4DI
,
25851 IX86_BUILTIN_PSLLVV2DI
,
25852 IX86_BUILTIN_PSLLVV8SI
,
25853 IX86_BUILTIN_PSLLVV4SI
,
25854 IX86_BUILTIN_PSRAVV8SI
,
25855 IX86_BUILTIN_PSRAVV4SI
,
25856 IX86_BUILTIN_PSRLVV4DI
,
25857 IX86_BUILTIN_PSRLVV2DI
,
25858 IX86_BUILTIN_PSRLVV8SI
,
25859 IX86_BUILTIN_PSRLVV4SI
,
25861 IX86_BUILTIN_GATHERSIV2DF
,
25862 IX86_BUILTIN_GATHERSIV4DF
,
25863 IX86_BUILTIN_GATHERDIV2DF
,
25864 IX86_BUILTIN_GATHERDIV4DF
,
25865 IX86_BUILTIN_GATHERSIV4SF
,
25866 IX86_BUILTIN_GATHERSIV8SF
,
25867 IX86_BUILTIN_GATHERDIV4SF
,
25868 IX86_BUILTIN_GATHERDIV8SF
,
25869 IX86_BUILTIN_GATHERSIV2DI
,
25870 IX86_BUILTIN_GATHERSIV4DI
,
25871 IX86_BUILTIN_GATHERDIV2DI
,
25872 IX86_BUILTIN_GATHERDIV4DI
,
25873 IX86_BUILTIN_GATHERSIV4SI
,
25874 IX86_BUILTIN_GATHERSIV8SI
,
25875 IX86_BUILTIN_GATHERDIV4SI
,
25876 IX86_BUILTIN_GATHERDIV8SI
,
25878 /* Alternate 4 element gather for the vectorizer where
25879 all operands are 32-byte wide. */
25880 IX86_BUILTIN_GATHERALTSIV4DF
,
25881 IX86_BUILTIN_GATHERALTDIV8SF
,
25882 IX86_BUILTIN_GATHERALTSIV4DI
,
25883 IX86_BUILTIN_GATHERALTDIV8SI
,
25885 /* TFmode support builtins. */
25887 IX86_BUILTIN_HUGE_VALQ
,
25888 IX86_BUILTIN_FABSQ
,
25889 IX86_BUILTIN_COPYSIGNQ
,
25891 /* Vectorizer support builtins. */
25892 IX86_BUILTIN_CPYSGNPS
,
25893 IX86_BUILTIN_CPYSGNPD
,
25894 IX86_BUILTIN_CPYSGNPS256
,
25895 IX86_BUILTIN_CPYSGNPD256
,
25897 /* FMA4 instructions. */
25898 IX86_BUILTIN_VFMADDSS
,
25899 IX86_BUILTIN_VFMADDSD
,
25900 IX86_BUILTIN_VFMADDPS
,
25901 IX86_BUILTIN_VFMADDPD
,
25902 IX86_BUILTIN_VFMADDPS256
,
25903 IX86_BUILTIN_VFMADDPD256
,
25904 IX86_BUILTIN_VFMADDSUBPS
,
25905 IX86_BUILTIN_VFMADDSUBPD
,
25906 IX86_BUILTIN_VFMADDSUBPS256
,
25907 IX86_BUILTIN_VFMADDSUBPD256
,
25909 /* FMA3 instructions. */
25910 IX86_BUILTIN_VFMADDSS3
,
25911 IX86_BUILTIN_VFMADDSD3
,
25913 /* XOP instructions. */
25914 IX86_BUILTIN_VPCMOV
,
25915 IX86_BUILTIN_VPCMOV_V2DI
,
25916 IX86_BUILTIN_VPCMOV_V4SI
,
25917 IX86_BUILTIN_VPCMOV_V8HI
,
25918 IX86_BUILTIN_VPCMOV_V16QI
,
25919 IX86_BUILTIN_VPCMOV_V4SF
,
25920 IX86_BUILTIN_VPCMOV_V2DF
,
25921 IX86_BUILTIN_VPCMOV256
,
25922 IX86_BUILTIN_VPCMOV_V4DI256
,
25923 IX86_BUILTIN_VPCMOV_V8SI256
,
25924 IX86_BUILTIN_VPCMOV_V16HI256
,
25925 IX86_BUILTIN_VPCMOV_V32QI256
,
25926 IX86_BUILTIN_VPCMOV_V8SF256
,
25927 IX86_BUILTIN_VPCMOV_V4DF256
,
25929 IX86_BUILTIN_VPPERM
,
25931 IX86_BUILTIN_VPMACSSWW
,
25932 IX86_BUILTIN_VPMACSWW
,
25933 IX86_BUILTIN_VPMACSSWD
,
25934 IX86_BUILTIN_VPMACSWD
,
25935 IX86_BUILTIN_VPMACSSDD
,
25936 IX86_BUILTIN_VPMACSDD
,
25937 IX86_BUILTIN_VPMACSSDQL
,
25938 IX86_BUILTIN_VPMACSSDQH
,
25939 IX86_BUILTIN_VPMACSDQL
,
25940 IX86_BUILTIN_VPMACSDQH
,
25941 IX86_BUILTIN_VPMADCSSWD
,
25942 IX86_BUILTIN_VPMADCSWD
,
25944 IX86_BUILTIN_VPHADDBW
,
25945 IX86_BUILTIN_VPHADDBD
,
25946 IX86_BUILTIN_VPHADDBQ
,
25947 IX86_BUILTIN_VPHADDWD
,
25948 IX86_BUILTIN_VPHADDWQ
,
25949 IX86_BUILTIN_VPHADDDQ
,
25950 IX86_BUILTIN_VPHADDUBW
,
25951 IX86_BUILTIN_VPHADDUBD
,
25952 IX86_BUILTIN_VPHADDUBQ
,
25953 IX86_BUILTIN_VPHADDUWD
,
25954 IX86_BUILTIN_VPHADDUWQ
,
25955 IX86_BUILTIN_VPHADDUDQ
,
25956 IX86_BUILTIN_VPHSUBBW
,
25957 IX86_BUILTIN_VPHSUBWD
,
25958 IX86_BUILTIN_VPHSUBDQ
,
25960 IX86_BUILTIN_VPROTB
,
25961 IX86_BUILTIN_VPROTW
,
25962 IX86_BUILTIN_VPROTD
,
25963 IX86_BUILTIN_VPROTQ
,
25964 IX86_BUILTIN_VPROTB_IMM
,
25965 IX86_BUILTIN_VPROTW_IMM
,
25966 IX86_BUILTIN_VPROTD_IMM
,
25967 IX86_BUILTIN_VPROTQ_IMM
,
25969 IX86_BUILTIN_VPSHLB
,
25970 IX86_BUILTIN_VPSHLW
,
25971 IX86_BUILTIN_VPSHLD
,
25972 IX86_BUILTIN_VPSHLQ
,
25973 IX86_BUILTIN_VPSHAB
,
25974 IX86_BUILTIN_VPSHAW
,
25975 IX86_BUILTIN_VPSHAD
,
25976 IX86_BUILTIN_VPSHAQ
,
25978 IX86_BUILTIN_VFRCZSS
,
25979 IX86_BUILTIN_VFRCZSD
,
25980 IX86_BUILTIN_VFRCZPS
,
25981 IX86_BUILTIN_VFRCZPD
,
25982 IX86_BUILTIN_VFRCZPS256
,
25983 IX86_BUILTIN_VFRCZPD256
,
25985 IX86_BUILTIN_VPCOMEQUB
,
25986 IX86_BUILTIN_VPCOMNEUB
,
25987 IX86_BUILTIN_VPCOMLTUB
,
25988 IX86_BUILTIN_VPCOMLEUB
,
25989 IX86_BUILTIN_VPCOMGTUB
,
25990 IX86_BUILTIN_VPCOMGEUB
,
25991 IX86_BUILTIN_VPCOMFALSEUB
,
25992 IX86_BUILTIN_VPCOMTRUEUB
,
25994 IX86_BUILTIN_VPCOMEQUW
,
25995 IX86_BUILTIN_VPCOMNEUW
,
25996 IX86_BUILTIN_VPCOMLTUW
,
25997 IX86_BUILTIN_VPCOMLEUW
,
25998 IX86_BUILTIN_VPCOMGTUW
,
25999 IX86_BUILTIN_VPCOMGEUW
,
26000 IX86_BUILTIN_VPCOMFALSEUW
,
26001 IX86_BUILTIN_VPCOMTRUEUW
,
26003 IX86_BUILTIN_VPCOMEQUD
,
26004 IX86_BUILTIN_VPCOMNEUD
,
26005 IX86_BUILTIN_VPCOMLTUD
,
26006 IX86_BUILTIN_VPCOMLEUD
,
26007 IX86_BUILTIN_VPCOMGTUD
,
26008 IX86_BUILTIN_VPCOMGEUD
,
26009 IX86_BUILTIN_VPCOMFALSEUD
,
26010 IX86_BUILTIN_VPCOMTRUEUD
,
26012 IX86_BUILTIN_VPCOMEQUQ
,
26013 IX86_BUILTIN_VPCOMNEUQ
,
26014 IX86_BUILTIN_VPCOMLTUQ
,
26015 IX86_BUILTIN_VPCOMLEUQ
,
26016 IX86_BUILTIN_VPCOMGTUQ
,
26017 IX86_BUILTIN_VPCOMGEUQ
,
26018 IX86_BUILTIN_VPCOMFALSEUQ
,
26019 IX86_BUILTIN_VPCOMTRUEUQ
,
26021 IX86_BUILTIN_VPCOMEQB
,
26022 IX86_BUILTIN_VPCOMNEB
,
26023 IX86_BUILTIN_VPCOMLTB
,
26024 IX86_BUILTIN_VPCOMLEB
,
26025 IX86_BUILTIN_VPCOMGTB
,
26026 IX86_BUILTIN_VPCOMGEB
,
26027 IX86_BUILTIN_VPCOMFALSEB
,
26028 IX86_BUILTIN_VPCOMTRUEB
,
26030 IX86_BUILTIN_VPCOMEQW
,
26031 IX86_BUILTIN_VPCOMNEW
,
26032 IX86_BUILTIN_VPCOMLTW
,
26033 IX86_BUILTIN_VPCOMLEW
,
26034 IX86_BUILTIN_VPCOMGTW
,
26035 IX86_BUILTIN_VPCOMGEW
,
26036 IX86_BUILTIN_VPCOMFALSEW
,
26037 IX86_BUILTIN_VPCOMTRUEW
,
26039 IX86_BUILTIN_VPCOMEQD
,
26040 IX86_BUILTIN_VPCOMNED
,
26041 IX86_BUILTIN_VPCOMLTD
,
26042 IX86_BUILTIN_VPCOMLED
,
26043 IX86_BUILTIN_VPCOMGTD
,
26044 IX86_BUILTIN_VPCOMGED
,
26045 IX86_BUILTIN_VPCOMFALSED
,
26046 IX86_BUILTIN_VPCOMTRUED
,
26048 IX86_BUILTIN_VPCOMEQQ
,
26049 IX86_BUILTIN_VPCOMNEQ
,
26050 IX86_BUILTIN_VPCOMLTQ
,
26051 IX86_BUILTIN_VPCOMLEQ
,
26052 IX86_BUILTIN_VPCOMGTQ
,
26053 IX86_BUILTIN_VPCOMGEQ
,
26054 IX86_BUILTIN_VPCOMFALSEQ
,
26055 IX86_BUILTIN_VPCOMTRUEQ
,
26057 /* LWP instructions. */
26058 IX86_BUILTIN_LLWPCB
,
26059 IX86_BUILTIN_SLWPCB
,
26060 IX86_BUILTIN_LWPVAL32
,
26061 IX86_BUILTIN_LWPVAL64
,
26062 IX86_BUILTIN_LWPINS32
,
26063 IX86_BUILTIN_LWPINS64
,
26068 IX86_BUILTIN_XBEGIN
,
26070 IX86_BUILTIN_XABORT
,
26071 IX86_BUILTIN_XTEST
,
26073 /* BMI instructions. */
26074 IX86_BUILTIN_BEXTR32
,
26075 IX86_BUILTIN_BEXTR64
,
26078 /* TBM instructions. */
26079 IX86_BUILTIN_BEXTRI32
,
26080 IX86_BUILTIN_BEXTRI64
,
26082 /* BMI2 instructions. */
26083 IX86_BUILTIN_BZHI32
,
26084 IX86_BUILTIN_BZHI64
,
26085 IX86_BUILTIN_PDEP32
,
26086 IX86_BUILTIN_PDEP64
,
26087 IX86_BUILTIN_PEXT32
,
26088 IX86_BUILTIN_PEXT64
,
26090 /* FSGSBASE instructions. */
26091 IX86_BUILTIN_RDFSBASE32
,
26092 IX86_BUILTIN_RDFSBASE64
,
26093 IX86_BUILTIN_RDGSBASE32
,
26094 IX86_BUILTIN_RDGSBASE64
,
26095 IX86_BUILTIN_WRFSBASE32
,
26096 IX86_BUILTIN_WRFSBASE64
,
26097 IX86_BUILTIN_WRGSBASE32
,
26098 IX86_BUILTIN_WRGSBASE64
,
26100 /* RDRND instructions. */
26101 IX86_BUILTIN_RDRAND16_STEP
,
26102 IX86_BUILTIN_RDRAND32_STEP
,
26103 IX86_BUILTIN_RDRAND64_STEP
,
26105 /* F16C instructions. */
26106 IX86_BUILTIN_CVTPH2PS
,
26107 IX86_BUILTIN_CVTPH2PS256
,
26108 IX86_BUILTIN_CVTPS2PH
,
26109 IX86_BUILTIN_CVTPS2PH256
,
26111 /* CFString built-in for darwin */
26112 IX86_BUILTIN_CFSTRING
,
26114 /* Builtins to get CPU type and supported features. */
26115 IX86_BUILTIN_CPU_INIT
,
26116 IX86_BUILTIN_CPU_IS
,
26117 IX86_BUILTIN_CPU_SUPPORTS
,
26122 /* Table for the ix86 builtin decls. */
26123 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
26125 /* Table of all of the builtin functions that are possible with different ISA's
26126 but are waiting to be built until a function is declared to use that
26128 struct builtin_isa
{
26129 const char *name
; /* function name */
26130 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
26131 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
26132 bool const_p
; /* true if the declaration is constant */
26133 bool set_and_not_built_p
;
26136 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
26139 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
26140 of which isa_flags to use in the ix86_builtins_isa array. Stores the
26141 function decl in the ix86_builtins array. Returns the function decl or
26142 NULL_TREE, if the builtin was not added.
26144 If the front end has a special hook for builtin functions, delay adding
26145 builtin functions that aren't in the current ISA until the ISA is changed
26146 with function specific optimization. Doing so, can save about 300K for the
26147 default compiler. When the builtin is expanded, check at that time whether
26150 If the front end doesn't have a special hook, record all builtins, even if
26151 it isn't an instruction set in the current ISA in case the user uses
26152 function specific options for a different ISA, so that we don't get scope
26153 errors if a builtin is added in the middle of a function scope. */
26156 def_builtin (HOST_WIDE_INT mask
, const char *name
,
26157 enum ix86_builtin_func_type tcode
,
26158 enum ix86_builtins code
)
26160 tree decl
= NULL_TREE
;
26162 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
26164 ix86_builtins_isa
[(int) code
].isa
= mask
;
26166 mask
&= ~OPTION_MASK_ISA_64BIT
;
26168 || (mask
& ix86_isa_flags
) != 0
26169 || (lang_hooks
.builtin_function
26170 == lang_hooks
.builtin_function_ext_scope
))
26173 tree type
= ix86_get_builtin_func_type (tcode
);
26174 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
26176 ix86_builtins
[(int) code
] = decl
;
26177 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
26181 ix86_builtins
[(int) code
] = NULL_TREE
;
26182 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
26183 ix86_builtins_isa
[(int) code
].name
= name
;
26184 ix86_builtins_isa
[(int) code
].const_p
= false;
26185 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
26192 /* Like def_builtin, but also marks the function decl "const". */
26195 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
26196 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
26198 tree decl
= def_builtin (mask
, name
, tcode
, code
);
26200 TREE_READONLY (decl
) = 1;
26202 ix86_builtins_isa
[(int) code
].const_p
= true;
26207 /* Add any new builtin functions for a given ISA that may not have been
26208 declared. This saves a bit of space compared to adding all of the
26209 declarations to the tree, even if we didn't use them. */
26212 ix86_add_new_builtins (HOST_WIDE_INT isa
)
26216 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
26218 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
26219 && ix86_builtins_isa
[i
].set_and_not_built_p
)
26223 /* Don't define the builtin again. */
26224 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
26226 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
26227 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
26228 type
, i
, BUILT_IN_MD
, NULL
,
26231 ix86_builtins
[i
] = decl
;
26232 if (ix86_builtins_isa
[i
].const_p
)
26233 TREE_READONLY (decl
) = 1;
26238 /* Bits for builtin_description.flag. */
26240 /* Set when we don't support the comparison natively, and should
26241 swap_comparison in order to support it. */
26242 #define BUILTIN_DESC_SWAP_OPERANDS 1
26244 struct builtin_description
26246 const HOST_WIDE_INT mask
;
26247 const enum insn_code icode
;
26248 const char *const name
;
26249 const enum ix86_builtins code
;
26250 const enum rtx_code comparison
;
26254 static const struct builtin_description bdesc_comi
[] =
26256 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
26257 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
26258 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
26259 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
26260 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
26261 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
26262 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
26263 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
26264 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
26265 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
26266 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
26267 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
26268 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
26269 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
26270 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
26271 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
26272 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
26273 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
26274 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
26275 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
26276 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
26277 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
26278 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
26279 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
26282 static const struct builtin_description bdesc_pcmpestr
[] =
26285 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
26286 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
26287 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
26288 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
26289 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
26290 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
26291 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
26294 static const struct builtin_description bdesc_pcmpistr
[] =
26297 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
26298 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
26299 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
26300 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
26301 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
26302 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
26303 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
26306 /* Special builtins with variable number of arguments. */
26307 static const struct builtin_description bdesc_special_args
[] =
26309 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26310 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
26311 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26314 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26317 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26320 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26321 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26322 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26324 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26325 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
26326 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26327 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
26329 /* SSE or 3DNow!A */
26330 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26331 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntq
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
26334 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26335 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26336 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26337 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
26338 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26339 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
26340 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntisi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
26341 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_movntidi
, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64
, UNKNOWN
, (int) VOID_FTYPE_PLONGLONG_LONGLONG
},
26342 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
26343 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26345 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26346 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
26349 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
26352 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
26355 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
26356 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
26359 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26360 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26362 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
26363 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26364 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26365 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
26366 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
26368 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
26369 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
26370 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26371 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26372 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26373 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
26374 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
26376 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
26377 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
26378 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
26380 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
26381 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
26382 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
26383 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
26384 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
26385 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
26386 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
26387 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
26390 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
26391 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
26392 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
26393 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
26394 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
26395 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
26396 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
26397 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
26398 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
26400 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
26401 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
26402 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
26403 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
26404 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
26405 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
26408 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26409 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26410 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26411 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
26412 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26413 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26414 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
26415 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
26418 { OPTION_MASK_ISA_RTM
, CODE_FOR_xbegin
, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
26419 { OPTION_MASK_ISA_RTM
, CODE_FOR_xend
, "__builtin_ia32_xend", IX86_BUILTIN_XEND
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
26420 { OPTION_MASK_ISA_RTM
, CODE_FOR_xtest
, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST
, UNKNOWN
, (int) INT_FTYPE_VOID
},
26423 /* Builtins with variable number of arguments. */
26424 static const struct builtin_description bdesc_args
[] =
26426 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
26427 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
26428 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
26429 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26430 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26431 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
26432 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
26435 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26436 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26437 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26438 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26439 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26440 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26442 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26443 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26444 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26445 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26446 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26447 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26448 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26449 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26451 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26452 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26454 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26455 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26456 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26457 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26459 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26460 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26461 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26462 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26463 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26464 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26466 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26467 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26468 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26469 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26470 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26471 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26473 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26474 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
26475 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
26477 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
26479 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26480 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26481 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26482 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26483 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26484 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26486 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26487 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26488 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
26489 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26490 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26491 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
26493 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
26494 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
26495 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
26496 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
26499 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26500 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26501 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26502 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26504 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26505 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26506 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26507 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26508 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26509 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
26510 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26511 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26512 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26513 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26514 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26515 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26516 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26517 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26518 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26521 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
26522 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
26523 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26524 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
26525 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26526 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
26529 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26530 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26531 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26532 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26533 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26534 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26535 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26536 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26537 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26538 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
26539 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
26540 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
26542 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26544 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26545 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26546 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26547 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26548 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26549 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26550 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26551 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26553 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26554 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26555 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26556 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26557 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26558 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26559 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26560 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26561 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26562 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26563 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26564 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26565 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
26566 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26567 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26568 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26569 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26570 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
26571 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
26572 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26573 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
26574 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
26576 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26577 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26578 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26579 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26581 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26582 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26583 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26584 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26586 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26588 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26589 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26590 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26591 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26592 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26594 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
26595 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
26596 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
26598 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
26600 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26601 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26602 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
26604 { OPTION_MASK_ISA_SSE
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26605 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26607 /* SSE MMX or 3Dnow!A */
26608 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26609 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26610 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26612 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26613 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26614 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26615 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26617 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26618 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26620 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26623 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26625 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26626 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26627 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26628 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26629 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26631 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26632 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26633 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26634 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26635 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26637 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26639 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26640 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26641 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26642 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26644 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26645 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26646 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26648 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26649 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26650 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26651 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26652 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26653 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26654 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26655 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26657 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26658 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26659 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26660 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26661 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26662 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26663 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26664 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26665 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26666 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26667 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26668 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26669 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26670 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26671 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26672 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26673 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26674 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26675 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26676 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26678 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26679 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26680 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26681 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26683 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26684 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26685 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26686 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26688 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26690 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26691 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26692 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26694 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26696 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26697 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26698 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26699 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26700 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26701 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26702 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26703 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26705 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26706 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26707 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26708 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26709 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26710 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26711 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26712 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26714 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26715 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26717 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26718 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26719 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26720 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26722 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26723 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26725 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26726 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26727 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26728 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26729 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26730 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26735 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26740 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26743 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26744 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26746 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26747 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26748 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26750 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26751 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26753 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26754 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_widen_umult_even_v4si
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26756 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26758 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26759 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26760 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26761 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26763 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26764 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26765 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26766 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26767 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26768 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26769 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26771 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26772 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26773 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26774 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26775 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26776 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26777 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26779 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26780 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26781 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26782 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26784 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26785 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26786 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26788 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26790 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26793 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26794 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26797 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26798 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26800 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26801 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26802 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26803 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26804 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26805 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26808 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26809 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26810 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26811 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26812 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26813 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26815 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26816 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26817 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26818 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26819 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26820 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26821 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26822 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26823 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26824 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26825 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26826 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26827 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26828 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26829 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26830 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26831 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26832 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26833 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26834 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26835 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26836 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26837 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26838 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26841 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26842 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26845 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26846 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26847 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26848 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26849 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26850 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26851 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26852 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26853 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26854 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26856 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26857 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26858 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26859 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26860 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26861 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26862 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26863 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26864 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26865 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26866 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26867 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26868 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26870 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26871 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26872 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26873 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26874 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26875 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26876 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26877 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26878 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26879 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26880 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26881 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26884 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26885 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26886 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26887 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26889 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26890 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26891 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26892 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26894 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26895 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd_vec_pack_sfix
, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V2DF_V2DF_ROUND
},
26897 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26898 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26900 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26901 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26902 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26903 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26905 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SI_FTYPE_V4SF_ROUND
},
26906 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps_sfix
, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX
, (enum rtx_code
) ROUND_CEIL
, (int) V4SI_FTYPE_V4SF_ROUND
},
26908 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26909 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2_sfix
, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26911 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26912 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26913 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26916 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26917 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26918 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26919 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26920 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26923 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26924 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26925 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26926 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26929 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26930 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26932 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26933 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26934 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26935 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26938 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26941 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26942 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26943 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26944 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26945 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26946 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26947 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26948 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26949 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26950 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26951 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26952 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26953 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26954 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26955 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26956 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26957 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26958 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26959 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26960 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26961 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26962 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26963 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26964 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26965 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26966 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26968 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26969 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26970 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26971 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26973 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26974 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26975 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26976 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26977 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26978 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26979 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26980 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26981 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26982 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26983 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26984 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26985 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26986 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26987 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26988 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26989 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26990 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26991 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26992 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26993 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26994 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26995 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26996 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26997 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26998 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26999 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27000 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
27001 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
27002 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27003 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27004 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
27005 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
27006 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
27008 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27009 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27010 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27012 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27013 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27014 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27015 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27016 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27018 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27020 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27021 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
27023 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27024 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
27025 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
27026 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
27028 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
27029 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2_vec_pack_sfix
, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27031 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27032 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd_vec_pack_sfix256
, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V4DF_V4DF_ROUND
},
27034 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27035 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
27036 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
27037 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
27039 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SI_FTYPE_V8SF_ROUND
},
27040 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps_sfix256
, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SI_FTYPE_V8SF_ROUND
},
27042 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
27043 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2_sfix
, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
27045 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27046 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27047 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27048 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27050 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27051 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27052 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27053 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
27054 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
27055 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
27057 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27058 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27059 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
27060 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27061 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27062 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
27063 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27064 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27065 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
27066 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27067 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27068 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
27069 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27070 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27071 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
27073 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
27074 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
27076 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
27077 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
27079 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
27082 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
27083 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
27084 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
27085 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
27086 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27087 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27088 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
27089 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
27090 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27091 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27092 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27093 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27094 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27095 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27096 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27097 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27098 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
27099 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27100 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27101 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27102 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27103 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
27104 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
27105 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27106 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27107 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27108 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27109 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27110 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27111 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27112 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27113 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27114 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27115 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27116 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27117 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27118 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27119 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27120 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
27121 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27122 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27123 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27124 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27125 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27126 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27127 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27128 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27129 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27130 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27131 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27132 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27133 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
27134 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27135 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27136 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27137 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27138 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27139 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27140 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
27141 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
27142 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
27143 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
27144 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
27145 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
27146 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_smult_even_v8si
, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27147 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27148 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27149 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27150 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27151 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27152 { OPTION_MASK_ISA_AVX2
, CODE_FOR_vec_widen_umult_even_v8si
, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
27153 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27154 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
27155 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27156 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
27157 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27158 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
27159 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27160 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27161 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27162 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27163 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27164 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27165 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27166 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27167 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27168 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27169 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27170 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27171 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27172 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27173 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
27174 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
27175 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
27176 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
27177 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
27178 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
27179 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
27180 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27181 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27182 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27183 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27184 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27185 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27186 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27187 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27188 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27189 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27190 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27191 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27192 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
27193 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
27194 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27195 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27196 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27197 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
27198 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
27199 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
27200 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27201 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
27202 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
27203 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
27204 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
27205 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
27206 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
27207 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
27208 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
27209 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
27210 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
27211 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27212 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
27213 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
27214 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
27215 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
27216 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
27217 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
27218 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27219 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27220 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27221 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27222 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27223 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27224 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
27225 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
27226 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
27227 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
27229 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27232 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27233 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27234 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
27237 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27238 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27241 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
27242 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
27243 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
27244 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
27247 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27248 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27249 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27250 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27251 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
27252 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
27255 /* FMA4 and XOP. */
27256 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
27257 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
27258 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
27259 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
27260 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
27261 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
27262 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
27263 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
27264 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
27265 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
27266 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
27267 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
27268 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
27269 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
27270 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
27271 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
27272 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
27273 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
27274 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
27275 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
27276 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
27277 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
27278 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
27279 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
27280 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
27281 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
27282 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
27283 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
27284 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
27285 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
27286 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
27287 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
27288 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
27289 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
27290 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
27291 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
27292 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
27293 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
27294 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
27295 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
27296 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
27297 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
27298 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
27299 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
27300 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
27301 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
27302 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
27303 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
27304 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
27305 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
27306 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
27307 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
27309 static const struct builtin_description bdesc_multi_arg
[] =
27311 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
27312 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
27313 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27314 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
27315 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
27316 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27318 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
27319 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
27320 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27321 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
27322 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
27323 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27325 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
27326 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
27327 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27328 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
27329 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
27330 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27331 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
27332 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
27333 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27334 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
27335 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
27336 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27338 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
27339 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
27340 UNKNOWN
, (int)MULTI_ARG_3_SF
},
27341 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
27342 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
27343 UNKNOWN
, (int)MULTI_ARG_3_DF
},
27344 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
27345 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
27346 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27347 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
27348 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
27349 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27351 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27352 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
27353 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27354 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27355 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
27356 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
27357 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
27359 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27360 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
27361 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
27362 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
27363 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
27364 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
27365 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
27367 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
27369 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27370 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
27371 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27372 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27373 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27374 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
27375 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27376 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27377 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27378 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
27379 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27380 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
27382 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27383 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27384 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27385 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27386 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
27387 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
27388 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
27389 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
27390 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27391 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27392 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27393 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27394 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
27395 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
27396 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
27397 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
27399 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
27400 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
27401 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
27402 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
27403 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
27404 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
27406 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27407 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27408 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27409 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27410 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27411 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27412 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27413 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
27414 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
27415 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27416 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
27417 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27418 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
27419 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
27420 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
27422 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27423 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27424 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27425 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
27426 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
27427 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
27428 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
27430 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27431 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27432 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27433 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
27434 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
27435 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
27436 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
27438 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27439 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27440 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27441 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
27442 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
27443 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
27444 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
27446 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27447 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27448 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27449 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
27450 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
27451 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
27452 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
27454 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
27455 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27456 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
27457 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
27458 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
27459 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
27460 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
27462 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
27463 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27464 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
27465 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
27466 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
27467 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
27468 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
27470 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
27471 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27472 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
27473 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
27474 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
27475 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
27476 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
27478 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
27479 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27480 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
27481 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
27482 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
27483 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
27484 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
27486 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27487 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27488 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27489 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27490 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
27491 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
27492 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
27493 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
27495 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27496 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27497 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27498 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27499 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
27500 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
27501 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
27502 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
27504 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
27505 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
27506 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
27507 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
27511 /* TM vector builtins. */
27513 /* Reuse the existing x86-specific `struct builtin_description' cause
27514 we're lazy. Add casts to make them fit. */
27515 static const struct builtin_description bdesc_tm
[] =
27517 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27518 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27519 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
27520 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27521 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27522 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27523 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
27525 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27526 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27527 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
27528 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27529 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27530 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27531 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
27533 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27534 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27535 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
27536 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27537 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27538 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27539 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
27541 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27542 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27543 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
27546 /* TM callbacks. */
27548 /* Return the builtin decl needed to load a vector of TYPE. */
27551 ix86_builtin_tm_load (tree type
)
27553 if (TREE_CODE (type
) == VECTOR_TYPE
)
27555 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27558 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
27560 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
27562 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
27568 /* Return the builtin decl needed to store a vector of TYPE. */
27571 ix86_builtin_tm_store (tree type
)
27573 if (TREE_CODE (type
) == VECTOR_TYPE
)
27575 switch (tree_low_cst (TYPE_SIZE (type
), 1))
27578 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
27580 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
27582 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
27588 /* Initialize the transactional memory vector load/store builtins. */
27591 ix86_init_tm_builtins (void)
27593 enum ix86_builtin_func_type ftype
;
27594 const struct builtin_description
*d
;
27597 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
27598 tree attrs_log
, attrs_type_log
;
27603 /* If there are no builtins defined, we must be compiling in a
27604 language without trans-mem support. */
27605 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1
))
27608 /* Use whatever attributes a normal TM load has. */
27609 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
27610 attrs_load
= DECL_ATTRIBUTES (decl
);
27611 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27612 /* Use whatever attributes a normal TM store has. */
27613 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
27614 attrs_store
= DECL_ATTRIBUTES (decl
);
27615 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27616 /* Use whatever attributes a normal TM log has. */
27617 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
27618 attrs_log
= DECL_ATTRIBUTES (decl
);
27619 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
27621 for (i
= 0, d
= bdesc_tm
;
27622 i
< ARRAY_SIZE (bdesc_tm
);
27625 if ((d
->mask
& ix86_isa_flags
) != 0
27626 || (lang_hooks
.builtin_function
27627 == lang_hooks
.builtin_function_ext_scope
))
27629 tree type
, attrs
, attrs_type
;
27630 enum built_in_function code
= (enum built_in_function
) d
->code
;
27632 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27633 type
= ix86_get_builtin_func_type (ftype
);
27635 if (BUILTIN_TM_LOAD_P (code
))
27637 attrs
= attrs_load
;
27638 attrs_type
= attrs_type_load
;
27640 else if (BUILTIN_TM_STORE_P (code
))
27642 attrs
= attrs_store
;
27643 attrs_type
= attrs_type_store
;
27648 attrs_type
= attrs_type_log
;
27650 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27651 /* The builtin without the prefix for
27652 calling it directly. */
27653 d
->name
+ strlen ("__builtin_"),
27655 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27656 set the TYPE_ATTRIBUTES. */
27657 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27659 set_builtin_decl (code
, decl
, false);
27664 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27665 in the current target ISA to allow the user to compile particular modules
27666 with different target specific options that differ from the command line
27669 ix86_init_mmx_sse_builtins (void)
27671 const struct builtin_description
* d
;
27672 enum ix86_builtin_func_type ftype
;
27675 /* Add all special builtins with variable number of operands. */
27676 for (i
= 0, d
= bdesc_special_args
;
27677 i
< ARRAY_SIZE (bdesc_special_args
);
27683 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27684 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27687 /* Add all builtins with variable number of operands. */
27688 for (i
= 0, d
= bdesc_args
;
27689 i
< ARRAY_SIZE (bdesc_args
);
27695 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27696 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27699 /* pcmpestr[im] insns. */
27700 for (i
= 0, d
= bdesc_pcmpestr
;
27701 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27704 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27705 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27707 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27708 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27711 /* pcmpistr[im] insns. */
27712 for (i
= 0, d
= bdesc_pcmpistr
;
27713 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27716 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27717 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27719 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27720 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27723 /* comi/ucomi insns. */
27724 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27726 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27727 ftype
= INT_FTYPE_V2DF_V2DF
;
27729 ftype
= INT_FTYPE_V4SF_V4SF
;
27730 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27734 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27735 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27736 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27737 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27739 /* SSE or 3DNow!A */
27740 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27741 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27742 IX86_BUILTIN_MASKMOVQ
);
27745 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27746 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27748 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27749 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27750 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27751 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27754 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27755 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27756 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27757 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27760 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27761 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27762 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27763 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27764 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27765 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27766 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27767 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27768 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27769 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27770 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27771 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27774 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27775 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27778 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27779 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27780 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27781 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27782 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27783 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27784 IX86_BUILTIN_RDRAND64_STEP
);
27787 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27788 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27789 IX86_BUILTIN_GATHERSIV2DF
);
27791 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27792 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27793 IX86_BUILTIN_GATHERSIV4DF
);
27795 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27796 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27797 IX86_BUILTIN_GATHERDIV2DF
);
27799 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27800 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27801 IX86_BUILTIN_GATHERDIV4DF
);
27803 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27804 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27805 IX86_BUILTIN_GATHERSIV4SF
);
27807 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27808 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27809 IX86_BUILTIN_GATHERSIV8SF
);
27811 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27812 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27813 IX86_BUILTIN_GATHERDIV4SF
);
27815 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27816 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27817 IX86_BUILTIN_GATHERDIV8SF
);
27819 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27820 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27821 IX86_BUILTIN_GATHERSIV2DI
);
27823 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27824 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27825 IX86_BUILTIN_GATHERSIV4DI
);
27827 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27828 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27829 IX86_BUILTIN_GATHERDIV2DI
);
27831 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27832 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27833 IX86_BUILTIN_GATHERDIV4DI
);
27835 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27836 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27837 IX86_BUILTIN_GATHERSIV4SI
);
27839 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27840 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27841 IX86_BUILTIN_GATHERSIV8SI
);
27843 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27844 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27845 IX86_BUILTIN_GATHERDIV4SI
);
27847 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27848 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27849 IX86_BUILTIN_GATHERDIV8SI
);
27851 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27852 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27853 IX86_BUILTIN_GATHERALTSIV4DF
);
27855 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27856 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27857 IX86_BUILTIN_GATHERALTDIV8SF
);
27859 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27860 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27861 IX86_BUILTIN_GATHERALTSIV4DI
);
27863 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27864 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27865 IX86_BUILTIN_GATHERALTDIV8SI
);
27868 def_builtin (OPTION_MASK_ISA_RTM
, "__builtin_ia32_xabort",
27869 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_XABORT
);
27871 /* MMX access to the vec_init patterns. */
27872 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27873 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27875 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27876 V4HI_FTYPE_HI_HI_HI_HI
,
27877 IX86_BUILTIN_VEC_INIT_V4HI
);
27879 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27880 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27881 IX86_BUILTIN_VEC_INIT_V8QI
);
27883 /* Access to the vec_extract patterns. */
27884 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27885 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27886 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27887 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27888 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27889 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27890 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27891 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27892 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27893 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27895 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27896 "__builtin_ia32_vec_ext_v4hi",
27897 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27899 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27900 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27902 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27903 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27905 /* Access to the vec_set patterns. */
27906 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27907 "__builtin_ia32_vec_set_v2di",
27908 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27910 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27911 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27913 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27914 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27916 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27917 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27919 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27920 "__builtin_ia32_vec_set_v4hi",
27921 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27923 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27924 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27926 /* Add FMA4 multi-arg argument instructions */
27927 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27932 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27933 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27937 /* This builds the processor_model struct type defined in
27938 libgcc/config/i386/cpuinfo.c */
27941 build_processor_model_struct (void)
27943 const char *field_name
[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
27945 tree field
= NULL_TREE
, field_chain
= NULL_TREE
;
27947 tree type
= make_node (RECORD_TYPE
);
27949 /* The first 3 fields are unsigned int. */
27950 for (i
= 0; i
< 3; ++i
)
27952 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27953 get_identifier (field_name
[i
]), unsigned_type_node
);
27954 if (field_chain
!= NULL_TREE
)
27955 DECL_CHAIN (field
) = field_chain
;
27956 field_chain
= field
;
27959 /* The last field is an array of unsigned integers of size one. */
27960 field
= build_decl (UNKNOWN_LOCATION
, FIELD_DECL
,
27961 get_identifier (field_name
[3]),
27962 build_array_type (unsigned_type_node
,
27963 build_index_type (size_one_node
)));
27964 if (field_chain
!= NULL_TREE
)
27965 DECL_CHAIN (field
) = field_chain
;
27966 field_chain
= field
;
27968 finish_builtin_struct (type
, "__processor_model", field_chain
, NULL_TREE
);
27972 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
27975 make_var_decl (tree type
, const char *name
)
27979 new_decl
= build_decl (UNKNOWN_LOCATION
,
27981 get_identifier(name
),
27984 DECL_EXTERNAL (new_decl
) = 1;
27985 TREE_STATIC (new_decl
) = 1;
27986 TREE_PUBLIC (new_decl
) = 1;
27987 DECL_INITIAL (new_decl
) = 0;
27988 DECL_ARTIFICIAL (new_decl
) = 0;
27989 DECL_PRESERVE_P (new_decl
) = 1;
27991 make_decl_one_only (new_decl
, DECL_ASSEMBLER_NAME (new_decl
));
27992 assemble_variable (new_decl
, 0, 0, 0);
27997 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
27998 into an integer defined in libgcc/config/i386/cpuinfo.c */
28001 fold_builtin_cpu (tree fndecl
, tree
*args
)
28004 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28005 DECL_FUNCTION_CODE (fndecl
);
28006 tree param_string_cst
= NULL
;
28008 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
28009 enum processor_features
28025 /* These are the values for vendor types and cpu types and subtypes
28026 in cpuinfo.c. Cpu types and subtypes should be subtracted by
28027 the corresponding start value. */
28028 enum processor_model
28038 M_CPU_SUBTYPE_START
,
28039 M_INTEL_COREI7_NEHALEM
,
28040 M_INTEL_COREI7_WESTMERE
,
28041 M_INTEL_COREI7_SANDYBRIDGE
,
28042 M_AMDFAM10H_BARCELONA
,
28043 M_AMDFAM10H_SHANGHAI
,
28044 M_AMDFAM10H_ISTANBUL
,
28045 M_AMDFAM15H_BDVER1
,
28049 static struct _arch_names_table
28051 const char *const name
;
28052 const enum processor_model model
;
28054 const arch_names_table
[] =
28057 {"intel", M_INTEL
},
28058 {"atom", M_INTEL_ATOM
},
28059 {"core2", M_INTEL_CORE2
},
28060 {"corei7", M_INTEL_COREI7
},
28061 {"nehalem", M_INTEL_COREI7_NEHALEM
},
28062 {"westmere", M_INTEL_COREI7_WESTMERE
},
28063 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE
},
28064 {"amdfam10h", M_AMDFAM10H
},
28065 {"barcelona", M_AMDFAM10H_BARCELONA
},
28066 {"shanghai", M_AMDFAM10H_SHANGHAI
},
28067 {"istanbul", M_AMDFAM10H_ISTANBUL
},
28068 {"amdfam15h", M_AMDFAM15H
},
28069 {"bdver1", M_AMDFAM15H_BDVER1
},
28070 {"bdver2", M_AMDFAM15H_BDVER2
},
28073 static struct _isa_names_table
28075 const char *const name
;
28076 const enum processor_features feature
;
28078 const isa_names_table
[] =
28082 {"popcnt", F_POPCNT
},
28086 {"ssse3", F_SSSE3
},
28087 {"sse4.1", F_SSE4_1
},
28088 {"sse4.2", F_SSE4_2
},
28093 static tree __processor_model_type
= NULL_TREE
;
28094 static tree __cpu_model_var
= NULL_TREE
;
28096 if (__processor_model_type
== NULL_TREE
)
28097 __processor_model_type
= build_processor_model_struct ();
28099 if (__cpu_model_var
== NULL_TREE
)
28100 __cpu_model_var
= make_var_decl (__processor_model_type
,
28103 gcc_assert ((args
!= NULL
) && (*args
!= NULL
));
28105 param_string_cst
= *args
;
28106 while (param_string_cst
28107 && TREE_CODE (param_string_cst
) != STRING_CST
)
28109 /* *args must be a expr that can contain other EXPRS leading to a
28111 if (!EXPR_P (param_string_cst
))
28113 error ("Parameter to builtin must be a string constant or literal");
28114 return integer_zero_node
;
28116 param_string_cst
= TREE_OPERAND (EXPR_CHECK (param_string_cst
), 0);
28119 gcc_assert (param_string_cst
);
28121 if (fn_code
== IX86_BUILTIN_CPU_IS
)
28125 unsigned int field_val
= 0;
28126 unsigned int NUM_ARCH_NAMES
28127 = sizeof (arch_names_table
) / sizeof (struct _arch_names_table
);
28129 for (i
= 0; i
< NUM_ARCH_NAMES
; i
++)
28130 if (strcmp (arch_names_table
[i
].name
,
28131 TREE_STRING_POINTER (param_string_cst
)) == 0)
28134 if (i
== NUM_ARCH_NAMES
)
28136 error ("Parameter to builtin not valid: %s",
28137 TREE_STRING_POINTER (param_string_cst
));
28138 return integer_zero_node
;
28141 field
= TYPE_FIELDS (__processor_model_type
);
28142 field_val
= arch_names_table
[i
].model
;
28144 /* CPU types are stored in the next field. */
28145 if (field_val
> M_CPU_TYPE_START
28146 && field_val
< M_CPU_SUBTYPE_START
)
28148 field
= DECL_CHAIN (field
);
28149 field_val
-= M_CPU_TYPE_START
;
28152 /* CPU subtypes are stored in the next field. */
28153 if (field_val
> M_CPU_SUBTYPE_START
)
28155 field
= DECL_CHAIN ( DECL_CHAIN (field
));
28156 field_val
-= M_CPU_SUBTYPE_START
;
28159 /* Get the appropriate field in __cpu_model. */
28160 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28163 /* Check the value. */
28164 return build2 (EQ_EXPR
, unsigned_type_node
, ref
,
28165 build_int_cstu (unsigned_type_node
, field_val
));
28167 else if (fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28172 unsigned int field_val
= 0;
28173 unsigned int NUM_ISA_NAMES
28174 = sizeof (isa_names_table
) / sizeof (struct _isa_names_table
);
28176 for (i
= 0; i
< NUM_ISA_NAMES
; i
++)
28177 if (strcmp (isa_names_table
[i
].name
,
28178 TREE_STRING_POINTER (param_string_cst
)) == 0)
28181 if (i
== NUM_ISA_NAMES
)
28183 error ("Parameter to builtin not valid: %s",
28184 TREE_STRING_POINTER (param_string_cst
));
28185 return integer_zero_node
;
28188 field
= TYPE_FIELDS (__processor_model_type
);
28189 /* Get the last field, which is __cpu_features. */
28190 while (DECL_CHAIN (field
))
28191 field
= DECL_CHAIN (field
);
28193 /* Get the appropriate field: __cpu_model.__cpu_features */
28194 ref
= build3 (COMPONENT_REF
, TREE_TYPE (field
), __cpu_model_var
,
28197 /* Access the 0th element of __cpu_features array. */
28198 array_elt
= build4 (ARRAY_REF
, unsigned_type_node
, ref
,
28199 integer_zero_node
, NULL_TREE
, NULL_TREE
);
28201 field_val
= (1 << isa_names_table
[i
].feature
);
28202 /* Return __cpu_model.__cpu_features[0] & field_val */
28203 return build2 (BIT_AND_EXPR
, unsigned_type_node
, array_elt
,
28204 build_int_cstu (unsigned_type_node
, field_val
));
28206 gcc_unreachable ();
28210 ix86_fold_builtin (tree fndecl
, int n_args
,
28211 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
28213 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
28215 enum ix86_builtins fn_code
= (enum ix86_builtins
)
28216 DECL_FUNCTION_CODE (fndecl
);
28217 if (fn_code
== IX86_BUILTIN_CPU_IS
28218 || fn_code
== IX86_BUILTIN_CPU_SUPPORTS
)
28220 gcc_assert (n_args
== 1);
28221 return fold_builtin_cpu (fndecl
, args
);
28225 #ifdef SUBTARGET_FOLD_BUILTIN
28226 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
28232 /* Make builtins to detect cpu type and features supported. NAME is
28233 the builtin name, CODE is the builtin code, and FTYPE is the function
28234 type of the builtin. */
28237 make_cpu_type_builtin (const char* name
, int code
,
28238 enum ix86_builtin_func_type ftype
, bool is_const
)
28243 type
= ix86_get_builtin_func_type (ftype
);
28244 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
28246 gcc_assert (decl
!= NULL_TREE
);
28247 ix86_builtins
[(int) code
] = decl
;
28248 TREE_READONLY (decl
) = is_const
;
28251 /* Make builtins to get CPU type and features supported. The created
28254 __builtin_cpu_init (), to detect cpu type and features,
28255 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
28256 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
28260 ix86_init_platform_type_builtins (void)
28262 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT
,
28263 INT_FTYPE_VOID
, false);
28264 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS
,
28265 INT_FTYPE_PCCHAR
, true);
28266 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS
,
28267 INT_FTYPE_PCCHAR
, true);
28270 /* Internal method for ix86_init_builtins. */
28273 ix86_init_builtins_va_builtins_abi (void)
28275 tree ms_va_ref
, sysv_va_ref
;
28276 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
28277 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
28278 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
28279 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
28283 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
28284 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
28285 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
28287 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
28290 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28291 fnvoid_va_start_ms
=
28292 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
28293 fnvoid_va_end_sysv
=
28294 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
28295 fnvoid_va_start_sysv
=
28296 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
28298 fnvoid_va_copy_ms
=
28299 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
28301 fnvoid_va_copy_sysv
=
28302 build_function_type_list (void_type_node
, sysv_va_ref
,
28303 sysv_va_ref
, NULL_TREE
);
28305 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
28306 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28307 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
28308 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28309 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
28310 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
28311 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
28312 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28313 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
28314 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28315 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
28316 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
28320 ix86_init_builtin_types (void)
28322 tree float128_type_node
, float80_type_node
;
28324 /* The __float80 type. */
28325 float80_type_node
= long_double_type_node
;
28326 if (TYPE_MODE (float80_type_node
) != XFmode
)
28328 /* The __float80 type. */
28329 float80_type_node
= make_node (REAL_TYPE
);
28331 TYPE_PRECISION (float80_type_node
) = 80;
28332 layout_type (float80_type_node
);
28334 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
28336 /* The __float128 type. */
28337 float128_type_node
= make_node (REAL_TYPE
);
28338 TYPE_PRECISION (float128_type_node
) = 128;
28339 layout_type (float128_type_node
);
28340 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
28342 /* This macro is built by i386-builtin-types.awk. */
28343 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
28347 ix86_init_builtins (void)
28351 ix86_init_builtin_types ();
28353 /* Builtins to get CPU type and features. */
28354 ix86_init_platform_type_builtins ();
28356 /* TFmode support builtins. */
28357 def_builtin_const (0, "__builtin_infq",
28358 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
28359 def_builtin_const (0, "__builtin_huge_valq",
28360 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
28362 /* We will expand them to normal call if SSE isn't available since
28363 they are used by libgcc. */
28364 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
28365 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
28366 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
28367 TREE_READONLY (t
) = 1;
28368 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
28370 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
28371 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
28372 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
28373 TREE_READONLY (t
) = 1;
28374 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
28376 ix86_init_tm_builtins ();
28377 ix86_init_mmx_sse_builtins ();
28380 ix86_init_builtins_va_builtins_abi ();
28382 #ifdef SUBTARGET_INIT_BUILTINS
28383 SUBTARGET_INIT_BUILTINS
;
28387 /* Return the ix86 builtin for CODE. */
28390 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
28392 if (code
>= IX86_BUILTIN_MAX
)
28393 return error_mark_node
;
28395 return ix86_builtins
[code
];
28398 /* Errors in the source file can cause expand_expr to return const0_rtx
28399 where we expect a vector. To avoid crashing, use one of the vector
28400 clear instructions. */
28402 safe_vector_operand (rtx x
, enum machine_mode mode
)
28404 if (x
== const0_rtx
)
28405 x
= CONST0_RTX (mode
);
28409 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
28412 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
28415 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28416 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28417 rtx op0
= expand_normal (arg0
);
28418 rtx op1
= expand_normal (arg1
);
28419 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28420 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28421 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
28423 if (VECTOR_MODE_P (mode0
))
28424 op0
= safe_vector_operand (op0
, mode0
);
28425 if (VECTOR_MODE_P (mode1
))
28426 op1
= safe_vector_operand (op1
, mode1
);
28428 if (optimize
|| !target
28429 || GET_MODE (target
) != tmode
28430 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28431 target
= gen_reg_rtx (tmode
);
28433 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
28435 rtx x
= gen_reg_rtx (V4SImode
);
28436 emit_insn (gen_sse2_loadd (x
, op1
));
28437 op1
= gen_lowpart (TImode
, x
);
28440 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28441 op0
= copy_to_mode_reg (mode0
, op0
);
28442 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
28443 op1
= copy_to_mode_reg (mode1
, op1
);
28445 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28454 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
28457 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
28458 enum ix86_builtin_func_type m_type
,
28459 enum rtx_code sub_code
)
28464 bool comparison_p
= false;
28466 bool last_arg_constant
= false;
28467 int num_memory
= 0;
28470 enum machine_mode mode
;
28473 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28477 case MULTI_ARG_4_DF2_DI_I
:
28478 case MULTI_ARG_4_DF2_DI_I1
:
28479 case MULTI_ARG_4_SF2_SI_I
:
28480 case MULTI_ARG_4_SF2_SI_I1
:
28482 last_arg_constant
= true;
28485 case MULTI_ARG_3_SF
:
28486 case MULTI_ARG_3_DF
:
28487 case MULTI_ARG_3_SF2
:
28488 case MULTI_ARG_3_DF2
:
28489 case MULTI_ARG_3_DI
:
28490 case MULTI_ARG_3_SI
:
28491 case MULTI_ARG_3_SI_DI
:
28492 case MULTI_ARG_3_HI
:
28493 case MULTI_ARG_3_HI_SI
:
28494 case MULTI_ARG_3_QI
:
28495 case MULTI_ARG_3_DI2
:
28496 case MULTI_ARG_3_SI2
:
28497 case MULTI_ARG_3_HI2
:
28498 case MULTI_ARG_3_QI2
:
28502 case MULTI_ARG_2_SF
:
28503 case MULTI_ARG_2_DF
:
28504 case MULTI_ARG_2_DI
:
28505 case MULTI_ARG_2_SI
:
28506 case MULTI_ARG_2_HI
:
28507 case MULTI_ARG_2_QI
:
28511 case MULTI_ARG_2_DI_IMM
:
28512 case MULTI_ARG_2_SI_IMM
:
28513 case MULTI_ARG_2_HI_IMM
:
28514 case MULTI_ARG_2_QI_IMM
:
28516 last_arg_constant
= true;
28519 case MULTI_ARG_1_SF
:
28520 case MULTI_ARG_1_DF
:
28521 case MULTI_ARG_1_SF2
:
28522 case MULTI_ARG_1_DF2
:
28523 case MULTI_ARG_1_DI
:
28524 case MULTI_ARG_1_SI
:
28525 case MULTI_ARG_1_HI
:
28526 case MULTI_ARG_1_QI
:
28527 case MULTI_ARG_1_SI_DI
:
28528 case MULTI_ARG_1_HI_DI
:
28529 case MULTI_ARG_1_HI_SI
:
28530 case MULTI_ARG_1_QI_DI
:
28531 case MULTI_ARG_1_QI_SI
:
28532 case MULTI_ARG_1_QI_HI
:
28536 case MULTI_ARG_2_DI_CMP
:
28537 case MULTI_ARG_2_SI_CMP
:
28538 case MULTI_ARG_2_HI_CMP
:
28539 case MULTI_ARG_2_QI_CMP
:
28541 comparison_p
= true;
28544 case MULTI_ARG_2_SF_TF
:
28545 case MULTI_ARG_2_DF_TF
:
28546 case MULTI_ARG_2_DI_TF
:
28547 case MULTI_ARG_2_SI_TF
:
28548 case MULTI_ARG_2_HI_TF
:
28549 case MULTI_ARG_2_QI_TF
:
28555 gcc_unreachable ();
28558 if (optimize
|| !target
28559 || GET_MODE (target
) != tmode
28560 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28561 target
= gen_reg_rtx (tmode
);
28563 gcc_assert (nargs
<= 4);
28565 for (i
= 0; i
< nargs
; i
++)
28567 tree arg
= CALL_EXPR_ARG (exp
, i
);
28568 rtx op
= expand_normal (arg
);
28569 int adjust
= (comparison_p
) ? 1 : 0;
28570 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
28572 if (last_arg_constant
&& i
== nargs
- 1)
28574 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
28576 enum insn_code new_icode
= icode
;
28579 case CODE_FOR_xop_vpermil2v2df3
:
28580 case CODE_FOR_xop_vpermil2v4sf3
:
28581 case CODE_FOR_xop_vpermil2v4df3
:
28582 case CODE_FOR_xop_vpermil2v8sf3
:
28583 error ("the last argument must be a 2-bit immediate");
28584 return gen_reg_rtx (tmode
);
28585 case CODE_FOR_xop_rotlv2di3
:
28586 new_icode
= CODE_FOR_rotlv2di3
;
28588 case CODE_FOR_xop_rotlv4si3
:
28589 new_icode
= CODE_FOR_rotlv4si3
;
28591 case CODE_FOR_xop_rotlv8hi3
:
28592 new_icode
= CODE_FOR_rotlv8hi3
;
28594 case CODE_FOR_xop_rotlv16qi3
:
28595 new_icode
= CODE_FOR_rotlv16qi3
;
28597 if (CONST_INT_P (op
))
28599 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
28600 op
= GEN_INT (INTVAL (op
) & mask
);
28601 gcc_checking_assert
28602 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
28606 gcc_checking_assert
28608 && insn_data
[new_icode
].operand
[0].mode
== tmode
28609 && insn_data
[new_icode
].operand
[1].mode
== tmode
28610 && insn_data
[new_icode
].operand
[2].mode
== mode
28611 && insn_data
[new_icode
].operand
[0].predicate
28612 == insn_data
[icode
].operand
[0].predicate
28613 && insn_data
[new_icode
].operand
[1].predicate
28614 == insn_data
[icode
].operand
[1].predicate
);
28620 gcc_unreachable ();
28627 if (VECTOR_MODE_P (mode
))
28628 op
= safe_vector_operand (op
, mode
);
28630 /* If we aren't optimizing, only allow one memory operand to be
28632 if (memory_operand (op
, mode
))
28635 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
28638 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
28640 op
= force_reg (mode
, op
);
28644 args
[i
].mode
= mode
;
28650 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28655 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
28656 GEN_INT ((int)sub_code
));
28657 else if (! comparison_p
)
28658 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28661 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
28665 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
28670 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28674 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
28678 gcc_unreachable ();
28688 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
28689 insns with vec_merge. */
28692 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
28696 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28697 rtx op1
, op0
= expand_normal (arg0
);
28698 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
28699 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
28701 if (optimize
|| !target
28702 || GET_MODE (target
) != tmode
28703 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
28704 target
= gen_reg_rtx (tmode
);
28706 if (VECTOR_MODE_P (mode0
))
28707 op0
= safe_vector_operand (op0
, mode0
);
28709 if ((optimize
&& !register_operand (op0
, mode0
))
28710 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
28711 op0
= copy_to_mode_reg (mode0
, op0
);
28714 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
28715 op1
= copy_to_mode_reg (mode0
, op1
);
28717 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
28724 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
28727 ix86_expand_sse_compare (const struct builtin_description
*d
,
28728 tree exp
, rtx target
, bool swap
)
28731 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28732 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28733 rtx op0
= expand_normal (arg0
);
28734 rtx op1
= expand_normal (arg1
);
28736 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28737 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28738 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28739 enum rtx_code comparison
= d
->comparison
;
28741 if (VECTOR_MODE_P (mode0
))
28742 op0
= safe_vector_operand (op0
, mode0
);
28743 if (VECTOR_MODE_P (mode1
))
28744 op1
= safe_vector_operand (op1
, mode1
);
28746 /* Swap operands if we have a comparison that isn't available in
28750 rtx tmp
= gen_reg_rtx (mode1
);
28751 emit_move_insn (tmp
, op1
);
28756 if (optimize
|| !target
28757 || GET_MODE (target
) != tmode
28758 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28759 target
= gen_reg_rtx (tmode
);
28761 if ((optimize
&& !register_operand (op0
, mode0
))
28762 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
28763 op0
= copy_to_mode_reg (mode0
, op0
);
28764 if ((optimize
&& !register_operand (op1
, mode1
))
28765 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
28766 op1
= copy_to_mode_reg (mode1
, op1
);
28768 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
28769 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28776 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
28779 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
28783 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28784 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28785 rtx op0
= expand_normal (arg0
);
28786 rtx op1
= expand_normal (arg1
);
28787 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28788 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28789 enum rtx_code comparison
= d
->comparison
;
28791 if (VECTOR_MODE_P (mode0
))
28792 op0
= safe_vector_operand (op0
, mode0
);
28793 if (VECTOR_MODE_P (mode1
))
28794 op1
= safe_vector_operand (op1
, mode1
);
28796 /* Swap operands if we have a comparison that isn't available in
28798 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
28805 target
= gen_reg_rtx (SImode
);
28806 emit_move_insn (target
, const0_rtx
);
28807 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28809 if ((optimize
&& !register_operand (op0
, mode0
))
28810 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28811 op0
= copy_to_mode_reg (mode0
, op0
);
28812 if ((optimize
&& !register_operand (op1
, mode1
))
28813 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28814 op1
= copy_to_mode_reg (mode1
, op1
);
28816 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28820 emit_insn (gen_rtx_SET (VOIDmode
,
28821 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28822 gen_rtx_fmt_ee (comparison
, QImode
,
28826 return SUBREG_REG (target
);
28829 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
28832 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
28836 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28837 rtx op1
, op0
= expand_normal (arg0
);
28838 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28839 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28841 if (optimize
|| target
== 0
28842 || GET_MODE (target
) != tmode
28843 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28844 target
= gen_reg_rtx (tmode
);
28846 if (VECTOR_MODE_P (mode0
))
28847 op0
= safe_vector_operand (op0
, mode0
);
28849 if ((optimize
&& !register_operand (op0
, mode0
))
28850 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28851 op0
= copy_to_mode_reg (mode0
, op0
);
28853 op1
= GEN_INT (d
->comparison
);
28855 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
28863 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description
*d
,
28864 tree exp
, rtx target
)
28867 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28868 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28869 rtx op0
= expand_normal (arg0
);
28870 rtx op1
= expand_normal (arg1
);
28872 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
28873 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
28874 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
28876 if (optimize
|| target
== 0
28877 || GET_MODE (target
) != tmode
28878 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
28879 target
= gen_reg_rtx (tmode
);
28881 op0
= safe_vector_operand (op0
, mode0
);
28882 op1
= safe_vector_operand (op1
, mode1
);
28884 if ((optimize
&& !register_operand (op0
, mode0
))
28885 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28886 op0
= copy_to_mode_reg (mode0
, op0
);
28887 if ((optimize
&& !register_operand (op1
, mode1
))
28888 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28889 op1
= copy_to_mode_reg (mode1
, op1
);
28891 op2
= GEN_INT (d
->comparison
);
28893 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
28900 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
28903 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
28907 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28908 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28909 rtx op0
= expand_normal (arg0
);
28910 rtx op1
= expand_normal (arg1
);
28911 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
28912 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
28913 enum rtx_code comparison
= d
->comparison
;
28915 if (VECTOR_MODE_P (mode0
))
28916 op0
= safe_vector_operand (op0
, mode0
);
28917 if (VECTOR_MODE_P (mode1
))
28918 op1
= safe_vector_operand (op1
, mode1
);
28920 target
= gen_reg_rtx (SImode
);
28921 emit_move_insn (target
, const0_rtx
);
28922 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28924 if ((optimize
&& !register_operand (op0
, mode0
))
28925 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
28926 op0
= copy_to_mode_reg (mode0
, op0
);
28927 if ((optimize
&& !register_operand (op1
, mode1
))
28928 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
28929 op1
= copy_to_mode_reg (mode1
, op1
);
28931 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
28935 emit_insn (gen_rtx_SET (VOIDmode
,
28936 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28937 gen_rtx_fmt_ee (comparison
, QImode
,
28941 return SUBREG_REG (target
);
28944 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
28947 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
28948 tree exp
, rtx target
)
28951 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28952 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28953 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28954 tree arg3
= CALL_EXPR_ARG (exp
, 3);
28955 tree arg4
= CALL_EXPR_ARG (exp
, 4);
28956 rtx scratch0
, scratch1
;
28957 rtx op0
= expand_normal (arg0
);
28958 rtx op1
= expand_normal (arg1
);
28959 rtx op2
= expand_normal (arg2
);
28960 rtx op3
= expand_normal (arg3
);
28961 rtx op4
= expand_normal (arg4
);
28962 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
28964 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28965 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28966 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28967 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
28968 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
28969 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
28970 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
28972 if (VECTOR_MODE_P (modev2
))
28973 op0
= safe_vector_operand (op0
, modev2
);
28974 if (VECTOR_MODE_P (modev4
))
28975 op2
= safe_vector_operand (op2
, modev4
);
28977 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28978 op0
= copy_to_mode_reg (modev2
, op0
);
28979 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
28980 op1
= copy_to_mode_reg (modei3
, op1
);
28981 if ((optimize
&& !register_operand (op2
, modev4
))
28982 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
28983 op2
= copy_to_mode_reg (modev4
, op2
);
28984 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
28985 op3
= copy_to_mode_reg (modei5
, op3
);
28987 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
28989 error ("the fifth argument must be an 8-bit immediate");
28993 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
28995 if (optimize
|| !target
28996 || GET_MODE (target
) != tmode0
28997 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28998 target
= gen_reg_rtx (tmode0
);
29000 scratch1
= gen_reg_rtx (tmode1
);
29002 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
29004 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
29006 if (optimize
|| !target
29007 || GET_MODE (target
) != tmode1
29008 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
29009 target
= gen_reg_rtx (tmode1
);
29011 scratch0
= gen_reg_rtx (tmode0
);
29013 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
29017 gcc_assert (d
->flag
);
29019 scratch0
= gen_reg_rtx (tmode0
);
29020 scratch1
= gen_reg_rtx (tmode1
);
29022 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
29032 target
= gen_reg_rtx (SImode
);
29033 emit_move_insn (target
, const0_rtx
);
29034 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29037 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29038 gen_rtx_fmt_ee (EQ
, QImode
,
29039 gen_rtx_REG ((enum machine_mode
) d
->flag
,
29042 return SUBREG_REG (target
);
29049 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
29052 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
29053 tree exp
, rtx target
)
29056 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29057 tree arg1
= CALL_EXPR_ARG (exp
, 1);
29058 tree arg2
= CALL_EXPR_ARG (exp
, 2);
29059 rtx scratch0
, scratch1
;
29060 rtx op0
= expand_normal (arg0
);
29061 rtx op1
= expand_normal (arg1
);
29062 rtx op2
= expand_normal (arg2
);
29063 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
29065 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
29066 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
29067 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
29068 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
29069 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
29071 if (VECTOR_MODE_P (modev2
))
29072 op0
= safe_vector_operand (op0
, modev2
);
29073 if (VECTOR_MODE_P (modev3
))
29074 op1
= safe_vector_operand (op1
, modev3
);
29076 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
29077 op0
= copy_to_mode_reg (modev2
, op0
);
29078 if ((optimize
&& !register_operand (op1
, modev3
))
29079 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
29080 op1
= copy_to_mode_reg (modev3
, op1
);
29082 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
29084 error ("the third argument must be an 8-bit immediate");
29088 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
29090 if (optimize
|| !target
29091 || GET_MODE (target
) != tmode0
29092 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
29093 target
= gen_reg_rtx (tmode0
);
29095 scratch1
= gen_reg_rtx (tmode1
);
29097 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
29099 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
29101 if (optimize
|| !target
29102 || GET_MODE (target
) != tmode1
29103 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
29104 target
= gen_reg_rtx (tmode1
);
29106 scratch0
= gen_reg_rtx (tmode0
);
29108 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
29112 gcc_assert (d
->flag
);
29114 scratch0
= gen_reg_rtx (tmode0
);
29115 scratch1
= gen_reg_rtx (tmode1
);
29117 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
29127 target
= gen_reg_rtx (SImode
);
29128 emit_move_insn (target
, const0_rtx
);
29129 target
= gen_rtx_SUBREG (QImode
, target
, 0);
29132 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
29133 gen_rtx_fmt_ee (EQ
, QImode
,
29134 gen_rtx_REG ((enum machine_mode
) d
->flag
,
29137 return SUBREG_REG (target
);
29143 /* Subroutine of ix86_expand_builtin to take care of insns with
29144 variable number of operands. */
29147 ix86_expand_args_builtin (const struct builtin_description
*d
,
29148 tree exp
, rtx target
)
29150 rtx pat
, real_target
;
29151 unsigned int i
, nargs
;
29152 unsigned int nargs_constant
= 0;
29153 int num_memory
= 0;
29157 enum machine_mode mode
;
29159 bool last_arg_count
= false;
29160 enum insn_code icode
= d
->icode
;
29161 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29162 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29163 enum machine_mode rmode
= VOIDmode
;
29165 enum rtx_code comparison
= d
->comparison
;
29167 switch ((enum ix86_builtin_func_type
) d
->flag
)
29169 case V2DF_FTYPE_V2DF_ROUND
:
29170 case V4DF_FTYPE_V4DF_ROUND
:
29171 case V4SF_FTYPE_V4SF_ROUND
:
29172 case V8SF_FTYPE_V8SF_ROUND
:
29173 case V4SI_FTYPE_V4SF_ROUND
:
29174 case V8SI_FTYPE_V8SF_ROUND
:
29175 return ix86_expand_sse_round (d
, exp
, target
);
29176 case V4SI_FTYPE_V2DF_V2DF_ROUND
:
29177 case V8SI_FTYPE_V4DF_V4DF_ROUND
:
29178 return ix86_expand_sse_round_vec_pack_sfix (d
, exp
, target
);
29179 case INT_FTYPE_V8SF_V8SF_PTEST
:
29180 case INT_FTYPE_V4DI_V4DI_PTEST
:
29181 case INT_FTYPE_V4DF_V4DF_PTEST
:
29182 case INT_FTYPE_V4SF_V4SF_PTEST
:
29183 case INT_FTYPE_V2DI_V2DI_PTEST
:
29184 case INT_FTYPE_V2DF_V2DF_PTEST
:
29185 return ix86_expand_sse_ptest (d
, exp
, target
);
29186 case FLOAT128_FTYPE_FLOAT128
:
29187 case FLOAT_FTYPE_FLOAT
:
29188 case INT_FTYPE_INT
:
29189 case UINT64_FTYPE_INT
:
29190 case UINT16_FTYPE_UINT16
:
29191 case INT64_FTYPE_INT64
:
29192 case INT64_FTYPE_V4SF
:
29193 case INT64_FTYPE_V2DF
:
29194 case INT_FTYPE_V16QI
:
29195 case INT_FTYPE_V8QI
:
29196 case INT_FTYPE_V8SF
:
29197 case INT_FTYPE_V4DF
:
29198 case INT_FTYPE_V4SF
:
29199 case INT_FTYPE_V2DF
:
29200 case INT_FTYPE_V32QI
:
29201 case V16QI_FTYPE_V16QI
:
29202 case V8SI_FTYPE_V8SF
:
29203 case V8SI_FTYPE_V4SI
:
29204 case V8HI_FTYPE_V8HI
:
29205 case V8HI_FTYPE_V16QI
:
29206 case V8QI_FTYPE_V8QI
:
29207 case V8SF_FTYPE_V8SF
:
29208 case V8SF_FTYPE_V8SI
:
29209 case V8SF_FTYPE_V4SF
:
29210 case V8SF_FTYPE_V8HI
:
29211 case V4SI_FTYPE_V4SI
:
29212 case V4SI_FTYPE_V16QI
:
29213 case V4SI_FTYPE_V4SF
:
29214 case V4SI_FTYPE_V8SI
:
29215 case V4SI_FTYPE_V8HI
:
29216 case V4SI_FTYPE_V4DF
:
29217 case V4SI_FTYPE_V2DF
:
29218 case V4HI_FTYPE_V4HI
:
29219 case V4DF_FTYPE_V4DF
:
29220 case V4DF_FTYPE_V4SI
:
29221 case V4DF_FTYPE_V4SF
:
29222 case V4DF_FTYPE_V2DF
:
29223 case V4SF_FTYPE_V4SF
:
29224 case V4SF_FTYPE_V4SI
:
29225 case V4SF_FTYPE_V8SF
:
29226 case V4SF_FTYPE_V4DF
:
29227 case V4SF_FTYPE_V8HI
:
29228 case V4SF_FTYPE_V2DF
:
29229 case V2DI_FTYPE_V2DI
:
29230 case V2DI_FTYPE_V16QI
:
29231 case V2DI_FTYPE_V8HI
:
29232 case V2DI_FTYPE_V4SI
:
29233 case V2DF_FTYPE_V2DF
:
29234 case V2DF_FTYPE_V4SI
:
29235 case V2DF_FTYPE_V4DF
:
29236 case V2DF_FTYPE_V4SF
:
29237 case V2DF_FTYPE_V2SI
:
29238 case V2SI_FTYPE_V2SI
:
29239 case V2SI_FTYPE_V4SF
:
29240 case V2SI_FTYPE_V2SF
:
29241 case V2SI_FTYPE_V2DF
:
29242 case V2SF_FTYPE_V2SF
:
29243 case V2SF_FTYPE_V2SI
:
29244 case V32QI_FTYPE_V32QI
:
29245 case V32QI_FTYPE_V16QI
:
29246 case V16HI_FTYPE_V16HI
:
29247 case V16HI_FTYPE_V8HI
:
29248 case V8SI_FTYPE_V8SI
:
29249 case V16HI_FTYPE_V16QI
:
29250 case V8SI_FTYPE_V16QI
:
29251 case V4DI_FTYPE_V16QI
:
29252 case V8SI_FTYPE_V8HI
:
29253 case V4DI_FTYPE_V8HI
:
29254 case V4DI_FTYPE_V4SI
:
29255 case V4DI_FTYPE_V2DI
:
29258 case V4SF_FTYPE_V4SF_VEC_MERGE
:
29259 case V2DF_FTYPE_V2DF_VEC_MERGE
:
29260 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
29261 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
29262 case V16QI_FTYPE_V16QI_V16QI
:
29263 case V16QI_FTYPE_V8HI_V8HI
:
29264 case V8QI_FTYPE_V8QI_V8QI
:
29265 case V8QI_FTYPE_V4HI_V4HI
:
29266 case V8HI_FTYPE_V8HI_V8HI
:
29267 case V8HI_FTYPE_V16QI_V16QI
:
29268 case V8HI_FTYPE_V4SI_V4SI
:
29269 case V8SF_FTYPE_V8SF_V8SF
:
29270 case V8SF_FTYPE_V8SF_V8SI
:
29271 case V4SI_FTYPE_V4SI_V4SI
:
29272 case V4SI_FTYPE_V8HI_V8HI
:
29273 case V4SI_FTYPE_V4SF_V4SF
:
29274 case V4SI_FTYPE_V2DF_V2DF
:
29275 case V4HI_FTYPE_V4HI_V4HI
:
29276 case V4HI_FTYPE_V8QI_V8QI
:
29277 case V4HI_FTYPE_V2SI_V2SI
:
29278 case V4DF_FTYPE_V4DF_V4DF
:
29279 case V4DF_FTYPE_V4DF_V4DI
:
29280 case V4SF_FTYPE_V4SF_V4SF
:
29281 case V4SF_FTYPE_V4SF_V4SI
:
29282 case V4SF_FTYPE_V4SF_V2SI
:
29283 case V4SF_FTYPE_V4SF_V2DF
:
29284 case V4SF_FTYPE_V4SF_DI
:
29285 case V4SF_FTYPE_V4SF_SI
:
29286 case V2DI_FTYPE_V2DI_V2DI
:
29287 case V2DI_FTYPE_V16QI_V16QI
:
29288 case V2DI_FTYPE_V4SI_V4SI
:
29289 case V2UDI_FTYPE_V4USI_V4USI
:
29290 case V2DI_FTYPE_V2DI_V16QI
:
29291 case V2DI_FTYPE_V2DF_V2DF
:
29292 case V2SI_FTYPE_V2SI_V2SI
:
29293 case V2SI_FTYPE_V4HI_V4HI
:
29294 case V2SI_FTYPE_V2SF_V2SF
:
29295 case V2DF_FTYPE_V2DF_V2DF
:
29296 case V2DF_FTYPE_V2DF_V4SF
:
29297 case V2DF_FTYPE_V2DF_V2DI
:
29298 case V2DF_FTYPE_V2DF_DI
:
29299 case V2DF_FTYPE_V2DF_SI
:
29300 case V2SF_FTYPE_V2SF_V2SF
:
29301 case V1DI_FTYPE_V1DI_V1DI
:
29302 case V1DI_FTYPE_V8QI_V8QI
:
29303 case V1DI_FTYPE_V2SI_V2SI
:
29304 case V32QI_FTYPE_V16HI_V16HI
:
29305 case V16HI_FTYPE_V8SI_V8SI
:
29306 case V32QI_FTYPE_V32QI_V32QI
:
29307 case V16HI_FTYPE_V32QI_V32QI
:
29308 case V16HI_FTYPE_V16HI_V16HI
:
29309 case V8SI_FTYPE_V4DF_V4DF
:
29310 case V8SI_FTYPE_V8SI_V8SI
:
29311 case V8SI_FTYPE_V16HI_V16HI
:
29312 case V4DI_FTYPE_V4DI_V4DI
:
29313 case V4DI_FTYPE_V8SI_V8SI
:
29314 case V4UDI_FTYPE_V8USI_V8USI
:
29315 if (comparison
== UNKNOWN
)
29316 return ix86_expand_binop_builtin (icode
, exp
, target
);
29319 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
29320 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
29321 gcc_assert (comparison
!= UNKNOWN
);
29325 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
29326 case V16HI_FTYPE_V16HI_SI_COUNT
:
29327 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
29328 case V8SI_FTYPE_V8SI_SI_COUNT
:
29329 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
29330 case V4DI_FTYPE_V4DI_INT_COUNT
:
29331 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
29332 case V8HI_FTYPE_V8HI_SI_COUNT
:
29333 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
29334 case V4SI_FTYPE_V4SI_SI_COUNT
:
29335 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
29336 case V4HI_FTYPE_V4HI_SI_COUNT
:
29337 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
29338 case V2DI_FTYPE_V2DI_SI_COUNT
:
29339 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
29340 case V2SI_FTYPE_V2SI_SI_COUNT
:
29341 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
29342 case V1DI_FTYPE_V1DI_SI_COUNT
:
29344 last_arg_count
= true;
29346 case UINT64_FTYPE_UINT64_UINT64
:
29347 case UINT_FTYPE_UINT_UINT
:
29348 case UINT_FTYPE_UINT_USHORT
:
29349 case UINT_FTYPE_UINT_UCHAR
:
29350 case UINT16_FTYPE_UINT16_INT
:
29351 case UINT8_FTYPE_UINT8_INT
:
29354 case V2DI_FTYPE_V2DI_INT_CONVERT
:
29357 nargs_constant
= 1;
29359 case V4DI_FTYPE_V4DI_INT_CONVERT
:
29362 nargs_constant
= 1;
29364 case V8HI_FTYPE_V8HI_INT
:
29365 case V8HI_FTYPE_V8SF_INT
:
29366 case V8HI_FTYPE_V4SF_INT
:
29367 case V8SF_FTYPE_V8SF_INT
:
29368 case V4SI_FTYPE_V4SI_INT
:
29369 case V4SI_FTYPE_V8SI_INT
:
29370 case V4HI_FTYPE_V4HI_INT
:
29371 case V4DF_FTYPE_V4DF_INT
:
29372 case V4SF_FTYPE_V4SF_INT
:
29373 case V4SF_FTYPE_V8SF_INT
:
29374 case V2DI_FTYPE_V2DI_INT
:
29375 case V2DF_FTYPE_V2DF_INT
:
29376 case V2DF_FTYPE_V4DF_INT
:
29377 case V16HI_FTYPE_V16HI_INT
:
29378 case V8SI_FTYPE_V8SI_INT
:
29379 case V4DI_FTYPE_V4DI_INT
:
29380 case V2DI_FTYPE_V4DI_INT
:
29382 nargs_constant
= 1;
29384 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
29385 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
29386 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
29387 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
29388 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
29389 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
29392 case V32QI_FTYPE_V32QI_V32QI_INT
:
29393 case V16HI_FTYPE_V16HI_V16HI_INT
:
29394 case V16QI_FTYPE_V16QI_V16QI_INT
:
29395 case V4DI_FTYPE_V4DI_V4DI_INT
:
29396 case V8HI_FTYPE_V8HI_V8HI_INT
:
29397 case V8SI_FTYPE_V8SI_V8SI_INT
:
29398 case V8SI_FTYPE_V8SI_V4SI_INT
:
29399 case V8SF_FTYPE_V8SF_V8SF_INT
:
29400 case V8SF_FTYPE_V8SF_V4SF_INT
:
29401 case V4SI_FTYPE_V4SI_V4SI_INT
:
29402 case V4DF_FTYPE_V4DF_V4DF_INT
:
29403 case V4DF_FTYPE_V4DF_V2DF_INT
:
29404 case V4SF_FTYPE_V4SF_V4SF_INT
:
29405 case V2DI_FTYPE_V2DI_V2DI_INT
:
29406 case V4DI_FTYPE_V4DI_V2DI_INT
:
29407 case V2DF_FTYPE_V2DF_V2DF_INT
:
29409 nargs_constant
= 1;
29411 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
29414 nargs_constant
= 1;
29416 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
29419 nargs_constant
= 1;
29421 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
29424 nargs_constant
= 1;
29426 case V2DI_FTYPE_V2DI_UINT_UINT
:
29428 nargs_constant
= 2;
29430 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
29431 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
29432 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
29433 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
29435 nargs_constant
= 1;
29437 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
29439 nargs_constant
= 2;
29442 gcc_unreachable ();
29445 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29447 if (comparison
!= UNKNOWN
)
29449 gcc_assert (nargs
== 2);
29450 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
29453 if (rmode
== VOIDmode
|| rmode
== tmode
)
29457 || GET_MODE (target
) != tmode
29458 || !insn_p
->operand
[0].predicate (target
, tmode
))
29459 target
= gen_reg_rtx (tmode
);
29460 real_target
= target
;
29464 target
= gen_reg_rtx (rmode
);
29465 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
29468 for (i
= 0; i
< nargs
; i
++)
29470 tree arg
= CALL_EXPR_ARG (exp
, i
);
29471 rtx op
= expand_normal (arg
);
29472 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29473 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29475 if (last_arg_count
&& (i
+ 1) == nargs
)
29477 /* SIMD shift insns take either an 8-bit immediate or
29478 register as count. But builtin functions take int as
29479 count. If count doesn't match, we put it in register. */
29482 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
29483 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
29484 op
= copy_to_reg (op
);
29487 else if ((nargs
- i
) <= nargs_constant
)
29492 case CODE_FOR_avx2_inserti128
:
29493 case CODE_FOR_avx2_extracti128
:
29494 error ("the last argument must be an 1-bit immediate");
29497 case CODE_FOR_sse4_1_roundsd
:
29498 case CODE_FOR_sse4_1_roundss
:
29500 case CODE_FOR_sse4_1_roundpd
:
29501 case CODE_FOR_sse4_1_roundps
:
29502 case CODE_FOR_avx_roundpd256
:
29503 case CODE_FOR_avx_roundps256
:
29505 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix
:
29506 case CODE_FOR_sse4_1_roundps_sfix
:
29507 case CODE_FOR_avx_roundpd_vec_pack_sfix256
:
29508 case CODE_FOR_avx_roundps_sfix256
:
29510 case CODE_FOR_sse4_1_blendps
:
29511 case CODE_FOR_avx_blendpd256
:
29512 case CODE_FOR_avx_vpermilv4df
:
29513 error ("the last argument must be a 4-bit immediate");
29516 case CODE_FOR_sse4_1_blendpd
:
29517 case CODE_FOR_avx_vpermilv2df
:
29518 case CODE_FOR_xop_vpermil2v2df3
:
29519 case CODE_FOR_xop_vpermil2v4sf3
:
29520 case CODE_FOR_xop_vpermil2v4df3
:
29521 case CODE_FOR_xop_vpermil2v8sf3
:
29522 error ("the last argument must be a 2-bit immediate");
29525 case CODE_FOR_avx_vextractf128v4df
:
29526 case CODE_FOR_avx_vextractf128v8sf
:
29527 case CODE_FOR_avx_vextractf128v8si
:
29528 case CODE_FOR_avx_vinsertf128v4df
:
29529 case CODE_FOR_avx_vinsertf128v8sf
:
29530 case CODE_FOR_avx_vinsertf128v8si
:
29531 error ("the last argument must be a 1-bit immediate");
29534 case CODE_FOR_avx_vmcmpv2df3
:
29535 case CODE_FOR_avx_vmcmpv4sf3
:
29536 case CODE_FOR_avx_cmpv2df3
:
29537 case CODE_FOR_avx_cmpv4sf3
:
29538 case CODE_FOR_avx_cmpv4df3
:
29539 case CODE_FOR_avx_cmpv8sf3
:
29540 error ("the last argument must be a 5-bit immediate");
29544 switch (nargs_constant
)
29547 if ((nargs
- i
) == nargs_constant
)
29549 error ("the next to last argument must be an 8-bit immediate");
29553 error ("the last argument must be an 8-bit immediate");
29556 gcc_unreachable ();
29563 if (VECTOR_MODE_P (mode
))
29564 op
= safe_vector_operand (op
, mode
);
29566 /* If we aren't optimizing, only allow one memory operand to
29568 if (memory_operand (op
, mode
))
29571 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
29573 if (optimize
|| !match
|| num_memory
> 1)
29574 op
= copy_to_mode_reg (mode
, op
);
29578 op
= copy_to_reg (op
);
29579 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
29584 args
[i
].mode
= mode
;
29590 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
29593 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
29596 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29600 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
29601 args
[2].op
, args
[3].op
);
29604 gcc_unreachable ();
29614 /* Subroutine of ix86_expand_builtin to take care of special insns
29615 with variable number of operands. */
29618 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
29619 tree exp
, rtx target
)
29623 unsigned int i
, nargs
, arg_adjust
, memory
;
29627 enum machine_mode mode
;
29629 enum insn_code icode
= d
->icode
;
29630 bool last_arg_constant
= false;
29631 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
29632 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
29633 enum { load
, store
} klass
;
29635 switch ((enum ix86_builtin_func_type
) d
->flag
)
29637 case VOID_FTYPE_VOID
:
29638 if (icode
== CODE_FOR_avx_vzeroupper
)
29639 target
= GEN_INT (vzeroupper_intrinsic
);
29640 emit_insn (GEN_FCN (icode
) (target
));
29642 case VOID_FTYPE_UINT64
:
29643 case VOID_FTYPE_UNSIGNED
:
29649 case INT_FTYPE_VOID
:
29650 case UINT64_FTYPE_VOID
:
29651 case UNSIGNED_FTYPE_VOID
:
29656 case UINT64_FTYPE_PUNSIGNED
:
29657 case V2DI_FTYPE_PV2DI
:
29658 case V4DI_FTYPE_PV4DI
:
29659 case V32QI_FTYPE_PCCHAR
:
29660 case V16QI_FTYPE_PCCHAR
:
29661 case V8SF_FTYPE_PCV4SF
:
29662 case V8SF_FTYPE_PCFLOAT
:
29663 case V4SF_FTYPE_PCFLOAT
:
29664 case V4DF_FTYPE_PCV2DF
:
29665 case V4DF_FTYPE_PCDOUBLE
:
29666 case V2DF_FTYPE_PCDOUBLE
:
29667 case VOID_FTYPE_PVOID
:
29672 case VOID_FTYPE_PV2SF_V4SF
:
29673 case VOID_FTYPE_PV4DI_V4DI
:
29674 case VOID_FTYPE_PV2DI_V2DI
:
29675 case VOID_FTYPE_PCHAR_V32QI
:
29676 case VOID_FTYPE_PCHAR_V16QI
:
29677 case VOID_FTYPE_PFLOAT_V8SF
:
29678 case VOID_FTYPE_PFLOAT_V4SF
:
29679 case VOID_FTYPE_PDOUBLE_V4DF
:
29680 case VOID_FTYPE_PDOUBLE_V2DF
:
29681 case VOID_FTYPE_PLONGLONG_LONGLONG
:
29682 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
29683 case VOID_FTYPE_PINT_INT
:
29686 /* Reserve memory operand for target. */
29687 memory
= ARRAY_SIZE (args
);
29689 case V4SF_FTYPE_V4SF_PCV2SF
:
29690 case V2DF_FTYPE_V2DF_PCDOUBLE
:
29695 case V8SF_FTYPE_PCV8SF_V8SI
:
29696 case V4DF_FTYPE_PCV4DF_V4DI
:
29697 case V4SF_FTYPE_PCV4SF_V4SI
:
29698 case V2DF_FTYPE_PCV2DF_V2DI
:
29699 case V8SI_FTYPE_PCV8SI_V8SI
:
29700 case V4DI_FTYPE_PCV4DI_V4DI
:
29701 case V4SI_FTYPE_PCV4SI_V4SI
:
29702 case V2DI_FTYPE_PCV2DI_V2DI
:
29707 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
29708 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
29709 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
29710 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
29711 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
29712 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
29713 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
29714 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
29717 /* Reserve memory operand for target. */
29718 memory
= ARRAY_SIZE (args
);
29720 case VOID_FTYPE_UINT_UINT_UINT
:
29721 case VOID_FTYPE_UINT64_UINT_UINT
:
29722 case UCHAR_FTYPE_UINT_UINT_UINT
:
29723 case UCHAR_FTYPE_UINT64_UINT_UINT
:
29726 memory
= ARRAY_SIZE (args
);
29727 last_arg_constant
= true;
29730 gcc_unreachable ();
29733 gcc_assert (nargs
<= ARRAY_SIZE (args
));
29735 if (klass
== store
)
29737 arg
= CALL_EXPR_ARG (exp
, 0);
29738 op
= expand_normal (arg
);
29739 gcc_assert (target
== 0);
29742 if (GET_MODE (op
) != Pmode
)
29743 op
= convert_to_mode (Pmode
, op
, 1);
29744 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
29747 target
= force_reg (tmode
, op
);
29755 || !register_operand (target
, tmode
)
29756 || GET_MODE (target
) != tmode
)
29757 target
= gen_reg_rtx (tmode
);
29760 for (i
= 0; i
< nargs
; i
++)
29762 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
29765 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
29766 op
= expand_normal (arg
);
29767 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
29769 if (last_arg_constant
&& (i
+ 1) == nargs
)
29773 if (icode
== CODE_FOR_lwp_lwpvalsi3
29774 || icode
== CODE_FOR_lwp_lwpinssi3
29775 || icode
== CODE_FOR_lwp_lwpvaldi3
29776 || icode
== CODE_FOR_lwp_lwpinsdi3
)
29777 error ("the last argument must be a 32-bit immediate");
29779 error ("the last argument must be an 8-bit immediate");
29787 /* This must be the memory operand. */
29788 if (GET_MODE (op
) != Pmode
)
29789 op
= convert_to_mode (Pmode
, op
, 1);
29790 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
29791 gcc_assert (GET_MODE (op
) == mode
29792 || GET_MODE (op
) == VOIDmode
);
29796 /* This must be register. */
29797 if (VECTOR_MODE_P (mode
))
29798 op
= safe_vector_operand (op
, mode
);
29800 gcc_assert (GET_MODE (op
) == mode
29801 || GET_MODE (op
) == VOIDmode
);
29802 op
= copy_to_mode_reg (mode
, op
);
29807 args
[i
].mode
= mode
;
29813 pat
= GEN_FCN (icode
) (target
);
29816 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
29819 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
29822 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
29825 gcc_unreachable ();
29831 return klass
== store
? 0 : target
;
29834 /* Return the integer constant in ARG. Constrain it to be in the range
29835 of the subparts of VEC_TYPE; issue an error if not. */
29838 get_element_number (tree vec_type
, tree arg
)
29840 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
29842 if (!host_integerp (arg
, 1)
29843 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
29845 error ("selector must be an integer constant in the range 0..%wi", max
);
29852 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29853 ix86_expand_vector_init. We DO have language-level syntax for this, in
29854 the form of (type){ init-list }. Except that since we can't place emms
29855 instructions from inside the compiler, we can't allow the use of MMX
29856 registers unless the user explicitly asks for it. So we do *not* define
29857 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
29858 we have builtins invoked by mmintrin.h that gives us license to emit
29859 these sorts of instructions. */
29862 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
29864 enum machine_mode tmode
= TYPE_MODE (type
);
29865 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
29866 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
29867 rtvec v
= rtvec_alloc (n_elt
);
29869 gcc_assert (VECTOR_MODE_P (tmode
));
29870 gcc_assert (call_expr_nargs (exp
) == n_elt
);
29872 for (i
= 0; i
< n_elt
; ++i
)
29874 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
29875 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
29878 if (!target
|| !register_operand (target
, tmode
))
29879 target
= gen_reg_rtx (tmode
);
29881 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
29885 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29886 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
29887 had a language-level syntax for referencing vector elements. */
29890 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
29892 enum machine_mode tmode
, mode0
;
29897 arg0
= CALL_EXPR_ARG (exp
, 0);
29898 arg1
= CALL_EXPR_ARG (exp
, 1);
29900 op0
= expand_normal (arg0
);
29901 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
29903 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29904 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
29905 gcc_assert (VECTOR_MODE_P (mode0
));
29907 op0
= force_reg (mode0
, op0
);
29909 if (optimize
|| !target
|| !register_operand (target
, tmode
))
29910 target
= gen_reg_rtx (tmode
);
29912 ix86_expand_vector_extract (true, target
, op0
, elt
);
29917 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
29918 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
29919 a language-level syntax for referencing vector elements. */
29922 ix86_expand_vec_set_builtin (tree exp
)
29924 enum machine_mode tmode
, mode1
;
29925 tree arg0
, arg1
, arg2
;
29927 rtx op0
, op1
, target
;
29929 arg0
= CALL_EXPR_ARG (exp
, 0);
29930 arg1
= CALL_EXPR_ARG (exp
, 1);
29931 arg2
= CALL_EXPR_ARG (exp
, 2);
29933 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
29934 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
29935 gcc_assert (VECTOR_MODE_P (tmode
));
29937 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
29938 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
29939 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
29941 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
29942 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
29944 op0
= force_reg (tmode
, op0
);
29945 op1
= force_reg (mode1
, op1
);
29947 /* OP0 is the source of these builtin functions and shouldn't be
29948 modified. Create a copy, use it and return it as target. */
29949 target
= gen_reg_rtx (tmode
);
29950 emit_move_insn (target
, op0
);
29951 ix86_expand_vector_set (true, target
, op1
, elt
);
29956 /* Expand an expression EXP that calls a built-in function,
29957 with result going to TARGET if that's convenient
29958 (and in mode MODE if that's convenient).
29959 SUBTARGET may be used as the target for computing one of EXP's operands.
29960 IGNORE is nonzero if the value is to be ignored. */
29963 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
29964 enum machine_mode mode ATTRIBUTE_UNUSED
,
29965 int ignore ATTRIBUTE_UNUSED
)
29967 const struct builtin_description
*d
;
29969 enum insn_code icode
;
29970 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
29971 tree arg0
, arg1
, arg2
, arg3
, arg4
;
29972 rtx op0
, op1
, op2
, op3
, op4
, pat
;
29973 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
29974 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
29976 /* For CPU builtins that can be folded, fold first and expand the fold. */
29979 case IX86_BUILTIN_CPU_INIT
:
29981 /* Make it call __cpu_indicator_init in libgcc. */
29982 tree call_expr
, fndecl
, type
;
29983 type
= build_function_type_list (integer_type_node
, NULL_TREE
);
29984 fndecl
= build_fn_decl ("__cpu_indicator_init", type
);
29985 call_expr
= build_call_expr (fndecl
, 0);
29986 return expand_expr (call_expr
, target
, mode
, EXPAND_NORMAL
);
29988 case IX86_BUILTIN_CPU_IS
:
29989 case IX86_BUILTIN_CPU_SUPPORTS
:
29991 tree arg0
= CALL_EXPR_ARG (exp
, 0);
29992 tree fold_expr
= fold_builtin_cpu (fndecl
, &arg0
);
29993 gcc_assert (fold_expr
!= NULL_TREE
);
29994 return expand_expr (fold_expr
, target
, mode
, EXPAND_NORMAL
);
29998 /* Determine whether the builtin function is available under the current ISA.
29999 Originally the builtin was not created if it wasn't applicable to the
30000 current ISA based on the command line switches. With function specific
30001 options, we need to check in the context of the function making the call
30002 whether it is supported. */
30003 if (ix86_builtins_isa
[fcode
].isa
30004 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
30006 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
30007 NULL
, (enum fpmath_unit
) 0, false);
30010 error ("%qE needs unknown isa option", fndecl
);
30013 gcc_assert (opts
!= NULL
);
30014 error ("%qE needs isa option %s", fndecl
, opts
);
30022 case IX86_BUILTIN_MASKMOVQ
:
30023 case IX86_BUILTIN_MASKMOVDQU
:
30024 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
30025 ? CODE_FOR_mmx_maskmovq
30026 : CODE_FOR_sse2_maskmovdqu
);
30027 /* Note the arg order is different from the operand order. */
30028 arg1
= CALL_EXPR_ARG (exp
, 0);
30029 arg2
= CALL_EXPR_ARG (exp
, 1);
30030 arg0
= CALL_EXPR_ARG (exp
, 2);
30031 op0
= expand_normal (arg0
);
30032 op1
= expand_normal (arg1
);
30033 op2
= expand_normal (arg2
);
30034 mode0
= insn_data
[icode
].operand
[0].mode
;
30035 mode1
= insn_data
[icode
].operand
[1].mode
;
30036 mode2
= insn_data
[icode
].operand
[2].mode
;
30038 if (GET_MODE (op0
) != Pmode
)
30039 op0
= convert_to_mode (Pmode
, op0
, 1);
30040 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
30042 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30043 op0
= copy_to_mode_reg (mode0
, op0
);
30044 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
30045 op1
= copy_to_mode_reg (mode1
, op1
);
30046 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
30047 op2
= copy_to_mode_reg (mode2
, op2
);
30048 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
30054 case IX86_BUILTIN_LDMXCSR
:
30055 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
30056 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
30057 emit_move_insn (target
, op0
);
30058 emit_insn (gen_sse_ldmxcsr (target
));
30061 case IX86_BUILTIN_STMXCSR
:
30062 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
30063 emit_insn (gen_sse_stmxcsr (target
));
30064 return copy_to_mode_reg (SImode
, target
);
30066 case IX86_BUILTIN_CLFLUSH
:
30067 arg0
= CALL_EXPR_ARG (exp
, 0);
30068 op0
= expand_normal (arg0
);
30069 icode
= CODE_FOR_sse2_clflush
;
30070 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30072 if (GET_MODE (op0
) != Pmode
)
30073 op0
= convert_to_mode (Pmode
, op0
, 1);
30074 op0
= force_reg (Pmode
, op0
);
30077 emit_insn (gen_sse2_clflush (op0
));
30080 case IX86_BUILTIN_MONITOR
:
30081 arg0
= CALL_EXPR_ARG (exp
, 0);
30082 arg1
= CALL_EXPR_ARG (exp
, 1);
30083 arg2
= CALL_EXPR_ARG (exp
, 2);
30084 op0
= expand_normal (arg0
);
30085 op1
= expand_normal (arg1
);
30086 op2
= expand_normal (arg2
);
30089 if (GET_MODE (op0
) != Pmode
)
30090 op0
= convert_to_mode (Pmode
, op0
, 1);
30091 op0
= force_reg (Pmode
, op0
);
30094 op1
= copy_to_mode_reg (SImode
, op1
);
30096 op2
= copy_to_mode_reg (SImode
, op2
);
30097 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
30100 case IX86_BUILTIN_MWAIT
:
30101 arg0
= CALL_EXPR_ARG (exp
, 0);
30102 arg1
= CALL_EXPR_ARG (exp
, 1);
30103 op0
= expand_normal (arg0
);
30104 op1
= expand_normal (arg1
);
30106 op0
= copy_to_mode_reg (SImode
, op0
);
30108 op1
= copy_to_mode_reg (SImode
, op1
);
30109 emit_insn (gen_sse3_mwait (op0
, op1
));
30112 case IX86_BUILTIN_VEC_INIT_V2SI
:
30113 case IX86_BUILTIN_VEC_INIT_V4HI
:
30114 case IX86_BUILTIN_VEC_INIT_V8QI
:
30115 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
30117 case IX86_BUILTIN_VEC_EXT_V2DF
:
30118 case IX86_BUILTIN_VEC_EXT_V2DI
:
30119 case IX86_BUILTIN_VEC_EXT_V4SF
:
30120 case IX86_BUILTIN_VEC_EXT_V4SI
:
30121 case IX86_BUILTIN_VEC_EXT_V8HI
:
30122 case IX86_BUILTIN_VEC_EXT_V2SI
:
30123 case IX86_BUILTIN_VEC_EXT_V4HI
:
30124 case IX86_BUILTIN_VEC_EXT_V16QI
:
30125 return ix86_expand_vec_ext_builtin (exp
, target
);
30127 case IX86_BUILTIN_VEC_SET_V2DI
:
30128 case IX86_BUILTIN_VEC_SET_V4SF
:
30129 case IX86_BUILTIN_VEC_SET_V4SI
:
30130 case IX86_BUILTIN_VEC_SET_V8HI
:
30131 case IX86_BUILTIN_VEC_SET_V4HI
:
30132 case IX86_BUILTIN_VEC_SET_V16QI
:
30133 return ix86_expand_vec_set_builtin (exp
);
30135 case IX86_BUILTIN_INFQ
:
30136 case IX86_BUILTIN_HUGE_VALQ
:
30138 REAL_VALUE_TYPE inf
;
30142 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
30144 tmp
= validize_mem (force_const_mem (mode
, tmp
));
30147 target
= gen_reg_rtx (mode
);
30149 emit_move_insn (target
, tmp
);
30153 case IX86_BUILTIN_LLWPCB
:
30154 arg0
= CALL_EXPR_ARG (exp
, 0);
30155 op0
= expand_normal (arg0
);
30156 icode
= CODE_FOR_lwp_llwpcb
;
30157 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
30159 if (GET_MODE (op0
) != Pmode
)
30160 op0
= convert_to_mode (Pmode
, op0
, 1);
30161 op0
= force_reg (Pmode
, op0
);
30163 emit_insn (gen_lwp_llwpcb (op0
));
30166 case IX86_BUILTIN_SLWPCB
:
30167 icode
= CODE_FOR_lwp_slwpcb
;
30169 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
30170 target
= gen_reg_rtx (Pmode
);
30171 emit_insn (gen_lwp_slwpcb (target
));
30174 case IX86_BUILTIN_BEXTRI32
:
30175 case IX86_BUILTIN_BEXTRI64
:
30176 arg0
= CALL_EXPR_ARG (exp
, 0);
30177 arg1
= CALL_EXPR_ARG (exp
, 1);
30178 op0
= expand_normal (arg0
);
30179 op1
= expand_normal (arg1
);
30180 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
30181 ? CODE_FOR_tbm_bextri_si
30182 : CODE_FOR_tbm_bextri_di
);
30183 if (!CONST_INT_P (op1
))
30185 error ("last argument must be an immediate");
30190 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
30191 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
30192 op1
= GEN_INT (length
);
30193 op2
= GEN_INT (lsb_index
);
30194 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
30200 case IX86_BUILTIN_RDRAND16_STEP
:
30201 icode
= CODE_FOR_rdrandhi_1
;
30205 case IX86_BUILTIN_RDRAND32_STEP
:
30206 icode
= CODE_FOR_rdrandsi_1
;
30210 case IX86_BUILTIN_RDRAND64_STEP
:
30211 icode
= CODE_FOR_rdranddi_1
;
30215 op0
= gen_reg_rtx (mode0
);
30216 emit_insn (GEN_FCN (icode
) (op0
));
30218 arg0
= CALL_EXPR_ARG (exp
, 0);
30219 op1
= expand_normal (arg0
);
30220 if (!address_operand (op1
, VOIDmode
))
30222 op1
= convert_memory_address (Pmode
, op1
);
30223 op1
= copy_addr_to_reg (op1
);
30225 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
30227 op1
= gen_reg_rtx (SImode
);
30228 emit_move_insn (op1
, CONST1_RTX (SImode
));
30230 /* Emit SImode conditional move. */
30231 if (mode0
== HImode
)
30233 op2
= gen_reg_rtx (SImode
);
30234 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
30236 else if (mode0
== SImode
)
30239 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
30242 target
= gen_reg_rtx (SImode
);
30244 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
30246 emit_insn (gen_rtx_SET (VOIDmode
, target
,
30247 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
30250 case IX86_BUILTIN_GATHERSIV2DF
:
30251 icode
= CODE_FOR_avx2_gathersiv2df
;
30253 case IX86_BUILTIN_GATHERSIV4DF
:
30254 icode
= CODE_FOR_avx2_gathersiv4df
;
30256 case IX86_BUILTIN_GATHERDIV2DF
:
30257 icode
= CODE_FOR_avx2_gatherdiv2df
;
30259 case IX86_BUILTIN_GATHERDIV4DF
:
30260 icode
= CODE_FOR_avx2_gatherdiv4df
;
30262 case IX86_BUILTIN_GATHERSIV4SF
:
30263 icode
= CODE_FOR_avx2_gathersiv4sf
;
30265 case IX86_BUILTIN_GATHERSIV8SF
:
30266 icode
= CODE_FOR_avx2_gathersiv8sf
;
30268 case IX86_BUILTIN_GATHERDIV4SF
:
30269 icode
= CODE_FOR_avx2_gatherdiv4sf
;
30271 case IX86_BUILTIN_GATHERDIV8SF
:
30272 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30274 case IX86_BUILTIN_GATHERSIV2DI
:
30275 icode
= CODE_FOR_avx2_gathersiv2di
;
30277 case IX86_BUILTIN_GATHERSIV4DI
:
30278 icode
= CODE_FOR_avx2_gathersiv4di
;
30280 case IX86_BUILTIN_GATHERDIV2DI
:
30281 icode
= CODE_FOR_avx2_gatherdiv2di
;
30283 case IX86_BUILTIN_GATHERDIV4DI
:
30284 icode
= CODE_FOR_avx2_gatherdiv4di
;
30286 case IX86_BUILTIN_GATHERSIV4SI
:
30287 icode
= CODE_FOR_avx2_gathersiv4si
;
30289 case IX86_BUILTIN_GATHERSIV8SI
:
30290 icode
= CODE_FOR_avx2_gathersiv8si
;
30292 case IX86_BUILTIN_GATHERDIV4SI
:
30293 icode
= CODE_FOR_avx2_gatherdiv4si
;
30295 case IX86_BUILTIN_GATHERDIV8SI
:
30296 icode
= CODE_FOR_avx2_gatherdiv8si
;
30298 case IX86_BUILTIN_GATHERALTSIV4DF
:
30299 icode
= CODE_FOR_avx2_gathersiv4df
;
30301 case IX86_BUILTIN_GATHERALTDIV8SF
:
30302 icode
= CODE_FOR_avx2_gatherdiv8sf
;
30304 case IX86_BUILTIN_GATHERALTSIV4DI
:
30305 icode
= CODE_FOR_avx2_gathersiv4di
;
30307 case IX86_BUILTIN_GATHERALTDIV8SI
:
30308 icode
= CODE_FOR_avx2_gatherdiv8si
;
30312 arg0
= CALL_EXPR_ARG (exp
, 0);
30313 arg1
= CALL_EXPR_ARG (exp
, 1);
30314 arg2
= CALL_EXPR_ARG (exp
, 2);
30315 arg3
= CALL_EXPR_ARG (exp
, 3);
30316 arg4
= CALL_EXPR_ARG (exp
, 4);
30317 op0
= expand_normal (arg0
);
30318 op1
= expand_normal (arg1
);
30319 op2
= expand_normal (arg2
);
30320 op3
= expand_normal (arg3
);
30321 op4
= expand_normal (arg4
);
30322 /* Note the arg order is different from the operand order. */
30323 mode0
= insn_data
[icode
].operand
[1].mode
;
30324 mode2
= insn_data
[icode
].operand
[3].mode
;
30325 mode3
= insn_data
[icode
].operand
[4].mode
;
30326 mode4
= insn_data
[icode
].operand
[5].mode
;
30328 if (target
== NULL_RTX
30329 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
30330 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
30332 subtarget
= target
;
30334 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
30335 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
30337 rtx half
= gen_reg_rtx (V4SImode
);
30338 if (!nonimmediate_operand (op2
, V8SImode
))
30339 op2
= copy_to_mode_reg (V8SImode
, op2
);
30340 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
30343 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
30344 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
30346 rtx (*gen
) (rtx
, rtx
);
30347 rtx half
= gen_reg_rtx (mode0
);
30348 if (mode0
== V4SFmode
)
30349 gen
= gen_vec_extract_lo_v8sf
;
30351 gen
= gen_vec_extract_lo_v8si
;
30352 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
30353 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
30354 emit_insn (gen (half
, op0
));
30356 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
30357 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
30358 emit_insn (gen (half
, op3
));
30362 /* Force memory operand only with base register here. But we
30363 don't want to do it on memory operand for other builtin
30365 if (GET_MODE (op1
) != Pmode
)
30366 op1
= convert_to_mode (Pmode
, op1
, 1);
30367 op1
= force_reg (Pmode
, op1
);
30369 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
30370 op0
= copy_to_mode_reg (mode0
, op0
);
30371 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
30372 op1
= copy_to_mode_reg (Pmode
, op1
);
30373 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
30374 op2
= copy_to_mode_reg (mode2
, op2
);
30375 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
30376 op3
= copy_to_mode_reg (mode3
, op3
);
30377 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
30379 error ("last argument must be scale 1, 2, 4, 8");
30383 /* Optimize. If mask is known to have all high bits set,
30384 replace op0 with pc_rtx to signal that the instruction
30385 overwrites the whole destination and doesn't use its
30386 previous contents. */
30389 if (TREE_CODE (arg3
) == VECTOR_CST
)
30391 unsigned int negative
= 0;
30392 for (i
= 0; i
< VECTOR_CST_NELTS (arg3
); ++i
)
30394 tree cst
= VECTOR_CST_ELT (arg3
, i
);
30395 if (TREE_CODE (cst
) == INTEGER_CST
30396 && tree_int_cst_sign_bit (cst
))
30398 else if (TREE_CODE (cst
) == REAL_CST
30399 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
30402 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
30405 else if (TREE_CODE (arg3
) == SSA_NAME
)
30407 /* Recognize also when mask is like:
30408 __v2df src = _mm_setzero_pd ();
30409 __v2df mask = _mm_cmpeq_pd (src, src);
30411 __v8sf src = _mm256_setzero_ps ();
30412 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
30413 as that is a cheaper way to load all ones into
30414 a register than having to load a constant from
30416 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
30417 if (is_gimple_call (def_stmt
))
30419 tree fndecl
= gimple_call_fndecl (def_stmt
);
30421 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
30422 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
30424 case IX86_BUILTIN_CMPPD
:
30425 case IX86_BUILTIN_CMPPS
:
30426 case IX86_BUILTIN_CMPPD256
:
30427 case IX86_BUILTIN_CMPPS256
:
30428 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
30431 case IX86_BUILTIN_CMPEQPD
:
30432 case IX86_BUILTIN_CMPEQPS
:
30433 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
30434 && initializer_zerop (gimple_call_arg (def_stmt
,
30445 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
30450 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
30451 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
30453 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
30454 ? V4SFmode
: V4SImode
;
30455 if (target
== NULL_RTX
)
30456 target
= gen_reg_rtx (tmode
);
30457 if (tmode
== V4SFmode
)
30458 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
30460 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
30463 target
= subtarget
;
30467 case IX86_BUILTIN_XABORT
:
30468 icode
= CODE_FOR_xabort
;
30469 arg0
= CALL_EXPR_ARG (exp
, 0);
30470 op0
= expand_normal (arg0
);
30471 mode0
= insn_data
[icode
].operand
[0].mode
;
30472 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
30474 error ("the xabort's argument must be an 8-bit immediate");
30477 emit_insn (gen_xabort (op0
));
30484 for (i
= 0, d
= bdesc_special_args
;
30485 i
< ARRAY_SIZE (bdesc_special_args
);
30487 if (d
->code
== fcode
)
30488 return ix86_expand_special_args_builtin (d
, exp
, target
);
30490 for (i
= 0, d
= bdesc_args
;
30491 i
< ARRAY_SIZE (bdesc_args
);
30493 if (d
->code
== fcode
)
30496 case IX86_BUILTIN_FABSQ
:
30497 case IX86_BUILTIN_COPYSIGNQ
:
30499 /* Emit a normal call if SSE isn't available. */
30500 return expand_call (exp
, target
, ignore
);
30502 return ix86_expand_args_builtin (d
, exp
, target
);
30505 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
30506 if (d
->code
== fcode
)
30507 return ix86_expand_sse_comi (d
, exp
, target
);
30509 for (i
= 0, d
= bdesc_pcmpestr
;
30510 i
< ARRAY_SIZE (bdesc_pcmpestr
);
30512 if (d
->code
== fcode
)
30513 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
30515 for (i
= 0, d
= bdesc_pcmpistr
;
30516 i
< ARRAY_SIZE (bdesc_pcmpistr
);
30518 if (d
->code
== fcode
)
30519 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
30521 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
30522 if (d
->code
== fcode
)
30523 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
30524 (enum ix86_builtin_func_type
)
30525 d
->flag
, d
->comparison
);
30527 gcc_unreachable ();
30530 /* Returns a function decl for a vectorized version of the builtin function
30531 with builtin function code FN and the result vector type TYPE, or NULL_TREE
30532 if it is not available. */
30535 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
30538 enum machine_mode in_mode
, out_mode
;
30540 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
30542 if (TREE_CODE (type_out
) != VECTOR_TYPE
30543 || TREE_CODE (type_in
) != VECTOR_TYPE
30544 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
30547 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30548 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
30549 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30550 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30554 case BUILT_IN_SQRT
:
30555 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30557 if (out_n
== 2 && in_n
== 2)
30558 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
30559 else if (out_n
== 4 && in_n
== 4)
30560 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
30564 case BUILT_IN_SQRTF
:
30565 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30567 if (out_n
== 4 && in_n
== 4)
30568 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
30569 else if (out_n
== 8 && in_n
== 8)
30570 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
30574 case BUILT_IN_IFLOOR
:
30575 case BUILT_IN_LFLOOR
:
30576 case BUILT_IN_LLFLOOR
:
30577 /* The round insn does not trap on denormals. */
30578 if (flag_trapping_math
|| !TARGET_ROUND
)
30581 if (out_mode
== SImode
&& in_mode
== DFmode
)
30583 if (out_n
== 4 && in_n
== 2)
30584 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX
];
30585 else if (out_n
== 8 && in_n
== 4)
30586 return ix86_builtins
[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256
];
30590 case BUILT_IN_IFLOORF
:
30591 case BUILT_IN_LFLOORF
:
30592 case BUILT_IN_LLFLOORF
:
30593 /* The round insn does not trap on denormals. */
30594 if (flag_trapping_math
|| !TARGET_ROUND
)
30597 if (out_mode
== SImode
&& in_mode
== SFmode
)
30599 if (out_n
== 4 && in_n
== 4)
30600 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX
];
30601 else if (out_n
== 8 && in_n
== 8)
30602 return ix86_builtins
[IX86_BUILTIN_FLOORPS_SFIX256
];
30606 case BUILT_IN_ICEIL
:
30607 case BUILT_IN_LCEIL
:
30608 case BUILT_IN_LLCEIL
:
30609 /* The round insn does not trap on denormals. */
30610 if (flag_trapping_math
|| !TARGET_ROUND
)
30613 if (out_mode
== SImode
&& in_mode
== DFmode
)
30615 if (out_n
== 4 && in_n
== 2)
30616 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX
];
30617 else if (out_n
== 8 && in_n
== 4)
30618 return ix86_builtins
[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256
];
30622 case BUILT_IN_ICEILF
:
30623 case BUILT_IN_LCEILF
:
30624 case BUILT_IN_LLCEILF
:
30625 /* The round insn does not trap on denormals. */
30626 if (flag_trapping_math
|| !TARGET_ROUND
)
30629 if (out_mode
== SImode
&& in_mode
== SFmode
)
30631 if (out_n
== 4 && in_n
== 4)
30632 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX
];
30633 else if (out_n
== 8 && in_n
== 8)
30634 return ix86_builtins
[IX86_BUILTIN_CEILPS_SFIX256
];
30638 case BUILT_IN_IRINT
:
30639 case BUILT_IN_LRINT
:
30640 case BUILT_IN_LLRINT
:
30641 if (out_mode
== SImode
&& in_mode
== DFmode
)
30643 if (out_n
== 4 && in_n
== 2)
30644 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
30645 else if (out_n
== 8 && in_n
== 4)
30646 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
30650 case BUILT_IN_IRINTF
:
30651 case BUILT_IN_LRINTF
:
30652 case BUILT_IN_LLRINTF
:
30653 if (out_mode
== SImode
&& in_mode
== SFmode
)
30655 if (out_n
== 4 && in_n
== 4)
30656 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
30657 else if (out_n
== 8 && in_n
== 8)
30658 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
30662 case BUILT_IN_IROUND
:
30663 case BUILT_IN_LROUND
:
30664 case BUILT_IN_LLROUND
:
30665 /* The round insn does not trap on denormals. */
30666 if (flag_trapping_math
|| !TARGET_ROUND
)
30669 if (out_mode
== SImode
&& in_mode
== DFmode
)
30671 if (out_n
== 4 && in_n
== 2)
30672 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX
];
30673 else if (out_n
== 8 && in_n
== 4)
30674 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256
];
30678 case BUILT_IN_IROUNDF
:
30679 case BUILT_IN_LROUNDF
:
30680 case BUILT_IN_LLROUNDF
:
30681 /* The round insn does not trap on denormals. */
30682 if (flag_trapping_math
|| !TARGET_ROUND
)
30685 if (out_mode
== SImode
&& in_mode
== SFmode
)
30687 if (out_n
== 4 && in_n
== 4)
30688 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX
];
30689 else if (out_n
== 8 && in_n
== 8)
30690 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ_SFIX256
];
30694 case BUILT_IN_COPYSIGN
:
30695 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30697 if (out_n
== 2 && in_n
== 2)
30698 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
30699 else if (out_n
== 4 && in_n
== 4)
30700 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
30704 case BUILT_IN_COPYSIGNF
:
30705 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30707 if (out_n
== 4 && in_n
== 4)
30708 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
30709 else if (out_n
== 8 && in_n
== 8)
30710 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
30714 case BUILT_IN_FLOOR
:
30715 /* The round insn does not trap on denormals. */
30716 if (flag_trapping_math
|| !TARGET_ROUND
)
30719 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30721 if (out_n
== 2 && in_n
== 2)
30722 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
30723 else if (out_n
== 4 && in_n
== 4)
30724 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
30728 case BUILT_IN_FLOORF
:
30729 /* The round insn does not trap on denormals. */
30730 if (flag_trapping_math
|| !TARGET_ROUND
)
30733 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30735 if (out_n
== 4 && in_n
== 4)
30736 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
30737 else if (out_n
== 8 && in_n
== 8)
30738 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
30742 case BUILT_IN_CEIL
:
30743 /* The round insn does not trap on denormals. */
30744 if (flag_trapping_math
|| !TARGET_ROUND
)
30747 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30749 if (out_n
== 2 && in_n
== 2)
30750 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
30751 else if (out_n
== 4 && in_n
== 4)
30752 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
30756 case BUILT_IN_CEILF
:
30757 /* The round insn does not trap on denormals. */
30758 if (flag_trapping_math
|| !TARGET_ROUND
)
30761 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30763 if (out_n
== 4 && in_n
== 4)
30764 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
30765 else if (out_n
== 8 && in_n
== 8)
30766 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
30770 case BUILT_IN_TRUNC
:
30771 /* The round insn does not trap on denormals. */
30772 if (flag_trapping_math
|| !TARGET_ROUND
)
30775 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30777 if (out_n
== 2 && in_n
== 2)
30778 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
30779 else if (out_n
== 4 && in_n
== 4)
30780 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
30784 case BUILT_IN_TRUNCF
:
30785 /* The round insn does not trap on denormals. */
30786 if (flag_trapping_math
|| !TARGET_ROUND
)
30789 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30791 if (out_n
== 4 && in_n
== 4)
30792 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
30793 else if (out_n
== 8 && in_n
== 8)
30794 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
30798 case BUILT_IN_RINT
:
30799 /* The round insn does not trap on denormals. */
30800 if (flag_trapping_math
|| !TARGET_ROUND
)
30803 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30805 if (out_n
== 2 && in_n
== 2)
30806 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
30807 else if (out_n
== 4 && in_n
== 4)
30808 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
30812 case BUILT_IN_RINTF
:
30813 /* The round insn does not trap on denormals. */
30814 if (flag_trapping_math
|| !TARGET_ROUND
)
30817 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30819 if (out_n
== 4 && in_n
== 4)
30820 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
30821 else if (out_n
== 8 && in_n
== 8)
30822 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
30826 case BUILT_IN_ROUND
:
30827 /* The round insn does not trap on denormals. */
30828 if (flag_trapping_math
|| !TARGET_ROUND
)
30831 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30833 if (out_n
== 2 && in_n
== 2)
30834 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
30835 else if (out_n
== 4 && in_n
== 4)
30836 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
30840 case BUILT_IN_ROUNDF
:
30841 /* The round insn does not trap on denormals. */
30842 if (flag_trapping_math
|| !TARGET_ROUND
)
30845 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30847 if (out_n
== 4 && in_n
== 4)
30848 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
30849 else if (out_n
== 8 && in_n
== 8)
30850 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
30855 if (out_mode
== DFmode
&& in_mode
== DFmode
)
30857 if (out_n
== 2 && in_n
== 2)
30858 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
30859 if (out_n
== 4 && in_n
== 4)
30860 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
30864 case BUILT_IN_FMAF
:
30865 if (out_mode
== SFmode
&& in_mode
== SFmode
)
30867 if (out_n
== 4 && in_n
== 4)
30868 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
30869 if (out_n
== 8 && in_n
== 8)
30870 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
30878 /* Dispatch to a handler for a vectorization library. */
30879 if (ix86_veclib_handler
)
30880 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
30886 /* Handler for an SVML-style interface to
30887 a library with vectorized intrinsics. */
30890 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
30893 tree fntype
, new_fndecl
, args
;
30896 enum machine_mode el_mode
, in_mode
;
30899 /* The SVML is suitable for unsafe math only. */
30900 if (!flag_unsafe_math_optimizations
)
30903 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
30904 n
= TYPE_VECTOR_SUBPARTS (type_out
);
30905 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
30906 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
30907 if (el_mode
!= in_mode
30915 case BUILT_IN_LOG10
:
30917 case BUILT_IN_TANH
:
30919 case BUILT_IN_ATAN
:
30920 case BUILT_IN_ATAN2
:
30921 case BUILT_IN_ATANH
:
30922 case BUILT_IN_CBRT
:
30923 case BUILT_IN_SINH
:
30925 case BUILT_IN_ASINH
:
30926 case BUILT_IN_ASIN
:
30927 case BUILT_IN_COSH
:
30929 case BUILT_IN_ACOSH
:
30930 case BUILT_IN_ACOS
:
30931 if (el_mode
!= DFmode
|| n
!= 2)
30935 case BUILT_IN_EXPF
:
30936 case BUILT_IN_LOGF
:
30937 case BUILT_IN_LOG10F
:
30938 case BUILT_IN_POWF
:
30939 case BUILT_IN_TANHF
:
30940 case BUILT_IN_TANF
:
30941 case BUILT_IN_ATANF
:
30942 case BUILT_IN_ATAN2F
:
30943 case BUILT_IN_ATANHF
:
30944 case BUILT_IN_CBRTF
:
30945 case BUILT_IN_SINHF
:
30946 case BUILT_IN_SINF
:
30947 case BUILT_IN_ASINHF
:
30948 case BUILT_IN_ASINF
:
30949 case BUILT_IN_COSHF
:
30950 case BUILT_IN_COSF
:
30951 case BUILT_IN_ACOSHF
:
30952 case BUILT_IN_ACOSF
:
30953 if (el_mode
!= SFmode
|| n
!= 4)
30961 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
30963 if (fn
== BUILT_IN_LOGF
)
30964 strcpy (name
, "vmlsLn4");
30965 else if (fn
== BUILT_IN_LOG
)
30966 strcpy (name
, "vmldLn2");
30969 sprintf (name
, "vmls%s", bname
+10);
30970 name
[strlen (name
)-1] = '4';
30973 sprintf (name
, "vmld%s2", bname
+10);
30975 /* Convert to uppercase. */
30979 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
30981 args
= TREE_CHAIN (args
))
30985 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
30987 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
30989 /* Build a function declaration for the vectorized function. */
30990 new_fndecl
= build_decl (BUILTINS_LOCATION
,
30991 FUNCTION_DECL
, get_identifier (name
), fntype
);
30992 TREE_PUBLIC (new_fndecl
) = 1;
30993 DECL_EXTERNAL (new_fndecl
) = 1;
30994 DECL_IS_NOVOPS (new_fndecl
) = 1;
30995 TREE_READONLY (new_fndecl
) = 1;
31000 /* Handler for an ACML-style interface to
31001 a library with vectorized intrinsics. */
31004 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
31006 char name
[20] = "__vr.._";
31007 tree fntype
, new_fndecl
, args
;
31010 enum machine_mode el_mode
, in_mode
;
31013 /* The ACML is 64bits only and suitable for unsafe math only as
31014 it does not correctly support parts of IEEE with the required
31015 precision such as denormals. */
31017 || !flag_unsafe_math_optimizations
)
31020 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
31021 n
= TYPE_VECTOR_SUBPARTS (type_out
);
31022 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
31023 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
31024 if (el_mode
!= in_mode
31034 case BUILT_IN_LOG2
:
31035 case BUILT_IN_LOG10
:
31038 if (el_mode
!= DFmode
31043 case BUILT_IN_SINF
:
31044 case BUILT_IN_COSF
:
31045 case BUILT_IN_EXPF
:
31046 case BUILT_IN_POWF
:
31047 case BUILT_IN_LOGF
:
31048 case BUILT_IN_LOG2F
:
31049 case BUILT_IN_LOG10F
:
31052 if (el_mode
!= SFmode
31061 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
31062 sprintf (name
+ 7, "%s", bname
+10);
31065 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
31067 args
= TREE_CHAIN (args
))
31071 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
31073 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
31075 /* Build a function declaration for the vectorized function. */
31076 new_fndecl
= build_decl (BUILTINS_LOCATION
,
31077 FUNCTION_DECL
, get_identifier (name
), fntype
);
31078 TREE_PUBLIC (new_fndecl
) = 1;
31079 DECL_EXTERNAL (new_fndecl
) = 1;
31080 DECL_IS_NOVOPS (new_fndecl
) = 1;
31081 TREE_READONLY (new_fndecl
) = 1;
31086 /* Returns a decl of a function that implements gather load with
31087 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
31088 Return NULL_TREE if it is not available. */
31091 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
31092 const_tree index_type
, int scale
)
31095 enum ix86_builtins code
;
31100 if ((TREE_CODE (index_type
) != INTEGER_TYPE
31101 && !POINTER_TYPE_P (index_type
))
31102 || (TYPE_MODE (index_type
) != SImode
31103 && TYPE_MODE (index_type
) != DImode
))
31106 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
31109 /* v*gather* insn sign extends index to pointer mode. */
31110 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
31111 && TYPE_UNSIGNED (index_type
))
31116 || (scale
& (scale
- 1)) != 0)
31119 si
= TYPE_MODE (index_type
) == SImode
;
31120 switch (TYPE_MODE (mem_vectype
))
31123 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
31126 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
31129 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
31132 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
31135 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
31138 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
31141 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
31144 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
31150 return ix86_builtins
[code
];
31153 /* Returns a code for a target-specific builtin that implements
31154 reciprocal of the function, or NULL_TREE if not available. */
31157 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
31158 bool sqrt ATTRIBUTE_UNUSED
)
31160 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
31161 && flag_finite_math_only
&& !flag_trapping_math
31162 && flag_unsafe_math_optimizations
))
31166 /* Machine dependent builtins. */
31169 /* Vectorized version of sqrt to rsqrt conversion. */
31170 case IX86_BUILTIN_SQRTPS_NR
:
31171 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
31173 case IX86_BUILTIN_SQRTPS_NR256
:
31174 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
31180 /* Normal builtins. */
31183 /* Sqrt to rsqrt conversion. */
31184 case BUILT_IN_SQRTF
:
31185 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
31192 /* Helper for avx_vpermilps256_operand et al. This is also used by
31193 the expansion functions to turn the parallel back into a mask.
31194 The return value is 0 for no match and the imm8+1 for a match. */
31197 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
31199 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
31201 unsigned char ipar
[8];
31203 if (XVECLEN (par
, 0) != (int) nelt
)
31206 /* Validate that all of the elements are constants, and not totally
31207 out of range. Copy the data into an integral array to make the
31208 subsequent checks easier. */
31209 for (i
= 0; i
< nelt
; ++i
)
31211 rtx er
= XVECEXP (par
, 0, i
);
31212 unsigned HOST_WIDE_INT ei
;
31214 if (!CONST_INT_P (er
))
31225 /* In the 256-bit DFmode case, we can only move elements within
31227 for (i
= 0; i
< 2; ++i
)
31231 mask
|= ipar
[i
] << i
;
31233 for (i
= 2; i
< 4; ++i
)
31237 mask
|= (ipar
[i
] - 2) << i
;
31242 /* In the 256-bit SFmode case, we have full freedom of movement
31243 within the low 128-bit lane, but the high 128-bit lane must
31244 mirror the exact same pattern. */
31245 for (i
= 0; i
< 4; ++i
)
31246 if (ipar
[i
] + 4 != ipar
[i
+ 4])
31253 /* In the 128-bit case, we've full freedom in the placement of
31254 the elements from the source operand. */
31255 for (i
= 0; i
< nelt
; ++i
)
31256 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
31260 gcc_unreachable ();
31263 /* Make sure success has a non-zero value by adding one. */
31267 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
31268 the expansion functions to turn the parallel back into a mask.
31269 The return value is 0 for no match and the imm8+1 for a match. */
31272 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
31274 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
31276 unsigned char ipar
[8];
31278 if (XVECLEN (par
, 0) != (int) nelt
)
31281 /* Validate that all of the elements are constants, and not totally
31282 out of range. Copy the data into an integral array to make the
31283 subsequent checks easier. */
31284 for (i
= 0; i
< nelt
; ++i
)
31286 rtx er
= XVECEXP (par
, 0, i
);
31287 unsigned HOST_WIDE_INT ei
;
31289 if (!CONST_INT_P (er
))
31292 if (ei
>= 2 * nelt
)
31297 /* Validate that the halves of the permute are halves. */
31298 for (i
= 0; i
< nelt2
- 1; ++i
)
31299 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31301 for (i
= nelt2
; i
< nelt
- 1; ++i
)
31302 if (ipar
[i
] + 1 != ipar
[i
+ 1])
31305 /* Reconstruct the mask. */
31306 for (i
= 0; i
< 2; ++i
)
31308 unsigned e
= ipar
[i
* nelt2
];
31312 mask
|= e
<< (i
* 4);
31315 /* Make sure success has a non-zero value by adding one. */
31319 /* Store OPERAND to the memory after reload is completed. This means
31320 that we can't easily use assign_stack_local. */
31322 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
31326 gcc_assert (reload_completed
);
31327 if (ix86_using_red_zone ())
31329 result
= gen_rtx_MEM (mode
,
31330 gen_rtx_PLUS (Pmode
,
31332 GEN_INT (-RED_ZONE_SIZE
)));
31333 emit_move_insn (result
, operand
);
31335 else if (TARGET_64BIT
)
31341 operand
= gen_lowpart (DImode
, operand
);
31345 gen_rtx_SET (VOIDmode
,
31346 gen_rtx_MEM (DImode
,
31347 gen_rtx_PRE_DEC (DImode
,
31348 stack_pointer_rtx
)),
31352 gcc_unreachable ();
31354 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31363 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
31365 gen_rtx_SET (VOIDmode
,
31366 gen_rtx_MEM (SImode
,
31367 gen_rtx_PRE_DEC (Pmode
,
31368 stack_pointer_rtx
)),
31371 gen_rtx_SET (VOIDmode
,
31372 gen_rtx_MEM (SImode
,
31373 gen_rtx_PRE_DEC (Pmode
,
31374 stack_pointer_rtx
)),
31379 /* Store HImodes as SImodes. */
31380 operand
= gen_lowpart (SImode
, operand
);
31384 gen_rtx_SET (VOIDmode
,
31385 gen_rtx_MEM (GET_MODE (operand
),
31386 gen_rtx_PRE_DEC (SImode
,
31387 stack_pointer_rtx
)),
31391 gcc_unreachable ();
31393 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
31398 /* Free operand from the memory. */
31400 ix86_free_from_memory (enum machine_mode mode
)
31402 if (!ix86_using_red_zone ())
31406 if (mode
== DImode
|| TARGET_64BIT
)
31410 /* Use LEA to deallocate stack space. In peephole2 it will be converted
31411 to pop or add instruction if registers are available. */
31412 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
31413 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
31418 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
31420 Put float CONST_DOUBLE in the constant pool instead of fp regs.
31421 QImode must go into class Q_REGS.
31422 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
31423 movdf to do mem-to-mem moves through integer regs. */
31426 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
31428 enum machine_mode mode
= GET_MODE (x
);
31430 /* We're only allowed to return a subclass of CLASS. Many of the
31431 following checks fail for NO_REGS, so eliminate that early. */
31432 if (regclass
== NO_REGS
)
31435 /* All classes can load zeros. */
31436 if (x
== CONST0_RTX (mode
))
31439 /* Force constants into memory if we are loading a (nonzero) constant into
31440 an MMX or SSE register. This is because there are no MMX/SSE instructions
31441 to load from a constant. */
31443 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
31446 /* Prefer SSE regs only, if we can use them for math. */
31447 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
31448 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31450 /* Floating-point constants need more complex checks. */
31451 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
31453 /* General regs can load everything. */
31454 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
31457 /* Floats can load 0 and 1 plus some others. Note that we eliminated
31458 zero above. We only want to wind up preferring 80387 registers if
31459 we plan on doing computation with them. */
31461 && standard_80387_constant_p (x
) > 0)
31463 /* Limit class to non-sse. */
31464 if (regclass
== FLOAT_SSE_REGS
)
31466 if (regclass
== FP_TOP_SSE_REGS
)
31468 if (regclass
== FP_SECOND_SSE_REGS
)
31469 return FP_SECOND_REG
;
31470 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
31477 /* Generally when we see PLUS here, it's the function invariant
31478 (plus soft-fp const_int). Which can only be computed into general
31480 if (GET_CODE (x
) == PLUS
)
31481 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
31483 /* QImode constants are easy to load, but non-constant QImode data
31484 must go into Q_REGS. */
31485 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
31487 if (reg_class_subset_p (regclass
, Q_REGS
))
31489 if (reg_class_subset_p (Q_REGS
, regclass
))
31497 /* Discourage putting floating-point values in SSE registers unless
31498 SSE math is being used, and likewise for the 387 registers. */
31500 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
31502 enum machine_mode mode
= GET_MODE (x
);
31504 /* Restrict the output reload class to the register bank that we are doing
31505 math on. If we would like not to return a subset of CLASS, reject this
31506 alternative: if reload cannot do this, it will still use its choice. */
31507 mode
= GET_MODE (x
);
31508 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
31509 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
31511 if (X87_FLOAT_MODE_P (mode
))
31513 if (regclass
== FP_TOP_SSE_REGS
)
31515 else if (regclass
== FP_SECOND_SSE_REGS
)
31516 return FP_SECOND_REG
;
31518 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
31525 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
31526 enum machine_mode mode
, secondary_reload_info
*sri
)
31528 /* Double-word spills from general registers to non-offsettable memory
31529 references (zero-extended addresses) require special handling. */
31532 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
31533 && rclass
== GENERAL_REGS
31534 && !offsettable_memref_p (x
))
31537 ? CODE_FOR_reload_noff_load
31538 : CODE_FOR_reload_noff_store
);
31539 /* Add the cost of moving address to a temporary. */
31540 sri
->extra_cost
= 1;
31545 /* QImode spills from non-QI registers require
31546 intermediate register on 32bit targets. */
31548 && !in_p
&& mode
== QImode
31549 && (rclass
== GENERAL_REGS
31550 || rclass
== LEGACY_REGS
31551 || rclass
== INDEX_REGS
))
31560 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
31561 regno
= true_regnum (x
);
31563 /* Return Q_REGS if the operand is in memory. */
31568 /* This condition handles corner case where an expression involving
31569 pointers gets vectorized. We're trying to use the address of a
31570 stack slot as a vector initializer.
31572 (set (reg:V2DI 74 [ vect_cst_.2 ])
31573 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
31575 Eventually frame gets turned into sp+offset like this:
31577 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31578 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31579 (const_int 392 [0x188]))))
31581 That later gets turned into:
31583 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31584 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
31585 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
31587 We'll have the following reload recorded:
31589 Reload 0: reload_in (DI) =
31590 (plus:DI (reg/f:DI 7 sp)
31591 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
31592 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31593 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
31594 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
31595 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
31596 reload_reg_rtx: (reg:V2DI 22 xmm1)
31598 Which isn't going to work since SSE instructions can't handle scalar
31599 additions. Returning GENERAL_REGS forces the addition into integer
31600 register and reload can handle subsequent reloads without problems. */
31602 if (in_p
&& GET_CODE (x
) == PLUS
31603 && SSE_CLASS_P (rclass
)
31604 && SCALAR_INT_MODE_P (mode
))
31605 return GENERAL_REGS
;
31610 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
31613 ix86_class_likely_spilled_p (reg_class_t rclass
)
31624 case SSE_FIRST_REG
:
31626 case FP_SECOND_REG
:
31636 /* If we are copying between general and FP registers, we need a memory
31637 location. The same is true for SSE and MMX registers.
31639 To optimize register_move_cost performance, allow inline variant.
31641 The macro can't work reliably when one of the CLASSES is class containing
31642 registers from multiple units (SSE, MMX, integer). We avoid this by never
31643 combining those units in single alternative in the machine description.
31644 Ensure that this constraint holds to avoid unexpected surprises.
31646 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
31647 enforce these sanity checks. */
31650 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31651 enum machine_mode mode
, int strict
)
31653 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
31654 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
31655 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
31656 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
31657 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
31658 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
31660 gcc_assert (!strict
);
31664 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
31667 /* ??? This is a lie. We do have moves between mmx/general, and for
31668 mmx/sse2. But by saying we need secondary memory we discourage the
31669 register allocator from using the mmx registers unless needed. */
31670 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
31673 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31675 /* SSE1 doesn't have any direct moves from other classes. */
31679 /* If the target says that inter-unit moves are more expensive
31680 than moving through memory, then don't generate them. */
31681 if (!TARGET_INTER_UNIT_MOVES
)
31684 /* Between SSE and general, we have moves no larger than word size. */
31685 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
31693 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
31694 enum machine_mode mode
, int strict
)
31696 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
31699 /* Implement the TARGET_CLASS_MAX_NREGS hook.
31701 On the 80386, this is the size of MODE in words,
31702 except in the FP regs, where a single reg is always enough. */
31704 static unsigned char
31705 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
31707 if (MAYBE_INTEGER_CLASS_P (rclass
))
31709 if (mode
== XFmode
)
31710 return (TARGET_64BIT
? 2 : 3);
31711 else if (mode
== XCmode
)
31712 return (TARGET_64BIT
? 4 : 6);
31714 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
31718 if (COMPLEX_MODE_P (mode
))
31725 /* Return true if the registers in CLASS cannot represent the change from
31726 modes FROM to TO. */
31729 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
31730 enum reg_class regclass
)
31735 /* x87 registers can't do subreg at all, as all values are reformatted
31736 to extended precision. */
31737 if (MAYBE_FLOAT_CLASS_P (regclass
))
31740 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
31742 /* Vector registers do not support QI or HImode loads. If we don't
31743 disallow a change to these modes, reload will assume it's ok to
31744 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
31745 the vec_dupv4hi pattern. */
31746 if (GET_MODE_SIZE (from
) < 4)
31749 /* Vector registers do not support subreg with nonzero offsets, which
31750 are otherwise valid for integer registers. Since we can't see
31751 whether we have a nonzero offset from here, prohibit all
31752 nonparadoxical subregs changing size. */
31753 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
31760 /* Return the cost of moving data of mode M between a
31761 register and memory. A value of 2 is the default; this cost is
31762 relative to those in `REGISTER_MOVE_COST'.
31764 This function is used extensively by register_move_cost that is used to
31765 build tables at startup. Make it inline in this case.
31766 When IN is 2, return maximum of in and out move cost.
31768 If moving between registers and memory is more expensive than
31769 between two registers, you should define this macro to express the
31772 Model also increased moving costs of QImode registers in non
31776 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
31780 if (FLOAT_CLASS_P (regclass
))
31798 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
31799 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
31801 if (SSE_CLASS_P (regclass
))
31804 switch (GET_MODE_SIZE (mode
))
31819 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
31820 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
31822 if (MMX_CLASS_P (regclass
))
31825 switch (GET_MODE_SIZE (mode
))
31837 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
31838 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
31840 switch (GET_MODE_SIZE (mode
))
31843 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
31846 return ix86_cost
->int_store
[0];
31847 if (TARGET_PARTIAL_REG_DEPENDENCY
31848 && optimize_function_for_speed_p (cfun
))
31849 cost
= ix86_cost
->movzbl_load
;
31851 cost
= ix86_cost
->int_load
[0];
31853 return MAX (cost
, ix86_cost
->int_store
[0]);
31859 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
31861 return ix86_cost
->movzbl_load
;
31863 return ix86_cost
->int_store
[0] + 4;
31868 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
31869 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
31871 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
31872 if (mode
== TFmode
)
31875 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
31877 cost
= ix86_cost
->int_load
[2];
31879 cost
= ix86_cost
->int_store
[2];
31880 return (cost
* (((int) GET_MODE_SIZE (mode
)
31881 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
31886 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
31889 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
31893 /* Return the cost of moving data from a register in class CLASS1 to
31894 one in class CLASS2.
31896 It is not required that the cost always equal 2 when FROM is the same as TO;
31897 on some machines it is expensive to move between registers if they are not
31898 general registers. */
31901 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
31902 reg_class_t class2_i
)
31904 enum reg_class class1
= (enum reg_class
) class1_i
;
31905 enum reg_class class2
= (enum reg_class
) class2_i
;
31907 /* In case we require secondary memory, compute cost of the store followed
31908 by load. In order to avoid bad register allocation choices, we need
31909 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
31911 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
31915 cost
+= inline_memory_move_cost (mode
, class1
, 2);
31916 cost
+= inline_memory_move_cost (mode
, class2
, 2);
31918 /* In case of copying from general_purpose_register we may emit multiple
31919 stores followed by single load causing memory size mismatch stall.
31920 Count this as arbitrarily high cost of 20. */
31921 if (targetm
.class_max_nregs (class1
, mode
)
31922 > targetm
.class_max_nregs (class2
, mode
))
31925 /* In the case of FP/MMX moves, the registers actually overlap, and we
31926 have to switch modes in order to treat them differently. */
31927 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
31928 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
31934 /* Moves between SSE/MMX and integer unit are expensive. */
31935 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
31936 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
31938 /* ??? By keeping returned value relatively high, we limit the number
31939 of moves between integer and MMX/SSE registers for all targets.
31940 Additionally, high value prevents problem with x86_modes_tieable_p(),
31941 where integer modes in MMX/SSE registers are not tieable
31942 because of missing QImode and HImode moves to, from or between
31943 MMX/SSE registers. */
31944 return MAX (8, ix86_cost
->mmxsse_to_integer
);
31946 if (MAYBE_FLOAT_CLASS_P (class1
))
31947 return ix86_cost
->fp_move
;
31948 if (MAYBE_SSE_CLASS_P (class1
))
31949 return ix86_cost
->sse_move
;
31950 if (MAYBE_MMX_CLASS_P (class1
))
31951 return ix86_cost
->mmx_move
;
31955 /* Return TRUE if hard register REGNO can hold a value of machine-mode
31959 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
31961 /* Flags and only flags can only hold CCmode values. */
31962 if (CC_REGNO_P (regno
))
31963 return GET_MODE_CLASS (mode
) == MODE_CC
;
31964 if (GET_MODE_CLASS (mode
) == MODE_CC
31965 || GET_MODE_CLASS (mode
) == MODE_RANDOM
31966 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
31968 if (FP_REGNO_P (regno
))
31969 return VALID_FP_MODE_P (mode
);
31970 if (SSE_REGNO_P (regno
))
31972 /* We implement the move patterns for all vector modes into and
31973 out of SSE registers, even when no operation instructions
31974 are available. OImode move is available only when AVX is
31976 return ((TARGET_AVX
&& mode
== OImode
)
31977 || VALID_AVX256_REG_MODE (mode
)
31978 || VALID_SSE_REG_MODE (mode
)
31979 || VALID_SSE2_REG_MODE (mode
)
31980 || VALID_MMX_REG_MODE (mode
)
31981 || VALID_MMX_REG_MODE_3DNOW (mode
));
31983 if (MMX_REGNO_P (regno
))
31985 /* We implement the move patterns for 3DNOW modes even in MMX mode,
31986 so if the register is available at all, then we can move data of
31987 the given mode into or out of it. */
31988 return (VALID_MMX_REG_MODE (mode
)
31989 || VALID_MMX_REG_MODE_3DNOW (mode
));
31992 if (mode
== QImode
)
31994 /* Take care for QImode values - they can be in non-QI regs,
31995 but then they do cause partial register stalls. */
31996 if (regno
<= BX_REG
|| TARGET_64BIT
)
31998 if (!TARGET_PARTIAL_REG_STALL
)
32000 return !can_create_pseudo_p ();
32002 /* We handle both integer and floats in the general purpose registers. */
32003 else if (VALID_INT_MODE_P (mode
))
32005 else if (VALID_FP_MODE_P (mode
))
32007 else if (VALID_DFP_MODE_P (mode
))
32009 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
32010 on to use that value in smaller contexts, this can easily force a
32011 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
32012 supporting DImode, allow it. */
32013 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
32019 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
32020 tieable integer mode. */
32023 ix86_tieable_integer_mode_p (enum machine_mode mode
)
32032 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
32035 return TARGET_64BIT
;
32042 /* Return true if MODE1 is accessible in a register that can hold MODE2
32043 without copying. That is, all register classes that can hold MODE2
32044 can also hold MODE1. */
32047 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
32049 if (mode1
== mode2
)
32052 if (ix86_tieable_integer_mode_p (mode1
)
32053 && ix86_tieable_integer_mode_p (mode2
))
32056 /* MODE2 being XFmode implies fp stack or general regs, which means we
32057 can tie any smaller floating point modes to it. Note that we do not
32058 tie this with TFmode. */
32059 if (mode2
== XFmode
)
32060 return mode1
== SFmode
|| mode1
== DFmode
;
32062 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
32063 that we can tie it with SFmode. */
32064 if (mode2
== DFmode
)
32065 return mode1
== SFmode
;
32067 /* If MODE2 is only appropriate for an SSE register, then tie with
32068 any other mode acceptable to SSE registers. */
32069 if (GET_MODE_SIZE (mode2
) == 32
32070 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
32071 return (GET_MODE_SIZE (mode1
) == 32
32072 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
32073 if (GET_MODE_SIZE (mode2
) == 16
32074 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
32075 return (GET_MODE_SIZE (mode1
) == 16
32076 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
32078 /* If MODE2 is appropriate for an MMX register, then tie
32079 with any other mode acceptable to MMX registers. */
32080 if (GET_MODE_SIZE (mode2
) == 8
32081 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
32082 return (GET_MODE_SIZE (mode1
) == 8
32083 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
32088 /* Return the cost of moving between two registers of mode MODE. */
32091 ix86_set_reg_reg_cost (enum machine_mode mode
)
32093 unsigned int units
= UNITS_PER_WORD
;
32095 switch (GET_MODE_CLASS (mode
))
32101 units
= GET_MODE_SIZE (CCmode
);
32105 if ((TARGET_SSE
&& mode
== TFmode
)
32106 || (TARGET_80387
&& mode
== XFmode
)
32107 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
32108 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
32109 units
= GET_MODE_SIZE (mode
);
32112 case MODE_COMPLEX_FLOAT
:
32113 if ((TARGET_SSE
&& mode
== TCmode
)
32114 || (TARGET_80387
&& mode
== XCmode
)
32115 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
32116 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
32117 units
= GET_MODE_SIZE (mode
);
32120 case MODE_VECTOR_INT
:
32121 case MODE_VECTOR_FLOAT
:
32122 if ((TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
32123 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
32124 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
32125 || (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
)))
32126 units
= GET_MODE_SIZE (mode
);
32129 /* Return the cost of moving between two registers of mode MODE,
32130 assuming that the move will be in pieces of at most UNITS bytes. */
32131 return COSTS_N_INSNS ((GET_MODE_SIZE (mode
) + units
- 1) / units
);
32134 /* Compute a (partial) cost for rtx X. Return true if the complete
32135 cost has been computed, and false if subexpressions should be
32136 scanned. In either case, *TOTAL contains the cost result. */
32139 ix86_rtx_costs (rtx x
, int code_i
, int outer_code_i
, int opno
, int *total
,
32142 enum rtx_code code
= (enum rtx_code
) code_i
;
32143 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
32144 enum machine_mode mode
= GET_MODE (x
);
32145 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
32150 if (register_operand (SET_DEST (x
), VOIDmode
)
32151 && reg_or_0_operand (SET_SRC (x
), VOIDmode
))
32153 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
32162 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
32164 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
32166 else if (flag_pic
&& SYMBOLIC_CONST (x
)
32168 || (!GET_CODE (x
) != LABEL_REF
32169 && (GET_CODE (x
) != SYMBOL_REF
32170 || !SYMBOL_REF_LOCAL_P (x
)))))
32177 if (mode
== VOIDmode
)
32182 switch (standard_80387_constant_p (x
))
32187 default: /* Other constants */
32194 if (SSE_FLOAT_MODE_P (mode
))
32197 switch (standard_sse_constant_p (x
))
32201 case 1: /* 0: xor eliminates false dependency */
32204 default: /* -1: cmp contains false dependency */
32209 /* Fall back to (MEM (SYMBOL_REF)), since that's where
32210 it'll probably end up. Add a penalty for size. */
32211 *total
= (COSTS_N_INSNS (1)
32212 + (flag_pic
!= 0 && !TARGET_64BIT
)
32213 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
32217 /* The zero extensions is often completely free on x86_64, so make
32218 it as cheap as possible. */
32219 if (TARGET_64BIT
&& mode
== DImode
32220 && GET_MODE (XEXP (x
, 0)) == SImode
)
32222 else if (TARGET_ZERO_EXTEND_WITH_AND
)
32223 *total
= cost
->add
;
32225 *total
= cost
->movzx
;
32229 *total
= cost
->movsx
;
32233 if (SCALAR_INT_MODE_P (mode
)
32234 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
32235 && CONST_INT_P (XEXP (x
, 1)))
32237 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32240 *total
= cost
->add
;
32243 if ((value
== 2 || value
== 3)
32244 && cost
->lea
<= cost
->shift_const
)
32246 *total
= cost
->lea
;
32256 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32258 /* ??? Should be SSE vector operation cost. */
32259 /* At least for published AMD latencies, this really is the same
32260 as the latency for a simple fpu operation like fabs. */
32261 /* V*QImode is emulated with 1-11 insns. */
32262 if (mode
== V16QImode
|| mode
== V32QImode
)
32265 if (TARGET_XOP
&& mode
== V16QImode
)
32267 /* For XOP we use vpshab, which requires a broadcast of the
32268 value to the variable shift insn. For constants this
32269 means a V16Q const in mem; even when we can perform the
32270 shift with one insn set the cost to prefer paddb. */
32271 if (CONSTANT_P (XEXP (x
, 1)))
32273 *total
= (cost
->fabs
32274 + rtx_cost (XEXP (x
, 0), code
, 0, speed
)
32275 + (speed
? 2 : COSTS_N_BYTES (16)));
32280 else if (TARGET_SSSE3
)
32282 *total
= cost
->fabs
* count
;
32285 *total
= cost
->fabs
;
32287 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32289 if (CONST_INT_P (XEXP (x
, 1)))
32291 if (INTVAL (XEXP (x
, 1)) > 32)
32292 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
32294 *total
= cost
->shift_const
* 2;
32298 if (GET_CODE (XEXP (x
, 1)) == AND
)
32299 *total
= cost
->shift_var
* 2;
32301 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
32306 if (CONST_INT_P (XEXP (x
, 1)))
32307 *total
= cost
->shift_const
;
32309 *total
= cost
->shift_var
;
32317 gcc_assert (FLOAT_MODE_P (mode
));
32318 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
32320 /* ??? SSE scalar/vector cost should be used here. */
32321 /* ??? Bald assumption that fma has the same cost as fmul. */
32322 *total
= cost
->fmul
;
32323 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
32325 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
32327 if (GET_CODE (sub
) == NEG
)
32328 sub
= XEXP (sub
, 0);
32329 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
32332 if (GET_CODE (sub
) == NEG
)
32333 sub
= XEXP (sub
, 0);
32334 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
32339 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32341 /* ??? SSE scalar cost should be used here. */
32342 *total
= cost
->fmul
;
32345 else if (X87_FLOAT_MODE_P (mode
))
32347 *total
= cost
->fmul
;
32350 else if (FLOAT_MODE_P (mode
))
32352 /* ??? SSE vector cost should be used here. */
32353 *total
= cost
->fmul
;
32356 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32358 /* V*QImode is emulated with 7-13 insns. */
32359 if (mode
== V16QImode
|| mode
== V32QImode
)
32362 if (TARGET_XOP
&& mode
== V16QImode
)
32364 else if (TARGET_SSSE3
)
32366 *total
= cost
->fmul
* 2 + cost
->fabs
* extra
;
32368 /* V*DImode is emulated with 5-8 insns. */
32369 else if (mode
== V2DImode
|| mode
== V4DImode
)
32371 if (TARGET_XOP
&& mode
== V2DImode
)
32372 *total
= cost
->fmul
* 2 + cost
->fabs
* 3;
32374 *total
= cost
->fmul
* 3 + cost
->fabs
* 5;
32376 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
32377 insns, including two PMULUDQ. */
32378 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
32379 *total
= cost
->fmul
* 2 + cost
->fabs
* 5;
32381 *total
= cost
->fmul
;
32386 rtx op0
= XEXP (x
, 0);
32387 rtx op1
= XEXP (x
, 1);
32389 if (CONST_INT_P (XEXP (x
, 1)))
32391 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
32392 for (nbits
= 0; value
!= 0; value
&= value
- 1)
32396 /* This is arbitrary. */
32399 /* Compute costs correctly for widening multiplication. */
32400 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
32401 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
32402 == GET_MODE_SIZE (mode
))
32404 int is_mulwiden
= 0;
32405 enum machine_mode inner_mode
= GET_MODE (op0
);
32407 if (GET_CODE (op0
) == GET_CODE (op1
))
32408 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
32409 else if (CONST_INT_P (op1
))
32411 if (GET_CODE (op0
) == SIGN_EXTEND
)
32412 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
32415 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
32419 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
32422 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
32423 + nbits
* cost
->mult_bit
32424 + rtx_cost (op0
, outer_code
, opno
, speed
)
32425 + rtx_cost (op1
, outer_code
, opno
, speed
));
32434 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32435 /* ??? SSE cost should be used here. */
32436 *total
= cost
->fdiv
;
32437 else if (X87_FLOAT_MODE_P (mode
))
32438 *total
= cost
->fdiv
;
32439 else if (FLOAT_MODE_P (mode
))
32440 /* ??? SSE vector cost should be used here. */
32441 *total
= cost
->fdiv
;
32443 *total
= cost
->divide
[MODE_INDEX (mode
)];
32447 if (GET_MODE_CLASS (mode
) == MODE_INT
32448 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
32450 if (GET_CODE (XEXP (x
, 0)) == PLUS
32451 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
32452 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
32453 && CONSTANT_P (XEXP (x
, 1)))
32455 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
32456 if (val
== 2 || val
== 4 || val
== 8)
32458 *total
= cost
->lea
;
32459 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32460 outer_code
, opno
, speed
);
32461 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
32462 outer_code
, opno
, speed
);
32463 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32467 else if (GET_CODE (XEXP (x
, 0)) == MULT
32468 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
32470 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
32471 if (val
== 2 || val
== 4 || val
== 8)
32473 *total
= cost
->lea
;
32474 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32475 outer_code
, opno
, speed
);
32476 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32480 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
32482 *total
= cost
->lea
;
32483 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
32484 outer_code
, opno
, speed
);
32485 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
32486 outer_code
, opno
, speed
);
32487 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
32494 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32496 /* ??? SSE cost should be used here. */
32497 *total
= cost
->fadd
;
32500 else if (X87_FLOAT_MODE_P (mode
))
32502 *total
= cost
->fadd
;
32505 else if (FLOAT_MODE_P (mode
))
32507 /* ??? SSE vector cost should be used here. */
32508 *total
= cost
->fadd
;
32516 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32518 *total
= (cost
->add
* 2
32519 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
32520 << (GET_MODE (XEXP (x
, 0)) != DImode
))
32521 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
32522 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
32528 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32530 /* ??? SSE cost should be used here. */
32531 *total
= cost
->fchs
;
32534 else if (X87_FLOAT_MODE_P (mode
))
32536 *total
= cost
->fchs
;
32539 else if (FLOAT_MODE_P (mode
))
32541 /* ??? SSE vector cost should be used here. */
32542 *total
= cost
->fchs
;
32548 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
32550 /* ??? Should be SSE vector operation cost. */
32551 /* At least for published AMD latencies, this really is the same
32552 as the latency for a simple fpu operation like fabs. */
32553 *total
= cost
->fabs
;
32555 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
32556 *total
= cost
->add
* 2;
32558 *total
= cost
->add
;
32562 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
32563 && XEXP (XEXP (x
, 0), 1) == const1_rtx
32564 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
32565 && XEXP (x
, 1) == const0_rtx
)
32567 /* This kind of construct is implemented using test[bwl].
32568 Treat it as if we had an AND. */
32569 *total
= (cost
->add
32570 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
32571 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
32577 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
32582 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32583 /* ??? SSE cost should be used here. */
32584 *total
= cost
->fabs
;
32585 else if (X87_FLOAT_MODE_P (mode
))
32586 *total
= cost
->fabs
;
32587 else if (FLOAT_MODE_P (mode
))
32588 /* ??? SSE vector cost should be used here. */
32589 *total
= cost
->fabs
;
32593 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
32594 /* ??? SSE cost should be used here. */
32595 *total
= cost
->fsqrt
;
32596 else if (X87_FLOAT_MODE_P (mode
))
32597 *total
= cost
->fsqrt
;
32598 else if (FLOAT_MODE_P (mode
))
32599 /* ??? SSE vector cost should be used here. */
32600 *total
= cost
->fsqrt
;
32604 if (XINT (x
, 1) == UNSPEC_TP
)
32611 case VEC_DUPLICATE
:
32612 /* ??? Assume all of these vector manipulation patterns are
32613 recognizable. In which case they all pretty much have the
32615 *total
= cost
->fabs
;
32625 static int current_machopic_label_num
;
32627 /* Given a symbol name and its associated stub, write out the
32628 definition of the stub. */
32631 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
32633 unsigned int length
;
32634 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
32635 int label
= ++current_machopic_label_num
;
32637 /* For 64-bit we shouldn't get here. */
32638 gcc_assert (!TARGET_64BIT
);
32640 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32641 symb
= targetm
.strip_name_encoding (symb
);
32643 length
= strlen (stub
);
32644 binder_name
= XALLOCAVEC (char, length
+ 32);
32645 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
32647 length
= strlen (symb
);
32648 symbol_name
= XALLOCAVEC (char, length
+ 32);
32649 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
32651 sprintf (lazy_ptr_name
, "L%d$lz", label
);
32653 if (MACHOPIC_ATT_STUB
)
32654 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
32655 else if (MACHOPIC_PURE
)
32656 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
32658 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
32660 fprintf (file
, "%s:\n", stub
);
32661 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32663 if (MACHOPIC_ATT_STUB
)
32665 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
32667 else if (MACHOPIC_PURE
)
32670 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32671 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
32672 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
32673 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
32674 label
, lazy_ptr_name
, label
);
32675 fprintf (file
, "\tjmp\t*%%ecx\n");
32678 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
32680 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
32681 it needs no stub-binding-helper. */
32682 if (MACHOPIC_ATT_STUB
)
32685 fprintf (file
, "%s:\n", binder_name
);
32689 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
32690 fprintf (file
, "\tpushl\t%%ecx\n");
32693 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
32695 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
32697 /* N.B. Keep the correspondence of these
32698 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
32699 old-pic/new-pic/non-pic stubs; altering this will break
32700 compatibility with existing dylibs. */
32703 /* 25-byte PIC stub using "CALL get_pc_thunk". */
32704 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
32707 /* 16-byte -mdynamic-no-pic stub. */
32708 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
32710 fprintf (file
, "%s:\n", lazy_ptr_name
);
32711 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
32712 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
32714 #endif /* TARGET_MACHO */
32716 /* Order the registers for register allocator. */
32719 x86_order_regs_for_local_alloc (void)
32724 /* First allocate the local general purpose registers. */
32725 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32726 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
32727 reg_alloc_order
[pos
++] = i
;
32729 /* Global general purpose registers. */
32730 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
32731 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
32732 reg_alloc_order
[pos
++] = i
;
32734 /* x87 registers come first in case we are doing FP math
32736 if (!TARGET_SSE_MATH
)
32737 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32738 reg_alloc_order
[pos
++] = i
;
32740 /* SSE registers. */
32741 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
32742 reg_alloc_order
[pos
++] = i
;
32743 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
32744 reg_alloc_order
[pos
++] = i
;
32746 /* x87 registers. */
32747 if (TARGET_SSE_MATH
)
32748 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
32749 reg_alloc_order
[pos
++] = i
;
32751 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
32752 reg_alloc_order
[pos
++] = i
;
32754 /* Initialize the rest of array as we do not allocate some registers
32756 while (pos
< FIRST_PSEUDO_REGISTER
)
32757 reg_alloc_order
[pos
++] = 0;
32760 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
32761 in struct attribute_spec handler. */
32763 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
32765 int flags ATTRIBUTE_UNUSED
,
32766 bool *no_add_attrs
)
32768 if (TREE_CODE (*node
) != FUNCTION_TYPE
32769 && TREE_CODE (*node
) != METHOD_TYPE
32770 && TREE_CODE (*node
) != FIELD_DECL
32771 && TREE_CODE (*node
) != TYPE_DECL
)
32773 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32775 *no_add_attrs
= true;
32780 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
32782 *no_add_attrs
= true;
32785 if (is_attribute_p ("callee_pop_aggregate_return", name
))
32789 cst
= TREE_VALUE (args
);
32790 if (TREE_CODE (cst
) != INTEGER_CST
)
32792 warning (OPT_Wattributes
,
32793 "%qE attribute requires an integer constant argument",
32795 *no_add_attrs
= true;
32797 else if (compare_tree_int (cst
, 0) != 0
32798 && compare_tree_int (cst
, 1) != 0)
32800 warning (OPT_Wattributes
,
32801 "argument to %qE attribute is neither zero, nor one",
32803 *no_add_attrs
= true;
32812 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
32813 struct attribute_spec.handler. */
32815 ix86_handle_abi_attribute (tree
*node
, tree name
,
32816 tree args ATTRIBUTE_UNUSED
,
32817 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32819 if (TREE_CODE (*node
) != FUNCTION_TYPE
32820 && TREE_CODE (*node
) != METHOD_TYPE
32821 && TREE_CODE (*node
) != FIELD_DECL
32822 && TREE_CODE (*node
) != TYPE_DECL
)
32824 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32826 *no_add_attrs
= true;
32830 /* Can combine regparm with all attributes but fastcall. */
32831 if (is_attribute_p ("ms_abi", name
))
32833 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
32835 error ("ms_abi and sysv_abi attributes are not compatible");
32840 else if (is_attribute_p ("sysv_abi", name
))
32842 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
32844 error ("ms_abi and sysv_abi attributes are not compatible");
32853 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32854 struct attribute_spec.handler. */
32856 ix86_handle_struct_attribute (tree
*node
, tree name
,
32857 tree args ATTRIBUTE_UNUSED
,
32858 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32861 if (DECL_P (*node
))
32863 if (TREE_CODE (*node
) == TYPE_DECL
)
32864 type
= &TREE_TYPE (*node
);
32869 if (!(type
&& RECORD_OR_UNION_TYPE_P (*type
)))
32871 warning (OPT_Wattributes
, "%qE attribute ignored",
32873 *no_add_attrs
= true;
32876 else if ((is_attribute_p ("ms_struct", name
)
32877 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
32878 || ((is_attribute_p ("gcc_struct", name
)
32879 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
32881 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
32883 *no_add_attrs
= true;
32890 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
32891 tree args ATTRIBUTE_UNUSED
,
32892 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
32894 if (TREE_CODE (*node
) != FUNCTION_DECL
)
32896 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
32898 *no_add_attrs
= true;
32904 ix86_ms_bitfield_layout_p (const_tree record_type
)
32906 return ((TARGET_MS_BITFIELD_LAYOUT
32907 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
32908 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
32911 /* Returns an expression indicating where the this parameter is
32912 located on entry to the FUNCTION. */
32915 x86_this_parameter (tree function
)
32917 tree type
= TREE_TYPE (function
);
32918 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
32923 const int *parm_regs
;
32925 if (ix86_function_type_abi (type
) == MS_ABI
)
32926 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
32928 parm_regs
= x86_64_int_parameter_registers
;
32929 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
32932 nregs
= ix86_function_regparm (type
, function
);
32934 if (nregs
> 0 && !stdarg_p (type
))
32937 unsigned int ccvt
= ix86_get_callcvt (type
);
32939 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
32940 regno
= aggr
? DX_REG
: CX_REG
;
32941 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
32945 return gen_rtx_MEM (SImode
,
32946 plus_constant (Pmode
, stack_pointer_rtx
, 4));
32955 return gen_rtx_MEM (SImode
,
32956 plus_constant (Pmode
,
32957 stack_pointer_rtx
, 4));
32960 return gen_rtx_REG (SImode
, regno
);
32963 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
32967 /* Determine whether x86_output_mi_thunk can succeed. */
32970 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
32971 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
32972 HOST_WIDE_INT vcall_offset
, const_tree function
)
32974 /* 64-bit can handle anything. */
32978 /* For 32-bit, everything's fine if we have one free register. */
32979 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
32982 /* Need a free register for vcall_offset. */
32986 /* Need a free register for GOT references. */
32987 if (flag_pic
&& !targetm
.binds_local_p (function
))
32990 /* Otherwise ok. */
32994 /* Output the assembler code for a thunk function. THUNK_DECL is the
32995 declaration for the thunk function itself, FUNCTION is the decl for
32996 the target function. DELTA is an immediate constant offset to be
32997 added to THIS. If VCALL_OFFSET is nonzero, the word at
32998 *(*this + vcall_offset) should be added to THIS. */
33001 x86_output_mi_thunk (FILE *file
,
33002 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
33003 HOST_WIDE_INT vcall_offset
, tree function
)
33005 rtx this_param
= x86_this_parameter (function
);
33006 rtx this_reg
, tmp
, fnaddr
;
33007 unsigned int tmp_regno
;
33010 tmp_regno
= R10_REG
;
33013 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
33014 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
33015 tmp_regno
= AX_REG
;
33017 tmp_regno
= CX_REG
;
33020 emit_note (NOTE_INSN_PROLOGUE_END
);
33022 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
33023 pull it in now and let DELTA benefit. */
33024 if (REG_P (this_param
))
33025 this_reg
= this_param
;
33026 else if (vcall_offset
)
33028 /* Put the this parameter into %eax. */
33029 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
33030 emit_move_insn (this_reg
, this_param
);
33033 this_reg
= NULL_RTX
;
33035 /* Adjust the this parameter by a fixed constant. */
33038 rtx delta_rtx
= GEN_INT (delta
);
33039 rtx delta_dst
= this_reg
? this_reg
: this_param
;
33043 if (!x86_64_general_operand (delta_rtx
, Pmode
))
33045 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
33046 emit_move_insn (tmp
, delta_rtx
);
33051 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
33054 /* Adjust the this parameter by a value stored in the vtable. */
33057 rtx vcall_addr
, vcall_mem
, this_mem
;
33059 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
33061 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
33062 if (Pmode
!= ptr_mode
)
33063 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
33064 emit_move_insn (tmp
, this_mem
);
33066 /* Adjust the this parameter. */
33067 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
33069 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
33071 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
33072 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
33073 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
33076 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
33077 if (Pmode
!= ptr_mode
)
33078 emit_insn (gen_addsi_1_zext (this_reg
,
33079 gen_rtx_REG (ptr_mode
,
33083 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
33086 /* If necessary, drop THIS back to its stack slot. */
33087 if (this_reg
&& this_reg
!= this_param
)
33088 emit_move_insn (this_param
, this_reg
);
33090 fnaddr
= XEXP (DECL_RTL (function
), 0);
33093 if (!flag_pic
|| targetm
.binds_local_p (function
)
33094 || cfun
->machine
->call_abi
== MS_ABI
)
33098 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
33099 tmp
= gen_rtx_CONST (Pmode
, tmp
);
33100 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
33105 if (!flag_pic
|| targetm
.binds_local_p (function
))
33108 else if (TARGET_MACHO
)
33110 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
33111 fnaddr
= XEXP (fnaddr
, 0);
33113 #endif /* TARGET_MACHO */
33116 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
33117 output_set_got (tmp
, NULL_RTX
);
33119 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
33120 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
33121 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
33125 /* Our sibling call patterns do not allow memories, because we have no
33126 predicate that can distinguish between frame and non-frame memory.
33127 For our purposes here, we can get away with (ab)using a jump pattern,
33128 because we're going to do no optimization. */
33129 if (MEM_P (fnaddr
))
33130 emit_jump_insn (gen_indirect_jump (fnaddr
));
33133 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
33134 fnaddr
= legitimize_pic_address (fnaddr
,
33135 gen_rtx_REG (Pmode
, tmp_regno
));
33137 if (!sibcall_insn_operand (fnaddr
, word_mode
))
33139 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
33140 if (GET_MODE (fnaddr
) != word_mode
)
33141 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
33142 emit_move_insn (tmp
, fnaddr
);
33146 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
33147 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
33148 tmp
= emit_call_insn (tmp
);
33149 SIBLING_CALL_P (tmp
) = 1;
33153 /* Emit just enough of rest_of_compilation to get the insns emitted.
33154 Note that use_thunk calls assemble_start_function et al. */
33155 tmp
= get_insns ();
33156 insn_locators_alloc ();
33157 shorten_branches (tmp
);
33158 final_start_function (tmp
, file
, 1);
33159 final (tmp
, file
, 1);
33160 final_end_function ();
33164 x86_file_start (void)
33166 default_file_start ();
33168 darwin_file_start ();
33170 if (X86_FILE_START_VERSION_DIRECTIVE
)
33171 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
33172 if (X86_FILE_START_FLTUSED
)
33173 fputs ("\t.global\t__fltused\n", asm_out_file
);
33174 if (ix86_asm_dialect
== ASM_INTEL
)
33175 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
33179 x86_field_alignment (tree field
, int computed
)
33181 enum machine_mode mode
;
33182 tree type
= TREE_TYPE (field
);
33184 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
33186 mode
= TYPE_MODE (strip_array_types (type
));
33187 if (mode
== DFmode
|| mode
== DCmode
33188 || GET_MODE_CLASS (mode
) == MODE_INT
33189 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
33190 return MIN (32, computed
);
33194 /* Output assembler code to FILE to increment profiler label # LABELNO
33195 for profiling a function entry. */
33197 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
33199 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
33204 #ifndef NO_PROFILE_COUNTERS
33205 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
33208 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
33209 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
33211 fprintf (file
, "\tcall\t%s\n", mcount_name
);
33215 #ifndef NO_PROFILE_COUNTERS
33216 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
33219 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
33223 #ifndef NO_PROFILE_COUNTERS
33224 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
33227 fprintf (file
, "\tcall\t%s\n", mcount_name
);
33231 /* We don't have exact information about the insn sizes, but we may assume
33232 quite safely that we are informed about all 1 byte insns and memory
33233 address sizes. This is enough to eliminate unnecessary padding in
33237 min_insn_size (rtx insn
)
33241 if (!INSN_P (insn
) || !active_insn_p (insn
))
33244 /* Discard alignments we've emit and jump instructions. */
33245 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
33246 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
33248 if (JUMP_TABLE_DATA_P (insn
))
33251 /* Important case - calls are always 5 bytes.
33252 It is common to have many calls in the row. */
33254 && symbolic_reference_mentioned_p (PATTERN (insn
))
33255 && !SIBLING_CALL_P (insn
))
33257 len
= get_attr_length (insn
);
33261 /* For normal instructions we rely on get_attr_length being exact,
33262 with a few exceptions. */
33263 if (!JUMP_P (insn
))
33265 enum attr_type type
= get_attr_type (insn
);
33270 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
33271 || asm_noperands (PATTERN (insn
)) >= 0)
33278 /* Otherwise trust get_attr_length. */
33282 l
= get_attr_length_address (insn
);
33283 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
33292 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33294 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
33298 ix86_avoid_jump_mispredicts (void)
33300 rtx insn
, start
= get_insns ();
33301 int nbytes
= 0, njumps
= 0;
33304 /* Look for all minimal intervals of instructions containing 4 jumps.
33305 The intervals are bounded by START and INSN. NBYTES is the total
33306 size of instructions in the interval including INSN and not including
33307 START. When the NBYTES is smaller than 16 bytes, it is possible
33308 that the end of START and INSN ends up in the same 16byte page.
33310 The smallest offset in the page INSN can start is the case where START
33311 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
33312 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
33314 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
33318 if (LABEL_P (insn
))
33320 int align
= label_to_alignment (insn
);
33321 int max_skip
= label_to_max_skip (insn
);
33325 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
33326 already in the current 16 byte page, because otherwise
33327 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
33328 bytes to reach 16 byte boundary. */
33330 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
33333 fprintf (dump_file
, "Label %i with max_skip %i\n",
33334 INSN_UID (insn
), max_skip
);
33337 while (nbytes
+ max_skip
>= 16)
33339 start
= NEXT_INSN (start
);
33340 if ((JUMP_P (start
)
33341 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33342 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33344 njumps
--, isjump
= 1;
33347 nbytes
-= min_insn_size (start
);
33353 min_size
= min_insn_size (insn
);
33354 nbytes
+= min_size
;
33356 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
33357 INSN_UID (insn
), min_size
);
33359 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
33360 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
33368 start
= NEXT_INSN (start
);
33369 if ((JUMP_P (start
)
33370 && GET_CODE (PATTERN (start
)) != ADDR_VEC
33371 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
33373 njumps
--, isjump
= 1;
33376 nbytes
-= min_insn_size (start
);
33378 gcc_assert (njumps
>= 0);
33380 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
33381 INSN_UID (start
), INSN_UID (insn
), nbytes
);
33383 if (njumps
== 3 && isjump
&& nbytes
< 16)
33385 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
33388 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
33389 INSN_UID (insn
), padsize
);
33390 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
33396 /* AMD Athlon works faster
33397 when RET is not destination of conditional jump or directly preceded
33398 by other jump instruction. We avoid the penalty by inserting NOP just
33399 before the RET instructions in such cases. */
33401 ix86_pad_returns (void)
33406 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33408 basic_block bb
= e
->src
;
33409 rtx ret
= BB_END (bb
);
33411 bool replace
= false;
33413 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
33414 || optimize_bb_for_size_p (bb
))
33416 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
33417 if (active_insn_p (prev
) || LABEL_P (prev
))
33419 if (prev
&& LABEL_P (prev
))
33424 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33425 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
33426 && !(e
->flags
& EDGE_FALLTHRU
))
33431 prev
= prev_active_insn (ret
);
33433 && ((JUMP_P (prev
) && any_condjump_p (prev
))
33436 /* Empty functions get branch mispredict even when
33437 the jump destination is not visible to us. */
33438 if (!prev
&& !optimize_function_for_size_p (cfun
))
33443 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
33449 /* Count the minimum number of instructions in BB. Return 4 if the
33450 number of instructions >= 4. */
33453 ix86_count_insn_bb (basic_block bb
)
33456 int insn_count
= 0;
33458 /* Count number of instructions in this block. Return 4 if the number
33459 of instructions >= 4. */
33460 FOR_BB_INSNS (bb
, insn
)
33462 /* Only happen in exit blocks. */
33464 && ANY_RETURN_P (PATTERN (insn
)))
33467 if (NONDEBUG_INSN_P (insn
)
33468 && GET_CODE (PATTERN (insn
)) != USE
33469 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
33472 if (insn_count
>= 4)
33481 /* Count the minimum number of instructions in code path in BB.
33482 Return 4 if the number of instructions >= 4. */
33485 ix86_count_insn (basic_block bb
)
33489 int min_prev_count
;
33491 /* Only bother counting instructions along paths with no
33492 more than 2 basic blocks between entry and exit. Given
33493 that BB has an edge to exit, determine if a predecessor
33494 of BB has an edge from entry. If so, compute the number
33495 of instructions in the predecessor block. If there
33496 happen to be multiple such blocks, compute the minimum. */
33497 min_prev_count
= 4;
33498 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
33501 edge_iterator prev_ei
;
33503 if (e
->src
== ENTRY_BLOCK_PTR
)
33505 min_prev_count
= 0;
33508 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
33510 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
33512 int count
= ix86_count_insn_bb (e
->src
);
33513 if (count
< min_prev_count
)
33514 min_prev_count
= count
;
33520 if (min_prev_count
< 4)
33521 min_prev_count
+= ix86_count_insn_bb (bb
);
33523 return min_prev_count
;
33526 /* Pad short function to 4 instructions. */
33529 ix86_pad_short_function (void)
33534 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
33536 rtx ret
= BB_END (e
->src
);
33537 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
33539 int insn_count
= ix86_count_insn (e
->src
);
33541 /* Pad short function. */
33542 if (insn_count
< 4)
33546 /* Find epilogue. */
33549 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
33550 insn
= PREV_INSN (insn
);
33555 /* Two NOPs count as one instruction. */
33556 insn_count
= 2 * (4 - insn_count
);
33557 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
33563 /* Implement machine specific optimizations. We implement padding of returns
33564 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
33568 /* We are freeing block_for_insn in the toplev to keep compatibility
33569 with old MDEP_REORGS that are not CFG based. Recompute it now. */
33570 compute_bb_for_insn ();
33572 /* Run the vzeroupper optimization if needed. */
33573 if (TARGET_VZEROUPPER
)
33574 move_or_delete_vzeroupper ();
33576 if (optimize
&& optimize_function_for_speed_p (cfun
))
33578 if (TARGET_PAD_SHORT_FUNCTION
)
33579 ix86_pad_short_function ();
33580 else if (TARGET_PAD_RETURNS
)
33581 ix86_pad_returns ();
33582 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
33583 if (TARGET_FOUR_JUMP_LIMIT
)
33584 ix86_avoid_jump_mispredicts ();
33589 /* Return nonzero when QImode register that must be represented via REX prefix
33592 x86_extended_QIreg_mentioned_p (rtx insn
)
33595 extract_insn_cached (insn
);
33596 for (i
= 0; i
< recog_data
.n_operands
; i
++)
33597 if (REG_P (recog_data
.operand
[i
])
33598 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
33603 /* Return nonzero when P points to register encoded via REX prefix.
33604 Called via for_each_rtx. */
33606 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
33608 unsigned int regno
;
33611 regno
= REGNO (*p
);
33612 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
33615 /* Return true when INSN mentions register that must be encoded using REX
33618 x86_extended_reg_mentioned_p (rtx insn
)
33620 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
33621 extended_reg_mentioned_1
, NULL
);
33624 /* If profitable, negate (without causing overflow) integer constant
33625 of mode MODE at location LOC. Return true in this case. */
33627 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
33631 if (!CONST_INT_P (*loc
))
33637 /* DImode x86_64 constants must fit in 32 bits. */
33638 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
33649 gcc_unreachable ();
33652 /* Avoid overflows. */
33653 if (mode_signbit_p (mode
, *loc
))
33656 val
= INTVAL (*loc
);
33658 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
33659 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
33660 if ((val
< 0 && val
!= -128)
33663 *loc
= GEN_INT (-val
);
33670 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
33671 optabs would emit if we didn't have TFmode patterns. */
33674 x86_emit_floatuns (rtx operands
[2])
33676 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
33677 enum machine_mode mode
, inmode
;
33679 inmode
= GET_MODE (operands
[1]);
33680 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
33683 in
= force_reg (inmode
, operands
[1]);
33684 mode
= GET_MODE (out
);
33685 neglab
= gen_label_rtx ();
33686 donelab
= gen_label_rtx ();
33687 f0
= gen_reg_rtx (mode
);
33689 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
33691 expand_float (out
, in
, 0);
33693 emit_jump_insn (gen_jump (donelab
));
33696 emit_label (neglab
);
33698 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
33700 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
33702 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
33704 expand_float (f0
, i0
, 0);
33706 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
33708 emit_label (donelab
);
33711 /* AVX2 does support 32-byte integer vector operations,
33712 thus the longest vector we are faced with is V32QImode. */
33713 #define MAX_VECT_LEN 32
33715 struct expand_vec_perm_d
33717 rtx target
, op0
, op1
;
33718 unsigned char perm
[MAX_VECT_LEN
];
33719 enum machine_mode vmode
;
33720 unsigned char nelt
;
33721 bool one_operand_p
;
33725 static bool canonicalize_perm (struct expand_vec_perm_d
*d
);
33726 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
33727 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
33729 /* Get a vector mode of the same size as the original but with elements
33730 twice as wide. This is only guaranteed to apply to integral vectors. */
33732 static inline enum machine_mode
33733 get_mode_wider_vector (enum machine_mode o
)
33735 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
33736 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
33737 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
33738 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
33742 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33743 with all elements equal to VAR. Return true if successful. */
33746 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
33747 rtx target
, rtx val
)
33770 /* First attempt to recognize VAL as-is. */
33771 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33772 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
33773 if (recog_memoized (insn
) < 0)
33776 /* If that fails, force VAL into a register. */
33779 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
33780 seq
= get_insns ();
33783 emit_insn_before (seq
, insn
);
33785 ok
= recog_memoized (insn
) >= 0;
33794 if (TARGET_SSE
|| TARGET_3DNOW_A
)
33798 val
= gen_lowpart (SImode
, val
);
33799 x
= gen_rtx_TRUNCATE (HImode
, val
);
33800 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
33801 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33814 struct expand_vec_perm_d dperm
;
33818 memset (&dperm
, 0, sizeof (dperm
));
33819 dperm
.target
= target
;
33820 dperm
.vmode
= mode
;
33821 dperm
.nelt
= GET_MODE_NUNITS (mode
);
33822 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
33823 dperm
.one_operand_p
= true;
33825 /* Extend to SImode using a paradoxical SUBREG. */
33826 tmp1
= gen_reg_rtx (SImode
);
33827 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
33829 /* Insert the SImode value as low element of a V4SImode vector. */
33830 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
33831 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
33833 ok
= (expand_vec_perm_1 (&dperm
)
33834 || expand_vec_perm_broadcast_1 (&dperm
));
33846 /* Replicate the value once into the next wider mode and recurse. */
33848 enum machine_mode smode
, wsmode
, wvmode
;
33851 smode
= GET_MODE_INNER (mode
);
33852 wvmode
= get_mode_wider_vector (mode
);
33853 wsmode
= GET_MODE_INNER (wvmode
);
33855 val
= convert_modes (wsmode
, smode
, val
, true);
33856 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
33857 GEN_INT (GET_MODE_BITSIZE (smode
)),
33858 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
33859 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
33861 x
= gen_lowpart (wvmode
, target
);
33862 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
33870 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
33871 rtx x
= gen_reg_rtx (hvmode
);
33873 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
33876 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
33877 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33886 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
33887 whose ONE_VAR element is VAR, and other elements are zero. Return true
33891 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
33892 rtx target
, rtx var
, int one_var
)
33894 enum machine_mode vsimode
;
33897 bool use_vector_set
= false;
33902 /* For SSE4.1, we normally use vector set. But if the second
33903 element is zero and inter-unit moves are OK, we use movq
33905 use_vector_set
= (TARGET_64BIT
33907 && !(TARGET_INTER_UNIT_MOVES
33913 use_vector_set
= TARGET_SSE4_1
;
33916 use_vector_set
= TARGET_SSE2
;
33919 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
33926 use_vector_set
= TARGET_AVX
;
33929 /* Use ix86_expand_vector_set in 64bit mode only. */
33930 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
33936 if (use_vector_set
)
33938 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
33939 var
= force_reg (GET_MODE_INNER (mode
), var
);
33940 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
33956 var
= force_reg (GET_MODE_INNER (mode
), var
);
33957 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
33958 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
33963 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
33964 new_target
= gen_reg_rtx (mode
);
33966 new_target
= target
;
33967 var
= force_reg (GET_MODE_INNER (mode
), var
);
33968 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
33969 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
33970 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
33973 /* We need to shuffle the value to the correct position, so
33974 create a new pseudo to store the intermediate result. */
33976 /* With SSE2, we can use the integer shuffle insns. */
33977 if (mode
!= V4SFmode
&& TARGET_SSE2
)
33979 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
33981 GEN_INT (one_var
== 1 ? 0 : 1),
33982 GEN_INT (one_var
== 2 ? 0 : 1),
33983 GEN_INT (one_var
== 3 ? 0 : 1)));
33984 if (target
!= new_target
)
33985 emit_move_insn (target
, new_target
);
33989 /* Otherwise convert the intermediate result to V4SFmode and
33990 use the SSE1 shuffle instructions. */
33991 if (mode
!= V4SFmode
)
33993 tmp
= gen_reg_rtx (V4SFmode
);
33994 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
33999 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
34001 GEN_INT (one_var
== 1 ? 0 : 1),
34002 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
34003 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
34005 if (mode
!= V4SFmode
)
34006 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
34007 else if (tmp
!= target
)
34008 emit_move_insn (target
, tmp
);
34010 else if (target
!= new_target
)
34011 emit_move_insn (target
, new_target
);
34016 vsimode
= V4SImode
;
34022 vsimode
= V2SImode
;
34028 /* Zero extend the variable element to SImode and recurse. */
34029 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
34031 x
= gen_reg_rtx (vsimode
);
34032 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
34034 gcc_unreachable ();
34036 emit_move_insn (target
, gen_lowpart (mode
, x
));
34044 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
34045 consisting of the values in VALS. It is known that all elements
34046 except ONE_VAR are constants. Return true if successful. */
34049 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
34050 rtx target
, rtx vals
, int one_var
)
34052 rtx var
= XVECEXP (vals
, 0, one_var
);
34053 enum machine_mode wmode
;
34056 const_vec
= copy_rtx (vals
);
34057 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
34058 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
34066 /* For the two element vectors, it's just as easy to use
34067 the general case. */
34071 /* Use ix86_expand_vector_set in 64bit mode only. */
34094 /* There's no way to set one QImode entry easily. Combine
34095 the variable value with its adjacent constant value, and
34096 promote to an HImode set. */
34097 x
= XVECEXP (vals
, 0, one_var
^ 1);
34100 var
= convert_modes (HImode
, QImode
, var
, true);
34101 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
34102 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
34103 x
= GEN_INT (INTVAL (x
) & 0xff);
34107 var
= convert_modes (HImode
, QImode
, var
, true);
34108 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
34110 if (x
!= const0_rtx
)
34111 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
34112 1, OPTAB_LIB_WIDEN
);
34114 x
= gen_reg_rtx (wmode
);
34115 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
34116 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
34118 emit_move_insn (target
, gen_lowpart (mode
, x
));
34125 emit_move_insn (target
, const_vec
);
34126 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
34130 /* A subroutine of ix86_expand_vector_init_general. Use vector
34131 concatenate to handle the most general case: all values variable,
34132 and none identical. */
34135 ix86_expand_vector_init_concat (enum machine_mode mode
,
34136 rtx target
, rtx
*ops
, int n
)
34138 enum machine_mode cmode
, hmode
= VOIDmode
;
34139 rtx first
[8], second
[4];
34179 gcc_unreachable ();
34182 if (!register_operand (ops
[1], cmode
))
34183 ops
[1] = force_reg (cmode
, ops
[1]);
34184 if (!register_operand (ops
[0], cmode
))
34185 ops
[0] = force_reg (cmode
, ops
[0]);
34186 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34187 gen_rtx_VEC_CONCAT (mode
, ops
[0],
34207 gcc_unreachable ();
34223 gcc_unreachable ();
34228 /* FIXME: We process inputs backward to help RA. PR 36222. */
34231 for (; i
> 0; i
-= 2, j
--)
34233 first
[j
] = gen_reg_rtx (cmode
);
34234 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
34235 ix86_expand_vector_init (false, first
[j
],
34236 gen_rtx_PARALLEL (cmode
, v
));
34242 gcc_assert (hmode
!= VOIDmode
);
34243 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34245 second
[j
] = gen_reg_rtx (hmode
);
34246 ix86_expand_vector_init_concat (hmode
, second
[j
],
34250 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
34253 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
34257 gcc_unreachable ();
34261 /* A subroutine of ix86_expand_vector_init_general. Use vector
34262 interleave to handle the most general case: all values variable,
34263 and none identical. */
34266 ix86_expand_vector_init_interleave (enum machine_mode mode
,
34267 rtx target
, rtx
*ops
, int n
)
34269 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
34272 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
34273 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
34274 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
34279 gen_load_even
= gen_vec_setv8hi
;
34280 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
34281 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34282 inner_mode
= HImode
;
34283 first_imode
= V4SImode
;
34284 second_imode
= V2DImode
;
34285 third_imode
= VOIDmode
;
34288 gen_load_even
= gen_vec_setv16qi
;
34289 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
34290 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
34291 inner_mode
= QImode
;
34292 first_imode
= V8HImode
;
34293 second_imode
= V4SImode
;
34294 third_imode
= V2DImode
;
34297 gcc_unreachable ();
34300 for (i
= 0; i
< n
; i
++)
34302 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
34303 op0
= gen_reg_rtx (SImode
);
34304 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
34306 /* Insert the SImode value as low element of V4SImode vector. */
34307 op1
= gen_reg_rtx (V4SImode
);
34308 op0
= gen_rtx_VEC_MERGE (V4SImode
,
34309 gen_rtx_VEC_DUPLICATE (V4SImode
,
34311 CONST0_RTX (V4SImode
),
34313 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
34315 /* Cast the V4SImode vector back to a vector in orignal mode. */
34316 op0
= gen_reg_rtx (mode
);
34317 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
34319 /* Load even elements into the second positon. */
34320 emit_insn (gen_load_even (op0
,
34321 force_reg (inner_mode
,
34325 /* Cast vector to FIRST_IMODE vector. */
34326 ops
[i
] = gen_reg_rtx (first_imode
);
34327 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
34330 /* Interleave low FIRST_IMODE vectors. */
34331 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
34333 op0
= gen_reg_rtx (first_imode
);
34334 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
34336 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
34337 ops
[j
] = gen_reg_rtx (second_imode
);
34338 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
34341 /* Interleave low SECOND_IMODE vectors. */
34342 switch (second_imode
)
34345 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
34347 op0
= gen_reg_rtx (second_imode
);
34348 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
34351 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
34353 ops
[j
] = gen_reg_rtx (third_imode
);
34354 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
34356 second_imode
= V2DImode
;
34357 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
34361 op0
= gen_reg_rtx (second_imode
);
34362 emit_insn (gen_interleave_second_low (op0
, ops
[0],
34365 /* Cast the SECOND_IMODE vector back to a vector on original
34367 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34368 gen_lowpart (mode
, op0
)));
34372 gcc_unreachable ();
34376 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
34377 all values variable, and none identical. */
34380 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
34381 rtx target
, rtx vals
)
34383 rtx ops
[32], op0
, op1
;
34384 enum machine_mode half_mode
= VOIDmode
;
34391 if (!mmx_ok
&& !TARGET_SSE
)
34403 n
= GET_MODE_NUNITS (mode
);
34404 for (i
= 0; i
< n
; i
++)
34405 ops
[i
] = XVECEXP (vals
, 0, i
);
34406 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
34410 half_mode
= V16QImode
;
34414 half_mode
= V8HImode
;
34418 n
= GET_MODE_NUNITS (mode
);
34419 for (i
= 0; i
< n
; i
++)
34420 ops
[i
] = XVECEXP (vals
, 0, i
);
34421 op0
= gen_reg_rtx (half_mode
);
34422 op1
= gen_reg_rtx (half_mode
);
34423 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
34425 ix86_expand_vector_init_interleave (half_mode
, op1
,
34426 &ops
[n
>> 1], n
>> 2);
34427 emit_insn (gen_rtx_SET (VOIDmode
, target
,
34428 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
34432 if (!TARGET_SSE4_1
)
34440 /* Don't use ix86_expand_vector_init_interleave if we can't
34441 move from GPR to SSE register directly. */
34442 if (!TARGET_INTER_UNIT_MOVES
)
34445 n
= GET_MODE_NUNITS (mode
);
34446 for (i
= 0; i
< n
; i
++)
34447 ops
[i
] = XVECEXP (vals
, 0, i
);
34448 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
34456 gcc_unreachable ();
34460 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
34461 enum machine_mode inner_mode
;
34462 rtx words
[4], shift
;
34464 inner_mode
= GET_MODE_INNER (mode
);
34465 n_elts
= GET_MODE_NUNITS (mode
);
34466 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
34467 n_elt_per_word
= n_elts
/ n_words
;
34468 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
34470 for (i
= 0; i
< n_words
; ++i
)
34472 rtx word
= NULL_RTX
;
34474 for (j
= 0; j
< n_elt_per_word
; ++j
)
34476 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
34477 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
34483 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
34484 word
, 1, OPTAB_LIB_WIDEN
);
34485 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
34486 word
, 1, OPTAB_LIB_WIDEN
);
34494 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
34495 else if (n_words
== 2)
34497 rtx tmp
= gen_reg_rtx (mode
);
34498 emit_clobber (tmp
);
34499 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
34500 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
34501 emit_move_insn (target
, tmp
);
34503 else if (n_words
== 4)
34505 rtx tmp
= gen_reg_rtx (V4SImode
);
34506 gcc_assert (word_mode
== SImode
);
34507 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
34508 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
34509 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
34512 gcc_unreachable ();
34516 /* Initialize vector TARGET via VALS. Suppress the use of MMX
34517 instructions unless MMX_OK is true. */
34520 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
34522 enum machine_mode mode
= GET_MODE (target
);
34523 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34524 int n_elts
= GET_MODE_NUNITS (mode
);
34525 int n_var
= 0, one_var
= -1;
34526 bool all_same
= true, all_const_zero
= true;
34530 for (i
= 0; i
< n_elts
; ++i
)
34532 x
= XVECEXP (vals
, 0, i
);
34533 if (!(CONST_INT_P (x
)
34534 || GET_CODE (x
) == CONST_DOUBLE
34535 || GET_CODE (x
) == CONST_FIXED
))
34536 n_var
++, one_var
= i
;
34537 else if (x
!= CONST0_RTX (inner_mode
))
34538 all_const_zero
= false;
34539 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
34543 /* Constants are best loaded from the constant pool. */
34546 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
34550 /* If all values are identical, broadcast the value. */
34552 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
34553 XVECEXP (vals
, 0, 0)))
34556 /* Values where only one field is non-constant are best loaded from
34557 the pool and overwritten via move later. */
34561 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
34562 XVECEXP (vals
, 0, one_var
),
34566 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
34570 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
34574 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
34576 enum machine_mode mode
= GET_MODE (target
);
34577 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34578 enum machine_mode half_mode
;
34579 bool use_vec_merge
= false;
34581 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
34583 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
34584 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
34585 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
34586 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
34587 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
34588 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
34590 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
34592 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
34593 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
34594 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
34595 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
34596 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
34597 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
34607 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34608 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
34610 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34612 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34613 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34619 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
34623 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
34624 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
34626 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
34628 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
34629 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34636 /* For the two element vectors, we implement a VEC_CONCAT with
34637 the extraction of the other element. */
34639 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
34640 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
34643 op0
= val
, op1
= tmp
;
34645 op0
= tmp
, op1
= val
;
34647 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
34648 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34653 use_vec_merge
= TARGET_SSE4_1
;
34660 use_vec_merge
= true;
34664 /* tmp = target = A B C D */
34665 tmp
= copy_to_reg (target
);
34666 /* target = A A B B */
34667 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
34668 /* target = X A B B */
34669 ix86_expand_vector_set (false, target
, val
, 0);
34670 /* target = A X C D */
34671 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34672 const1_rtx
, const0_rtx
,
34673 GEN_INT (2+4), GEN_INT (3+4)));
34677 /* tmp = target = A B C D */
34678 tmp
= copy_to_reg (target
);
34679 /* tmp = X B C D */
34680 ix86_expand_vector_set (false, tmp
, val
, 0);
34681 /* target = A B X D */
34682 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34683 const0_rtx
, const1_rtx
,
34684 GEN_INT (0+4), GEN_INT (3+4)));
34688 /* tmp = target = A B C D */
34689 tmp
= copy_to_reg (target
);
34690 /* tmp = X B C D */
34691 ix86_expand_vector_set (false, tmp
, val
, 0);
34692 /* target = A B X D */
34693 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
34694 const0_rtx
, const1_rtx
,
34695 GEN_INT (2+4), GEN_INT (0+4)));
34699 gcc_unreachable ();
34704 use_vec_merge
= TARGET_SSE4_1
;
34708 /* Element 0 handled by vec_merge below. */
34711 use_vec_merge
= true;
34717 /* With SSE2, use integer shuffles to swap element 0 and ELT,
34718 store into element 0, then shuffle them back. */
34722 order
[0] = GEN_INT (elt
);
34723 order
[1] = const1_rtx
;
34724 order
[2] = const2_rtx
;
34725 order
[3] = GEN_INT (3);
34726 order
[elt
] = const0_rtx
;
34728 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34729 order
[1], order
[2], order
[3]));
34731 ix86_expand_vector_set (false, target
, val
, 0);
34733 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
34734 order
[1], order
[2], order
[3]));
34738 /* For SSE1, we have to reuse the V4SF code. */
34739 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
34740 gen_lowpart (SFmode
, val
), elt
);
34745 use_vec_merge
= TARGET_SSE2
;
34748 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34752 use_vec_merge
= TARGET_SSE4_1
;
34759 half_mode
= V16QImode
;
34765 half_mode
= V8HImode
;
34771 half_mode
= V4SImode
;
34777 half_mode
= V2DImode
;
34783 half_mode
= V4SFmode
;
34789 half_mode
= V2DFmode
;
34795 /* Compute offset. */
34799 gcc_assert (i
<= 1);
34801 /* Extract the half. */
34802 tmp
= gen_reg_rtx (half_mode
);
34803 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
34805 /* Put val in tmp at elt. */
34806 ix86_expand_vector_set (false, tmp
, val
, elt
);
34809 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
34818 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
34819 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
34820 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
34824 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
34826 emit_move_insn (mem
, target
);
34828 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
34829 emit_move_insn (tmp
, val
);
34831 emit_move_insn (target
, mem
);
34836 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
34838 enum machine_mode mode
= GET_MODE (vec
);
34839 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
34840 bool use_vec_extr
= false;
34853 use_vec_extr
= true;
34857 use_vec_extr
= TARGET_SSE4_1
;
34869 tmp
= gen_reg_rtx (mode
);
34870 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
34871 GEN_INT (elt
), GEN_INT (elt
),
34872 GEN_INT (elt
+4), GEN_INT (elt
+4)));
34876 tmp
= gen_reg_rtx (mode
);
34877 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
34881 gcc_unreachable ();
34884 use_vec_extr
= true;
34889 use_vec_extr
= TARGET_SSE4_1
;
34903 tmp
= gen_reg_rtx (mode
);
34904 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
34905 GEN_INT (elt
), GEN_INT (elt
),
34906 GEN_INT (elt
), GEN_INT (elt
)));
34910 tmp
= gen_reg_rtx (mode
);
34911 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
34915 gcc_unreachable ();
34918 use_vec_extr
= true;
34923 /* For SSE1, we have to reuse the V4SF code. */
34924 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
34925 gen_lowpart (V4SFmode
, vec
), elt
);
34931 use_vec_extr
= TARGET_SSE2
;
34934 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
34938 use_vec_extr
= TARGET_SSE4_1
;
34944 tmp
= gen_reg_rtx (V4SFmode
);
34946 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
34948 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
34949 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
34957 tmp
= gen_reg_rtx (V2DFmode
);
34959 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
34961 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
34962 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
34970 tmp
= gen_reg_rtx (V16QImode
);
34972 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
34974 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
34975 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
34983 tmp
= gen_reg_rtx (V8HImode
);
34985 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
34987 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
34988 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
34996 tmp
= gen_reg_rtx (V4SImode
);
34998 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
35000 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
35001 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
35009 tmp
= gen_reg_rtx (V2DImode
);
35011 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
35013 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
35014 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
35020 /* ??? Could extract the appropriate HImode element and shift. */
35027 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
35028 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
35030 /* Let the rtl optimizers know about the zero extension performed. */
35031 if (inner_mode
== QImode
|| inner_mode
== HImode
)
35033 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
35034 target
= gen_lowpart (SImode
, target
);
35037 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
35041 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
35043 emit_move_insn (mem
, vec
);
35045 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
35046 emit_move_insn (target
, tmp
);
35050 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
35051 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
35052 The upper bits of DEST are undefined, though they shouldn't cause
35053 exceptions (some bits from src or all zeros are ok). */
35056 emit_reduc_half (rtx dest
, rtx src
, int i
)
35059 switch (GET_MODE (src
))
35063 tem
= gen_sse_movhlps (dest
, src
, src
);
35065 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
35066 GEN_INT (1 + 4), GEN_INT (1 + 4));
35069 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
35075 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
35076 gen_lowpart (V1TImode
, src
),
35081 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
35083 tem
= gen_avx_shufps256 (dest
, src
, src
,
35084 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
35088 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
35090 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
35097 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
35098 gen_lowpart (V4DImode
, src
),
35099 gen_lowpart (V4DImode
, src
),
35102 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
35103 gen_lowpart (V2TImode
, src
),
35107 gcc_unreachable ();
35112 /* Expand a vector reduction. FN is the binary pattern to reduce;
35113 DEST is the destination; IN is the input vector. */
35116 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
35118 rtx half
, dst
, vec
= in
;
35119 enum machine_mode mode
= GET_MODE (in
);
35122 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
35124 && mode
== V8HImode
35125 && fn
== gen_uminv8hi3
)
35127 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
35131 for (i
= GET_MODE_BITSIZE (mode
);
35132 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
35135 half
= gen_reg_rtx (mode
);
35136 emit_reduc_half (half
, vec
, i
);
35137 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
35140 dst
= gen_reg_rtx (mode
);
35141 emit_insn (fn (dst
, half
, vec
));
35146 /* Target hook for scalar_mode_supported_p. */
35148 ix86_scalar_mode_supported_p (enum machine_mode mode
)
35150 if (DECIMAL_FLOAT_MODE_P (mode
))
35151 return default_decimal_float_supported_p ();
35152 else if (mode
== TFmode
)
35155 return default_scalar_mode_supported_p (mode
);
35158 /* Implements target hook vector_mode_supported_p. */
35160 ix86_vector_mode_supported_p (enum machine_mode mode
)
35162 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
35164 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
35166 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
35168 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
35170 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
35175 /* Target hook for c_mode_for_suffix. */
35176 static enum machine_mode
35177 ix86_c_mode_for_suffix (char suffix
)
35187 /* Worker function for TARGET_MD_ASM_CLOBBERS.
35189 We do this in the new i386 backend to maintain source compatibility
35190 with the old cc0-based compiler. */
35193 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
35194 tree inputs ATTRIBUTE_UNUSED
,
35197 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
35199 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
35204 /* Implements target vector targetm.asm.encode_section_info. */
35206 static void ATTRIBUTE_UNUSED
35207 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
35209 default_encode_section_info (decl
, rtl
, first
);
35211 if (TREE_CODE (decl
) == VAR_DECL
35212 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
35213 && ix86_in_large_data_p (decl
))
35214 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
35217 /* Worker function for REVERSE_CONDITION. */
35220 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
35222 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
35223 ? reverse_condition (code
)
35224 : reverse_condition_maybe_unordered (code
));
35227 /* Output code to perform an x87 FP register move, from OPERANDS[1]
35231 output_387_reg_move (rtx insn
, rtx
*operands
)
35233 if (REG_P (operands
[0]))
35235 if (REG_P (operands
[1])
35236 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
35238 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
35239 return output_387_ffreep (operands
, 0);
35240 return "fstp\t%y0";
35242 if (STACK_TOP_P (operands
[0]))
35243 return "fld%Z1\t%y1";
35246 else if (MEM_P (operands
[0]))
35248 gcc_assert (REG_P (operands
[1]));
35249 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
35250 return "fstp%Z0\t%y0";
35253 /* There is no non-popping store to memory for XFmode.
35254 So if we need one, follow the store with a load. */
35255 if (GET_MODE (operands
[0]) == XFmode
)
35256 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
35258 return "fst%Z0\t%y0";
35265 /* Output code to perform a conditional jump to LABEL, if C2 flag in
35266 FP status register is set. */
35269 ix86_emit_fp_unordered_jump (rtx label
)
35271 rtx reg
= gen_reg_rtx (HImode
);
35274 emit_insn (gen_x86_fnstsw_1 (reg
));
35276 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
35278 emit_insn (gen_x86_sahf_1 (reg
));
35280 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
35281 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
35285 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
35287 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35288 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
35291 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
35292 gen_rtx_LABEL_REF (VOIDmode
, label
),
35294 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
35296 emit_jump_insn (temp
);
35297 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
35300 /* Output code to perform a log1p XFmode calculation. */
35302 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
35304 rtx label1
= gen_label_rtx ();
35305 rtx label2
= gen_label_rtx ();
35307 rtx tmp
= gen_reg_rtx (XFmode
);
35308 rtx tmp2
= gen_reg_rtx (XFmode
);
35311 emit_insn (gen_absxf2 (tmp
, op1
));
35312 test
= gen_rtx_GE (VOIDmode
, tmp
,
35313 CONST_DOUBLE_FROM_REAL_VALUE (
35314 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
35316 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
35318 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35319 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
35320 emit_jump (label2
);
35322 emit_label (label1
);
35323 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
35324 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
35325 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
35326 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
35328 emit_label (label2
);
35331 /* Emit code for round calculation. */
35332 void ix86_emit_i387_round (rtx op0
, rtx op1
)
35334 enum machine_mode inmode
= GET_MODE (op1
);
35335 enum machine_mode outmode
= GET_MODE (op0
);
35336 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
35337 rtx scratch
= gen_reg_rtx (HImode
);
35338 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
35339 rtx jump_label
= gen_label_rtx ();
35341 rtx (*gen_abs
) (rtx
, rtx
);
35342 rtx (*gen_neg
) (rtx
, rtx
);
35347 gen_abs
= gen_abssf2
;
35350 gen_abs
= gen_absdf2
;
35353 gen_abs
= gen_absxf2
;
35356 gcc_unreachable ();
35362 gen_neg
= gen_negsf2
;
35365 gen_neg
= gen_negdf2
;
35368 gen_neg
= gen_negxf2
;
35371 gen_neg
= gen_neghi2
;
35374 gen_neg
= gen_negsi2
;
35377 gen_neg
= gen_negdi2
;
35380 gcc_unreachable ();
35383 e1
= gen_reg_rtx (inmode
);
35384 e2
= gen_reg_rtx (inmode
);
35385 res
= gen_reg_rtx (outmode
);
35387 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
35389 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
35391 /* scratch = fxam(op1) */
35392 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
35393 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
35395 /* e1 = fabs(op1) */
35396 emit_insn (gen_abs (e1
, op1
));
35398 /* e2 = e1 + 0.5 */
35399 half
= force_reg (inmode
, half
);
35400 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35401 gen_rtx_PLUS (inmode
, e1
, half
)));
35403 /* res = floor(e2) */
35404 if (inmode
!= XFmode
)
35406 tmp1
= gen_reg_rtx (XFmode
);
35408 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
35409 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
35419 rtx tmp0
= gen_reg_rtx (XFmode
);
35421 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
35423 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35424 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
35425 UNSPEC_TRUNC_NOOP
)));
35429 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
35432 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
35435 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
35438 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
35441 gcc_unreachable ();
35444 /* flags = signbit(a) */
35445 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
35447 /* if (flags) then res = -res */
35448 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
35449 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
35450 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
35452 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35453 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
35454 JUMP_LABEL (insn
) = jump_label
;
35456 emit_insn (gen_neg (res
, res
));
35458 emit_label (jump_label
);
35459 LABEL_NUSES (jump_label
) = 1;
35461 emit_move_insn (op0
, res
);
35464 /* Output code to perform a Newton-Rhapson approximation of a single precision
35465 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
35467 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
35469 rtx x0
, x1
, e0
, e1
;
35471 x0
= gen_reg_rtx (mode
);
35472 e0
= gen_reg_rtx (mode
);
35473 e1
= gen_reg_rtx (mode
);
35474 x1
= gen_reg_rtx (mode
);
35476 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
35478 b
= force_reg (mode
, b
);
35480 /* x0 = rcp(b) estimate */
35481 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35482 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
35485 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35486 gen_rtx_MULT (mode
, x0
, b
)));
35489 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35490 gen_rtx_MULT (mode
, x0
, e0
)));
35493 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35494 gen_rtx_PLUS (mode
, x0
, x0
)));
35497 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
35498 gen_rtx_MINUS (mode
, e1
, e0
)));
35501 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35502 gen_rtx_MULT (mode
, a
, x1
)));
35505 /* Output code to perform a Newton-Rhapson approximation of a
35506 single precision floating point [reciprocal] square root. */
35508 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
35511 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
35514 x0
= gen_reg_rtx (mode
);
35515 e0
= gen_reg_rtx (mode
);
35516 e1
= gen_reg_rtx (mode
);
35517 e2
= gen_reg_rtx (mode
);
35518 e3
= gen_reg_rtx (mode
);
35520 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
35521 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35523 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
35524 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
35526 if (VECTOR_MODE_P (mode
))
35528 mthree
= ix86_build_const_vector (mode
, true, mthree
);
35529 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
35532 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
35533 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
35535 a
= force_reg (mode
, a
);
35537 /* x0 = rsqrt(a) estimate */
35538 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35539 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
35542 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
35547 zero
= gen_reg_rtx (mode
);
35548 mask
= gen_reg_rtx (mode
);
35550 zero
= force_reg (mode
, CONST0_RTX(mode
));
35551 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
35552 gen_rtx_NE (mode
, zero
, a
)));
35554 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
35555 gen_rtx_AND (mode
, x0
, mask
)));
35559 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
35560 gen_rtx_MULT (mode
, x0
, a
)));
35562 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
35563 gen_rtx_MULT (mode
, e0
, x0
)));
35566 mthree
= force_reg (mode
, mthree
);
35567 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
35568 gen_rtx_PLUS (mode
, e1
, mthree
)));
35570 mhalf
= force_reg (mode
, mhalf
);
35572 /* e3 = -.5 * x0 */
35573 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35574 gen_rtx_MULT (mode
, x0
, mhalf
)));
35576 /* e3 = -.5 * e0 */
35577 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
35578 gen_rtx_MULT (mode
, e0
, mhalf
)));
35579 /* ret = e2 * e3 */
35580 emit_insn (gen_rtx_SET (VOIDmode
, res
,
35581 gen_rtx_MULT (mode
, e2
, e3
)));
35584 #ifdef TARGET_SOLARIS
35585 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
35588 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
35591 /* With Binutils 2.15, the "@unwind" marker must be specified on
35592 every occurrence of the ".eh_frame" section, not just the first
35595 && strcmp (name
, ".eh_frame") == 0)
35597 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
35598 flags
& SECTION_WRITE
? "aw" : "a");
35603 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
35605 solaris_elf_asm_comdat_section (name
, flags
, decl
);
35610 default_elf_asm_named_section (name
, flags
, decl
);
35612 #endif /* TARGET_SOLARIS */
35614 /* Return the mangling of TYPE if it is an extended fundamental type. */
35616 static const char *
35617 ix86_mangle_type (const_tree type
)
35619 type
= TYPE_MAIN_VARIANT (type
);
35621 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
35622 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
35625 switch (TYPE_MODE (type
))
35628 /* __float128 is "g". */
35631 /* "long double" or __float80 is "e". */
35638 /* For 32-bit code we can save PIC register setup by using
35639 __stack_chk_fail_local hidden function instead of calling
35640 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
35641 register, so it is better to call __stack_chk_fail directly. */
35643 static tree ATTRIBUTE_UNUSED
35644 ix86_stack_protect_fail (void)
35646 return TARGET_64BIT
35647 ? default_external_stack_protect_fail ()
35648 : default_hidden_stack_protect_fail ();
35651 /* Select a format to encode pointers in exception handling data. CODE
35652 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
35653 true if the symbol may be affected by dynamic relocations.
35655 ??? All x86 object file formats are capable of representing this.
35656 After all, the relocation needed is the same as for the call insn.
35657 Whether or not a particular assembler allows us to enter such, I
35658 guess we'll have to see. */
35660 asm_preferred_eh_data_format (int code
, int global
)
35664 int type
= DW_EH_PE_sdata8
;
35666 || ix86_cmodel
== CM_SMALL_PIC
35667 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
35668 type
= DW_EH_PE_sdata4
;
35669 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
35671 if (ix86_cmodel
== CM_SMALL
35672 || (ix86_cmodel
== CM_MEDIUM
&& code
))
35673 return DW_EH_PE_udata4
;
35674 return DW_EH_PE_absptr
;
35677 /* Expand copysign from SIGN to the positive value ABS_VALUE
35678 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
35681 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
35683 enum machine_mode mode
= GET_MODE (sign
);
35684 rtx sgn
= gen_reg_rtx (mode
);
35685 if (mask
== NULL_RTX
)
35687 enum machine_mode vmode
;
35689 if (mode
== SFmode
)
35691 else if (mode
== DFmode
)
35696 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
35697 if (!VECTOR_MODE_P (mode
))
35699 /* We need to generate a scalar mode mask in this case. */
35700 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35701 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35702 mask
= gen_reg_rtx (mode
);
35703 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35707 mask
= gen_rtx_NOT (mode
, mask
);
35708 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
35709 gen_rtx_AND (mode
, mask
, sign
)));
35710 emit_insn (gen_rtx_SET (VOIDmode
, result
,
35711 gen_rtx_IOR (mode
, abs_value
, sgn
)));
35714 /* Expand fabs (OP0) and return a new rtx that holds the result. The
35715 mask for masking out the sign-bit is stored in *SMASK, if that is
35718 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
35720 enum machine_mode vmode
, mode
= GET_MODE (op0
);
35723 xa
= gen_reg_rtx (mode
);
35724 if (mode
== SFmode
)
35726 else if (mode
== DFmode
)
35730 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
35731 if (!VECTOR_MODE_P (mode
))
35733 /* We need to generate a scalar mode mask in this case. */
35734 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
35735 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
35736 mask
= gen_reg_rtx (mode
);
35737 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
35739 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
35740 gen_rtx_AND (mode
, op0
, mask
)));
35748 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
35749 swapping the operands if SWAP_OPERANDS is true. The expanded
35750 code is a forward jump to a newly created label in case the
35751 comparison is true. The generated label rtx is returned. */
35753 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
35754 bool swap_operands
)
35765 label
= gen_label_rtx ();
35766 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
35767 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35768 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
35769 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
35770 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
35771 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
35772 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
35773 JUMP_LABEL (tmp
) = label
;
35778 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
35779 using comparison code CODE. Operands are swapped for the comparison if
35780 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
35782 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
35783 bool swap_operands
)
35785 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
35786 enum machine_mode mode
= GET_MODE (op0
);
35787 rtx mask
= gen_reg_rtx (mode
);
35796 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
35798 emit_insn (insn (mask
, op0
, op1
,
35799 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
35803 /* Generate and return a rtx of mode MODE for 2**n where n is the number
35804 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
35806 ix86_gen_TWO52 (enum machine_mode mode
)
35808 REAL_VALUE_TYPE TWO52r
;
35811 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
35812 TWO52
= const_double_from_real_value (TWO52r
, mode
);
35813 TWO52
= force_reg (mode
, TWO52
);
35818 /* Expand SSE sequence for computing lround from OP1 storing
35821 ix86_expand_lround (rtx op0
, rtx op1
)
35823 /* C code for the stuff we're doing below:
35824 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
35827 enum machine_mode mode
= GET_MODE (op1
);
35828 const struct real_format
*fmt
;
35829 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
35832 /* load nextafter (0.5, 0.0) */
35833 fmt
= REAL_MODE_FORMAT (mode
);
35834 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
35835 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
35837 /* adj = copysign (0.5, op1) */
35838 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
35839 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
35841 /* adj = op1 + adj */
35842 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
35844 /* op0 = (imode)adj */
35845 expand_fix (op0
, adj
, 0);
35848 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
35851 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
35853 /* C code for the stuff we're doing below (for do_floor):
35855 xi -= (double)xi > op1 ? 1 : 0;
35858 enum machine_mode fmode
= GET_MODE (op1
);
35859 enum machine_mode imode
= GET_MODE (op0
);
35860 rtx ireg
, freg
, label
, tmp
;
35862 /* reg = (long)op1 */
35863 ireg
= gen_reg_rtx (imode
);
35864 expand_fix (ireg
, op1
, 0);
35866 /* freg = (double)reg */
35867 freg
= gen_reg_rtx (fmode
);
35868 expand_float (freg
, ireg
, 0);
35870 /* ireg = (freg > op1) ? ireg - 1 : ireg */
35871 label
= ix86_expand_sse_compare_and_jump (UNLE
,
35872 freg
, op1
, !do_floor
);
35873 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
35874 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
35875 emit_move_insn (ireg
, tmp
);
35877 emit_label (label
);
35878 LABEL_NUSES (label
) = 1;
35880 emit_move_insn (op0
, ireg
);
35883 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
35884 result in OPERAND0. */
35886 ix86_expand_rint (rtx operand0
, rtx operand1
)
35888 /* C code for the stuff we're doing below:
35889 xa = fabs (operand1);
35890 if (!isless (xa, 2**52))
35892 xa = xa + 2**52 - 2**52;
35893 return copysign (xa, operand1);
35895 enum machine_mode mode
= GET_MODE (operand0
);
35896 rtx res
, xa
, label
, TWO52
, mask
;
35898 res
= gen_reg_rtx (mode
);
35899 emit_move_insn (res
, operand1
);
35901 /* xa = abs (operand1) */
35902 xa
= ix86_expand_sse_fabs (res
, &mask
);
35904 /* if (!isless (xa, TWO52)) goto label; */
35905 TWO52
= ix86_gen_TWO52 (mode
);
35906 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35908 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35909 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35911 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
35913 emit_label (label
);
35914 LABEL_NUSES (label
) = 1;
35916 emit_move_insn (operand0
, res
);
35919 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35922 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
35924 /* C code for the stuff we expand below.
35925 double xa = fabs (x), x2;
35926 if (!isless (xa, TWO52))
35928 xa = xa + TWO52 - TWO52;
35929 x2 = copysign (xa, x);
35938 enum machine_mode mode
= GET_MODE (operand0
);
35939 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
35941 TWO52
= ix86_gen_TWO52 (mode
);
35943 /* Temporary for holding the result, initialized to the input
35944 operand to ease control flow. */
35945 res
= gen_reg_rtx (mode
);
35946 emit_move_insn (res
, operand1
);
35948 /* xa = abs (operand1) */
35949 xa
= ix86_expand_sse_fabs (res
, &mask
);
35951 /* if (!isless (xa, TWO52)) goto label; */
35952 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
35954 /* xa = xa + TWO52 - TWO52; */
35955 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
35956 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
35958 /* xa = copysign (xa, operand1) */
35959 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
35961 /* generate 1.0 or -1.0 */
35962 one
= force_reg (mode
,
35963 const_double_from_real_value (do_floor
35964 ? dconst1
: dconstm1
, mode
));
35966 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
35967 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
35968 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
35969 gen_rtx_AND (mode
, one
, tmp
)));
35970 /* We always need to subtract here to preserve signed zero. */
35971 tmp
= expand_simple_binop (mode
, MINUS
,
35972 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
35973 emit_move_insn (res
, tmp
);
35975 emit_label (label
);
35976 LABEL_NUSES (label
) = 1;
35978 emit_move_insn (operand0
, res
);
35981 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
35984 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
35986 /* C code for the stuff we expand below.
35987 double xa = fabs (x), x2;
35988 if (!isless (xa, TWO52))
35990 x2 = (double)(long)x;
35997 if (HONOR_SIGNED_ZEROS (mode))
35998 return copysign (x2, x);
36001 enum machine_mode mode
= GET_MODE (operand0
);
36002 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
36004 TWO52
= ix86_gen_TWO52 (mode
);
36006 /* Temporary for holding the result, initialized to the input
36007 operand to ease control flow. */
36008 res
= gen_reg_rtx (mode
);
36009 emit_move_insn (res
, operand1
);
36011 /* xa = abs (operand1) */
36012 xa
= ix86_expand_sse_fabs (res
, &mask
);
36014 /* if (!isless (xa, TWO52)) goto label; */
36015 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36017 /* xa = (double)(long)x */
36018 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36019 expand_fix (xi
, res
, 0);
36020 expand_float (xa
, xi
, 0);
36023 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
36025 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
36026 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
36027 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36028 gen_rtx_AND (mode
, one
, tmp
)));
36029 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
36030 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36031 emit_move_insn (res
, tmp
);
36033 if (HONOR_SIGNED_ZEROS (mode
))
36034 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
36036 emit_label (label
);
36037 LABEL_NUSES (label
) = 1;
36039 emit_move_insn (operand0
, res
);
36042 /* Expand SSE sequence for computing round from OPERAND1 storing
36043 into OPERAND0. Sequence that works without relying on DImode truncation
36044 via cvttsd2siq that is only available on 64bit targets. */
36046 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
36048 /* C code for the stuff we expand below.
36049 double xa = fabs (x), xa2, x2;
36050 if (!isless (xa, TWO52))
36052 Using the absolute value and copying back sign makes
36053 -0.0 -> -0.0 correct.
36054 xa2 = xa + TWO52 - TWO52;
36059 else if (dxa > 0.5)
36061 x2 = copysign (xa2, x);
36064 enum machine_mode mode
= GET_MODE (operand0
);
36065 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
36067 TWO52
= ix86_gen_TWO52 (mode
);
36069 /* Temporary for holding the result, initialized to the input
36070 operand to ease control flow. */
36071 res
= gen_reg_rtx (mode
);
36072 emit_move_insn (res
, operand1
);
36074 /* xa = abs (operand1) */
36075 xa
= ix86_expand_sse_fabs (res
, &mask
);
36077 /* if (!isless (xa, TWO52)) goto label; */
36078 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36080 /* xa2 = xa + TWO52 - TWO52; */
36081 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36082 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
36084 /* dxa = xa2 - xa; */
36085 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
36087 /* generate 0.5, 1.0 and -0.5 */
36088 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
36089 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
36090 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
36094 tmp
= gen_reg_rtx (mode
);
36095 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
36096 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
36097 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36098 gen_rtx_AND (mode
, one
, tmp
)));
36099 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36100 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
36101 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
36102 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
36103 gen_rtx_AND (mode
, one
, tmp
)));
36104 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
36106 /* res = copysign (xa2, operand1) */
36107 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
36109 emit_label (label
);
36110 LABEL_NUSES (label
) = 1;
36112 emit_move_insn (operand0
, res
);
36115 /* Expand SSE sequence for computing trunc from OPERAND1 storing
36118 ix86_expand_trunc (rtx operand0
, rtx operand1
)
36120 /* C code for SSE variant we expand below.
36121 double xa = fabs (x), x2;
36122 if (!isless (xa, TWO52))
36124 x2 = (double)(long)x;
36125 if (HONOR_SIGNED_ZEROS (mode))
36126 return copysign (x2, x);
36129 enum machine_mode mode
= GET_MODE (operand0
);
36130 rtx xa
, xi
, TWO52
, label
, res
, mask
;
36132 TWO52
= ix86_gen_TWO52 (mode
);
36134 /* Temporary for holding the result, initialized to the input
36135 operand to ease control flow. */
36136 res
= gen_reg_rtx (mode
);
36137 emit_move_insn (res
, operand1
);
36139 /* xa = abs (operand1) */
36140 xa
= ix86_expand_sse_fabs (res
, &mask
);
36142 /* if (!isless (xa, TWO52)) goto label; */
36143 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36145 /* x = (double)(long)x */
36146 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36147 expand_fix (xi
, res
, 0);
36148 expand_float (res
, xi
, 0);
36150 if (HONOR_SIGNED_ZEROS (mode
))
36151 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
36153 emit_label (label
);
36154 LABEL_NUSES (label
) = 1;
36156 emit_move_insn (operand0
, res
);
36159 /* Expand SSE sequence for computing trunc from OPERAND1 storing
36162 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
36164 enum machine_mode mode
= GET_MODE (operand0
);
36165 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
36167 /* C code for SSE variant we expand below.
36168 double xa = fabs (x), x2;
36169 if (!isless (xa, TWO52))
36171 xa2 = xa + TWO52 - TWO52;
36175 x2 = copysign (xa2, x);
36179 TWO52
= ix86_gen_TWO52 (mode
);
36181 /* Temporary for holding the result, initialized to the input
36182 operand to ease control flow. */
36183 res
= gen_reg_rtx (mode
);
36184 emit_move_insn (res
, operand1
);
36186 /* xa = abs (operand1) */
36187 xa
= ix86_expand_sse_fabs (res
, &smask
);
36189 /* if (!isless (xa, TWO52)) goto label; */
36190 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36192 /* res = xa + TWO52 - TWO52; */
36193 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
36194 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
36195 emit_move_insn (res
, tmp
);
36198 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
36200 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
36201 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
36202 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
36203 gen_rtx_AND (mode
, mask
, one
)));
36204 tmp
= expand_simple_binop (mode
, MINUS
,
36205 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
36206 emit_move_insn (res
, tmp
);
36208 /* res = copysign (res, operand1) */
36209 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
36211 emit_label (label
);
36212 LABEL_NUSES (label
) = 1;
36214 emit_move_insn (operand0
, res
);
36217 /* Expand SSE sequence for computing round from OPERAND1 storing
36220 ix86_expand_round (rtx operand0
, rtx operand1
)
36222 /* C code for the stuff we're doing below:
36223 double xa = fabs (x);
36224 if (!isless (xa, TWO52))
36226 xa = (double)(long)(xa + nextafter (0.5, 0.0));
36227 return copysign (xa, x);
36229 enum machine_mode mode
= GET_MODE (operand0
);
36230 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
36231 const struct real_format
*fmt
;
36232 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36234 /* Temporary for holding the result, initialized to the input
36235 operand to ease control flow. */
36236 res
= gen_reg_rtx (mode
);
36237 emit_move_insn (res
, operand1
);
36239 TWO52
= ix86_gen_TWO52 (mode
);
36240 xa
= ix86_expand_sse_fabs (res
, &mask
);
36241 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
36243 /* load nextafter (0.5, 0.0) */
36244 fmt
= REAL_MODE_FORMAT (mode
);
36245 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36246 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36248 /* xa = xa + 0.5 */
36249 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
36250 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
36252 /* xa = (double)(int64_t)xa */
36253 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
36254 expand_fix (xi
, xa
, 0);
36255 expand_float (xa
, xi
, 0);
36257 /* res = copysign (xa, operand1) */
36258 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
36260 emit_label (label
);
36261 LABEL_NUSES (label
) = 1;
36263 emit_move_insn (operand0
, res
);
36266 /* Expand SSE sequence for computing round
36267 from OP1 storing into OP0 using sse4 round insn. */
36269 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
36271 enum machine_mode mode
= GET_MODE (op0
);
36272 rtx e1
, e2
, res
, half
;
36273 const struct real_format
*fmt
;
36274 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
36275 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
36276 rtx (*gen_round
) (rtx
, rtx
, rtx
);
36281 gen_copysign
= gen_copysignsf3
;
36282 gen_round
= gen_sse4_1_roundsf2
;
36285 gen_copysign
= gen_copysigndf3
;
36286 gen_round
= gen_sse4_1_rounddf2
;
36289 gcc_unreachable ();
36292 /* round (a) = trunc (a + copysign (0.5, a)) */
36294 /* load nextafter (0.5, 0.0) */
36295 fmt
= REAL_MODE_FORMAT (mode
);
36296 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
36297 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
36298 half
= const_double_from_real_value (pred_half
, mode
);
36300 /* e1 = copysign (0.5, op1) */
36301 e1
= gen_reg_rtx (mode
);
36302 emit_insn (gen_copysign (e1
, half
, op1
));
36304 /* e2 = op1 + e1 */
36305 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
36307 /* res = trunc (e2) */
36308 res
= gen_reg_rtx (mode
);
36309 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
36311 emit_move_insn (op0
, res
);
36315 /* Table of valid machine attributes. */
36316 static const struct attribute_spec ix86_attribute_table
[] =
36318 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
36319 affects_type_identity } */
36320 /* Stdcall attribute says callee is responsible for popping arguments
36321 if they are not variable. */
36322 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36324 /* Fastcall attribute says callee is responsible for popping arguments
36325 if they are not variable. */
36326 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36328 /* Thiscall attribute says callee is responsible for popping arguments
36329 if they are not variable. */
36330 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36332 /* Cdecl attribute says the callee is a normal C declaration */
36333 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36335 /* Regparm attribute specifies how many integer arguments are to be
36336 passed in registers. */
36337 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
36339 /* Sseregparm attribute says we are using x86_64 calling conventions
36340 for FP arguments. */
36341 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
36343 /* The transactional memory builtins are implicitly regparm or fastcall
36344 depending on the ABI. Override the generic do-nothing attribute that
36345 these builtins were declared with. */
36346 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
36348 /* force_align_arg_pointer says this function realigns the stack at entry. */
36349 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
36350 false, true, true, ix86_handle_cconv_attribute
, false },
36351 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
36352 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
36353 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
36354 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
36357 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36359 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
36361 #ifdef SUBTARGET_ATTRIBUTE_TABLE
36362 SUBTARGET_ATTRIBUTE_TABLE
,
36364 /* ms_abi and sysv_abi calling convention function attributes. */
36365 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36366 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
36367 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
36369 { "callee_pop_aggregate_return", 1, 1, false, true, true,
36370 ix86_handle_callee_pop_aggregate_return
, true },
36372 { NULL
, 0, 0, false, false, false, NULL
, false }
36375 /* Implement targetm.vectorize.builtin_vectorization_cost. */
36377 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
36379 int misalign ATTRIBUTE_UNUSED
)
36383 switch (type_of_cost
)
36386 return ix86_cost
->scalar_stmt_cost
;
36389 return ix86_cost
->scalar_load_cost
;
36392 return ix86_cost
->scalar_store_cost
;
36395 return ix86_cost
->vec_stmt_cost
;
36398 return ix86_cost
->vec_align_load_cost
;
36401 return ix86_cost
->vec_store_cost
;
36403 case vec_to_scalar
:
36404 return ix86_cost
->vec_to_scalar_cost
;
36406 case scalar_to_vec
:
36407 return ix86_cost
->scalar_to_vec_cost
;
36409 case unaligned_load
:
36410 case unaligned_store
:
36411 return ix86_cost
->vec_unalign_load_cost
;
36413 case cond_branch_taken
:
36414 return ix86_cost
->cond_taken_branch_cost
;
36416 case cond_branch_not_taken
:
36417 return ix86_cost
->cond_not_taken_branch_cost
;
36420 case vec_promote_demote
:
36421 return ix86_cost
->vec_stmt_cost
;
36423 case vec_construct
:
36424 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
36425 return elements
/ 2 + 1;
36428 gcc_unreachable ();
36432 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
36433 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
36434 insn every time. */
36436 static GTY(()) rtx vselect_insn
;
36438 /* Initialize vselect_insn. */
36441 init_vselect_insn (void)
36446 x
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (MAX_VECT_LEN
));
36447 for (i
= 0; i
< MAX_VECT_LEN
; ++i
)
36448 XVECEXP (x
, 0, i
) = const0_rtx
;
36449 x
= gen_rtx_VEC_SELECT (V2DFmode
, gen_rtx_VEC_CONCAT (V4DFmode
, const0_rtx
,
36451 x
= gen_rtx_SET (VOIDmode
, const0_rtx
, x
);
36453 vselect_insn
= emit_insn (x
);
36457 /* Construct (set target (vec_select op0 (parallel perm))) and
36458 return true if that's a valid instruction in the active ISA. */
36461 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
,
36462 unsigned nelt
, bool testing_p
)
36465 rtx x
, save_vconcat
;
36468 if (vselect_insn
== NULL_RTX
)
36469 init_vselect_insn ();
36471 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 1);
36472 PUT_NUM_ELEM (XVEC (x
, 0), nelt
);
36473 for (i
= 0; i
< nelt
; ++i
)
36474 XVECEXP (x
, 0, i
) = GEN_INT (perm
[i
]);
36475 save_vconcat
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36476 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = op0
;
36477 PUT_MODE (SET_SRC (PATTERN (vselect_insn
)), GET_MODE (target
));
36478 SET_DEST (PATTERN (vselect_insn
)) = target
;
36479 icode
= recog_memoized (vselect_insn
);
36481 if (icode
>= 0 && !testing_p
)
36482 emit_insn (copy_rtx (PATTERN (vselect_insn
)));
36484 SET_DEST (PATTERN (vselect_insn
)) = const0_rtx
;
36485 XEXP (SET_SRC (PATTERN (vselect_insn
)), 0) = save_vconcat
;
36486 INSN_CODE (vselect_insn
) = -1;
36491 /* Similar, but generate a vec_concat from op0 and op1 as well. */
36494 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
36495 const unsigned char *perm
, unsigned nelt
,
36498 enum machine_mode v2mode
;
36502 if (vselect_insn
== NULL_RTX
)
36503 init_vselect_insn ();
36505 v2mode
= GET_MODE_2XWIDER_MODE (GET_MODE (op0
));
36506 x
= XEXP (SET_SRC (PATTERN (vselect_insn
)), 0);
36507 PUT_MODE (x
, v2mode
);
36510 ok
= expand_vselect (target
, x
, perm
, nelt
, testing_p
);
36511 XEXP (x
, 0) = const0_rtx
;
36512 XEXP (x
, 1) = const0_rtx
;
36516 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36517 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
36520 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
36522 enum machine_mode vmode
= d
->vmode
;
36523 unsigned i
, mask
, nelt
= d
->nelt
;
36524 rtx target
, op0
, op1
, x
;
36525 rtx rperm
[32], vperm
;
36527 if (d
->one_operand_p
)
36529 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
36531 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
36533 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
36538 /* This is a blend, not a permute. Elements must stay in their
36539 respective lanes. */
36540 for (i
= 0; i
< nelt
; ++i
)
36542 unsigned e
= d
->perm
[i
];
36543 if (!(e
== i
|| e
== i
+ nelt
))
36550 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
36551 decision should be extracted elsewhere, so that we only try that
36552 sequence once all budget==3 options have been tried. */
36553 target
= d
->target
;
36566 for (i
= 0; i
< nelt
; ++i
)
36567 mask
|= (d
->perm
[i
] >= nelt
) << i
;
36571 for (i
= 0; i
< 2; ++i
)
36572 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
36577 for (i
= 0; i
< 4; ++i
)
36578 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36583 /* See if bytes move in pairs so we can use pblendw with
36584 an immediate argument, rather than pblendvb with a vector
36586 for (i
= 0; i
< 16; i
+= 2)
36587 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36590 for (i
= 0; i
< nelt
; ++i
)
36591 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
36594 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
36595 vperm
= force_reg (vmode
, vperm
);
36597 if (GET_MODE_SIZE (vmode
) == 16)
36598 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
36600 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
36604 for (i
= 0; i
< 8; ++i
)
36605 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36610 target
= gen_lowpart (vmode
, target
);
36611 op0
= gen_lowpart (vmode
, op0
);
36612 op1
= gen_lowpart (vmode
, op1
);
36616 /* See if bytes move in pairs. If not, vpblendvb must be used. */
36617 for (i
= 0; i
< 32; i
+= 2)
36618 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36620 /* See if bytes move in quadruplets. If yes, vpblendd
36621 with immediate can be used. */
36622 for (i
= 0; i
< 32; i
+= 4)
36623 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
36627 /* See if bytes move the same in both lanes. If yes,
36628 vpblendw with immediate can be used. */
36629 for (i
= 0; i
< 16; i
+= 2)
36630 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
36633 /* Use vpblendw. */
36634 for (i
= 0; i
< 16; ++i
)
36635 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
36640 /* Use vpblendd. */
36641 for (i
= 0; i
< 8; ++i
)
36642 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
36647 /* See if words move in pairs. If yes, vpblendd can be used. */
36648 for (i
= 0; i
< 16; i
+= 2)
36649 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
36653 /* See if words move the same in both lanes. If not,
36654 vpblendvb must be used. */
36655 for (i
= 0; i
< 8; i
++)
36656 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
36658 /* Use vpblendvb. */
36659 for (i
= 0; i
< 32; ++i
)
36660 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
36664 target
= gen_lowpart (vmode
, target
);
36665 op0
= gen_lowpart (vmode
, op0
);
36666 op1
= gen_lowpart (vmode
, op1
);
36667 goto finish_pblendvb
;
36670 /* Use vpblendw. */
36671 for (i
= 0; i
< 16; ++i
)
36672 mask
|= (d
->perm
[i
] >= 16) << i
;
36676 /* Use vpblendd. */
36677 for (i
= 0; i
< 8; ++i
)
36678 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
36683 /* Use vpblendd. */
36684 for (i
= 0; i
< 4; ++i
)
36685 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
36690 gcc_unreachable ();
36693 /* This matches five different patterns with the different modes. */
36694 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
36695 x
= gen_rtx_SET (VOIDmode
, target
, x
);
36701 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36702 in terms of the variable form of vpermilps.
36704 Note that we will have already failed the immediate input vpermilps,
36705 which requires that the high and low part shuffle be identical; the
36706 variable form doesn't require that. */
36709 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
36711 rtx rperm
[8], vperm
;
36714 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| !d
->one_operand_p
)
36717 /* We can only permute within the 128-bit lane. */
36718 for (i
= 0; i
< 8; ++i
)
36720 unsigned e
= d
->perm
[i
];
36721 if (i
< 4 ? e
>= 4 : e
< 4)
36728 for (i
= 0; i
< 8; ++i
)
36730 unsigned e
= d
->perm
[i
];
36732 /* Within each 128-bit lane, the elements of op0 are numbered
36733 from 0 and the elements of op1 are numbered from 4. */
36739 rperm
[i
] = GEN_INT (e
);
36742 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
36743 vperm
= force_reg (V8SImode
, vperm
);
36744 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
36749 /* Return true if permutation D can be performed as VMODE permutation
36753 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
36755 unsigned int i
, j
, chunk
;
36757 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
36758 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
36759 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
36762 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
36765 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
36766 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
36767 if (d
->perm
[i
] & (chunk
- 1))
36770 for (j
= 1; j
< chunk
; ++j
)
36771 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
36777 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
36778 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
36781 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
36783 unsigned i
, nelt
, eltsz
, mask
;
36784 unsigned char perm
[32];
36785 enum machine_mode vmode
= V16QImode
;
36786 rtx rperm
[32], vperm
, target
, op0
, op1
;
36790 if (!d
->one_operand_p
)
36792 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
36795 && valid_perm_using_mode_p (V2TImode
, d
))
36800 /* Use vperm2i128 insn. The pattern uses
36801 V4DImode instead of V2TImode. */
36802 target
= gen_lowpart (V4DImode
, d
->target
);
36803 op0
= gen_lowpart (V4DImode
, d
->op0
);
36804 op1
= gen_lowpart (V4DImode
, d
->op1
);
36806 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
36807 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
36808 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
36816 if (GET_MODE_SIZE (d
->vmode
) == 16)
36821 else if (GET_MODE_SIZE (d
->vmode
) == 32)
36826 /* V4DImode should be already handled through
36827 expand_vselect by vpermq instruction. */
36828 gcc_assert (d
->vmode
!= V4DImode
);
36831 if (d
->vmode
== V8SImode
36832 || d
->vmode
== V16HImode
36833 || d
->vmode
== V32QImode
)
36835 /* First see if vpermq can be used for
36836 V8SImode/V16HImode/V32QImode. */
36837 if (valid_perm_using_mode_p (V4DImode
, d
))
36839 for (i
= 0; i
< 4; i
++)
36840 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
36843 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
36844 gen_lowpart (V4DImode
, d
->op0
),
36848 /* Next see if vpermd can be used. */
36849 if (valid_perm_using_mode_p (V8SImode
, d
))
36852 /* Or if vpermps can be used. */
36853 else if (d
->vmode
== V8SFmode
)
36856 if (vmode
== V32QImode
)
36858 /* vpshufb only works intra lanes, it is not
36859 possible to shuffle bytes in between the lanes. */
36860 for (i
= 0; i
< nelt
; ++i
)
36861 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
36872 if (vmode
== V8SImode
)
36873 for (i
= 0; i
< 8; ++i
)
36874 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
36877 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36878 if (!d
->one_operand_p
)
36879 mask
= 2 * nelt
- 1;
36880 else if (vmode
== V16QImode
)
36883 mask
= nelt
/ 2 - 1;
36885 for (i
= 0; i
< nelt
; ++i
)
36887 unsigned j
, e
= d
->perm
[i
] & mask
;
36888 for (j
= 0; j
< eltsz
; ++j
)
36889 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
36893 vperm
= gen_rtx_CONST_VECTOR (vmode
,
36894 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
36895 vperm
= force_reg (vmode
, vperm
);
36897 target
= gen_lowpart (vmode
, d
->target
);
36898 op0
= gen_lowpart (vmode
, d
->op0
);
36899 if (d
->one_operand_p
)
36901 if (vmode
== V16QImode
)
36902 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
36903 else if (vmode
== V32QImode
)
36904 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
36905 else if (vmode
== V8SFmode
)
36906 emit_insn (gen_avx2_permvarv8sf (target
, op0
, vperm
));
36908 emit_insn (gen_avx2_permvarv8si (target
, op0
, vperm
));
36912 op1
= gen_lowpart (vmode
, d
->op1
);
36913 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
36919 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
36920 in a single instruction. */
36923 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
36925 unsigned i
, nelt
= d
->nelt
;
36926 unsigned char perm2
[MAX_VECT_LEN
];
36928 /* Check plain VEC_SELECT first, because AVX has instructions that could
36929 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
36930 input where SEL+CONCAT may not. */
36931 if (d
->one_operand_p
)
36933 int mask
= nelt
- 1;
36934 bool identity_perm
= true;
36935 bool broadcast_perm
= true;
36937 for (i
= 0; i
< nelt
; i
++)
36939 perm2
[i
] = d
->perm
[i
] & mask
;
36941 identity_perm
= false;
36943 broadcast_perm
= false;
36949 emit_move_insn (d
->target
, d
->op0
);
36952 else if (broadcast_perm
&& TARGET_AVX2
)
36954 /* Use vpbroadcast{b,w,d}. */
36955 rtx (*gen
) (rtx
, rtx
) = NULL
;
36959 gen
= gen_avx2_pbroadcastv32qi_1
;
36962 gen
= gen_avx2_pbroadcastv16hi_1
;
36965 gen
= gen_avx2_pbroadcastv8si_1
;
36968 gen
= gen_avx2_pbroadcastv16qi
;
36971 gen
= gen_avx2_pbroadcastv8hi
;
36974 gen
= gen_avx2_vec_dupv8sf_1
;
36976 /* For other modes prefer other shuffles this function creates. */
36982 emit_insn (gen (d
->target
, d
->op0
));
36987 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
, d
->testing_p
))
36990 /* There are plenty of patterns in sse.md that are written for
36991 SEL+CONCAT and are not replicated for a single op. Perhaps
36992 that should be changed, to avoid the nastiness here. */
36994 /* Recognize interleave style patterns, which means incrementing
36995 every other permutation operand. */
36996 for (i
= 0; i
< nelt
; i
+= 2)
36998 perm2
[i
] = d
->perm
[i
] & mask
;
36999 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
37001 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
37005 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
37008 for (i
= 0; i
< nelt
; i
+= 4)
37010 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
37011 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
37012 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
37013 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
37016 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
,
37022 /* Finally, try the fully general two operand permute. */
37023 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
,
37027 /* Recognize interleave style patterns with reversed operands. */
37028 if (!d
->one_operand_p
)
37030 for (i
= 0; i
< nelt
; ++i
)
37032 unsigned e
= d
->perm
[i
];
37040 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
,
37045 /* Try the SSE4.1 blend variable merge instructions. */
37046 if (expand_vec_perm_blend (d
))
37049 /* Try one of the AVX vpermil variable permutations. */
37050 if (expand_vec_perm_vpermil (d
))
37053 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
37054 vpshufb, vpermd, vpermps or vpermq variable permutation. */
37055 if (expand_vec_perm_pshufb (d
))
37061 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
37062 in terms of a pair of pshuflw + pshufhw instructions. */
37065 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
37067 unsigned char perm2
[MAX_VECT_LEN
];
37071 if (d
->vmode
!= V8HImode
|| !d
->one_operand_p
)
37074 /* The two permutations only operate in 64-bit lanes. */
37075 for (i
= 0; i
< 4; ++i
)
37076 if (d
->perm
[i
] >= 4)
37078 for (i
= 4; i
< 8; ++i
)
37079 if (d
->perm
[i
] < 4)
37085 /* Emit the pshuflw. */
37086 memcpy (perm2
, d
->perm
, 4);
37087 for (i
= 4; i
< 8; ++i
)
37089 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8, d
->testing_p
);
37092 /* Emit the pshufhw. */
37093 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
37094 for (i
= 0; i
< 4; ++i
)
37096 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8, d
->testing_p
);
37102 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37103 the permutation using the SSSE3 palignr instruction. This succeeds
37104 when all of the elements in PERM fit within one vector and we merely
37105 need to shift them down so that a single vector permutation has a
37106 chance to succeed. */
37109 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
37111 unsigned i
, nelt
= d
->nelt
;
37116 /* Even with AVX, palignr only operates on 128-bit vectors. */
37117 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37120 min
= nelt
, max
= 0;
37121 for (i
= 0; i
< nelt
; ++i
)
37123 unsigned e
= d
->perm
[i
];
37129 if (min
== 0 || max
- min
>= nelt
)
37132 /* Given that we have SSSE3, we know we'll be able to implement the
37133 single operand permutation after the palignr with pshufb. */
37137 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
37138 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
37139 gen_lowpart (TImode
, d
->op1
),
37140 gen_lowpart (TImode
, d
->op0
), shift
));
37142 d
->op0
= d
->op1
= d
->target
;
37143 d
->one_operand_p
= true;
37146 for (i
= 0; i
< nelt
; ++i
)
37148 unsigned e
= d
->perm
[i
] - min
;
37154 /* Test for the degenerate case where the alignment by itself
37155 produces the desired permutation. */
37159 ok
= expand_vec_perm_1 (d
);
37165 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
);
37167 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37168 a two vector permutation into a single vector permutation by using
37169 an interleave operation to merge the vectors. */
37172 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
37174 struct expand_vec_perm_d dremap
, dfinal
;
37175 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37176 unsigned HOST_WIDE_INT contents
;
37177 unsigned char remap
[2 * MAX_VECT_LEN
];
37179 bool ok
, same_halves
= false;
37181 if (GET_MODE_SIZE (d
->vmode
) == 16)
37183 if (d
->one_operand_p
)
37186 else if (GET_MODE_SIZE (d
->vmode
) == 32)
37190 /* For 32-byte modes allow even d->one_operand_p.
37191 The lack of cross-lane shuffling in some instructions
37192 might prevent a single insn shuffle. */
37194 dfinal
.testing_p
= true;
37195 /* If expand_vec_perm_interleave3 can expand this into
37196 a 3 insn sequence, give up and let it be expanded as
37197 3 insn sequence. While that is one insn longer,
37198 it doesn't need a memory operand and in the common
37199 case that both interleave low and high permutations
37200 with the same operands are adjacent needs 4 insns
37201 for both after CSE. */
37202 if (expand_vec_perm_interleave3 (&dfinal
))
37208 /* Examine from whence the elements come. */
37210 for (i
= 0; i
< nelt
; ++i
)
37211 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
37213 memset (remap
, 0xff, sizeof (remap
));
37216 if (GET_MODE_SIZE (d
->vmode
) == 16)
37218 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
37220 /* Split the two input vectors into 4 halves. */
37221 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
37226 /* If the elements from the low halves use interleave low, and similarly
37227 for interleave high. If the elements are from mis-matched halves, we
37228 can use shufps for V4SF/V4SI or do a DImode shuffle. */
37229 if ((contents
& (h1
| h3
)) == contents
)
37232 for (i
= 0; i
< nelt2
; ++i
)
37235 remap
[i
+ nelt
] = i
* 2 + 1;
37236 dremap
.perm
[i
* 2] = i
;
37237 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37239 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
37240 dremap
.vmode
= V4SFmode
;
37242 else if ((contents
& (h2
| h4
)) == contents
)
37245 for (i
= 0; i
< nelt2
; ++i
)
37247 remap
[i
+ nelt2
] = i
* 2;
37248 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
37249 dremap
.perm
[i
* 2] = i
+ nelt2
;
37250 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
37252 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
37253 dremap
.vmode
= V4SFmode
;
37255 else if ((contents
& (h1
| h4
)) == contents
)
37258 for (i
= 0; i
< nelt2
; ++i
)
37261 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
37262 dremap
.perm
[i
] = i
;
37263 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
37268 dremap
.vmode
= V2DImode
;
37270 dremap
.perm
[0] = 0;
37271 dremap
.perm
[1] = 3;
37274 else if ((contents
& (h2
| h3
)) == contents
)
37277 for (i
= 0; i
< nelt2
; ++i
)
37279 remap
[i
+ nelt2
] = i
;
37280 remap
[i
+ nelt
] = i
+ nelt2
;
37281 dremap
.perm
[i
] = i
+ nelt2
;
37282 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
37287 dremap
.vmode
= V2DImode
;
37289 dremap
.perm
[0] = 1;
37290 dremap
.perm
[1] = 2;
37298 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
37299 unsigned HOST_WIDE_INT q
[8];
37300 unsigned int nonzero_halves
[4];
37302 /* Split the two input vectors into 8 quarters. */
37303 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
37304 for (i
= 1; i
< 8; ++i
)
37305 q
[i
] = q
[0] << (nelt4
* i
);
37306 for (i
= 0; i
< 4; ++i
)
37307 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
37309 nonzero_halves
[nzcnt
] = i
;
37315 gcc_assert (d
->one_operand_p
);
37316 nonzero_halves
[1] = nonzero_halves
[0];
37317 same_halves
= true;
37319 else if (d
->one_operand_p
)
37321 gcc_assert (nonzero_halves
[0] == 0);
37322 gcc_assert (nonzero_halves
[1] == 1);
37327 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
37329 /* Attempt to increase the likelihood that dfinal
37330 shuffle will be intra-lane. */
37331 char tmph
= nonzero_halves
[0];
37332 nonzero_halves
[0] = nonzero_halves
[1];
37333 nonzero_halves
[1] = tmph
;
37336 /* vperm2f128 or vperm2i128. */
37337 for (i
= 0; i
< nelt2
; ++i
)
37339 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
37340 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
37341 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
37342 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
37345 if (d
->vmode
!= V8SFmode
37346 && d
->vmode
!= V4DFmode
37347 && d
->vmode
!= V8SImode
)
37349 dremap
.vmode
= V8SImode
;
37351 for (i
= 0; i
< 4; ++i
)
37353 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
37354 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
37358 else if (d
->one_operand_p
)
37360 else if (TARGET_AVX2
37361 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
37364 for (i
= 0; i
< nelt4
; ++i
)
37367 remap
[i
+ nelt
] = i
* 2 + 1;
37368 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
37369 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
37370 dremap
.perm
[i
* 2] = i
;
37371 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
37372 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
37373 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
37376 else if (TARGET_AVX2
37377 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
37380 for (i
= 0; i
< nelt4
; ++i
)
37382 remap
[i
+ nelt4
] = i
* 2;
37383 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
37384 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
37385 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
37386 dremap
.perm
[i
* 2] = i
+ nelt4
;
37387 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
37388 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
37389 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
37396 /* Use the remapping array set up above to move the elements from their
37397 swizzled locations into their final destinations. */
37399 for (i
= 0; i
< nelt
; ++i
)
37401 unsigned e
= remap
[d
->perm
[i
]];
37402 gcc_assert (e
< nelt
);
37403 /* If same_halves is true, both halves of the remapped vector are the
37404 same. Avoid cross-lane accesses if possible. */
37405 if (same_halves
&& i
>= nelt2
)
37407 gcc_assert (e
< nelt2
);
37408 dfinal
.perm
[i
] = e
+ nelt2
;
37411 dfinal
.perm
[i
] = e
;
37413 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
37414 dfinal
.op1
= dfinal
.op0
;
37415 dfinal
.one_operand_p
= true;
37416 dremap
.target
= dfinal
.op0
;
37418 /* Test if the final remap can be done with a single insn. For V4SFmode or
37419 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
37421 ok
= expand_vec_perm_1 (&dfinal
);
37422 seq
= get_insns ();
37431 if (dremap
.vmode
!= dfinal
.vmode
)
37433 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
37434 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
37435 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
37438 ok
= expand_vec_perm_1 (&dremap
);
37445 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37446 a single vector cross-lane permutation into vpermq followed
37447 by any of the single insn permutations. */
37450 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
37452 struct expand_vec_perm_d dremap
, dfinal
;
37453 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
37454 unsigned contents
[2];
37458 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
37459 && d
->one_operand_p
))
37464 for (i
= 0; i
< nelt2
; ++i
)
37466 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
37467 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
37470 for (i
= 0; i
< 2; ++i
)
37472 unsigned int cnt
= 0;
37473 for (j
= 0; j
< 4; ++j
)
37474 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
37482 dremap
.vmode
= V4DImode
;
37484 dremap
.target
= gen_reg_rtx (V4DImode
);
37485 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
37486 dremap
.op1
= dremap
.op0
;
37487 dremap
.one_operand_p
= true;
37488 for (i
= 0; i
< 2; ++i
)
37490 unsigned int cnt
= 0;
37491 for (j
= 0; j
< 4; ++j
)
37492 if ((contents
[i
] & (1u << j
)) != 0)
37493 dremap
.perm
[2 * i
+ cnt
++] = j
;
37494 for (; cnt
< 2; ++cnt
)
37495 dremap
.perm
[2 * i
+ cnt
] = 0;
37499 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
37500 dfinal
.op1
= dfinal
.op0
;
37501 dfinal
.one_operand_p
= true;
37502 for (i
= 0, j
= 0; i
< nelt
; ++i
)
37506 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
37507 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
37509 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
37510 dfinal
.perm
[i
] |= nelt4
;
37512 gcc_unreachable ();
37515 ok
= expand_vec_perm_1 (&dremap
);
37518 ok
= expand_vec_perm_1 (&dfinal
);
37524 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
37525 a vector permutation using two instructions, vperm2f128 resp.
37526 vperm2i128 followed by any single in-lane permutation. */
37529 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d
*d
)
37531 struct expand_vec_perm_d dfirst
, dsecond
;
37532 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, perm
;
37536 || GET_MODE_SIZE (d
->vmode
) != 32
37537 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
&& !TARGET_AVX2
))
37541 dsecond
.one_operand_p
= false;
37542 dsecond
.testing_p
= true;
37544 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
37545 immediate. For perm < 16 the second permutation uses
37546 d->op0 as first operand, for perm >= 16 it uses d->op1
37547 as first operand. The second operand is the result of
37549 for (perm
= 0; perm
< 32; perm
++)
37551 /* Ignore permutations which do not move anything cross-lane. */
37554 /* The second shuffle for e.g. V4DFmode has
37555 0123 and ABCD operands.
37556 Ignore AB23, as 23 is already in the second lane
37557 of the first operand. */
37558 if ((perm
& 0xc) == (1 << 2)) continue;
37559 /* And 01CD, as 01 is in the first lane of the first
37561 if ((perm
& 3) == 0) continue;
37562 /* And 4567, as then the vperm2[fi]128 doesn't change
37563 anything on the original 4567 second operand. */
37564 if ((perm
& 0xf) == ((3 << 2) | 2)) continue;
37568 /* The second shuffle for e.g. V4DFmode has
37569 4567 and ABCD operands.
37570 Ignore AB67, as 67 is already in the second lane
37571 of the first operand. */
37572 if ((perm
& 0xc) == (3 << 2)) continue;
37573 /* And 45CD, as 45 is in the first lane of the first
37575 if ((perm
& 3) == 2) continue;
37576 /* And 0123, as then the vperm2[fi]128 doesn't change
37577 anything on the original 0123 first operand. */
37578 if ((perm
& 0xf) == (1 << 2)) continue;
37581 for (i
= 0; i
< nelt
; i
++)
37583 j
= d
->perm
[i
] / nelt2
;
37584 if (j
== ((perm
>> (2 * (i
>= nelt2
))) & 3))
37585 dsecond
.perm
[i
] = nelt
+ (i
& nelt2
) + (d
->perm
[i
] & (nelt2
- 1));
37586 else if (j
== (unsigned) (i
>= nelt2
) + 2 * (perm
>= 16))
37587 dsecond
.perm
[i
] = d
->perm
[i
] & (nelt
- 1);
37595 ok
= expand_vec_perm_1 (&dsecond
);
37606 /* Found a usable second shuffle. dfirst will be
37607 vperm2f128 on d->op0 and d->op1. */
37608 dsecond
.testing_p
= false;
37610 dfirst
.target
= gen_reg_rtx (d
->vmode
);
37611 for (i
= 0; i
< nelt
; i
++)
37612 dfirst
.perm
[i
] = (i
& (nelt2
- 1))
37613 + ((perm
>> (2 * (i
>= nelt2
))) & 3) * nelt2
;
37615 ok
= expand_vec_perm_1 (&dfirst
);
37618 /* And dsecond is some single insn shuffle, taking
37619 d->op0 and result of vperm2f128 (if perm < 16) or
37620 d->op1 and result of vperm2f128 (otherwise). */
37621 dsecond
.op1
= dfirst
.target
;
37623 dsecond
.op0
= dfirst
.op1
;
37625 ok
= expand_vec_perm_1 (&dsecond
);
37631 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
37632 if (d
->one_operand_p
)
37639 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
37640 a two vector permutation using 2 intra-lane interleave insns
37641 and cross-lane shuffle for 32-byte vectors. */
37644 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
37647 rtx (*gen
) (rtx
, rtx
, rtx
);
37649 if (d
->one_operand_p
)
37651 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
37653 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
37659 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
37661 for (i
= 0; i
< nelt
; i
+= 2)
37662 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
37663 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
37673 gen
= gen_vec_interleave_highv32qi
;
37675 gen
= gen_vec_interleave_lowv32qi
;
37679 gen
= gen_vec_interleave_highv16hi
;
37681 gen
= gen_vec_interleave_lowv16hi
;
37685 gen
= gen_vec_interleave_highv8si
;
37687 gen
= gen_vec_interleave_lowv8si
;
37691 gen
= gen_vec_interleave_highv4di
;
37693 gen
= gen_vec_interleave_lowv4di
;
37697 gen
= gen_vec_interleave_highv8sf
;
37699 gen
= gen_vec_interleave_lowv8sf
;
37703 gen
= gen_vec_interleave_highv4df
;
37705 gen
= gen_vec_interleave_lowv4df
;
37708 gcc_unreachable ();
37711 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
37715 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
37716 a single vector permutation using a single intra-lane vector
37717 permutation, vperm2f128 swapping the lanes and vblend* insn blending
37718 the non-swapped and swapped vectors together. */
37721 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d
*d
)
37723 struct expand_vec_perm_d dfirst
, dsecond
;
37724 unsigned i
, j
, msk
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
37727 rtx (*blend
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
37731 || (d
->vmode
!= V8SFmode
&& d
->vmode
!= V4DFmode
)
37732 || !d
->one_operand_p
)
37736 for (i
= 0; i
< nelt
; i
++)
37737 dfirst
.perm
[i
] = 0xff;
37738 for (i
= 0, msk
= 0; i
< nelt
; i
++)
37740 j
= (d
->perm
[i
] & nelt2
) ? i
| nelt2
: i
& ~nelt2
;
37741 if (dfirst
.perm
[j
] != 0xff && dfirst
.perm
[j
] != d
->perm
[i
])
37743 dfirst
.perm
[j
] = d
->perm
[i
];
37747 for (i
= 0; i
< nelt
; i
++)
37748 if (dfirst
.perm
[i
] == 0xff)
37749 dfirst
.perm
[i
] = i
;
37752 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37755 ok
= expand_vec_perm_1 (&dfirst
);
37756 seq
= get_insns ();
37768 dsecond
.op0
= dfirst
.target
;
37769 dsecond
.op1
= dfirst
.target
;
37770 dsecond
.one_operand_p
= true;
37771 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37772 for (i
= 0; i
< nelt
; i
++)
37773 dsecond
.perm
[i
] = i
^ nelt2
;
37775 ok
= expand_vec_perm_1 (&dsecond
);
37778 blend
= d
->vmode
== V8SFmode
? gen_avx_blendps256
: gen_avx_blendpd256
;
37779 emit_insn (blend (d
->target
, dfirst
.target
, dsecond
.target
, GEN_INT (msk
)));
37783 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
37784 permutation using two vperm2f128, followed by a vshufpd insn blending
37785 the two vectors together. */
37788 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d
*d
)
37790 struct expand_vec_perm_d dfirst
, dsecond
, dthird
;
37793 if (!TARGET_AVX
|| (d
->vmode
!= V4DFmode
))
37803 dfirst
.perm
[0] = (d
->perm
[0] & ~1);
37804 dfirst
.perm
[1] = (d
->perm
[0] & ~1) + 1;
37805 dfirst
.perm
[2] = (d
->perm
[2] & ~1);
37806 dfirst
.perm
[3] = (d
->perm
[2] & ~1) + 1;
37807 dsecond
.perm
[0] = (d
->perm
[1] & ~1);
37808 dsecond
.perm
[1] = (d
->perm
[1] & ~1) + 1;
37809 dsecond
.perm
[2] = (d
->perm
[3] & ~1);
37810 dsecond
.perm
[3] = (d
->perm
[3] & ~1) + 1;
37811 dthird
.perm
[0] = (d
->perm
[0] % 2);
37812 dthird
.perm
[1] = (d
->perm
[1] % 2) + 4;
37813 dthird
.perm
[2] = (d
->perm
[2] % 2) + 2;
37814 dthird
.perm
[3] = (d
->perm
[3] % 2) + 6;
37816 dfirst
.target
= gen_reg_rtx (dfirst
.vmode
);
37817 dsecond
.target
= gen_reg_rtx (dsecond
.vmode
);
37818 dthird
.op0
= dfirst
.target
;
37819 dthird
.op1
= dsecond
.target
;
37820 dthird
.one_operand_p
= false;
37822 canonicalize_perm (&dfirst
);
37823 canonicalize_perm (&dsecond
);
37825 ok
= expand_vec_perm_1 (&dfirst
)
37826 && expand_vec_perm_1 (&dsecond
)
37827 && expand_vec_perm_1 (&dthird
);
37834 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
37835 permutation with two pshufb insns and an ior. We should have already
37836 failed all two instruction sequences. */
37839 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
37841 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
37842 unsigned int i
, nelt
, eltsz
;
37844 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
37846 gcc_assert (!d
->one_operand_p
);
37849 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37851 /* Generate two permutation masks. If the required element is within
37852 the given vector it is shuffled into the proper lane. If the required
37853 element is in the other vector, force a zero into the lane by setting
37854 bit 7 in the permutation mask. */
37855 m128
= GEN_INT (-128);
37856 for (i
= 0; i
< nelt
; ++i
)
37858 unsigned j
, e
= d
->perm
[i
];
37859 unsigned which
= (e
>= nelt
);
37863 for (j
= 0; j
< eltsz
; ++j
)
37865 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
37866 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
37870 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
37871 vperm
= force_reg (V16QImode
, vperm
);
37873 l
= gen_reg_rtx (V16QImode
);
37874 op
= gen_lowpart (V16QImode
, d
->op0
);
37875 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
37877 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
37878 vperm
= force_reg (V16QImode
, vperm
);
37880 h
= gen_reg_rtx (V16QImode
);
37881 op
= gen_lowpart (V16QImode
, d
->op1
);
37882 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
37884 op
= gen_lowpart (V16QImode
, d
->target
);
37885 emit_insn (gen_iorv16qi3 (op
, l
, h
));
37890 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
37891 with two vpshufb insns, vpermq and vpor. We should have already failed
37892 all two or three instruction sequences. */
37895 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
37897 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
37898 unsigned int i
, nelt
, eltsz
;
37901 || !d
->one_operand_p
37902 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37909 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37911 /* Generate two permutation masks. If the required element is within
37912 the same lane, it is shuffled in. If the required element from the
37913 other lane, force a zero by setting bit 7 in the permutation mask.
37914 In the other mask the mask has non-negative elements if element
37915 is requested from the other lane, but also moved to the other lane,
37916 so that the result of vpshufb can have the two V2TImode halves
37918 m128
= GEN_INT (-128);
37919 for (i
= 0; i
< nelt
; ++i
)
37921 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
37922 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
37924 for (j
= 0; j
< eltsz
; ++j
)
37926 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
37927 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
37931 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
37932 vperm
= force_reg (V32QImode
, vperm
);
37934 h
= gen_reg_rtx (V32QImode
);
37935 op
= gen_lowpart (V32QImode
, d
->op0
);
37936 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
37938 /* Swap the 128-byte lanes of h into hp. */
37939 hp
= gen_reg_rtx (V4DImode
);
37940 op
= gen_lowpart (V4DImode
, h
);
37941 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
37944 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
37945 vperm
= force_reg (V32QImode
, vperm
);
37947 l
= gen_reg_rtx (V32QImode
);
37948 op
= gen_lowpart (V32QImode
, d
->op0
);
37949 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
37951 op
= gen_lowpart (V32QImode
, d
->target
);
37952 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
37957 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
37958 and extract-odd permutations of two V32QImode and V16QImode operand
37959 with two vpshufb insns, vpor and vpermq. We should have already
37960 failed all two or three instruction sequences. */
37963 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
37965 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
37966 unsigned int i
, nelt
, eltsz
;
37969 || d
->one_operand_p
37970 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
37973 for (i
= 0; i
< d
->nelt
; ++i
)
37974 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
37981 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
37983 /* Generate two permutation masks. In the first permutation mask
37984 the first quarter will contain indexes for the first half
37985 of the op0, the second quarter will contain bit 7 set, third quarter
37986 will contain indexes for the second half of the op0 and the
37987 last quarter bit 7 set. In the second permutation mask
37988 the first quarter will contain bit 7 set, the second quarter
37989 indexes for the first half of the op1, the third quarter bit 7 set
37990 and last quarter indexes for the second half of the op1.
37991 I.e. the first mask e.g. for V32QImode extract even will be:
37992 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
37993 (all values masked with 0xf except for -128) and second mask
37994 for extract even will be
37995 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
37996 m128
= GEN_INT (-128);
37997 for (i
= 0; i
< nelt
; ++i
)
37999 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38000 unsigned which
= d
->perm
[i
] >= nelt
;
38001 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
38003 for (j
= 0; j
< eltsz
; ++j
)
38005 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
38006 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
38010 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
38011 vperm
= force_reg (V32QImode
, vperm
);
38013 l
= gen_reg_rtx (V32QImode
);
38014 op
= gen_lowpart (V32QImode
, d
->op0
);
38015 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
38017 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
38018 vperm
= force_reg (V32QImode
, vperm
);
38020 h
= gen_reg_rtx (V32QImode
);
38021 op
= gen_lowpart (V32QImode
, d
->op1
);
38022 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
38024 ior
= gen_reg_rtx (V32QImode
);
38025 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
38027 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
38028 op
= gen_lowpart (V4DImode
, d
->target
);
38029 ior
= gen_lowpart (V4DImode
, ior
);
38030 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
38031 const1_rtx
, GEN_INT (3)));
38036 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
38037 and extract-odd permutations. */
38040 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
38047 t1
= gen_reg_rtx (V4DFmode
);
38048 t2
= gen_reg_rtx (V4DFmode
);
38050 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
38051 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
38052 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
38054 /* Now an unpck[lh]pd will produce the result required. */
38056 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
38058 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
38064 int mask
= odd
? 0xdd : 0x88;
38066 t1
= gen_reg_rtx (V8SFmode
);
38067 t2
= gen_reg_rtx (V8SFmode
);
38068 t3
= gen_reg_rtx (V8SFmode
);
38070 /* Shuffle within the 128-bit lanes to produce:
38071 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
38072 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
38075 /* Shuffle the lanes around to produce:
38076 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
38077 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
38080 /* Shuffle within the 128-bit lanes to produce:
38081 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
38082 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
38084 /* Shuffle within the 128-bit lanes to produce:
38085 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
38086 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
38088 /* Shuffle the lanes around to produce:
38089 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
38090 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
38099 /* These are always directly implementable by expand_vec_perm_1. */
38100 gcc_unreachable ();
38104 return expand_vec_perm_pshufb2 (d
);
38107 /* We need 2*log2(N)-1 operations to achieve odd/even
38108 with interleave. */
38109 t1
= gen_reg_rtx (V8HImode
);
38110 t2
= gen_reg_rtx (V8HImode
);
38111 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
38112 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
38113 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
38114 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
38116 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
38118 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
38125 return expand_vec_perm_pshufb2 (d
);
38128 t1
= gen_reg_rtx (V16QImode
);
38129 t2
= gen_reg_rtx (V16QImode
);
38130 t3
= gen_reg_rtx (V16QImode
);
38131 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
38132 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
38133 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
38134 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
38135 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
38136 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
38138 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
38140 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
38147 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
38152 struct expand_vec_perm_d d_copy
= *d
;
38153 d_copy
.vmode
= V4DFmode
;
38154 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
38155 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
38156 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
38157 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
38160 t1
= gen_reg_rtx (V4DImode
);
38161 t2
= gen_reg_rtx (V4DImode
);
38163 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
38164 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
38165 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
38167 /* Now an vpunpck[lh]qdq will produce the result required. */
38169 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
38171 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
38178 struct expand_vec_perm_d d_copy
= *d
;
38179 d_copy
.vmode
= V8SFmode
;
38180 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
38181 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
38182 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
38183 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
38186 t1
= gen_reg_rtx (V8SImode
);
38187 t2
= gen_reg_rtx (V8SImode
);
38189 /* Shuffle the lanes around into
38190 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
38191 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
38192 gen_lowpart (V4DImode
, d
->op0
),
38193 gen_lowpart (V4DImode
, d
->op1
),
38195 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
38196 gen_lowpart (V4DImode
, d
->op0
),
38197 gen_lowpart (V4DImode
, d
->op1
),
38200 /* Swap the 2nd and 3rd position in each lane into
38201 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
38202 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
38203 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
38204 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
38205 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
38207 /* Now an vpunpck[lh]qdq will produce
38208 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
38210 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
38211 gen_lowpart (V4DImode
, t1
),
38212 gen_lowpart (V4DImode
, t2
));
38214 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
38215 gen_lowpart (V4DImode
, t1
),
38216 gen_lowpart (V4DImode
, t2
));
38221 gcc_unreachable ();
38227 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38228 extract-even and extract-odd permutations. */
38231 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
38233 unsigned i
, odd
, nelt
= d
->nelt
;
38236 if (odd
!= 0 && odd
!= 1)
38239 for (i
= 1; i
< nelt
; ++i
)
38240 if (d
->perm
[i
] != 2 * i
+ odd
)
38243 return expand_vec_perm_even_odd_1 (d
, odd
);
38246 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
38247 permutations. We assume that expand_vec_perm_1 has already failed. */
38250 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
38252 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
38253 enum machine_mode vmode
= d
->vmode
;
38254 unsigned char perm2
[4];
38262 /* These are special-cased in sse.md so that we can optionally
38263 use the vbroadcast instruction. They expand to two insns
38264 if the input happens to be in a register. */
38265 gcc_unreachable ();
38271 /* These are always implementable using standard shuffle patterns. */
38272 gcc_unreachable ();
38276 /* These can be implemented via interleave. We save one insn by
38277 stopping once we have promoted to V4SImode and then use pshufd. */
38281 rtx (*gen
) (rtx
, rtx
, rtx
)
38282 = vmode
== V16QImode
? gen_vec_interleave_lowv16qi
38283 : gen_vec_interleave_lowv8hi
;
38287 gen
= vmode
== V16QImode
? gen_vec_interleave_highv16qi
38288 : gen_vec_interleave_highv8hi
;
38293 dest
= gen_reg_rtx (vmode
);
38294 emit_insn (gen (dest
, op0
, op0
));
38295 vmode
= get_mode_wider_vector (vmode
);
38296 op0
= gen_lowpart (vmode
, dest
);
38298 while (vmode
!= V4SImode
);
38300 memset (perm2
, elt
, 4);
38301 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4,
38310 /* For AVX2 broadcasts of the first element vpbroadcast* or
38311 vpermq should be used by expand_vec_perm_1. */
38312 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
38316 gcc_unreachable ();
38320 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
38321 broadcast permutations. */
38324 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
38326 unsigned i
, elt
, nelt
= d
->nelt
;
38328 if (!d
->one_operand_p
)
38332 for (i
= 1; i
< nelt
; ++i
)
38333 if (d
->perm
[i
] != elt
)
38336 return expand_vec_perm_broadcast_1 (d
);
38339 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
38340 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
38341 all the shorter instruction sequences. */
38344 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
38346 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
38347 unsigned int i
, nelt
, eltsz
;
38351 || d
->one_operand_p
38352 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
38359 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
38361 /* Generate 4 permutation masks. If the required element is within
38362 the same lane, it is shuffled in. If the required element from the
38363 other lane, force a zero by setting bit 7 in the permutation mask.
38364 In the other mask the mask has non-negative elements if element
38365 is requested from the other lane, but also moved to the other lane,
38366 so that the result of vpshufb can have the two V2TImode halves
38368 m128
= GEN_INT (-128);
38369 for (i
= 0; i
< 32; ++i
)
38371 rperm
[0][i
] = m128
;
38372 rperm
[1][i
] = m128
;
38373 rperm
[2][i
] = m128
;
38374 rperm
[3][i
] = m128
;
38380 for (i
= 0; i
< nelt
; ++i
)
38382 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
38383 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
38384 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
38386 for (j
= 0; j
< eltsz
; ++j
)
38387 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
38388 used
[which
] = true;
38391 for (i
= 0; i
< 2; ++i
)
38393 if (!used
[2 * i
+ 1])
38398 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
38399 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
38400 vperm
= force_reg (V32QImode
, vperm
);
38401 h
[i
] = gen_reg_rtx (V32QImode
);
38402 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38403 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
38406 /* Swap the 128-byte lanes of h[X]. */
38407 for (i
= 0; i
< 2; ++i
)
38409 if (h
[i
] == NULL_RTX
)
38411 op
= gen_reg_rtx (V4DImode
);
38412 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
38413 const2_rtx
, GEN_INT (3), const0_rtx
,
38415 h
[i
] = gen_lowpart (V32QImode
, op
);
38418 for (i
= 0; i
< 2; ++i
)
38425 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
38426 vperm
= force_reg (V32QImode
, vperm
);
38427 l
[i
] = gen_reg_rtx (V32QImode
);
38428 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
38429 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
38432 for (i
= 0; i
< 2; ++i
)
38436 op
= gen_reg_rtx (V32QImode
);
38437 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
38444 gcc_assert (l
[0] && l
[1]);
38445 op
= gen_lowpart (V32QImode
, d
->target
);
38446 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
38450 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
38451 With all of the interface bits taken care of, perform the expansion
38452 in D and return true on success. */
38455 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
38457 /* Try a single instruction expansion. */
38458 if (expand_vec_perm_1 (d
))
38461 /* Try sequences of two instructions. */
38463 if (expand_vec_perm_pshuflw_pshufhw (d
))
38466 if (expand_vec_perm_palignr (d
))
38469 if (expand_vec_perm_interleave2 (d
))
38472 if (expand_vec_perm_broadcast (d
))
38475 if (expand_vec_perm_vpermq_perm_1 (d
))
38478 if (expand_vec_perm_vperm2f128 (d
))
38481 /* Try sequences of three instructions. */
38483 if (expand_vec_perm_2vperm2f128_vshuf (d
))
38486 if (expand_vec_perm_pshufb2 (d
))
38489 if (expand_vec_perm_interleave3 (d
))
38492 if (expand_vec_perm_vperm2f128_vblend (d
))
38495 /* Try sequences of four instructions. */
38497 if (expand_vec_perm_vpshufb2_vpermq (d
))
38500 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
38503 /* ??? Look for narrow permutations whose element orderings would
38504 allow the promotion to a wider mode. */
38506 /* ??? Look for sequences of interleave or a wider permute that place
38507 the data into the correct lanes for a half-vector shuffle like
38508 pshuf[lh]w or vpermilps. */
38510 /* ??? Look for sequences of interleave that produce the desired results.
38511 The combinatorics of punpck[lh] get pretty ugly... */
38513 if (expand_vec_perm_even_odd (d
))
38516 /* Even longer sequences. */
38517 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
38523 /* If a permutation only uses one operand, make it clear. Returns true
38524 if the permutation references both operands. */
38527 canonicalize_perm (struct expand_vec_perm_d
*d
)
38529 int i
, which
, nelt
= d
->nelt
;
38531 for (i
= which
= 0; i
< nelt
; ++i
)
38532 which
|= (d
->perm
[i
] < nelt
? 1 : 2);
38534 d
->one_operand_p
= true;
38541 if (!rtx_equal_p (d
->op0
, d
->op1
))
38543 d
->one_operand_p
= false;
38546 /* The elements of PERM do not suggest that only the first operand
38547 is used, but both operands are identical. Allow easier matching
38548 of the permutation by folding the permutation into the single
38553 for (i
= 0; i
< nelt
; ++i
)
38554 d
->perm
[i
] &= nelt
- 1;
38563 return (which
== 3);
38567 ix86_expand_vec_perm_const (rtx operands
[4])
38569 struct expand_vec_perm_d d
;
38570 unsigned char perm
[MAX_VECT_LEN
];
38575 d
.target
= operands
[0];
38576 d
.op0
= operands
[1];
38577 d
.op1
= operands
[2];
38580 d
.vmode
= GET_MODE (d
.target
);
38581 gcc_assert (VECTOR_MODE_P (d
.vmode
));
38582 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38583 d
.testing_p
= false;
38585 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
38586 gcc_assert (XVECLEN (sel
, 0) == nelt
);
38587 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
38589 for (i
= 0; i
< nelt
; ++i
)
38591 rtx e
= XVECEXP (sel
, 0, i
);
38592 int ei
= INTVAL (e
) & (2 * nelt
- 1);
38597 two_args
= canonicalize_perm (&d
);
38599 if (ix86_expand_vec_perm_const_1 (&d
))
38602 /* If the selector says both arguments are needed, but the operands are the
38603 same, the above tried to expand with one_operand_p and flattened selector.
38604 If that didn't work, retry without one_operand_p; we succeeded with that
38606 if (two_args
&& d
.one_operand_p
)
38608 d
.one_operand_p
= false;
38609 memcpy (d
.perm
, perm
, sizeof (perm
));
38610 return ix86_expand_vec_perm_const_1 (&d
);
38616 /* Implement targetm.vectorize.vec_perm_const_ok. */
38619 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
38620 const unsigned char *sel
)
38622 struct expand_vec_perm_d d
;
38623 unsigned int i
, nelt
, which
;
38627 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38628 d
.testing_p
= true;
38630 /* Given sufficient ISA support we can just return true here
38631 for selected vector modes. */
38632 if (GET_MODE_SIZE (d
.vmode
) == 16)
38634 /* All implementable with a single vpperm insn. */
38637 /* All implementable with 2 pshufb + 1 ior. */
38640 /* All implementable with shufpd or unpck[lh]pd. */
38645 /* Extract the values from the vector CST into the permutation
38647 memcpy (d
.perm
, sel
, nelt
);
38648 for (i
= which
= 0; i
< nelt
; ++i
)
38650 unsigned char e
= d
.perm
[i
];
38651 gcc_assert (e
< 2 * nelt
);
38652 which
|= (e
< nelt
? 1 : 2);
38655 /* For all elements from second vector, fold the elements to first. */
38657 for (i
= 0; i
< nelt
; ++i
)
38660 /* Check whether the mask can be applied to the vector type. */
38661 d
.one_operand_p
= (which
!= 3);
38663 /* Implementable with shufps or pshufd. */
38664 if (d
.one_operand_p
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
38667 /* Otherwise we have to go through the motions and see if we can
38668 figure out how to generate the requested permutation. */
38669 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
38670 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
38671 if (!d
.one_operand_p
)
38672 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
38675 ret
= ix86_expand_vec_perm_const_1 (&d
);
38682 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
38684 struct expand_vec_perm_d d
;
38690 d
.vmode
= GET_MODE (targ
);
38691 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38692 d
.one_operand_p
= false;
38693 d
.testing_p
= false;
38695 for (i
= 0; i
< nelt
; ++i
)
38696 d
.perm
[i
] = i
* 2 + odd
;
38698 /* We'll either be able to implement the permutation directly... */
38699 if (expand_vec_perm_1 (&d
))
38702 /* ... or we use the special-case patterns. */
38703 expand_vec_perm_even_odd_1 (&d
, odd
);
38707 ix86_expand_vec_interleave (rtx targ
, rtx op0
, rtx op1
, bool high_p
)
38709 struct expand_vec_perm_d d
;
38710 unsigned i
, nelt
, base
;
38716 d
.vmode
= GET_MODE (targ
);
38717 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
38718 d
.one_operand_p
= false;
38719 d
.testing_p
= false;
38721 base
= high_p
? nelt
/ 2 : 0;
38722 for (i
= 0; i
< nelt
/ 2; ++i
)
38724 d
.perm
[i
* 2] = i
+ base
;
38725 d
.perm
[i
* 2 + 1] = i
+ base
+ nelt
;
38728 /* Note that for AVX this isn't one instruction. */
38729 ok
= ix86_expand_vec_perm_const_1 (&d
);
38734 /* Expand a vector operation CODE for a V*QImode in terms of the
38735 same operation on V*HImode. */
38738 ix86_expand_vecop_qihi (enum rtx_code code
, rtx dest
, rtx op1
, rtx op2
)
38740 enum machine_mode qimode
= GET_MODE (dest
);
38741 enum machine_mode himode
;
38742 rtx (*gen_il
) (rtx
, rtx
, rtx
);
38743 rtx (*gen_ih
) (rtx
, rtx
, rtx
);
38744 rtx op1_l
, op1_h
, op2_l
, op2_h
, res_l
, res_h
;
38745 struct expand_vec_perm_d d
;
38746 bool ok
, full_interleave
;
38747 bool uns_p
= false;
38754 gen_il
= gen_vec_interleave_lowv16qi
;
38755 gen_ih
= gen_vec_interleave_highv16qi
;
38758 himode
= V16HImode
;
38759 gen_il
= gen_avx2_interleave_lowv32qi
;
38760 gen_ih
= gen_avx2_interleave_highv32qi
;
38763 gcc_unreachable ();
38766 op2_l
= op2_h
= op2
;
38770 /* Unpack data such that we've got a source byte in each low byte of
38771 each word. We don't care what goes into the high byte of each word.
38772 Rather than trying to get zero in there, most convenient is to let
38773 it be a copy of the low byte. */
38774 op2_l
= gen_reg_rtx (qimode
);
38775 op2_h
= gen_reg_rtx (qimode
);
38776 emit_insn (gen_il (op2_l
, op2
, op2
));
38777 emit_insn (gen_ih (op2_h
, op2
, op2
));
38780 op1_l
= gen_reg_rtx (qimode
);
38781 op1_h
= gen_reg_rtx (qimode
);
38782 emit_insn (gen_il (op1_l
, op1
, op1
));
38783 emit_insn (gen_ih (op1_h
, op1
, op1
));
38784 full_interleave
= qimode
== V16QImode
;
38792 op1_l
= gen_reg_rtx (himode
);
38793 op1_h
= gen_reg_rtx (himode
);
38794 ix86_expand_sse_unpack (op1_l
, op1
, uns_p
, false);
38795 ix86_expand_sse_unpack (op1_h
, op1
, uns_p
, true);
38796 full_interleave
= true;
38799 gcc_unreachable ();
38802 /* Perform the operation. */
38803 res_l
= expand_simple_binop (himode
, code
, op1_l
, op2_l
, NULL_RTX
,
38805 res_h
= expand_simple_binop (himode
, code
, op1_h
, op2_h
, NULL_RTX
,
38807 gcc_assert (res_l
&& res_h
);
38809 /* Merge the data back into the right place. */
38811 d
.op0
= gen_lowpart (qimode
, res_l
);
38812 d
.op1
= gen_lowpart (qimode
, res_h
);
38814 d
.nelt
= GET_MODE_NUNITS (qimode
);
38815 d
.one_operand_p
= false;
38816 d
.testing_p
= false;
38818 if (full_interleave
)
38820 /* For SSE2, we used an full interleave, so the desired
38821 results are in the even elements. */
38822 for (i
= 0; i
< 32; ++i
)
38827 /* For AVX, the interleave used above was not cross-lane. So the
38828 extraction is evens but with the second and third quarter swapped.
38829 Happily, that is even one insn shorter than even extraction. */
38830 for (i
= 0; i
< 32; ++i
)
38831 d
.perm
[i
] = i
* 2 + ((i
& 24) == 8 ? 16 : (i
& 24) == 16 ? -16 : 0);
38834 ok
= ix86_expand_vec_perm_const_1 (&d
);
38837 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
38838 gen_rtx_fmt_ee (code
, qimode
, op1
, op2
));
38842 ix86_expand_mul_widen_evenodd (rtx dest
, rtx op1
, rtx op2
,
38843 bool uns_p
, bool odd_p
)
38845 enum machine_mode mode
= GET_MODE (op1
);
38846 enum machine_mode wmode
= GET_MODE (dest
);
38849 /* We only play even/odd games with vectors of SImode. */
38850 gcc_assert (mode
== V4SImode
|| mode
== V8SImode
);
38852 /* If we're looking for the odd results, shift those members down to
38853 the even slots. For some cpus this is faster than a PSHUFD. */
38856 if (TARGET_XOP
&& mode
== V4SImode
)
38858 x
= force_reg (wmode
, CONST0_RTX (wmode
));
38859 emit_insn (gen_xop_pmacsdqh (dest
, op1
, op2
, x
));
38863 x
= GEN_INT (GET_MODE_UNIT_BITSIZE (mode
));
38864 op1
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op1
),
38865 x
, NULL
, 1, OPTAB_DIRECT
);
38866 op2
= expand_binop (wmode
, lshr_optab
, gen_lowpart (wmode
, op2
),
38867 x
, NULL
, 1, OPTAB_DIRECT
);
38868 op1
= gen_lowpart (mode
, op1
);
38869 op2
= gen_lowpart (mode
, op2
);
38872 if (mode
== V8SImode
)
38875 x
= gen_vec_widen_umult_even_v8si (dest
, op1
, op2
);
38877 x
= gen_vec_widen_smult_even_v8si (dest
, op1
, op2
);
38880 x
= gen_vec_widen_umult_even_v4si (dest
, op1
, op2
);
38881 else if (TARGET_SSE4_1
)
38882 x
= gen_sse4_1_mulv2siv2di3 (dest
, op1
, op2
);
38885 rtx s1
, s2
, t0
, t1
, t2
;
38887 /* The easiest way to implement this without PMULDQ is to go through
38888 the motions as if we are performing a full 64-bit multiply. With
38889 the exception that we need to do less shuffling of the elements. */
38891 /* Compute the sign-extension, aka highparts, of the two operands. */
38892 s1
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
38893 op1
, pc_rtx
, pc_rtx
);
38894 s2
= ix86_expand_sse_cmp (gen_reg_rtx (mode
), GT
, CONST0_RTX (mode
),
38895 op2
, pc_rtx
, pc_rtx
);
38897 /* Multiply LO(A) * HI(B), and vice-versa. */
38898 t1
= gen_reg_rtx (wmode
);
38899 t2
= gen_reg_rtx (wmode
);
38900 emit_insn (gen_vec_widen_umult_even_v4si (t1
, s1
, op2
));
38901 emit_insn (gen_vec_widen_umult_even_v4si (t2
, s2
, op1
));
38903 /* Multiply LO(A) * LO(B). */
38904 t0
= gen_reg_rtx (wmode
);
38905 emit_insn (gen_vec_widen_umult_even_v4si (t0
, op1
, op2
));
38907 /* Combine and shift the highparts into place. */
38908 t1
= expand_binop (wmode
, add_optab
, t1
, t2
, t1
, 1, OPTAB_DIRECT
);
38909 t1
= expand_binop (wmode
, ashl_optab
, t1
, GEN_INT (32), t1
,
38912 /* Combine high and low parts. */
38913 force_expand_binop (wmode
, add_optab
, t0
, t1
, dest
, 1, OPTAB_DIRECT
);
38920 ix86_expand_mul_widen_hilo (rtx dest
, rtx op1
, rtx op2
,
38921 bool uns_p
, bool high_p
)
38923 enum machine_mode wmode
= GET_MODE (dest
);
38924 enum machine_mode mode
= GET_MODE (op1
);
38925 rtx t1
, t2
, t3
, t4
, mask
;
38930 t1
= gen_reg_rtx (mode
);
38931 t2
= gen_reg_rtx (mode
);
38932 if (TARGET_XOP
&& !uns_p
)
38934 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
38935 shuffle the elements once so that all elements are in the right
38936 place for immediate use: { A C B D }. */
38937 emit_insn (gen_sse2_pshufd_1 (t1
, op1
, const0_rtx
, const2_rtx
,
38938 const1_rtx
, GEN_INT (3)));
38939 emit_insn (gen_sse2_pshufd_1 (t2
, op2
, const0_rtx
, const2_rtx
,
38940 const1_rtx
, GEN_INT (3)));
38944 /* Put the elements into place for the multiply. */
38945 ix86_expand_vec_interleave (t1
, op1
, op1
, high_p
);
38946 ix86_expand_vec_interleave (t2
, op2
, op2
, high_p
);
38949 ix86_expand_mul_widen_evenodd (dest
, t1
, t2
, uns_p
, high_p
);
38953 /* Shuffle the elements between the lanes. After this we
38954 have { A B E F | C D G H } for each operand. */
38955 t1
= gen_reg_rtx (V4DImode
);
38956 t2
= gen_reg_rtx (V4DImode
);
38957 emit_insn (gen_avx2_permv4di_1 (t1
, gen_lowpart (V4DImode
, op1
),
38958 const0_rtx
, const2_rtx
,
38959 const1_rtx
, GEN_INT (3)));
38960 emit_insn (gen_avx2_permv4di_1 (t2
, gen_lowpart (V4DImode
, op2
),
38961 const0_rtx
, const2_rtx
,
38962 const1_rtx
, GEN_INT (3)));
38964 /* Shuffle the elements within the lanes. After this we
38965 have { A A B B | C C D D } or { E E F F | G G H H }. */
38966 t3
= gen_reg_rtx (V8SImode
);
38967 t4
= gen_reg_rtx (V8SImode
);
38968 mask
= GEN_INT (high_p
38969 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
38970 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
38971 emit_insn (gen_avx2_pshufdv3 (t3
, gen_lowpart (V8SImode
, t1
), mask
));
38972 emit_insn (gen_avx2_pshufdv3 (t4
, gen_lowpart (V8SImode
, t2
), mask
));
38974 ix86_expand_mul_widen_evenodd (dest
, t3
, t4
, uns_p
, false);
38979 t1
= expand_binop (mode
, smul_optab
, op1
, op2
, NULL_RTX
,
38980 uns_p
, OPTAB_DIRECT
);
38981 t2
= expand_binop (mode
,
38982 uns_p
? umul_highpart_optab
: smul_highpart_optab
,
38983 op1
, op2
, NULL_RTX
, uns_p
, OPTAB_DIRECT
);
38984 gcc_assert (t1
&& t2
);
38986 ix86_expand_vec_interleave (gen_lowpart (mode
, dest
), t1
, t2
, high_p
);
38991 t1
= gen_reg_rtx (wmode
);
38992 t2
= gen_reg_rtx (wmode
);
38993 ix86_expand_sse_unpack (t1
, op1
, uns_p
, high_p
);
38994 ix86_expand_sse_unpack (t2
, op2
, uns_p
, high_p
);
38996 emit_insn (gen_rtx_SET (VOIDmode
, dest
, gen_rtx_MULT (wmode
, t1
, t2
)));
39000 gcc_unreachable ();
39005 ix86_expand_sse2_mulv4si3 (rtx op0
, rtx op1
, rtx op2
)
39009 res_1
= gen_reg_rtx (V4SImode
);
39010 res_2
= gen_reg_rtx (V4SImode
);
39011 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_1
),
39012 op1
, op2
, true, false);
39013 ix86_expand_mul_widen_evenodd (gen_lowpart (V2DImode
, res_2
),
39014 op1
, op2
, true, true);
39016 /* Move the results in element 2 down to element 1; we don't care
39017 what goes in elements 2 and 3. Then we can merge the parts
39018 back together with an interleave.
39020 Note that two other sequences were tried:
39021 (1) Use interleaves at the start instead of psrldq, which allows
39022 us to use a single shufps to merge things back at the end.
39023 (2) Use shufps here to combine the two vectors, then pshufd to
39024 put the elements in the correct order.
39025 In both cases the cost of the reformatting stall was too high
39026 and the overall sequence slower. */
39028 emit_insn (gen_sse2_pshufd_1 (res_1
, res_1
, const0_rtx
, const2_rtx
,
39029 const0_rtx
, const0_rtx
));
39030 emit_insn (gen_sse2_pshufd_1 (res_2
, res_2
, const0_rtx
, const2_rtx
,
39031 const0_rtx
, const0_rtx
));
39032 res_1
= emit_insn (gen_vec_interleave_lowv4si (op0
, res_1
, res_2
));
39034 set_unique_reg_note (res_1
, REG_EQUAL
, gen_rtx_MULT (V4SImode
, op1
, op2
));
39038 ix86_expand_sse2_mulvxdi3 (rtx op0
, rtx op1
, rtx op2
)
39040 enum machine_mode mode
= GET_MODE (op0
);
39041 rtx t1
, t2
, t3
, t4
, t5
, t6
;
39043 if (TARGET_XOP
&& mode
== V2DImode
)
39045 /* op1: A,B,C,D, op2: E,F,G,H */
39046 op1
= gen_lowpart (V4SImode
, op1
);
39047 op2
= gen_lowpart (V4SImode
, op2
);
39049 t1
= gen_reg_rtx (V4SImode
);
39050 t2
= gen_reg_rtx (V4SImode
);
39051 t3
= gen_reg_rtx (V2DImode
);
39052 t4
= gen_reg_rtx (V2DImode
);
39055 emit_insn (gen_sse2_pshufd_1 (t1
, op1
,
39061 /* t2: (B*E),(A*F),(D*G),(C*H) */
39062 emit_insn (gen_mulv4si3 (t2
, t1
, op2
));
39064 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
39065 emit_insn (gen_xop_phadddq (t3
, t2
));
39067 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
39068 emit_insn (gen_ashlv2di3 (t4
, t3
, GEN_INT (32)));
39070 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
39071 emit_insn (gen_xop_pmacsdql (op0
, op1
, op2
, t4
));
39075 enum machine_mode nmode
;
39076 rtx (*umul
) (rtx
, rtx
, rtx
);
39078 if (mode
== V2DImode
)
39080 umul
= gen_vec_widen_umult_even_v4si
;
39083 else if (mode
== V4DImode
)
39085 umul
= gen_vec_widen_umult_even_v8si
;
39089 gcc_unreachable ();
39092 /* Multiply low parts. */
39093 t1
= gen_reg_rtx (mode
);
39094 emit_insn (umul (t1
, gen_lowpart (nmode
, op1
), gen_lowpart (nmode
, op2
)));
39096 /* Shift input vectors right 32 bits so we can multiply high parts. */
39098 t2
= expand_binop (mode
, lshr_optab
, op1
, t6
, NULL
, 1, OPTAB_DIRECT
);
39099 t3
= expand_binop (mode
, lshr_optab
, op2
, t6
, NULL
, 1, OPTAB_DIRECT
);
39101 /* Multiply high parts by low parts. */
39102 t4
= gen_reg_rtx (mode
);
39103 t5
= gen_reg_rtx (mode
);
39104 emit_insn (umul (t4
, gen_lowpart (nmode
, t2
), gen_lowpart (nmode
, op2
)));
39105 emit_insn (umul (t5
, gen_lowpart (nmode
, t3
), gen_lowpart (nmode
, op1
)));
39107 /* Combine and shift the highparts back. */
39108 t4
= expand_binop (mode
, add_optab
, t4
, t5
, t4
, 1, OPTAB_DIRECT
);
39109 t4
= expand_binop (mode
, ashl_optab
, t4
, t6
, t4
, 1, OPTAB_DIRECT
);
39111 /* Combine high and low parts. */
39112 force_expand_binop (mode
, add_optab
, t1
, t4
, op0
, 1, OPTAB_DIRECT
);
39115 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
39116 gen_rtx_MULT (mode
, op1
, op2
));
39119 /* Expand an insert into a vector register through pinsr insn.
39120 Return true if successful. */
39123 ix86_expand_pinsr (rtx
*operands
)
39125 rtx dst
= operands
[0];
39126 rtx src
= operands
[3];
39128 unsigned int size
= INTVAL (operands
[1]);
39129 unsigned int pos
= INTVAL (operands
[2]);
39131 if (GET_CODE (dst
) == SUBREG
)
39133 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
39134 dst
= SUBREG_REG (dst
);
39137 if (GET_CODE (src
) == SUBREG
)
39138 src
= SUBREG_REG (src
);
39140 switch (GET_MODE (dst
))
39147 enum machine_mode srcmode
, dstmode
;
39148 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
39150 srcmode
= mode_for_size (size
, MODE_INT
, 0);
39155 if (!TARGET_SSE4_1
)
39157 dstmode
= V16QImode
;
39158 pinsr
= gen_sse4_1_pinsrb
;
39164 dstmode
= V8HImode
;
39165 pinsr
= gen_sse2_pinsrw
;
39169 if (!TARGET_SSE4_1
)
39171 dstmode
= V4SImode
;
39172 pinsr
= gen_sse4_1_pinsrd
;
39176 gcc_assert (TARGET_64BIT
);
39177 if (!TARGET_SSE4_1
)
39179 dstmode
= V2DImode
;
39180 pinsr
= gen_sse4_1_pinsrq
;
39187 dst
= gen_lowpart (dstmode
, dst
);
39188 src
= gen_lowpart (srcmode
, src
);
39192 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
39201 /* This function returns the calling abi specific va_list type node.
39202 It returns the FNDECL specific va_list type. */
39205 ix86_fn_abi_va_list (tree fndecl
)
39208 return va_list_type_node
;
39209 gcc_assert (fndecl
!= NULL_TREE
);
39211 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
39212 return ms_va_list_type_node
;
39214 return sysv_va_list_type_node
;
39217 /* Returns the canonical va_list type specified by TYPE. If there
39218 is no valid TYPE provided, it return NULL_TREE. */
39221 ix86_canonical_va_list_type (tree type
)
39225 /* Resolve references and pointers to va_list type. */
39226 if (TREE_CODE (type
) == MEM_REF
)
39227 type
= TREE_TYPE (type
);
39228 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
39229 type
= TREE_TYPE (type
);
39230 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
39231 type
= TREE_TYPE (type
);
39233 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
39235 wtype
= va_list_type_node
;
39236 gcc_assert (wtype
!= NULL_TREE
);
39238 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39240 /* If va_list is an array type, the argument may have decayed
39241 to a pointer type, e.g. by being passed to another function.
39242 In that case, unwrap both types so that we can compare the
39243 underlying records. */
39244 if (TREE_CODE (htype
) == ARRAY_TYPE
39245 || POINTER_TYPE_P (htype
))
39247 wtype
= TREE_TYPE (wtype
);
39248 htype
= TREE_TYPE (htype
);
39251 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39252 return va_list_type_node
;
39253 wtype
= sysv_va_list_type_node
;
39254 gcc_assert (wtype
!= NULL_TREE
);
39256 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39258 /* If va_list is an array type, the argument may have decayed
39259 to a pointer type, e.g. by being passed to another function.
39260 In that case, unwrap both types so that we can compare the
39261 underlying records. */
39262 if (TREE_CODE (htype
) == ARRAY_TYPE
39263 || POINTER_TYPE_P (htype
))
39265 wtype
= TREE_TYPE (wtype
);
39266 htype
= TREE_TYPE (htype
);
39269 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39270 return sysv_va_list_type_node
;
39271 wtype
= ms_va_list_type_node
;
39272 gcc_assert (wtype
!= NULL_TREE
);
39274 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
39276 /* If va_list is an array type, the argument may have decayed
39277 to a pointer type, e.g. by being passed to another function.
39278 In that case, unwrap both types so that we can compare the
39279 underlying records. */
39280 if (TREE_CODE (htype
) == ARRAY_TYPE
39281 || POINTER_TYPE_P (htype
))
39283 wtype
= TREE_TYPE (wtype
);
39284 htype
= TREE_TYPE (htype
);
39287 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
39288 return ms_va_list_type_node
;
39291 return std_canonical_va_list_type (type
);
39294 /* Iterate through the target-specific builtin types for va_list.
39295 IDX denotes the iterator, *PTREE is set to the result type of
39296 the va_list builtin, and *PNAME to its internal type.
39297 Returns zero if there is no element for this index, otherwise
39298 IDX should be increased upon the next call.
39299 Note, do not iterate a base builtin's name like __builtin_va_list.
39300 Used from c_common_nodes_and_builtins. */
39303 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
39313 *ptree
= ms_va_list_type_node
;
39314 *pname
= "__builtin_ms_va_list";
39318 *ptree
= sysv_va_list_type_node
;
39319 *pname
= "__builtin_sysv_va_list";
39327 #undef TARGET_SCHED_DISPATCH
39328 #define TARGET_SCHED_DISPATCH has_dispatch
39329 #undef TARGET_SCHED_DISPATCH_DO
39330 #define TARGET_SCHED_DISPATCH_DO do_dispatch
39331 #undef TARGET_SCHED_REASSOCIATION_WIDTH
39332 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
39333 #undef TARGET_SCHED_REORDER
39334 #define TARGET_SCHED_REORDER ix86_sched_reorder
39336 /* The size of the dispatch window is the total number of bytes of
39337 object code allowed in a window. */
39338 #define DISPATCH_WINDOW_SIZE 16
39340 /* Number of dispatch windows considered for scheduling. */
39341 #define MAX_DISPATCH_WINDOWS 3
39343 /* Maximum number of instructions in a window. */
39346 /* Maximum number of immediate operands in a window. */
39349 /* Maximum number of immediate bits allowed in a window. */
39350 #define MAX_IMM_SIZE 128
39352 /* Maximum number of 32 bit immediates allowed in a window. */
39353 #define MAX_IMM_32 4
39355 /* Maximum number of 64 bit immediates allowed in a window. */
39356 #define MAX_IMM_64 2
39358 /* Maximum total of loads or prefetches allowed in a window. */
39361 /* Maximum total of stores allowed in a window. */
39362 #define MAX_STORE 1
39368 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
39369 enum dispatch_group
{
39384 /* Number of allowable groups in a dispatch window. It is an array
39385 indexed by dispatch_group enum. 100 is used as a big number,
39386 because the number of these kind of operations does not have any
39387 effect in dispatch window, but we need them for other reasons in
39389 static unsigned int num_allowable_groups
[disp_last
] = {
39390 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
39393 char group_name
[disp_last
+ 1][16] = {
39394 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
39395 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
39396 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
39399 /* Instruction path. */
39402 path_single
, /* Single micro op. */
39403 path_double
, /* Double micro op. */
39404 path_multi
, /* Instructions with more than 2 micro op.. */
39408 /* sched_insn_info defines a window to the instructions scheduled in
39409 the basic block. It contains a pointer to the insn_info table and
39410 the instruction scheduled.
39412 Windows are allocated for each basic block and are linked
39414 typedef struct sched_insn_info_s
{
39416 enum dispatch_group group
;
39417 enum insn_path path
;
39422 /* Linked list of dispatch windows. This is a two way list of
39423 dispatch windows of a basic block. It contains information about
39424 the number of uops in the window and the total number of
39425 instructions and of bytes in the object code for this dispatch
39427 typedef struct dispatch_windows_s
{
39428 int num_insn
; /* Number of insn in the window. */
39429 int num_uops
; /* Number of uops in the window. */
39430 int window_size
; /* Number of bytes in the window. */
39431 int window_num
; /* Window number between 0 or 1. */
39432 int num_imm
; /* Number of immediates in an insn. */
39433 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
39434 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
39435 int imm_size
; /* Total immediates in the window. */
39436 int num_loads
; /* Total memory loads in the window. */
39437 int num_stores
; /* Total memory stores in the window. */
39438 int violation
; /* Violation exists in window. */
39439 sched_insn_info
*window
; /* Pointer to the window. */
39440 struct dispatch_windows_s
*next
;
39441 struct dispatch_windows_s
*prev
;
39442 } dispatch_windows
;
39444 /* Immediate valuse used in an insn. */
39445 typedef struct imm_info_s
39452 static dispatch_windows
*dispatch_window_list
;
39453 static dispatch_windows
*dispatch_window_list1
;
39455 /* Get dispatch group of insn. */
39457 static enum dispatch_group
39458 get_mem_group (rtx insn
)
39460 enum attr_memory memory
;
39462 if (INSN_CODE (insn
) < 0)
39463 return disp_no_group
;
39464 memory
= get_attr_memory (insn
);
39465 if (memory
== MEMORY_STORE
)
39468 if (memory
== MEMORY_LOAD
)
39471 if (memory
== MEMORY_BOTH
)
39472 return disp_load_store
;
39474 return disp_no_group
;
39477 /* Return true if insn is a compare instruction. */
39482 enum attr_type type
;
39484 type
= get_attr_type (insn
);
39485 return (type
== TYPE_TEST
39486 || type
== TYPE_ICMP
39487 || type
== TYPE_FCMP
39488 || GET_CODE (PATTERN (insn
)) == COMPARE
);
39491 /* Return true if a dispatch violation encountered. */
39494 dispatch_violation (void)
39496 if (dispatch_window_list
->next
)
39497 return dispatch_window_list
->next
->violation
;
39498 return dispatch_window_list
->violation
;
39501 /* Return true if insn is a branch instruction. */
39504 is_branch (rtx insn
)
39506 return (CALL_P (insn
) || JUMP_P (insn
));
39509 /* Return true if insn is a prefetch instruction. */
39512 is_prefetch (rtx insn
)
39514 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
39517 /* This function initializes a dispatch window and the list container holding a
39518 pointer to the window. */
39521 init_window (int window_num
)
39524 dispatch_windows
*new_list
;
39526 if (window_num
== 0)
39527 new_list
= dispatch_window_list
;
39529 new_list
= dispatch_window_list1
;
39531 new_list
->num_insn
= 0;
39532 new_list
->num_uops
= 0;
39533 new_list
->window_size
= 0;
39534 new_list
->next
= NULL
;
39535 new_list
->prev
= NULL
;
39536 new_list
->window_num
= window_num
;
39537 new_list
->num_imm
= 0;
39538 new_list
->num_imm_32
= 0;
39539 new_list
->num_imm_64
= 0;
39540 new_list
->imm_size
= 0;
39541 new_list
->num_loads
= 0;
39542 new_list
->num_stores
= 0;
39543 new_list
->violation
= false;
39545 for (i
= 0; i
< MAX_INSN
; i
++)
39547 new_list
->window
[i
].insn
= NULL
;
39548 new_list
->window
[i
].group
= disp_no_group
;
39549 new_list
->window
[i
].path
= no_path
;
39550 new_list
->window
[i
].byte_len
= 0;
39551 new_list
->window
[i
].imm_bytes
= 0;
39556 /* This function allocates and initializes a dispatch window and the
39557 list container holding a pointer to the window. */
39559 static dispatch_windows
*
39560 allocate_window (void)
39562 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
39563 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
39568 /* This routine initializes the dispatch scheduling information. It
39569 initiates building dispatch scheduler tables and constructs the
39570 first dispatch window. */
39573 init_dispatch_sched (void)
39575 /* Allocate a dispatch list and a window. */
39576 dispatch_window_list
= allocate_window ();
39577 dispatch_window_list1
= allocate_window ();
39582 /* This function returns true if a branch is detected. End of a basic block
39583 does not have to be a branch, but here we assume only branches end a
39587 is_end_basic_block (enum dispatch_group group
)
39589 return group
== disp_branch
;
39592 /* This function is called when the end of a window processing is reached. */
39595 process_end_window (void)
39597 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
39598 if (dispatch_window_list
->next
)
39600 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
39601 gcc_assert (dispatch_window_list
->window_size
39602 + dispatch_window_list1
->window_size
<= 48);
39608 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
39609 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
39610 for 48 bytes of instructions. Note that these windows are not dispatch
39611 windows that their sizes are DISPATCH_WINDOW_SIZE. */
39613 static dispatch_windows
*
39614 allocate_next_window (int window_num
)
39616 if (window_num
== 0)
39618 if (dispatch_window_list
->next
)
39621 return dispatch_window_list
;
39624 dispatch_window_list
->next
= dispatch_window_list1
;
39625 dispatch_window_list1
->prev
= dispatch_window_list
;
39627 return dispatch_window_list1
;
39630 /* Increment the number of immediate operands of an instruction. */
39633 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
39638 switch ( GET_CODE (*in_rtx
))
39643 (imm_values
->imm
)++;
39644 if (x86_64_immediate_operand (*in_rtx
, SImode
))
39645 (imm_values
->imm32
)++;
39647 (imm_values
->imm64
)++;
39651 (imm_values
->imm
)++;
39652 (imm_values
->imm64
)++;
39656 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
39658 (imm_values
->imm
)++;
39659 (imm_values
->imm32
)++;
39670 /* Compute number of immediate operands of an instruction. */
39673 find_constant (rtx in_rtx
, imm_info
*imm_values
)
39675 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
39676 (rtx_function
) find_constant_1
, (void *) imm_values
);
39679 /* Return total size of immediate operands of an instruction along with number
39680 of corresponding immediate-operands. It initializes its parameters to zero
39681 befor calling FIND_CONSTANT.
39682 INSN is the input instruction. IMM is the total of immediates.
39683 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
39687 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
39689 imm_info imm_values
= {0, 0, 0};
39691 find_constant (insn
, &imm_values
);
39692 *imm
= imm_values
.imm
;
39693 *imm32
= imm_values
.imm32
;
39694 *imm64
= imm_values
.imm64
;
39695 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
39698 /* This function indicates if an operand of an instruction is an
39702 has_immediate (rtx insn
)
39704 int num_imm_operand
;
39705 int num_imm32_operand
;
39706 int num_imm64_operand
;
39709 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39710 &num_imm64_operand
);
39714 /* Return single or double path for instructions. */
39716 static enum insn_path
39717 get_insn_path (rtx insn
)
39719 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
39721 if ((int)path
== 0)
39722 return path_single
;
39724 if ((int)path
== 1)
39725 return path_double
;
39730 /* Return insn dispatch group. */
39732 static enum dispatch_group
39733 get_insn_group (rtx insn
)
39735 enum dispatch_group group
= get_mem_group (insn
);
39739 if (is_branch (insn
))
39740 return disp_branch
;
39745 if (has_immediate (insn
))
39748 if (is_prefetch (insn
))
39749 return disp_prefetch
;
39751 return disp_no_group
;
39754 /* Count number of GROUP restricted instructions in a dispatch
39755 window WINDOW_LIST. */
39758 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
39760 enum dispatch_group group
= get_insn_group (insn
);
39762 int num_imm_operand
;
39763 int num_imm32_operand
;
39764 int num_imm64_operand
;
39766 if (group
== disp_no_group
)
39769 if (group
== disp_imm
)
39771 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39772 &num_imm64_operand
);
39773 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
39774 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
39775 || (num_imm32_operand
> 0
39776 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
39777 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
39778 || (num_imm64_operand
> 0
39779 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
39780 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
39781 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
39782 && num_imm64_operand
> 0
39783 && ((window_list
->num_imm_64
> 0
39784 && window_list
->num_insn
>= 2)
39785 || window_list
->num_insn
>= 3)))
39791 if ((group
== disp_load_store
39792 && (window_list
->num_loads
>= MAX_LOAD
39793 || window_list
->num_stores
>= MAX_STORE
))
39794 || ((group
== disp_load
39795 || group
== disp_prefetch
)
39796 && window_list
->num_loads
>= MAX_LOAD
)
39797 || (group
== disp_store
39798 && window_list
->num_stores
>= MAX_STORE
))
39804 /* This function returns true if insn satisfies dispatch rules on the
39805 last window scheduled. */
39808 fits_dispatch_window (rtx insn
)
39810 dispatch_windows
*window_list
= dispatch_window_list
;
39811 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
39812 unsigned int num_restrict
;
39813 enum dispatch_group group
= get_insn_group (insn
);
39814 enum insn_path path
= get_insn_path (insn
);
39817 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
39818 instructions should be given the lowest priority in the
39819 scheduling process in Haifa scheduler to make sure they will be
39820 scheduled in the same dispatch window as the reference to them. */
39821 if (group
== disp_jcc
|| group
== disp_cmp
)
39824 /* Check nonrestricted. */
39825 if (group
== disp_no_group
|| group
== disp_branch
)
39828 /* Get last dispatch window. */
39829 if (window_list_next
)
39830 window_list
= window_list_next
;
39832 if (window_list
->window_num
== 1)
39834 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
39837 || (min_insn_size (insn
) + sum
) >= 48)
39838 /* Window 1 is full. Go for next window. */
39842 num_restrict
= count_num_restricted (insn
, window_list
);
39844 if (num_restrict
> num_allowable_groups
[group
])
39847 /* See if it fits in the first window. */
39848 if (window_list
->window_num
== 0)
39850 /* The first widow should have only single and double path
39852 if (path
== path_double
39853 && (window_list
->num_uops
+ 2) > MAX_INSN
)
39855 else if (path
!= path_single
)
39861 /* Add an instruction INSN with NUM_UOPS micro-operations to the
39862 dispatch window WINDOW_LIST. */
39865 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
39867 int byte_len
= min_insn_size (insn
);
39868 int num_insn
= window_list
->num_insn
;
39870 sched_insn_info
*window
= window_list
->window
;
39871 enum dispatch_group group
= get_insn_group (insn
);
39872 enum insn_path path
= get_insn_path (insn
);
39873 int num_imm_operand
;
39874 int num_imm32_operand
;
39875 int num_imm64_operand
;
39877 if (!window_list
->violation
&& group
!= disp_cmp
39878 && !fits_dispatch_window (insn
))
39879 window_list
->violation
= true;
39881 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
39882 &num_imm64_operand
);
39884 /* Initialize window with new instruction. */
39885 window
[num_insn
].insn
= insn
;
39886 window
[num_insn
].byte_len
= byte_len
;
39887 window
[num_insn
].group
= group
;
39888 window
[num_insn
].path
= path
;
39889 window
[num_insn
].imm_bytes
= imm_size
;
39891 window_list
->window_size
+= byte_len
;
39892 window_list
->num_insn
= num_insn
+ 1;
39893 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
39894 window_list
->imm_size
+= imm_size
;
39895 window_list
->num_imm
+= num_imm_operand
;
39896 window_list
->num_imm_32
+= num_imm32_operand
;
39897 window_list
->num_imm_64
+= num_imm64_operand
;
39899 if (group
== disp_store
)
39900 window_list
->num_stores
+= 1;
39901 else if (group
== disp_load
39902 || group
== disp_prefetch
)
39903 window_list
->num_loads
+= 1;
39904 else if (group
== disp_load_store
)
39906 window_list
->num_stores
+= 1;
39907 window_list
->num_loads
+= 1;
39911 /* Adds a scheduled instruction, INSN, to the current dispatch window.
39912 If the total bytes of instructions or the number of instructions in
39913 the window exceed allowable, it allocates a new window. */
39916 add_to_dispatch_window (rtx insn
)
39919 dispatch_windows
*window_list
;
39920 dispatch_windows
*next_list
;
39921 dispatch_windows
*window0_list
;
39922 enum insn_path path
;
39923 enum dispatch_group insn_group
;
39931 if (INSN_CODE (insn
) < 0)
39934 byte_len
= min_insn_size (insn
);
39935 window_list
= dispatch_window_list
;
39936 next_list
= window_list
->next
;
39937 path
= get_insn_path (insn
);
39938 insn_group
= get_insn_group (insn
);
39940 /* Get the last dispatch window. */
39942 window_list
= dispatch_window_list
->next
;
39944 if (path
== path_single
)
39946 else if (path
== path_double
)
39949 insn_num_uops
= (int) path
;
39951 /* If current window is full, get a new window.
39952 Window number zero is full, if MAX_INSN uops are scheduled in it.
39953 Window number one is full, if window zero's bytes plus window
39954 one's bytes is 32, or if the bytes of the new instruction added
39955 to the total makes it greater than 48, or it has already MAX_INSN
39956 instructions in it. */
39957 num_insn
= window_list
->num_insn
;
39958 num_uops
= window_list
->num_uops
;
39959 window_num
= window_list
->window_num
;
39960 insn_fits
= fits_dispatch_window (insn
);
39962 if (num_insn
>= MAX_INSN
39963 || num_uops
+ insn_num_uops
> MAX_INSN
39966 window_num
= ~window_num
& 1;
39967 window_list
= allocate_next_window (window_num
);
39970 if (window_num
== 0)
39972 add_insn_window (insn
, window_list
, insn_num_uops
);
39973 if (window_list
->num_insn
>= MAX_INSN
39974 && insn_group
== disp_branch
)
39976 process_end_window ();
39980 else if (window_num
== 1)
39982 window0_list
= window_list
->prev
;
39983 sum
= window0_list
->window_size
+ window_list
->window_size
;
39985 || (byte_len
+ sum
) >= 48)
39987 process_end_window ();
39988 window_list
= dispatch_window_list
;
39991 add_insn_window (insn
, window_list
, insn_num_uops
);
39994 gcc_unreachable ();
39996 if (is_end_basic_block (insn_group
))
39998 /* End of basic block is reached do end-basic-block process. */
39999 process_end_window ();
40004 /* Print the dispatch window, WINDOW_NUM, to FILE. */
40006 DEBUG_FUNCTION
static void
40007 debug_dispatch_window_file (FILE *file
, int window_num
)
40009 dispatch_windows
*list
;
40012 if (window_num
== 0)
40013 list
= dispatch_window_list
;
40015 list
= dispatch_window_list1
;
40017 fprintf (file
, "Window #%d:\n", list
->window_num
);
40018 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
40019 list
->num_insn
, list
->num_uops
, list
->window_size
);
40020 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40021 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
40023 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
40025 fprintf (file
, " insn info:\n");
40027 for (i
= 0; i
< MAX_INSN
; i
++)
40029 if (!list
->window
[i
].insn
)
40031 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
40032 i
, group_name
[list
->window
[i
].group
],
40033 i
, (void *)list
->window
[i
].insn
,
40034 i
, list
->window
[i
].path
,
40035 i
, list
->window
[i
].byte_len
,
40036 i
, list
->window
[i
].imm_bytes
);
40040 /* Print to stdout a dispatch window. */
40042 DEBUG_FUNCTION
void
40043 debug_dispatch_window (int window_num
)
40045 debug_dispatch_window_file (stdout
, window_num
);
40048 /* Print INSN dispatch information to FILE. */
40050 DEBUG_FUNCTION
static void
40051 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
40054 enum insn_path path
;
40055 enum dispatch_group group
;
40057 int num_imm_operand
;
40058 int num_imm32_operand
;
40059 int num_imm64_operand
;
40061 if (INSN_CODE (insn
) < 0)
40064 byte_len
= min_insn_size (insn
);
40065 path
= get_insn_path (insn
);
40066 group
= get_insn_group (insn
);
40067 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
40068 &num_imm64_operand
);
40070 fprintf (file
, " insn info:\n");
40071 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
40072 group_name
[group
], path
, byte_len
);
40073 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
40074 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
40077 /* Print to STDERR the status of the ready list with respect to
40078 dispatch windows. */
40080 DEBUG_FUNCTION
void
40081 debug_ready_dispatch (void)
40084 int no_ready
= number_in_ready ();
40086 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
40088 for (i
= 0; i
< no_ready
; i
++)
40089 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
40092 /* This routine is the driver of the dispatch scheduler. */
40095 do_dispatch (rtx insn
, int mode
)
40097 if (mode
== DISPATCH_INIT
)
40098 init_dispatch_sched ();
40099 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
40100 add_to_dispatch_window (insn
);
40103 /* Return TRUE if Dispatch Scheduling is supported. */
40106 has_dispatch (rtx insn
, int action
)
40108 if ((TARGET_BDVER1
|| TARGET_BDVER2
)
40109 && flag_dispatch_scheduler
)
40115 case IS_DISPATCH_ON
:
40120 return is_cmp (insn
);
40122 case DISPATCH_VIOLATION
:
40123 return dispatch_violation ();
40125 case FITS_DISPATCH_WINDOW
:
40126 return fits_dispatch_window (insn
);
40132 /* Implementation of reassociation_width target hook used by
40133 reassoc phase to identify parallelism level in reassociated
40134 tree. Statements tree_code is passed in OPC. Arguments type
40137 Currently parallel reassociation is enabled for Atom
40138 processors only and we set reassociation width to be 2
40139 because Atom may issue up to 2 instructions per cycle.
40141 Return value should be fixed if parallel reassociation is
40142 enabled for other processors. */
40145 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
40146 enum machine_mode mode
)
40150 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
40152 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
40158 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
40159 place emms and femms instructions. */
40161 static enum machine_mode
40162 ix86_preferred_simd_mode (enum machine_mode mode
)
40170 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
40172 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
40174 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
40176 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
40179 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
40185 if (!TARGET_VECTORIZE_DOUBLE
)
40187 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
40189 else if (TARGET_SSE2
)
40198 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
40201 static unsigned int
40202 ix86_autovectorize_vector_sizes (void)
40204 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
40207 /* Implement targetm.vectorize.init_cost. */
40210 ix86_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
40212 unsigned *cost
= XNEWVEC (unsigned, 3);
40213 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
40217 /* Implement targetm.vectorize.add_stmt_cost. */
40220 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
40221 struct _stmt_vec_info
*stmt_info
, int misalign
,
40222 enum vect_cost_model_location where
)
40224 unsigned *cost
= (unsigned *) data
;
40225 unsigned retval
= 0;
40227 if (flag_vect_cost_model
)
40229 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
40230 int stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
40232 /* Statements in an inner loop relative to the loop being
40233 vectorized are weighted more heavily. The value here is
40234 arbitrary and could potentially be improved with analysis. */
40235 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
40236 count
*= 50; /* FIXME. */
40238 retval
= (unsigned) (count
* stmt_cost
);
40239 cost
[where
] += retval
;
40245 /* Implement targetm.vectorize.finish_cost. */
40248 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
40249 unsigned *body_cost
, unsigned *epilogue_cost
)
40251 unsigned *cost
= (unsigned *) data
;
40252 *prologue_cost
= cost
[vect_prologue
];
40253 *body_cost
= cost
[vect_body
];
40254 *epilogue_cost
= cost
[vect_epilogue
];
40257 /* Implement targetm.vectorize.destroy_cost_data. */
40260 ix86_destroy_cost_data (void *data
)
40265 /* Validate target specific memory model bits in VAL. */
40267 static unsigned HOST_WIDE_INT
40268 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
40270 unsigned HOST_WIDE_INT model
= val
& MEMMODEL_MASK
;
40271 unsigned HOST_WIDE_INT strong
;
40273 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
40275 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
40277 warning (OPT_Winvalid_memory_model
,
40278 "Unknown architecture specific memory model");
40279 return MEMMODEL_SEQ_CST
;
40281 strong
= (model
== MEMMODEL_ACQ_REL
|| model
== MEMMODEL_SEQ_CST
);
40282 if (val
& IX86_HLE_ACQUIRE
&& !(model
== MEMMODEL_ACQUIRE
|| strong
))
40284 warning (OPT_Winvalid_memory_model
,
40285 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
40286 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
40288 if (val
& IX86_HLE_RELEASE
&& !(model
== MEMMODEL_RELEASE
|| strong
))
40290 warning (OPT_Winvalid_memory_model
,
40291 "HLE_RELEASE not used with RELEASE or stronger memory model");
40292 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
40297 /* Initialize the GCC target structure. */
40298 #undef TARGET_RETURN_IN_MEMORY
40299 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
40301 #undef TARGET_LEGITIMIZE_ADDRESS
40302 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
40304 #undef TARGET_ATTRIBUTE_TABLE
40305 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
40306 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
40307 # undef TARGET_MERGE_DECL_ATTRIBUTES
40308 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
40311 #undef TARGET_COMP_TYPE_ATTRIBUTES
40312 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
40314 #undef TARGET_INIT_BUILTINS
40315 #define TARGET_INIT_BUILTINS ix86_init_builtins
40316 #undef TARGET_BUILTIN_DECL
40317 #define TARGET_BUILTIN_DECL ix86_builtin_decl
40318 #undef TARGET_EXPAND_BUILTIN
40319 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
40321 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
40322 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
40323 ix86_builtin_vectorized_function
40325 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
40326 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
40328 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
40329 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
40331 #undef TARGET_VECTORIZE_BUILTIN_GATHER
40332 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
40334 #undef TARGET_BUILTIN_RECIPROCAL
40335 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
40337 #undef TARGET_ASM_FUNCTION_EPILOGUE
40338 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
40340 #undef TARGET_ENCODE_SECTION_INFO
40341 #ifndef SUBTARGET_ENCODE_SECTION_INFO
40342 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
40344 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
40347 #undef TARGET_ASM_OPEN_PAREN
40348 #define TARGET_ASM_OPEN_PAREN ""
40349 #undef TARGET_ASM_CLOSE_PAREN
40350 #define TARGET_ASM_CLOSE_PAREN ""
40352 #undef TARGET_ASM_BYTE_OP
40353 #define TARGET_ASM_BYTE_OP ASM_BYTE
40355 #undef TARGET_ASM_ALIGNED_HI_OP
40356 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
40357 #undef TARGET_ASM_ALIGNED_SI_OP
40358 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
40360 #undef TARGET_ASM_ALIGNED_DI_OP
40361 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
40364 #undef TARGET_PROFILE_BEFORE_PROLOGUE
40365 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
40367 #undef TARGET_ASM_UNALIGNED_HI_OP
40368 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
40369 #undef TARGET_ASM_UNALIGNED_SI_OP
40370 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
40371 #undef TARGET_ASM_UNALIGNED_DI_OP
40372 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
40374 #undef TARGET_PRINT_OPERAND
40375 #define TARGET_PRINT_OPERAND ix86_print_operand
40376 #undef TARGET_PRINT_OPERAND_ADDRESS
40377 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
40378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
40379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
40380 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
40381 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
40383 #undef TARGET_SCHED_INIT_GLOBAL
40384 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
40385 #undef TARGET_SCHED_ADJUST_COST
40386 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
40387 #undef TARGET_SCHED_ISSUE_RATE
40388 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
40389 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
40390 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
40391 ia32_multipass_dfa_lookahead
40393 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
40394 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
40396 #undef TARGET_MEMMODEL_CHECK
40397 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
40400 #undef TARGET_HAVE_TLS
40401 #define TARGET_HAVE_TLS true
40403 #undef TARGET_CANNOT_FORCE_CONST_MEM
40404 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
40405 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
40406 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
40408 #undef TARGET_DELEGITIMIZE_ADDRESS
40409 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
40411 #undef TARGET_MS_BITFIELD_LAYOUT_P
40412 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
40415 #undef TARGET_BINDS_LOCAL_P
40416 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
40418 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
40419 #undef TARGET_BINDS_LOCAL_P
40420 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
40423 #undef TARGET_ASM_OUTPUT_MI_THUNK
40424 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
40425 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
40426 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
40428 #undef TARGET_ASM_FILE_START
40429 #define TARGET_ASM_FILE_START x86_file_start
40431 #undef TARGET_OPTION_OVERRIDE
40432 #define TARGET_OPTION_OVERRIDE ix86_option_override
40434 #undef TARGET_REGISTER_MOVE_COST
40435 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
40436 #undef TARGET_MEMORY_MOVE_COST
40437 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
40438 #undef TARGET_RTX_COSTS
40439 #define TARGET_RTX_COSTS ix86_rtx_costs
40440 #undef TARGET_ADDRESS_COST
40441 #define TARGET_ADDRESS_COST ix86_address_cost
40443 #undef TARGET_FIXED_CONDITION_CODE_REGS
40444 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
40445 #undef TARGET_CC_MODES_COMPATIBLE
40446 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
40448 #undef TARGET_MACHINE_DEPENDENT_REORG
40449 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
40451 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
40452 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
40454 #undef TARGET_BUILD_BUILTIN_VA_LIST
40455 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
40457 #undef TARGET_FOLD_BUILTIN
40458 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
40460 #undef TARGET_ENUM_VA_LIST_P
40461 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
40463 #undef TARGET_FN_ABI_VA_LIST
40464 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
40466 #undef TARGET_CANONICAL_VA_LIST_TYPE
40467 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
40469 #undef TARGET_EXPAND_BUILTIN_VA_START
40470 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
40472 #undef TARGET_MD_ASM_CLOBBERS
40473 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
40475 #undef TARGET_PROMOTE_PROTOTYPES
40476 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
40477 #undef TARGET_STRUCT_VALUE_RTX
40478 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
40479 #undef TARGET_SETUP_INCOMING_VARARGS
40480 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
40481 #undef TARGET_MUST_PASS_IN_STACK
40482 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
40483 #undef TARGET_FUNCTION_ARG_ADVANCE
40484 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
40485 #undef TARGET_FUNCTION_ARG
40486 #define TARGET_FUNCTION_ARG ix86_function_arg
40487 #undef TARGET_FUNCTION_ARG_BOUNDARY
40488 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
40489 #undef TARGET_PASS_BY_REFERENCE
40490 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
40491 #undef TARGET_INTERNAL_ARG_POINTER
40492 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
40493 #undef TARGET_UPDATE_STACK_BOUNDARY
40494 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
40495 #undef TARGET_GET_DRAP_RTX
40496 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
40497 #undef TARGET_STRICT_ARGUMENT_NAMING
40498 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
40499 #undef TARGET_STATIC_CHAIN
40500 #define TARGET_STATIC_CHAIN ix86_static_chain
40501 #undef TARGET_TRAMPOLINE_INIT
40502 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
40503 #undef TARGET_RETURN_POPS_ARGS
40504 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
40506 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
40507 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
40509 #undef TARGET_SCALAR_MODE_SUPPORTED_P
40510 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
40512 #undef TARGET_VECTOR_MODE_SUPPORTED_P
40513 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
40515 #undef TARGET_C_MODE_FOR_SUFFIX
40516 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
40519 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
40520 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
40523 #ifdef SUBTARGET_INSERT_ATTRIBUTES
40524 #undef TARGET_INSERT_ATTRIBUTES
40525 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
40528 #undef TARGET_MANGLE_TYPE
40529 #define TARGET_MANGLE_TYPE ix86_mangle_type
40532 #undef TARGET_STACK_PROTECT_FAIL
40533 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
40536 #undef TARGET_FUNCTION_VALUE
40537 #define TARGET_FUNCTION_VALUE ix86_function_value
40539 #undef TARGET_FUNCTION_VALUE_REGNO_P
40540 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
40542 #undef TARGET_PROMOTE_FUNCTION_MODE
40543 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
40545 #undef TARGET_SECONDARY_RELOAD
40546 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
40548 #undef TARGET_CLASS_MAX_NREGS
40549 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
40551 #undef TARGET_PREFERRED_RELOAD_CLASS
40552 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
40553 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
40554 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
40555 #undef TARGET_CLASS_LIKELY_SPILLED_P
40556 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
40558 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
40559 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
40560 ix86_builtin_vectorization_cost
40561 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
40562 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
40563 ix86_vectorize_vec_perm_const_ok
40564 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
40565 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
40566 ix86_preferred_simd_mode
40567 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
40568 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
40569 ix86_autovectorize_vector_sizes
40570 #undef TARGET_VECTORIZE_INIT_COST
40571 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
40572 #undef TARGET_VECTORIZE_ADD_STMT_COST
40573 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
40574 #undef TARGET_VECTORIZE_FINISH_COST
40575 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
40576 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
40577 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
40579 #undef TARGET_SET_CURRENT_FUNCTION
40580 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
40582 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
40583 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
40585 #undef TARGET_OPTION_SAVE
40586 #define TARGET_OPTION_SAVE ix86_function_specific_save
40588 #undef TARGET_OPTION_RESTORE
40589 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
40591 #undef TARGET_OPTION_PRINT
40592 #define TARGET_OPTION_PRINT ix86_function_specific_print
40594 #undef TARGET_CAN_INLINE_P
40595 #define TARGET_CAN_INLINE_P ix86_can_inline_p
40597 #undef TARGET_EXPAND_TO_RTL_HOOK
40598 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
40600 #undef TARGET_LEGITIMATE_ADDRESS_P
40601 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
40603 #undef TARGET_LEGITIMATE_CONSTANT_P
40604 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
40606 #undef TARGET_FRAME_POINTER_REQUIRED
40607 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
40609 #undef TARGET_CAN_ELIMINATE
40610 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
40612 #undef TARGET_EXTRA_LIVE_ON_ENTRY
40613 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
40615 #undef TARGET_ASM_CODE_END
40616 #define TARGET_ASM_CODE_END ix86_code_end
40618 #undef TARGET_CONDITIONAL_REGISTER_USAGE
40619 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
40622 #undef TARGET_INIT_LIBFUNCS
40623 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
40626 struct gcc_target targetm
= TARGET_INITIALIZER
;
40628 #include "gt-i386.h"