1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "common/common-target.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 #include "sched-int.h"
62 #include "diagnostic.h"
64 enum upper_128bits_state
71 typedef struct block_info_def
73 /* State of the upper 128bits of AVX registers at exit. */
74 enum upper_128bits_state state
;
75 /* TRUE if state of the upper 128bits of AVX registers is unchanged
78 /* TRUE if block has been processed. */
80 /* TRUE if block has been scanned. */
82 /* Previous state of the upper 128bits of AVX registers at entry. */
83 enum upper_128bits_state prev
;
86 #define BLOCK_INFO(B) ((block_info) (B)->aux)
88 enum call_avx256_state
90 /* Callee returns 256bit AVX register. */
91 callee_return_avx256
= -1,
92 /* Callee returns and passes 256bit AVX register. */
93 callee_return_pass_avx256
,
94 /* Callee passes 256bit AVX register. */
96 /* Callee doesn't return nor passe 256bit AVX register, or no
97 256bit AVX register in function return. */
99 /* vzeroupper intrinsic. */
103 /* Check if a 256bit AVX register is referenced in stores. */
106 check_avx256_stores (rtx dest
, const_rtx set
, void *data
)
109 && VALID_AVX256_REG_MODE (GET_MODE (dest
)))
110 || (GET_CODE (set
) == SET
111 && REG_P (SET_SRC (set
))
112 && VALID_AVX256_REG_MODE (GET_MODE (SET_SRC (set
)))))
114 enum upper_128bits_state
*state
115 = (enum upper_128bits_state
*) data
;
120 /* Helper function for move_or_delete_vzeroupper_1. Look for vzeroupper
121 in basic block BB. Delete it if upper 128bit AVX registers are
122 unused. If it isn't deleted, move it to just before a jump insn.
124 STATE is state of the upper 128bits of AVX registers at entry. */
127 move_or_delete_vzeroupper_2 (basic_block bb
,
128 enum upper_128bits_state state
)
131 rtx vzeroupper_insn
= NULL_RTX
;
136 if (BLOCK_INFO (bb
)->unchanged
)
139 fprintf (dump_file
, " [bb %i] unchanged: upper 128bits: %d\n",
142 BLOCK_INFO (bb
)->state
= state
;
146 if (BLOCK_INFO (bb
)->scanned
&& BLOCK_INFO (bb
)->prev
== state
)
149 fprintf (dump_file
, " [bb %i] scanned: upper 128bits: %d\n",
150 bb
->index
, BLOCK_INFO (bb
)->state
);
154 BLOCK_INFO (bb
)->prev
= state
;
157 fprintf (dump_file
, " [bb %i] entry: upper 128bits: %d\n",
162 /* BB_END changes when it is deleted. */
163 bb_end
= BB_END (bb
);
165 while (insn
!= bb_end
)
167 insn
= NEXT_INSN (insn
);
169 if (!NONDEBUG_INSN_P (insn
))
172 /* Move vzeroupper before jump/call. */
173 if (JUMP_P (insn
) || CALL_P (insn
))
175 if (!vzeroupper_insn
)
178 if (PREV_INSN (insn
) != vzeroupper_insn
)
182 fprintf (dump_file
, "Move vzeroupper after:\n");
183 print_rtl_single (dump_file
, PREV_INSN (insn
));
184 fprintf (dump_file
, "before:\n");
185 print_rtl_single (dump_file
, insn
);
187 reorder_insns_nobb (vzeroupper_insn
, vzeroupper_insn
,
190 vzeroupper_insn
= NULL_RTX
;
194 pat
= PATTERN (insn
);
196 /* Check insn for vzeroupper intrinsic. */
197 if (GET_CODE (pat
) == UNSPEC_VOLATILE
198 && XINT (pat
, 1) == UNSPECV_VZEROUPPER
)
202 /* Found vzeroupper intrinsic. */
203 fprintf (dump_file
, "Found vzeroupper:\n");
204 print_rtl_single (dump_file
, insn
);
209 /* Check insn for vzeroall intrinsic. */
210 if (GET_CODE (pat
) == PARALLEL
211 && GET_CODE (XVECEXP (pat
, 0, 0)) == UNSPEC_VOLATILE
212 && XINT (XVECEXP (pat
, 0, 0), 1) == UNSPECV_VZEROALL
)
217 /* Delete pending vzeroupper insertion. */
220 delete_insn (vzeroupper_insn
);
221 vzeroupper_insn
= NULL_RTX
;
224 else if (state
!= used
)
226 note_stores (pat
, check_avx256_stores
, &state
);
233 /* Process vzeroupper intrinsic. */
234 avx256
= INTVAL (XVECEXP (pat
, 0, 0));
238 /* Since the upper 128bits are cleared, callee must not pass
239 256bit AVX register. We only need to check if callee
240 returns 256bit AVX register. */
241 if (avx256
== callee_return_avx256
)
247 /* Remove unnecessary vzeroupper since upper 128bits are
251 fprintf (dump_file
, "Delete redundant vzeroupper:\n");
252 print_rtl_single (dump_file
, insn
);
258 /* Set state to UNUSED if callee doesn't return 256bit AVX
260 if (avx256
!= callee_return_pass_avx256
)
263 if (avx256
== callee_return_pass_avx256
264 || avx256
== callee_pass_avx256
)
266 /* Must remove vzeroupper since callee passes in 256bit
270 fprintf (dump_file
, "Delete callee pass vzeroupper:\n");
271 print_rtl_single (dump_file
, insn
);
277 vzeroupper_insn
= insn
;
283 BLOCK_INFO (bb
)->state
= state
;
284 BLOCK_INFO (bb
)->unchanged
= unchanged
;
285 BLOCK_INFO (bb
)->scanned
= true;
288 fprintf (dump_file
, " [bb %i] exit: %s: upper 128bits: %d\n",
289 bb
->index
, unchanged
? "unchanged" : "changed",
293 /* Helper function for move_or_delete_vzeroupper. Process vzeroupper
294 in BLOCK and check its predecessor blocks. Treat UNKNOWN state
295 as USED if UNKNOWN_IS_UNUSED is true. Return TRUE if the exit
299 move_or_delete_vzeroupper_1 (basic_block block
, bool unknown_is_unused
)
303 enum upper_128bits_state state
, old_state
, new_state
;
307 fprintf (dump_file
, " Process [bb %i]: status: %d\n",
308 block
->index
, BLOCK_INFO (block
)->processed
);
310 if (BLOCK_INFO (block
)->processed
)
315 /* Check all predecessor edges of this block. */
316 seen_unknown
= false;
317 FOR_EACH_EDGE (e
, ei
, block
->preds
)
321 switch (BLOCK_INFO (e
->src
)->state
)
324 if (!unknown_is_unused
)
338 old_state
= BLOCK_INFO (block
)->state
;
339 move_or_delete_vzeroupper_2 (block
, state
);
340 new_state
= BLOCK_INFO (block
)->state
;
342 if (state
!= unknown
|| new_state
== used
)
343 BLOCK_INFO (block
)->processed
= true;
345 /* Need to rescan if the upper 128bits of AVX registers are changed
347 if (new_state
!= old_state
)
349 if (new_state
== used
)
350 cfun
->machine
->rescan_vzeroupper_p
= 1;
357 /* Go through the instruction stream looking for vzeroupper. Delete
358 it if upper 128bit AVX registers are unused. If it isn't deleted,
359 move it to just before a jump insn. */
362 move_or_delete_vzeroupper (void)
367 fibheap_t worklist
, pending
, fibheap_swap
;
368 sbitmap visited
, in_worklist
, in_pending
, sbitmap_swap
;
373 /* Set up block info for each basic block. */
374 alloc_aux_for_blocks (sizeof (struct block_info_def
));
376 /* Process outgoing edges of entry point. */
378 fprintf (dump_file
, "Process outgoing edges of entry point\n");
380 FOR_EACH_EDGE (e
, ei
, ENTRY_BLOCK_PTR
->succs
)
382 move_or_delete_vzeroupper_2 (e
->dest
,
383 cfun
->machine
->caller_pass_avx256_p
385 BLOCK_INFO (e
->dest
)->processed
= true;
388 /* Compute reverse completion order of depth first search of the CFG
389 so that the data-flow runs faster. */
390 rc_order
= XNEWVEC (int, n_basic_blocks
- NUM_FIXED_BLOCKS
);
391 bb_order
= XNEWVEC (int, last_basic_block
);
392 pre_and_rev_post_order_compute (NULL
, rc_order
, false);
393 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
394 bb_order
[rc_order
[i
]] = i
;
397 worklist
= fibheap_new ();
398 pending
= fibheap_new ();
399 visited
= sbitmap_alloc (last_basic_block
);
400 in_worklist
= sbitmap_alloc (last_basic_block
);
401 in_pending
= sbitmap_alloc (last_basic_block
);
402 sbitmap_zero (in_worklist
);
404 /* Don't check outgoing edges of entry point. */
405 sbitmap_ones (in_pending
);
407 if (BLOCK_INFO (bb
)->processed
)
408 RESET_BIT (in_pending
, bb
->index
);
411 move_or_delete_vzeroupper_1 (bb
, false);
412 fibheap_insert (pending
, bb_order
[bb
->index
], bb
);
416 fprintf (dump_file
, "Check remaining basic blocks\n");
418 while (!fibheap_empty (pending
))
420 fibheap_swap
= pending
;
422 worklist
= fibheap_swap
;
423 sbitmap_swap
= in_pending
;
424 in_pending
= in_worklist
;
425 in_worklist
= sbitmap_swap
;
427 sbitmap_zero (visited
);
429 cfun
->machine
->rescan_vzeroupper_p
= 0;
431 while (!fibheap_empty (worklist
))
433 bb
= (basic_block
) fibheap_extract_min (worklist
);
434 RESET_BIT (in_worklist
, bb
->index
);
435 gcc_assert (!TEST_BIT (visited
, bb
->index
));
436 if (!TEST_BIT (visited
, bb
->index
))
440 SET_BIT (visited
, bb
->index
);
442 if (move_or_delete_vzeroupper_1 (bb
, false))
443 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
445 if (e
->dest
== EXIT_BLOCK_PTR
446 || BLOCK_INFO (e
->dest
)->processed
)
449 if (TEST_BIT (visited
, e
->dest
->index
))
451 if (!TEST_BIT (in_pending
, e
->dest
->index
))
453 /* Send E->DEST to next round. */
454 SET_BIT (in_pending
, e
->dest
->index
);
455 fibheap_insert (pending
,
456 bb_order
[e
->dest
->index
],
460 else if (!TEST_BIT (in_worklist
, e
->dest
->index
))
462 /* Add E->DEST to current round. */
463 SET_BIT (in_worklist
, e
->dest
->index
);
464 fibheap_insert (worklist
, bb_order
[e
->dest
->index
],
471 if (!cfun
->machine
->rescan_vzeroupper_p
)
476 fibheap_delete (worklist
);
477 fibheap_delete (pending
);
478 sbitmap_free (visited
);
479 sbitmap_free (in_worklist
);
480 sbitmap_free (in_pending
);
483 fprintf (dump_file
, "Process remaining basic blocks\n");
486 move_or_delete_vzeroupper_1 (bb
, true);
488 free_aux_for_blocks ();
491 static rtx
legitimize_dllimport_symbol (rtx
, bool);
493 #ifndef CHECK_STACK_LIMIT
494 #define CHECK_STACK_LIMIT (-1)
497 /* Return index of given mode in mult and division cost tables. */
498 #define MODE_INDEX(mode) \
499 ((mode) == QImode ? 0 \
500 : (mode) == HImode ? 1 \
501 : (mode) == SImode ? 2 \
502 : (mode) == DImode ? 3 \
505 /* Processor costs (relative to an add) */
506 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
507 #define COSTS_N_BYTES(N) ((N) * 2)
509 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
512 struct processor_costs ix86_size_cost
= {/* costs for tuning for size */
513 COSTS_N_BYTES (2), /* cost of an add instruction */
514 COSTS_N_BYTES (3), /* cost of a lea instruction */
515 COSTS_N_BYTES (2), /* variable shift costs */
516 COSTS_N_BYTES (3), /* constant shift costs */
517 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
518 COSTS_N_BYTES (3), /* HI */
519 COSTS_N_BYTES (3), /* SI */
520 COSTS_N_BYTES (3), /* DI */
521 COSTS_N_BYTES (5)}, /* other */
522 0, /* cost of multiply per each bit set */
523 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
524 COSTS_N_BYTES (3), /* HI */
525 COSTS_N_BYTES (3), /* SI */
526 COSTS_N_BYTES (3), /* DI */
527 COSTS_N_BYTES (5)}, /* other */
528 COSTS_N_BYTES (3), /* cost of movsx */
529 COSTS_N_BYTES (3), /* cost of movzx */
530 0, /* "large" insn */
532 2, /* cost for loading QImode using movzbl */
533 {2, 2, 2}, /* cost of loading integer registers
534 in QImode, HImode and SImode.
535 Relative to reg-reg move (2). */
536 {2, 2, 2}, /* cost of storing integer registers */
537 2, /* cost of reg,reg fld/fst */
538 {2, 2, 2}, /* cost of loading fp registers
539 in SFmode, DFmode and XFmode */
540 {2, 2, 2}, /* cost of storing fp registers
541 in SFmode, DFmode and XFmode */
542 3, /* cost of moving MMX register */
543 {3, 3}, /* cost of loading MMX registers
544 in SImode and DImode */
545 {3, 3}, /* cost of storing MMX registers
546 in SImode and DImode */
547 3, /* cost of moving SSE register */
548 {3, 3, 3}, /* cost of loading SSE registers
549 in SImode, DImode and TImode */
550 {3, 3, 3}, /* cost of storing SSE registers
551 in SImode, DImode and TImode */
552 3, /* MMX or SSE register to integer */
553 0, /* size of l1 cache */
554 0, /* size of l2 cache */
555 0, /* size of prefetch block */
556 0, /* number of parallel prefetches */
558 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
560 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
561 COSTS_N_BYTES (2), /* cost of FABS instruction. */
562 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
563 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
564 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
565 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
566 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
567 {rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}}},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 1, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 1, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
581 /* Processor costs (relative to an add) */
583 struct processor_costs i386_cost
= { /* 386 specific costs */
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (1), /* cost of a lea instruction */
586 COSTS_N_INSNS (3), /* variable shift costs */
587 COSTS_N_INSNS (2), /* constant shift costs */
588 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (6), /* HI */
590 COSTS_N_INSNS (6), /* SI */
591 COSTS_N_INSNS (6), /* DI */
592 COSTS_N_INSNS (6)}, /* other */
593 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (23), /* HI */
596 COSTS_N_INSNS (23), /* SI */
597 COSTS_N_INSNS (23), /* DI */
598 COSTS_N_INSNS (23)}, /* other */
599 COSTS_N_INSNS (3), /* cost of movsx */
600 COSTS_N_INSNS (2), /* cost of movzx */
601 15, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {2, 4, 2}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {2, 4, 2}, /* cost of storing integer registers */
608 2, /* cost of reg,reg fld/fst */
609 {8, 8, 8}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {8, 8, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 8}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 8}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 8, 16}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 8, 16}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 3, /* MMX or SSE register to integer */
624 0, /* size of l1 cache */
625 0, /* size of l2 cache */
626 0, /* size of prefetch block */
627 0, /* number of parallel prefetches */
629 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (22), /* cost of FABS instruction. */
633 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
635 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
636 DUMMY_STRINGOP_ALGS
},
637 {{rep_prefix_1_byte
, {{-1, rep_prefix_1_byte
}}},
638 DUMMY_STRINGOP_ALGS
},
639 1, /* scalar_stmt_cost. */
640 1, /* scalar load_cost. */
641 1, /* scalar_store_cost. */
642 1, /* vec_stmt_cost. */
643 1, /* vec_to_scalar_cost. */
644 1, /* scalar_to_vec_cost. */
645 1, /* vec_align_load_cost. */
646 2, /* vec_unalign_load_cost. */
647 1, /* vec_store_cost. */
648 3, /* cond_taken_branch_cost. */
649 1, /* cond_not_taken_branch_cost. */
653 struct processor_costs i486_cost
= { /* 486 specific costs */
654 COSTS_N_INSNS (1), /* cost of an add instruction */
655 COSTS_N_INSNS (1), /* cost of a lea instruction */
656 COSTS_N_INSNS (3), /* variable shift costs */
657 COSTS_N_INSNS (2), /* constant shift costs */
658 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
659 COSTS_N_INSNS (12), /* HI */
660 COSTS_N_INSNS (12), /* SI */
661 COSTS_N_INSNS (12), /* DI */
662 COSTS_N_INSNS (12)}, /* other */
663 1, /* cost of multiply per each bit set */
664 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
665 COSTS_N_INSNS (40), /* HI */
666 COSTS_N_INSNS (40), /* SI */
667 COSTS_N_INSNS (40), /* DI */
668 COSTS_N_INSNS (40)}, /* other */
669 COSTS_N_INSNS (3), /* cost of movsx */
670 COSTS_N_INSNS (2), /* cost of movzx */
671 15, /* "large" insn */
673 4, /* cost for loading QImode using movzbl */
674 {2, 4, 2}, /* cost of loading integer registers
675 in QImode, HImode and SImode.
676 Relative to reg-reg move (2). */
677 {2, 4, 2}, /* cost of storing integer registers */
678 2, /* cost of reg,reg fld/fst */
679 {8, 8, 8}, /* cost of loading fp registers
680 in SFmode, DFmode and XFmode */
681 {8, 8, 8}, /* cost of storing fp registers
682 in SFmode, DFmode and XFmode */
683 2, /* cost of moving MMX register */
684 {4, 8}, /* cost of loading MMX registers
685 in SImode and DImode */
686 {4, 8}, /* cost of storing MMX registers
687 in SImode and DImode */
688 2, /* cost of moving SSE register */
689 {4, 8, 16}, /* cost of loading SSE registers
690 in SImode, DImode and TImode */
691 {4, 8, 16}, /* cost of storing SSE registers
692 in SImode, DImode and TImode */
693 3, /* MMX or SSE register to integer */
694 4, /* size of l1 cache. 486 has 8kB cache
695 shared for code and data, so 4kB is
696 not really precise. */
697 4, /* size of l2 cache */
698 0, /* size of prefetch block */
699 0, /* number of parallel prefetches */
701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
702 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
703 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
704 COSTS_N_INSNS (3), /* cost of FABS instruction. */
705 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
706 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
707 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
708 DUMMY_STRINGOP_ALGS
},
709 {{rep_prefix_4_byte
, {{-1, rep_prefix_4_byte
}}},
710 DUMMY_STRINGOP_ALGS
},
711 1, /* scalar_stmt_cost. */
712 1, /* scalar load_cost. */
713 1, /* scalar_store_cost. */
714 1, /* vec_stmt_cost. */
715 1, /* vec_to_scalar_cost. */
716 1, /* scalar_to_vec_cost. */
717 1, /* vec_align_load_cost. */
718 2, /* vec_unalign_load_cost. */
719 1, /* vec_store_cost. */
720 3, /* cond_taken_branch_cost. */
721 1, /* cond_not_taken_branch_cost. */
725 struct processor_costs pentium_cost
= {
726 COSTS_N_INSNS (1), /* cost of an add instruction */
727 COSTS_N_INSNS (1), /* cost of a lea instruction */
728 COSTS_N_INSNS (4), /* variable shift costs */
729 COSTS_N_INSNS (1), /* constant shift costs */
730 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
731 COSTS_N_INSNS (11), /* HI */
732 COSTS_N_INSNS (11), /* SI */
733 COSTS_N_INSNS (11), /* DI */
734 COSTS_N_INSNS (11)}, /* other */
735 0, /* cost of multiply per each bit set */
736 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
737 COSTS_N_INSNS (25), /* HI */
738 COSTS_N_INSNS (25), /* SI */
739 COSTS_N_INSNS (25), /* DI */
740 COSTS_N_INSNS (25)}, /* other */
741 COSTS_N_INSNS (3), /* cost of movsx */
742 COSTS_N_INSNS (2), /* cost of movzx */
743 8, /* "large" insn */
745 6, /* cost for loading QImode using movzbl */
746 {2, 4, 2}, /* cost of loading integer registers
747 in QImode, HImode and SImode.
748 Relative to reg-reg move (2). */
749 {2, 4, 2}, /* cost of storing integer registers */
750 2, /* cost of reg,reg fld/fst */
751 {2, 2, 6}, /* cost of loading fp registers
752 in SFmode, DFmode and XFmode */
753 {4, 4, 6}, /* cost of storing fp registers
754 in SFmode, DFmode and XFmode */
755 8, /* cost of moving MMX register */
756 {8, 8}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {8, 8}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {4, 8, 16}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 8, 16}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 3, /* MMX or SSE register to integer */
766 8, /* size of l1 cache. */
767 8, /* size of l2 cache */
768 0, /* size of prefetch block */
769 0, /* number of parallel prefetches */
771 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
772 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
773 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
774 COSTS_N_INSNS (1), /* cost of FABS instruction. */
775 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
776 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
777 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
778 DUMMY_STRINGOP_ALGS
},
779 {{libcall
, {{-1, rep_prefix_4_byte
}}},
780 DUMMY_STRINGOP_ALGS
},
781 1, /* scalar_stmt_cost. */
782 1, /* scalar load_cost. */
783 1, /* scalar_store_cost. */
784 1, /* vec_stmt_cost. */
785 1, /* vec_to_scalar_cost. */
786 1, /* scalar_to_vec_cost. */
787 1, /* vec_align_load_cost. */
788 2, /* vec_unalign_load_cost. */
789 1, /* vec_store_cost. */
790 3, /* cond_taken_branch_cost. */
791 1, /* cond_not_taken_branch_cost. */
795 struct processor_costs pentiumpro_cost
= {
796 COSTS_N_INSNS (1), /* cost of an add instruction */
797 COSTS_N_INSNS (1), /* cost of a lea instruction */
798 COSTS_N_INSNS (1), /* variable shift costs */
799 COSTS_N_INSNS (1), /* constant shift costs */
800 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
801 COSTS_N_INSNS (4), /* HI */
802 COSTS_N_INSNS (4), /* SI */
803 COSTS_N_INSNS (4), /* DI */
804 COSTS_N_INSNS (4)}, /* other */
805 0, /* cost of multiply per each bit set */
806 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
807 COSTS_N_INSNS (17), /* HI */
808 COSTS_N_INSNS (17), /* SI */
809 COSTS_N_INSNS (17), /* DI */
810 COSTS_N_INSNS (17)}, /* other */
811 COSTS_N_INSNS (1), /* cost of movsx */
812 COSTS_N_INSNS (1), /* cost of movzx */
813 8, /* "large" insn */
815 2, /* cost for loading QImode using movzbl */
816 {4, 4, 4}, /* cost of loading integer registers
817 in QImode, HImode and SImode.
818 Relative to reg-reg move (2). */
819 {2, 2, 2}, /* cost of storing integer registers */
820 2, /* cost of reg,reg fld/fst */
821 {2, 2, 6}, /* cost of loading fp registers
822 in SFmode, DFmode and XFmode */
823 {4, 4, 6}, /* cost of storing fp registers
824 in SFmode, DFmode and XFmode */
825 2, /* cost of moving MMX register */
826 {2, 2}, /* cost of loading MMX registers
827 in SImode and DImode */
828 {2, 2}, /* cost of storing MMX registers
829 in SImode and DImode */
830 2, /* cost of moving SSE register */
831 {2, 2, 8}, /* cost of loading SSE registers
832 in SImode, DImode and TImode */
833 {2, 2, 8}, /* cost of storing SSE registers
834 in SImode, DImode and TImode */
835 3, /* MMX or SSE register to integer */
836 8, /* size of l1 cache. */
837 256, /* size of l2 cache */
838 32, /* size of prefetch block */
839 6, /* number of parallel prefetches */
841 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
842 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
843 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
844 COSTS_N_INSNS (2), /* cost of FABS instruction. */
845 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
846 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
847 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
848 (we ensure the alignment). For small blocks inline loop is still a
849 noticeable win, for bigger blocks either rep movsl or rep movsb is
850 way to go. Rep movsb has apparently more expensive startup time in CPU,
851 but after 4K the difference is down in the noise. */
852 {{rep_prefix_4_byte
, {{128, loop
}, {1024, unrolled_loop
},
853 {8192, rep_prefix_4_byte
}, {-1, rep_prefix_1_byte
}}},
854 DUMMY_STRINGOP_ALGS
},
855 {{rep_prefix_4_byte
, {{1024, unrolled_loop
},
856 {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
857 DUMMY_STRINGOP_ALGS
},
858 1, /* scalar_stmt_cost. */
859 1, /* scalar load_cost. */
860 1, /* scalar_store_cost. */
861 1, /* vec_stmt_cost. */
862 1, /* vec_to_scalar_cost. */
863 1, /* scalar_to_vec_cost. */
864 1, /* vec_align_load_cost. */
865 2, /* vec_unalign_load_cost. */
866 1, /* vec_store_cost. */
867 3, /* cond_taken_branch_cost. */
868 1, /* cond_not_taken_branch_cost. */
872 struct processor_costs geode_cost
= {
873 COSTS_N_INSNS (1), /* cost of an add instruction */
874 COSTS_N_INSNS (1), /* cost of a lea instruction */
875 COSTS_N_INSNS (2), /* variable shift costs */
876 COSTS_N_INSNS (1), /* constant shift costs */
877 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
878 COSTS_N_INSNS (4), /* HI */
879 COSTS_N_INSNS (7), /* SI */
880 COSTS_N_INSNS (7), /* DI */
881 COSTS_N_INSNS (7)}, /* other */
882 0, /* cost of multiply per each bit set */
883 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
884 COSTS_N_INSNS (23), /* HI */
885 COSTS_N_INSNS (39), /* SI */
886 COSTS_N_INSNS (39), /* DI */
887 COSTS_N_INSNS (39)}, /* other */
888 COSTS_N_INSNS (1), /* cost of movsx */
889 COSTS_N_INSNS (1), /* cost of movzx */
890 8, /* "large" insn */
892 1, /* cost for loading QImode using movzbl */
893 {1, 1, 1}, /* cost of loading integer registers
894 in QImode, HImode and SImode.
895 Relative to reg-reg move (2). */
896 {1, 1, 1}, /* cost of storing integer registers */
897 1, /* cost of reg,reg fld/fst */
898 {1, 1, 1}, /* cost of loading fp registers
899 in SFmode, DFmode and XFmode */
900 {4, 6, 6}, /* cost of storing fp registers
901 in SFmode, DFmode and XFmode */
903 1, /* cost of moving MMX register */
904 {1, 1}, /* cost of loading MMX registers
905 in SImode and DImode */
906 {1, 1}, /* cost of storing MMX registers
907 in SImode and DImode */
908 1, /* cost of moving SSE register */
909 {1, 1, 1}, /* cost of loading SSE registers
910 in SImode, DImode and TImode */
911 {1, 1, 1}, /* cost of storing SSE registers
912 in SImode, DImode and TImode */
913 1, /* MMX or SSE register to integer */
914 64, /* size of l1 cache. */
915 128, /* size of l2 cache. */
916 32, /* size of prefetch block */
917 1, /* number of parallel prefetches */
919 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
920 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
921 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
922 COSTS_N_INSNS (1), /* cost of FABS instruction. */
923 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
924 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
925 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
926 DUMMY_STRINGOP_ALGS
},
927 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
928 DUMMY_STRINGOP_ALGS
},
929 1, /* scalar_stmt_cost. */
930 1, /* scalar load_cost. */
931 1, /* scalar_store_cost. */
932 1, /* vec_stmt_cost. */
933 1, /* vec_to_scalar_cost. */
934 1, /* scalar_to_vec_cost. */
935 1, /* vec_align_load_cost. */
936 2, /* vec_unalign_load_cost. */
937 1, /* vec_store_cost. */
938 3, /* cond_taken_branch_cost. */
939 1, /* cond_not_taken_branch_cost. */
943 struct processor_costs k6_cost
= {
944 COSTS_N_INSNS (1), /* cost of an add instruction */
945 COSTS_N_INSNS (2), /* cost of a lea instruction */
946 COSTS_N_INSNS (1), /* variable shift costs */
947 COSTS_N_INSNS (1), /* constant shift costs */
948 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
949 COSTS_N_INSNS (3), /* HI */
950 COSTS_N_INSNS (3), /* SI */
951 COSTS_N_INSNS (3), /* DI */
952 COSTS_N_INSNS (3)}, /* other */
953 0, /* cost of multiply per each bit set */
954 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
955 COSTS_N_INSNS (18), /* HI */
956 COSTS_N_INSNS (18), /* SI */
957 COSTS_N_INSNS (18), /* DI */
958 COSTS_N_INSNS (18)}, /* other */
959 COSTS_N_INSNS (2), /* cost of movsx */
960 COSTS_N_INSNS (2), /* cost of movzx */
961 8, /* "large" insn */
963 3, /* cost for loading QImode using movzbl */
964 {4, 5, 4}, /* cost of loading integer registers
965 in QImode, HImode and SImode.
966 Relative to reg-reg move (2). */
967 {2, 3, 2}, /* cost of storing integer registers */
968 4, /* cost of reg,reg fld/fst */
969 {6, 6, 6}, /* cost of loading fp registers
970 in SFmode, DFmode and XFmode */
971 {4, 4, 4}, /* cost of storing fp registers
972 in SFmode, DFmode and XFmode */
973 2, /* cost of moving MMX register */
974 {2, 2}, /* cost of loading MMX registers
975 in SImode and DImode */
976 {2, 2}, /* cost of storing MMX registers
977 in SImode and DImode */
978 2, /* cost of moving SSE register */
979 {2, 2, 8}, /* cost of loading SSE registers
980 in SImode, DImode and TImode */
981 {2, 2, 8}, /* cost of storing SSE registers
982 in SImode, DImode and TImode */
983 6, /* MMX or SSE register to integer */
984 32, /* size of l1 cache. */
985 32, /* size of l2 cache. Some models
986 have integrated l2 cache, but
987 optimizing for k6 is not important
988 enough to worry about that. */
989 32, /* size of prefetch block */
990 1, /* number of parallel prefetches */
992 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
993 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
994 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
995 COSTS_N_INSNS (2), /* cost of FABS instruction. */
996 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
997 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
998 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
999 DUMMY_STRINGOP_ALGS
},
1000 {{libcall
, {{256, rep_prefix_4_byte
}, {-1, libcall
}}},
1001 DUMMY_STRINGOP_ALGS
},
1002 1, /* scalar_stmt_cost. */
1003 1, /* scalar load_cost. */
1004 1, /* scalar_store_cost. */
1005 1, /* vec_stmt_cost. */
1006 1, /* vec_to_scalar_cost. */
1007 1, /* scalar_to_vec_cost. */
1008 1, /* vec_align_load_cost. */
1009 2, /* vec_unalign_load_cost. */
1010 1, /* vec_store_cost. */
1011 3, /* cond_taken_branch_cost. */
1012 1, /* cond_not_taken_branch_cost. */
1016 struct processor_costs athlon_cost
= {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (2), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (5), /* HI */
1023 COSTS_N_INSNS (5), /* SI */
1024 COSTS_N_INSNS (5), /* DI */
1025 COSTS_N_INSNS (5)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (26), /* HI */
1029 COSTS_N_INSNS (42), /* SI */
1030 COSTS_N_INSNS (74), /* DI */
1031 COSTS_N_INSNS (74)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1036 4, /* cost for loading QImode using movzbl */
1037 {3, 4, 3}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {3, 4, 3}, /* cost of storing integer registers */
1041 4, /* cost of reg,reg fld/fst */
1042 {4, 4, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {6, 6, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 6}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 5}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 5, /* MMX or SSE register to integer */
1057 64, /* size of l1 cache. */
1058 256, /* size of l2 cache. */
1059 64, /* size of prefetch block */
1060 6, /* number of parallel prefetches */
1061 5, /* Branch cost */
1062 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1063 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1064 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
1065 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1066 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1067 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1068 /* For some reason, Athlon deals better with REP prefix (relative to loops)
1069 compared to K8. Alignment becomes important after 8 bytes for memcpy and
1070 128 bytes for memset. */
1071 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1072 DUMMY_STRINGOP_ALGS
},
1073 {{libcall
, {{2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1074 DUMMY_STRINGOP_ALGS
},
1075 1, /* scalar_stmt_cost. */
1076 1, /* scalar load_cost. */
1077 1, /* scalar_store_cost. */
1078 1, /* vec_stmt_cost. */
1079 1, /* vec_to_scalar_cost. */
1080 1, /* scalar_to_vec_cost. */
1081 1, /* vec_align_load_cost. */
1082 2, /* vec_unalign_load_cost. */
1083 1, /* vec_store_cost. */
1084 3, /* cond_taken_branch_cost. */
1085 1, /* cond_not_taken_branch_cost. */
1089 struct processor_costs k8_cost
= {
1090 COSTS_N_INSNS (1), /* cost of an add instruction */
1091 COSTS_N_INSNS (2), /* cost of a lea instruction */
1092 COSTS_N_INSNS (1), /* variable shift costs */
1093 COSTS_N_INSNS (1), /* constant shift costs */
1094 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1095 COSTS_N_INSNS (4), /* HI */
1096 COSTS_N_INSNS (3), /* SI */
1097 COSTS_N_INSNS (4), /* DI */
1098 COSTS_N_INSNS (5)}, /* other */
1099 0, /* cost of multiply per each bit set */
1100 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1101 COSTS_N_INSNS (26), /* HI */
1102 COSTS_N_INSNS (42), /* SI */
1103 COSTS_N_INSNS (74), /* DI */
1104 COSTS_N_INSNS (74)}, /* other */
1105 COSTS_N_INSNS (1), /* cost of movsx */
1106 COSTS_N_INSNS (1), /* cost of movzx */
1107 8, /* "large" insn */
1109 4, /* cost for loading QImode using movzbl */
1110 {3, 4, 3}, /* cost of loading integer registers
1111 in QImode, HImode and SImode.
1112 Relative to reg-reg move (2). */
1113 {3, 4, 3}, /* cost of storing integer registers */
1114 4, /* cost of reg,reg fld/fst */
1115 {4, 4, 12}, /* cost of loading fp registers
1116 in SFmode, DFmode and XFmode */
1117 {6, 6, 8}, /* cost of storing fp registers
1118 in SFmode, DFmode and XFmode */
1119 2, /* cost of moving MMX register */
1120 {3, 3}, /* cost of loading MMX registers
1121 in SImode and DImode */
1122 {4, 4}, /* cost of storing MMX registers
1123 in SImode and DImode */
1124 2, /* cost of moving SSE register */
1125 {4, 3, 6}, /* cost of loading SSE registers
1126 in SImode, DImode and TImode */
1127 {4, 4, 5}, /* cost of storing SSE registers
1128 in SImode, DImode and TImode */
1129 5, /* MMX or SSE register to integer */
1130 64, /* size of l1 cache. */
1131 512, /* size of l2 cache. */
1132 64, /* size of prefetch block */
1133 /* New AMD processors never drop prefetches; if they cannot be performed
1134 immediately, they are queued. We set number of simultaneous prefetches
1135 to a large constant to reflect this (it probably is not a good idea not
1136 to limit number of prefetches at all, as their execution also takes some
1138 100, /* number of parallel prefetches */
1139 3, /* Branch cost */
1140 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1141 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1142 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1143 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1144 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1145 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1146 /* K8 has optimized REP instruction for medium sized blocks, but for very
1147 small blocks it is better to use loop. For large blocks, libcall can
1148 do nontemporary accesses and beat inline considerably. */
1149 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1150 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1151 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1152 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1153 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1154 4, /* scalar_stmt_cost. */
1155 2, /* scalar load_cost. */
1156 2, /* scalar_store_cost. */
1157 5, /* vec_stmt_cost. */
1158 0, /* vec_to_scalar_cost. */
1159 2, /* scalar_to_vec_cost. */
1160 2, /* vec_align_load_cost. */
1161 3, /* vec_unalign_load_cost. */
1162 3, /* vec_store_cost. */
1163 3, /* cond_taken_branch_cost. */
1164 2, /* cond_not_taken_branch_cost. */
1167 struct processor_costs amdfam10_cost
= {
1168 COSTS_N_INSNS (1), /* cost of an add instruction */
1169 COSTS_N_INSNS (2), /* cost of a lea instruction */
1170 COSTS_N_INSNS (1), /* variable shift costs */
1171 COSTS_N_INSNS (1), /* constant shift costs */
1172 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1173 COSTS_N_INSNS (4), /* HI */
1174 COSTS_N_INSNS (3), /* SI */
1175 COSTS_N_INSNS (4), /* DI */
1176 COSTS_N_INSNS (5)}, /* other */
1177 0, /* cost of multiply per each bit set */
1178 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1179 COSTS_N_INSNS (35), /* HI */
1180 COSTS_N_INSNS (51), /* SI */
1181 COSTS_N_INSNS (83), /* DI */
1182 COSTS_N_INSNS (83)}, /* other */
1183 COSTS_N_INSNS (1), /* cost of movsx */
1184 COSTS_N_INSNS (1), /* cost of movzx */
1185 8, /* "large" insn */
1187 4, /* cost for loading QImode using movzbl */
1188 {3, 4, 3}, /* cost of loading integer registers
1189 in QImode, HImode and SImode.
1190 Relative to reg-reg move (2). */
1191 {3, 4, 3}, /* cost of storing integer registers */
1192 4, /* cost of reg,reg fld/fst */
1193 {4, 4, 12}, /* cost of loading fp registers
1194 in SFmode, DFmode and XFmode */
1195 {6, 6, 8}, /* cost of storing fp registers
1196 in SFmode, DFmode and XFmode */
1197 2, /* cost of moving MMX register */
1198 {3, 3}, /* cost of loading MMX registers
1199 in SImode and DImode */
1200 {4, 4}, /* cost of storing MMX registers
1201 in SImode and DImode */
1202 2, /* cost of moving SSE register */
1203 {4, 4, 3}, /* cost of loading SSE registers
1204 in SImode, DImode and TImode */
1205 {4, 4, 5}, /* cost of storing SSE registers
1206 in SImode, DImode and TImode */
1207 3, /* MMX or SSE register to integer */
1209 MOVD reg64, xmmreg Double FSTORE 4
1210 MOVD reg32, xmmreg Double FSTORE 4
1212 MOVD reg64, xmmreg Double FADD 3
1214 MOVD reg32, xmmreg Double FADD 3
1216 64, /* size of l1 cache. */
1217 512, /* size of l2 cache. */
1218 64, /* size of prefetch block */
1219 /* New AMD processors never drop prefetches; if they cannot be performed
1220 immediately, they are queued. We set number of simultaneous prefetches
1221 to a large constant to reflect this (it probably is not a good idea not
1222 to limit number of prefetches at all, as their execution also takes some
1224 100, /* number of parallel prefetches */
1225 2, /* Branch cost */
1226 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1227 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1228 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1229 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1230 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1231 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1233 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
1234 very small blocks it is better to use loop. For large blocks, libcall can
1235 do nontemporary accesses and beat inline considerably. */
1236 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1237 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1238 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1239 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1240 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1241 4, /* scalar_stmt_cost. */
1242 2, /* scalar load_cost. */
1243 2, /* scalar_store_cost. */
1244 6, /* vec_stmt_cost. */
1245 0, /* vec_to_scalar_cost. */
1246 2, /* scalar_to_vec_cost. */
1247 2, /* vec_align_load_cost. */
1248 2, /* vec_unalign_load_cost. */
1249 2, /* vec_store_cost. */
1250 2, /* cond_taken_branch_cost. */
1251 1, /* cond_not_taken_branch_cost. */
1254 struct processor_costs bdver1_cost
= {
1255 COSTS_N_INSNS (1), /* cost of an add instruction */
1256 COSTS_N_INSNS (1), /* cost of a lea instruction */
1257 COSTS_N_INSNS (1), /* variable shift costs */
1258 COSTS_N_INSNS (1), /* constant shift costs */
1259 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1260 COSTS_N_INSNS (4), /* HI */
1261 COSTS_N_INSNS (4), /* SI */
1262 COSTS_N_INSNS (6), /* DI */
1263 COSTS_N_INSNS (6)}, /* other */
1264 0, /* cost of multiply per each bit set */
1265 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1266 COSTS_N_INSNS (35), /* HI */
1267 COSTS_N_INSNS (51), /* SI */
1268 COSTS_N_INSNS (83), /* DI */
1269 COSTS_N_INSNS (83)}, /* other */
1270 COSTS_N_INSNS (1), /* cost of movsx */
1271 COSTS_N_INSNS (1), /* cost of movzx */
1272 8, /* "large" insn */
1274 4, /* cost for loading QImode using movzbl */
1275 {5, 5, 4}, /* cost of loading integer registers
1276 in QImode, HImode and SImode.
1277 Relative to reg-reg move (2). */
1278 {4, 4, 4}, /* cost of storing integer registers */
1279 2, /* cost of reg,reg fld/fst */
1280 {5, 5, 12}, /* cost of loading fp registers
1281 in SFmode, DFmode and XFmode */
1282 {4, 4, 8}, /* cost of storing fp registers
1283 in SFmode, DFmode and XFmode */
1284 2, /* cost of moving MMX register */
1285 {4, 4}, /* cost of loading MMX registers
1286 in SImode and DImode */
1287 {4, 4}, /* cost of storing MMX registers
1288 in SImode and DImode */
1289 2, /* cost of moving SSE register */
1290 {4, 4, 4}, /* cost of loading SSE registers
1291 in SImode, DImode and TImode */
1292 {4, 4, 4}, /* cost of storing SSE registers
1293 in SImode, DImode and TImode */
1294 2, /* MMX or SSE register to integer */
1296 MOVD reg64, xmmreg Double FSTORE 4
1297 MOVD reg32, xmmreg Double FSTORE 4
1299 MOVD reg64, xmmreg Double FADD 3
1301 MOVD reg32, xmmreg Double FADD 3
1303 16, /* size of l1 cache. */
1304 2048, /* size of l2 cache. */
1305 64, /* size of prefetch block */
1306 /* New AMD processors never drop prefetches; if they cannot be performed
1307 immediately, they are queued. We set number of simultaneous prefetches
1308 to a large constant to reflect this (it probably is not a good idea not
1309 to limit number of prefetches at all, as their execution also takes some
1311 100, /* number of parallel prefetches */
1312 2, /* Branch cost */
1313 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1314 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1315 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1316 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1317 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1318 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1320 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
1321 very small blocks it is better to use loop. For large blocks, libcall
1322 can do nontemporary accesses and beat inline considerably. */
1323 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1324 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1325 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1326 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1327 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1328 6, /* scalar_stmt_cost. */
1329 4, /* scalar load_cost. */
1330 4, /* scalar_store_cost. */
1331 6, /* vec_stmt_cost. */
1332 0, /* vec_to_scalar_cost. */
1333 2, /* scalar_to_vec_cost. */
1334 4, /* vec_align_load_cost. */
1335 4, /* vec_unalign_load_cost. */
1336 4, /* vec_store_cost. */
1337 2, /* cond_taken_branch_cost. */
1338 1, /* cond_not_taken_branch_cost. */
1341 struct processor_costs bdver2_cost
= {
1342 COSTS_N_INSNS (1), /* cost of an add instruction */
1343 COSTS_N_INSNS (1), /* cost of a lea instruction */
1344 COSTS_N_INSNS (1), /* variable shift costs */
1345 COSTS_N_INSNS (1), /* constant shift costs */
1346 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1347 COSTS_N_INSNS (4), /* HI */
1348 COSTS_N_INSNS (4), /* SI */
1349 COSTS_N_INSNS (6), /* DI */
1350 COSTS_N_INSNS (6)}, /* other */
1351 0, /* cost of multiply per each bit set */
1352 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1353 COSTS_N_INSNS (35), /* HI */
1354 COSTS_N_INSNS (51), /* SI */
1355 COSTS_N_INSNS (83), /* DI */
1356 COSTS_N_INSNS (83)}, /* other */
1357 COSTS_N_INSNS (1), /* cost of movsx */
1358 COSTS_N_INSNS (1), /* cost of movzx */
1359 8, /* "large" insn */
1361 4, /* cost for loading QImode using movzbl */
1362 {5, 5, 4}, /* cost of loading integer registers
1363 in QImode, HImode and SImode.
1364 Relative to reg-reg move (2). */
1365 {4, 4, 4}, /* cost of storing integer registers */
1366 2, /* cost of reg,reg fld/fst */
1367 {5, 5, 12}, /* cost of loading fp registers
1368 in SFmode, DFmode and XFmode */
1369 {4, 4, 8}, /* cost of storing fp registers
1370 in SFmode, DFmode and XFmode */
1371 2, /* cost of moving MMX register */
1372 {4, 4}, /* cost of loading MMX registers
1373 in SImode and DImode */
1374 {4, 4}, /* cost of storing MMX registers
1375 in SImode and DImode */
1376 2, /* cost of moving SSE register */
1377 {4, 4, 4}, /* cost of loading SSE registers
1378 in SImode, DImode and TImode */
1379 {4, 4, 4}, /* cost of storing SSE registers
1380 in SImode, DImode and TImode */
1381 2, /* MMX or SSE register to integer */
1383 MOVD reg64, xmmreg Double FSTORE 4
1384 MOVD reg32, xmmreg Double FSTORE 4
1386 MOVD reg64, xmmreg Double FADD 3
1388 MOVD reg32, xmmreg Double FADD 3
1390 16, /* size of l1 cache. */
1391 2048, /* size of l2 cache. */
1392 64, /* size of prefetch block */
1393 /* New AMD processors never drop prefetches; if they cannot be performed
1394 immediately, they are queued. We set number of simultaneous prefetches
1395 to a large constant to reflect this (it probably is not a good idea not
1396 to limit number of prefetches at all, as their execution also takes some
1398 100, /* number of parallel prefetches */
1399 2, /* Branch cost */
1400 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1401 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1402 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1403 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1404 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1405 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1407 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1408 very small blocks it is better to use loop. For large blocks, libcall
1409 can do nontemporary accesses and beat inline considerably. */
1410 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1411 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1412 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1413 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1414 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1415 6, /* scalar_stmt_cost. */
1416 4, /* scalar load_cost. */
1417 4, /* scalar_store_cost. */
1418 6, /* vec_stmt_cost. */
1419 0, /* vec_to_scalar_cost. */
1420 2, /* scalar_to_vec_cost. */
1421 4, /* vec_align_load_cost. */
1422 4, /* vec_unalign_load_cost. */
1423 4, /* vec_store_cost. */
1424 2, /* cond_taken_branch_cost. */
1425 1, /* cond_not_taken_branch_cost. */
1428 struct processor_costs btver1_cost
= {
1429 COSTS_N_INSNS (1), /* cost of an add instruction */
1430 COSTS_N_INSNS (2), /* cost of a lea instruction */
1431 COSTS_N_INSNS (1), /* variable shift costs */
1432 COSTS_N_INSNS (1), /* constant shift costs */
1433 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1434 COSTS_N_INSNS (4), /* HI */
1435 COSTS_N_INSNS (3), /* SI */
1436 COSTS_N_INSNS (4), /* DI */
1437 COSTS_N_INSNS (5)}, /* other */
1438 0, /* cost of multiply per each bit set */
1439 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1440 COSTS_N_INSNS (35), /* HI */
1441 COSTS_N_INSNS (51), /* SI */
1442 COSTS_N_INSNS (83), /* DI */
1443 COSTS_N_INSNS (83)}, /* other */
1444 COSTS_N_INSNS (1), /* cost of movsx */
1445 COSTS_N_INSNS (1), /* cost of movzx */
1446 8, /* "large" insn */
1448 4, /* cost for loading QImode using movzbl */
1449 {3, 4, 3}, /* cost of loading integer registers
1450 in QImode, HImode and SImode.
1451 Relative to reg-reg move (2). */
1452 {3, 4, 3}, /* cost of storing integer registers */
1453 4, /* cost of reg,reg fld/fst */
1454 {4, 4, 12}, /* cost of loading fp registers
1455 in SFmode, DFmode and XFmode */
1456 {6, 6, 8}, /* cost of storing fp registers
1457 in SFmode, DFmode and XFmode */
1458 2, /* cost of moving MMX register */
1459 {3, 3}, /* cost of loading MMX registers
1460 in SImode and DImode */
1461 {4, 4}, /* cost of storing MMX registers
1462 in SImode and DImode */
1463 2, /* cost of moving SSE register */
1464 {4, 4, 3}, /* cost of loading SSE registers
1465 in SImode, DImode and TImode */
1466 {4, 4, 5}, /* cost of storing SSE registers
1467 in SImode, DImode and TImode */
1468 3, /* MMX or SSE register to integer */
1470 MOVD reg64, xmmreg Double FSTORE 4
1471 MOVD reg32, xmmreg Double FSTORE 4
1473 MOVD reg64, xmmreg Double FADD 3
1475 MOVD reg32, xmmreg Double FADD 3
1477 32, /* size of l1 cache. */
1478 512, /* size of l2 cache. */
1479 64, /* size of prefetch block */
1480 100, /* number of parallel prefetches */
1481 2, /* Branch cost */
1482 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1483 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1484 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1485 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1486 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1487 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1489 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1490 very small blocks it is better to use loop. For large blocks, libcall can
1491 do nontemporary accesses and beat inline considerably. */
1492 {{libcall
, {{6, loop
}, {14, unrolled_loop
}, {-1, rep_prefix_4_byte
}}},
1493 {libcall
, {{16, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1494 {{libcall
, {{8, loop
}, {24, unrolled_loop
},
1495 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1496 {libcall
, {{48, unrolled_loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1497 4, /* scalar_stmt_cost. */
1498 2, /* scalar load_cost. */
1499 2, /* scalar_store_cost. */
1500 6, /* vec_stmt_cost. */
1501 0, /* vec_to_scalar_cost. */
1502 2, /* scalar_to_vec_cost. */
1503 2, /* vec_align_load_cost. */
1504 2, /* vec_unalign_load_cost. */
1505 2, /* vec_store_cost. */
1506 2, /* cond_taken_branch_cost. */
1507 1, /* cond_not_taken_branch_cost. */
1511 struct processor_costs pentium4_cost
= {
1512 COSTS_N_INSNS (1), /* cost of an add instruction */
1513 COSTS_N_INSNS (3), /* cost of a lea instruction */
1514 COSTS_N_INSNS (4), /* variable shift costs */
1515 COSTS_N_INSNS (4), /* constant shift costs */
1516 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1517 COSTS_N_INSNS (15), /* HI */
1518 COSTS_N_INSNS (15), /* SI */
1519 COSTS_N_INSNS (15), /* DI */
1520 COSTS_N_INSNS (15)}, /* other */
1521 0, /* cost of multiply per each bit set */
1522 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1523 COSTS_N_INSNS (56), /* HI */
1524 COSTS_N_INSNS (56), /* SI */
1525 COSTS_N_INSNS (56), /* DI */
1526 COSTS_N_INSNS (56)}, /* other */
1527 COSTS_N_INSNS (1), /* cost of movsx */
1528 COSTS_N_INSNS (1), /* cost of movzx */
1529 16, /* "large" insn */
1531 2, /* cost for loading QImode using movzbl */
1532 {4, 5, 4}, /* cost of loading integer registers
1533 in QImode, HImode and SImode.
1534 Relative to reg-reg move (2). */
1535 {2, 3, 2}, /* cost of storing integer registers */
1536 2, /* cost of reg,reg fld/fst */
1537 {2, 2, 6}, /* cost of loading fp registers
1538 in SFmode, DFmode and XFmode */
1539 {4, 4, 6}, /* cost of storing fp registers
1540 in SFmode, DFmode and XFmode */
1541 2, /* cost of moving MMX register */
1542 {2, 2}, /* cost of loading MMX registers
1543 in SImode and DImode */
1544 {2, 2}, /* cost of storing MMX registers
1545 in SImode and DImode */
1546 12, /* cost of moving SSE register */
1547 {12, 12, 12}, /* cost of loading SSE registers
1548 in SImode, DImode and TImode */
1549 {2, 2, 8}, /* cost of storing SSE registers
1550 in SImode, DImode and TImode */
1551 10, /* MMX or SSE register to integer */
1552 8, /* size of l1 cache. */
1553 256, /* size of l2 cache. */
1554 64, /* size of prefetch block */
1555 6, /* number of parallel prefetches */
1556 2, /* Branch cost */
1557 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1558 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1559 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1560 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1561 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1562 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1563 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1564 DUMMY_STRINGOP_ALGS
},
1565 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1567 DUMMY_STRINGOP_ALGS
},
1568 1, /* scalar_stmt_cost. */
1569 1, /* scalar load_cost. */
1570 1, /* scalar_store_cost. */
1571 1, /* vec_stmt_cost. */
1572 1, /* vec_to_scalar_cost. */
1573 1, /* scalar_to_vec_cost. */
1574 1, /* vec_align_load_cost. */
1575 2, /* vec_unalign_load_cost. */
1576 1, /* vec_store_cost. */
1577 3, /* cond_taken_branch_cost. */
1578 1, /* cond_not_taken_branch_cost. */
1582 struct processor_costs nocona_cost
= {
1583 COSTS_N_INSNS (1), /* cost of an add instruction */
1584 COSTS_N_INSNS (1), /* cost of a lea instruction */
1585 COSTS_N_INSNS (1), /* variable shift costs */
1586 COSTS_N_INSNS (1), /* constant shift costs */
1587 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1588 COSTS_N_INSNS (10), /* HI */
1589 COSTS_N_INSNS (10), /* SI */
1590 COSTS_N_INSNS (10), /* DI */
1591 COSTS_N_INSNS (10)}, /* other */
1592 0, /* cost of multiply per each bit set */
1593 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1594 COSTS_N_INSNS (66), /* HI */
1595 COSTS_N_INSNS (66), /* SI */
1596 COSTS_N_INSNS (66), /* DI */
1597 COSTS_N_INSNS (66)}, /* other */
1598 COSTS_N_INSNS (1), /* cost of movsx */
1599 COSTS_N_INSNS (1), /* cost of movzx */
1600 16, /* "large" insn */
1601 17, /* MOVE_RATIO */
1602 4, /* cost for loading QImode using movzbl */
1603 {4, 4, 4}, /* cost of loading integer registers
1604 in QImode, HImode and SImode.
1605 Relative to reg-reg move (2). */
1606 {4, 4, 4}, /* cost of storing integer registers */
1607 3, /* cost of reg,reg fld/fst */
1608 {12, 12, 12}, /* cost of loading fp registers
1609 in SFmode, DFmode and XFmode */
1610 {4, 4, 4}, /* cost of storing fp registers
1611 in SFmode, DFmode and XFmode */
1612 6, /* cost of moving MMX register */
1613 {12, 12}, /* cost of loading MMX registers
1614 in SImode and DImode */
1615 {12, 12}, /* cost of storing MMX registers
1616 in SImode and DImode */
1617 6, /* cost of moving SSE register */
1618 {12, 12, 12}, /* cost of loading SSE registers
1619 in SImode, DImode and TImode */
1620 {12, 12, 12}, /* cost of storing SSE registers
1621 in SImode, DImode and TImode */
1622 8, /* MMX or SSE register to integer */
1623 8, /* size of l1 cache. */
1624 1024, /* size of l2 cache. */
1625 128, /* size of prefetch block */
1626 8, /* number of parallel prefetches */
1627 1, /* Branch cost */
1628 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1629 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1630 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1631 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1632 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1633 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1634 {{libcall
, {{12, loop_1_byte
}, {-1, rep_prefix_4_byte
}}},
1635 {libcall
, {{32, loop
}, {20000, rep_prefix_8_byte
},
1636 {100000, unrolled_loop
}, {-1, libcall
}}}},
1637 {{libcall
, {{6, loop_1_byte
}, {48, loop
}, {20480, rep_prefix_4_byte
},
1639 {libcall
, {{24, loop
}, {64, unrolled_loop
},
1640 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1641 1, /* scalar_stmt_cost. */
1642 1, /* scalar load_cost. */
1643 1, /* scalar_store_cost. */
1644 1, /* vec_stmt_cost. */
1645 1, /* vec_to_scalar_cost. */
1646 1, /* scalar_to_vec_cost. */
1647 1, /* vec_align_load_cost. */
1648 2, /* vec_unalign_load_cost. */
1649 1, /* vec_store_cost. */
1650 3, /* cond_taken_branch_cost. */
1651 1, /* cond_not_taken_branch_cost. */
1655 struct processor_costs atom_cost
= {
1656 COSTS_N_INSNS (1), /* cost of an add instruction */
1657 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1658 COSTS_N_INSNS (1), /* variable shift costs */
1659 COSTS_N_INSNS (1), /* constant shift costs */
1660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1661 COSTS_N_INSNS (4), /* HI */
1662 COSTS_N_INSNS (3), /* SI */
1663 COSTS_N_INSNS (4), /* DI */
1664 COSTS_N_INSNS (2)}, /* other */
1665 0, /* cost of multiply per each bit set */
1666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1667 COSTS_N_INSNS (26), /* HI */
1668 COSTS_N_INSNS (42), /* SI */
1669 COSTS_N_INSNS (74), /* DI */
1670 COSTS_N_INSNS (74)}, /* other */
1671 COSTS_N_INSNS (1), /* cost of movsx */
1672 COSTS_N_INSNS (1), /* cost of movzx */
1673 8, /* "large" insn */
1674 17, /* MOVE_RATIO */
1675 4, /* cost for loading QImode using movzbl */
1676 {4, 4, 4}, /* cost of loading integer registers
1677 in QImode, HImode and SImode.
1678 Relative to reg-reg move (2). */
1679 {4, 4, 4}, /* cost of storing integer registers */
1680 4, /* cost of reg,reg fld/fst */
1681 {12, 12, 12}, /* cost of loading fp registers
1682 in SFmode, DFmode and XFmode */
1683 {6, 6, 8}, /* cost of storing fp registers
1684 in SFmode, DFmode and XFmode */
1685 2, /* cost of moving MMX register */
1686 {8, 8}, /* cost of loading MMX registers
1687 in SImode and DImode */
1688 {8, 8}, /* cost of storing MMX registers
1689 in SImode and DImode */
1690 2, /* cost of moving SSE register */
1691 {8, 8, 8}, /* cost of loading SSE registers
1692 in SImode, DImode and TImode */
1693 {8, 8, 8}, /* cost of storing SSE registers
1694 in SImode, DImode and TImode */
1695 5, /* MMX or SSE register to integer */
1696 32, /* size of l1 cache. */
1697 256, /* size of l2 cache. */
1698 64, /* size of prefetch block */
1699 6, /* number of parallel prefetches */
1700 3, /* Branch cost */
1701 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1702 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1703 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1704 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1705 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1706 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1707 {{libcall
, {{11, loop
}, {-1, rep_prefix_4_byte
}}},
1708 {libcall
, {{32, loop
}, {64, rep_prefix_4_byte
},
1709 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1710 {{libcall
, {{8, loop
}, {15, unrolled_loop
},
1711 {2048, rep_prefix_4_byte
}, {-1, libcall
}}},
1712 {libcall
, {{24, loop
}, {32, unrolled_loop
},
1713 {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1714 1, /* scalar_stmt_cost. */
1715 1, /* scalar load_cost. */
1716 1, /* scalar_store_cost. */
1717 1, /* vec_stmt_cost. */
1718 1, /* vec_to_scalar_cost. */
1719 1, /* scalar_to_vec_cost. */
1720 1, /* vec_align_load_cost. */
1721 2, /* vec_unalign_load_cost. */
1722 1, /* vec_store_cost. */
1723 3, /* cond_taken_branch_cost. */
1724 1, /* cond_not_taken_branch_cost. */
1727 /* Generic64 should produce code tuned for Nocona and K8. */
1729 struct processor_costs generic64_cost
= {
1730 COSTS_N_INSNS (1), /* cost of an add instruction */
1731 /* On all chips taken into consideration lea is 2 cycles and more. With
1732 this cost however our current implementation of synth_mult results in
1733 use of unnecessary temporary registers causing regression on several
1734 SPECfp benchmarks. */
1735 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1736 COSTS_N_INSNS (1), /* variable shift costs */
1737 COSTS_N_INSNS (1), /* constant shift costs */
1738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1739 COSTS_N_INSNS (4), /* HI */
1740 COSTS_N_INSNS (3), /* SI */
1741 COSTS_N_INSNS (4), /* DI */
1742 COSTS_N_INSNS (2)}, /* other */
1743 0, /* cost of multiply per each bit set */
1744 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1745 COSTS_N_INSNS (26), /* HI */
1746 COSTS_N_INSNS (42), /* SI */
1747 COSTS_N_INSNS (74), /* DI */
1748 COSTS_N_INSNS (74)}, /* other */
1749 COSTS_N_INSNS (1), /* cost of movsx */
1750 COSTS_N_INSNS (1), /* cost of movzx */
1751 8, /* "large" insn */
1752 17, /* MOVE_RATIO */
1753 4, /* cost for loading QImode using movzbl */
1754 {4, 4, 4}, /* cost of loading integer registers
1755 in QImode, HImode and SImode.
1756 Relative to reg-reg move (2). */
1757 {4, 4, 4}, /* cost of storing integer registers */
1758 4, /* cost of reg,reg fld/fst */
1759 {12, 12, 12}, /* cost of loading fp registers
1760 in SFmode, DFmode and XFmode */
1761 {6, 6, 8}, /* cost of storing fp registers
1762 in SFmode, DFmode and XFmode */
1763 2, /* cost of moving MMX register */
1764 {8, 8}, /* cost of loading MMX registers
1765 in SImode and DImode */
1766 {8, 8}, /* cost of storing MMX registers
1767 in SImode and DImode */
1768 2, /* cost of moving SSE register */
1769 {8, 8, 8}, /* cost of loading SSE registers
1770 in SImode, DImode and TImode */
1771 {8, 8, 8}, /* cost of storing SSE registers
1772 in SImode, DImode and TImode */
1773 5, /* MMX or SSE register to integer */
1774 32, /* size of l1 cache. */
1775 512, /* size of l2 cache. */
1776 64, /* size of prefetch block */
1777 6, /* number of parallel prefetches */
1778 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1779 value is increased to perhaps more appropriate value of 5. */
1780 3, /* Branch cost */
1781 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1782 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1783 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1784 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1785 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1786 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1787 {DUMMY_STRINGOP_ALGS
,
1788 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1789 {DUMMY_STRINGOP_ALGS
,
1790 {libcall
, {{32, loop
}, {8192, rep_prefix_8_byte
}, {-1, libcall
}}}},
1791 1, /* scalar_stmt_cost. */
1792 1, /* scalar load_cost. */
1793 1, /* scalar_store_cost. */
1794 1, /* vec_stmt_cost. */
1795 1, /* vec_to_scalar_cost. */
1796 1, /* scalar_to_vec_cost. */
1797 1, /* vec_align_load_cost. */
1798 2, /* vec_unalign_load_cost. */
1799 1, /* vec_store_cost. */
1800 3, /* cond_taken_branch_cost. */
1801 1, /* cond_not_taken_branch_cost. */
1804 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1807 struct processor_costs generic32_cost
= {
1808 COSTS_N_INSNS (1), /* cost of an add instruction */
1809 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1810 COSTS_N_INSNS (1), /* variable shift costs */
1811 COSTS_N_INSNS (1), /* constant shift costs */
1812 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1813 COSTS_N_INSNS (4), /* HI */
1814 COSTS_N_INSNS (3), /* SI */
1815 COSTS_N_INSNS (4), /* DI */
1816 COSTS_N_INSNS (2)}, /* other */
1817 0, /* cost of multiply per each bit set */
1818 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1819 COSTS_N_INSNS (26), /* HI */
1820 COSTS_N_INSNS (42), /* SI */
1821 COSTS_N_INSNS (74), /* DI */
1822 COSTS_N_INSNS (74)}, /* other */
1823 COSTS_N_INSNS (1), /* cost of movsx */
1824 COSTS_N_INSNS (1), /* cost of movzx */
1825 8, /* "large" insn */
1826 17, /* MOVE_RATIO */
1827 4, /* cost for loading QImode using movzbl */
1828 {4, 4, 4}, /* cost of loading integer registers
1829 in QImode, HImode and SImode.
1830 Relative to reg-reg move (2). */
1831 {4, 4, 4}, /* cost of storing integer registers */
1832 4, /* cost of reg,reg fld/fst */
1833 {12, 12, 12}, /* cost of loading fp registers
1834 in SFmode, DFmode and XFmode */
1835 {6, 6, 8}, /* cost of storing fp registers
1836 in SFmode, DFmode and XFmode */
1837 2, /* cost of moving MMX register */
1838 {8, 8}, /* cost of loading MMX registers
1839 in SImode and DImode */
1840 {8, 8}, /* cost of storing MMX registers
1841 in SImode and DImode */
1842 2, /* cost of moving SSE register */
1843 {8, 8, 8}, /* cost of loading SSE registers
1844 in SImode, DImode and TImode */
1845 {8, 8, 8}, /* cost of storing SSE registers
1846 in SImode, DImode and TImode */
1847 5, /* MMX or SSE register to integer */
1848 32, /* size of l1 cache. */
1849 256, /* size of l2 cache. */
1850 64, /* size of prefetch block */
1851 6, /* number of parallel prefetches */
1852 3, /* Branch cost */
1853 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1854 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1855 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1856 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1857 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1858 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1859 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1860 DUMMY_STRINGOP_ALGS
},
1861 {{libcall
, {{32, loop
}, {8192, rep_prefix_4_byte
}, {-1, libcall
}}},
1862 DUMMY_STRINGOP_ALGS
},
1863 1, /* scalar_stmt_cost. */
1864 1, /* scalar load_cost. */
1865 1, /* scalar_store_cost. */
1866 1, /* vec_stmt_cost. */
1867 1, /* vec_to_scalar_cost. */
1868 1, /* scalar_to_vec_cost. */
1869 1, /* vec_align_load_cost. */
1870 2, /* vec_unalign_load_cost. */
1871 1, /* vec_store_cost. */
1872 3, /* cond_taken_branch_cost. */
1873 1, /* cond_not_taken_branch_cost. */
1876 const struct processor_costs
*ix86_cost
= &pentium_cost
;
1878 /* Processor feature/optimization bitmasks. */
1879 #define m_386 (1<<PROCESSOR_I386)
1880 #define m_486 (1<<PROCESSOR_I486)
1881 #define m_PENT (1<<PROCESSOR_PENTIUM)
1882 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1883 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1884 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1885 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
1886 #define m_CORE2_32 (1<<PROCESSOR_CORE2_32)
1887 #define m_CORE2_64 (1<<PROCESSOR_CORE2_64)
1888 #define m_COREI7_32 (1<<PROCESSOR_COREI7_32)
1889 #define m_COREI7_64 (1<<PROCESSOR_COREI7_64)
1890 #define m_COREI7 (m_COREI7_32 | m_COREI7_64)
1891 #define m_CORE2I7_32 (m_CORE2_32 | m_COREI7_32)
1892 #define m_CORE2I7_64 (m_CORE2_64 | m_COREI7_64)
1893 #define m_CORE2I7 (m_CORE2I7_32 | m_CORE2I7_64)
1894 #define m_ATOM (1<<PROCESSOR_ATOM)
1896 #define m_GEODE (1<<PROCESSOR_GEODE)
1897 #define m_K6 (1<<PROCESSOR_K6)
1898 #define m_K6_GEODE (m_K6 | m_GEODE)
1899 #define m_K8 (1<<PROCESSOR_K8)
1900 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1901 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1902 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1903 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1904 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
1905 #define m_BDVER (m_BDVER1 | m_BDVER2)
1906 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
1907 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER1)
1909 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1910 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1912 /* Generic instruction choice should be common subset of supported CPUs
1913 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1914 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1916 /* Feature tests against the various tunings. */
1917 unsigned char ix86_tune_features
[X86_TUNE_LAST
];
1919 /* Feature tests against the various tunings used to create ix86_tune_features
1920 based on the processor mask. */
1921 static unsigned int initial_ix86_tune_features
[X86_TUNE_LAST
] = {
1922 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1923 negatively, so enabling for Generic64 seems like good code size
1924 tradeoff. We can't enable it for 32bit generic because it does not
1925 work well with PPro base chips. */
1926 m_386
| m_CORE2I7_64
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC64
,
1928 /* X86_TUNE_PUSH_MEMORY */
1929 m_386
| m_P4_NOCONA
| m_CORE2I7
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1931 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1934 /* X86_TUNE_UNROLL_STRLEN */
1935 m_486
| m_PENT
| m_PPRO
| m_ATOM
| m_CORE2I7
| m_K6
| m_AMD_MULTIPLE
| m_GENERIC
,
1937 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1938 on simulation result. But after P4 was made, no performance benefit
1939 was observed with branch hints. It also increases the code size.
1940 As a result, icc never generates branch hints. */
1943 /* X86_TUNE_DOUBLE_WITH_ADD */
1946 /* X86_TUNE_USE_SAHF */
1947 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC
,
1949 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1950 partial dependencies. */
1951 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1953 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1954 register stalls on Generic32 compilation setting as well. However
1955 in current implementation the partial register stalls are not eliminated
1956 very well - they can be introduced via subregs synthesized by combine
1957 and can happen in caller/callee saving sequences. Because this option
1958 pays back little on PPro based chips and is in conflict with partial reg
1959 dependencies used by Athlon/P4 based chips, it is better to leave it off
1960 for generic32 for now. */
1963 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1964 m_CORE2I7
| m_GENERIC
,
1966 /* X86_TUNE_USE_HIMODE_FIOP */
1967 m_386
| m_486
| m_K6_GEODE
,
1969 /* X86_TUNE_USE_SIMODE_FIOP */
1970 ~(m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
),
1972 /* X86_TUNE_USE_MOV0 */
1975 /* X86_TUNE_USE_CLTD */
1976 ~(m_PENT
| m_CORE2I7
| m_ATOM
| m_K6
| m_GENERIC
),
1978 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1981 /* X86_TUNE_SPLIT_LONG_MOVES */
1984 /* X86_TUNE_READ_MODIFY_WRITE */
1987 /* X86_TUNE_READ_MODIFY */
1990 /* X86_TUNE_PROMOTE_QIMODE */
1991 m_386
| m_486
| m_PENT
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
1993 /* X86_TUNE_FAST_PREFIX */
1994 ~(m_386
| m_486
| m_PENT
),
1996 /* X86_TUNE_SINGLE_STRINGOP */
1997 m_386
| m_P4_NOCONA
,
1999 /* X86_TUNE_QIMODE_MATH */
2002 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
2003 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
2004 might be considered for Generic32 if our scheme for avoiding partial
2005 stalls was more effective. */
2008 /* X86_TUNE_PROMOTE_QI_REGS */
2011 /* X86_TUNE_PROMOTE_HI_REGS */
2014 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
2015 over esp addition. */
2016 m_386
| m_486
| m_PENT
| m_PPRO
,
2018 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
2019 over esp addition. */
2022 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
2023 over esp subtraction. */
2024 m_386
| m_486
| m_PENT
| m_K6_GEODE
,
2026 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
2027 over esp subtraction. */
2028 m_PENT
| m_K6_GEODE
,
2030 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
2031 for DFmode copies */
2032 ~(m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GEODE
| m_AMD_MULTIPLE
| m_ATOM
| m_GENERIC
),
2034 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
2035 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2037 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
2038 conflict here in between PPro/Pentium4 based chips that thread 128bit
2039 SSE registers as single units versus K8 based chips that divide SSE
2040 registers to two 64bit halves. This knob promotes all store destinations
2041 to be 128bit to allow register renaming on 128bit SSE units, but usually
2042 results in one extra microop on 64bit SSE units. Experimental results
2043 shows that disabling this option on P4 brings over 20% SPECfp regression,
2044 while enabling it on K8 brings roughly 2.4% regression that can be partly
2045 masked by careful scheduling of moves. */
2046 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMDFAM10
| m_BDVER
| m_GENERIC
,
2048 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
2049 m_COREI7
| m_AMDFAM10
| m_BDVER
| m_BTVER1
,
2051 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
2054 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
2057 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
2058 are resolved on SSE register parts instead of whole registers, so we may
2059 maintain just lower part of scalar values in proper format leaving the
2060 upper part undefined. */
2063 /* X86_TUNE_SSE_TYPELESS_STORES */
2066 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
2067 m_PPRO
| m_P4_NOCONA
,
2069 /* X86_TUNE_MEMORY_MISMATCH_STALL */
2070 m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2072 /* X86_TUNE_PROLOGUE_USING_MOVE */
2073 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2075 /* X86_TUNE_EPILOGUE_USING_MOVE */
2076 m_PPRO
| m_CORE2I7
| m_ATOM
| m_ATHLON_K8
| m_GENERIC
,
2078 /* X86_TUNE_SHIFT1 */
2081 /* X86_TUNE_USE_FFREEP */
2084 /* X86_TUNE_INTER_UNIT_MOVES */
2085 ~(m_AMD_MULTIPLE
| m_GENERIC
),
2087 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
2088 ~(m_AMDFAM10
| m_BDVER
),
2090 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
2091 than 4 branch instructions in the 16 byte window. */
2092 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2094 /* X86_TUNE_SCHEDULE */
2095 m_PENT
| m_PPRO
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_AMD_MULTIPLE
| m_GENERIC
,
2097 /* X86_TUNE_USE_BT */
2098 m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
,
2100 /* X86_TUNE_USE_INCDEC */
2101 ~(m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_GENERIC
),
2103 /* X86_TUNE_PAD_RETURNS */
2104 m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
,
2106 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
2109 /* X86_TUNE_EXT_80387_CONSTANTS */
2110 m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_K6_GEODE
| m_ATHLON_K8
| m_GENERIC
,
2112 /* X86_TUNE_SHORTEN_X87_SSE */
2115 /* X86_TUNE_AVOID_VECTOR_DECODE */
2116 m_CORE2I7_64
| m_K8
| m_GENERIC64
,
2118 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
2119 and SImode multiply, but 386 and 486 do HImode multiply faster. */
2122 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
2123 vector path on AMD machines. */
2124 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2126 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
2128 m_CORE2I7_64
| m_K8
| m_AMDFAM10
| m_BDVER
| m_BTVER1
| m_GENERIC64
,
2130 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
2134 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
2135 but one byte longer. */
2138 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
2139 operand that cannot be represented using a modRM byte. The XOR
2140 replacement is long decoded, so this split helps here as well. */
2143 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
2145 m_CORE2I7
| m_AMDFAM10
| m_GENERIC
,
2147 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
2148 from integer to FP. */
2151 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
2152 with a subsequent conditional jump instruction into a single
2153 compare-and-branch uop. */
2156 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
2157 will impact LEA instruction selection. */
2160 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
2164 /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
2165 at -O3. For the moment, the prefetching seems badly tuned for Intel
2167 m_K6_GEODE
| m_AMD_MULTIPLE
,
2169 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
2170 the auto-vectorizer. */
2173 /* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
2174 during reassociation of integer computation. */
2177 /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
2178 during reassociation of fp computation. */
2182 /* Feature tests against the various architecture variations. */
2183 unsigned char ix86_arch_features
[X86_ARCH_LAST
];
2185 /* Feature tests against the various architecture variations, used to create
2186 ix86_arch_features based on the processor mask. */
2187 static unsigned int initial_ix86_arch_features
[X86_ARCH_LAST
] = {
2188 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
2189 ~(m_386
| m_486
| m_PENT
| m_K6
),
2191 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2194 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2197 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2200 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2204 static const unsigned int x86_accumulate_outgoing_args
2205 = m_PPRO
| m_P4_NOCONA
| m_ATOM
| m_CORE2I7
| m_AMD_MULTIPLE
| m_GENERIC
;
2207 static const unsigned int x86_arch_always_fancy_math_387
2208 = m_PENT
| m_PPRO
| m_P4_NOCONA
| m_CORE2I7
| m_ATOM
| m_AMD_MULTIPLE
| m_GENERIC
;
2210 static const unsigned int x86_avx256_split_unaligned_load
2211 = m_COREI7
| m_GENERIC
;
2213 static const unsigned int x86_avx256_split_unaligned_store
2214 = m_COREI7
| m_BDVER
| m_GENERIC
;
2216 /* In case the average insn count for single function invocation is
2217 lower than this constant, emit fast (but longer) prologue and
2219 #define FAST_PROLOGUE_INSN_COUNT 20
2221 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2222 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
2223 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
2224 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
2226 /* Array of the smallest class containing reg number REGNO, indexed by
2227 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2229 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
2231 /* ax, dx, cx, bx */
2232 AREG
, DREG
, CREG
, BREG
,
2233 /* si, di, bp, sp */
2234 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
2236 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
2237 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
2240 /* flags, fpsr, fpcr, frame */
2241 NO_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
2243 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2246 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
2249 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2250 NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
, NON_Q_REGS
,
2251 /* SSE REX registers */
2252 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
2256 /* The "default" register map used in 32bit mode. */
2258 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2260 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2261 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2262 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2263 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2264 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2265 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2266 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2269 /* The "default" register map used in 64bit mode. */
2271 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
2273 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2274 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2275 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2276 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2277 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2278 8,9,10,11,12,13,14,15, /* extended integer registers */
2279 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2282 /* Define the register numbers to be used in Dwarf debugging information.
2283 The SVR4 reference port C compiler uses the following register numbers
2284 in its Dwarf output code:
2285 0 for %eax (gcc regno = 0)
2286 1 for %ecx (gcc regno = 2)
2287 2 for %edx (gcc regno = 1)
2288 3 for %ebx (gcc regno = 3)
2289 4 for %esp (gcc regno = 7)
2290 5 for %ebp (gcc regno = 6)
2291 6 for %esi (gcc regno = 4)
2292 7 for %edi (gcc regno = 5)
2293 The following three DWARF register numbers are never generated by
2294 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2295 believes these numbers have these meanings.
2296 8 for %eip (no gcc equivalent)
2297 9 for %eflags (gcc regno = 17)
2298 10 for %trapno (no gcc equivalent)
2299 It is not at all clear how we should number the FP stack registers
2300 for the x86 architecture. If the version of SDB on x86/svr4 were
2301 a bit less brain dead with respect to floating-point then we would
2302 have a precedent to follow with respect to DWARF register numbers
2303 for x86 FP registers, but the SDB on x86/svr4 is so completely
2304 broken with respect to FP registers that it is hardly worth thinking
2305 of it as something to strive for compatibility with.
2306 The version of x86/svr4 SDB I have at the moment does (partially)
2307 seem to believe that DWARF register number 11 is associated with
2308 the x86 register %st(0), but that's about all. Higher DWARF
2309 register numbers don't seem to be associated with anything in
2310 particular, and even for DWARF regno 11, SDB only seems to under-
2311 stand that it should say that a variable lives in %st(0) (when
2312 asked via an `=' command) if we said it was in DWARF regno 11,
2313 but SDB still prints garbage when asked for the value of the
2314 variable in question (via a `/' command).
2315 (Also note that the labels SDB prints for various FP stack regs
2316 when doing an `x' command are all wrong.)
2317 Note that these problems generally don't affect the native SVR4
2318 C compiler because it doesn't allow the use of -O with -g and
2319 because when it is *not* optimizing, it allocates a memory
2320 location for each floating-point variable, and the memory
2321 location is what gets described in the DWARF AT_location
2322 attribute for the variable in question.
2323 Regardless of the severe mental illness of the x86/svr4 SDB, we
2324 do something sensible here and we use the following DWARF
2325 register numbers. Note that these are all stack-top-relative
2327 11 for %st(0) (gcc regno = 8)
2328 12 for %st(1) (gcc regno = 9)
2329 13 for %st(2) (gcc regno = 10)
2330 14 for %st(3) (gcc regno = 11)
2331 15 for %st(4) (gcc regno = 12)
2332 16 for %st(5) (gcc regno = 13)
2333 17 for %st(6) (gcc regno = 14)
2334 18 for %st(7) (gcc regno = 15)
2336 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
2338 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2339 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2340 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2347 /* Define parameter passing and return registers. */
2349 static int const x86_64_int_parameter_registers
[6] =
2351 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
2354 static int const x86_64_ms_abi_int_parameter_registers
[4] =
2356 CX_REG
, DX_REG
, R8_REG
, R9_REG
2359 static int const x86_64_int_return_registers
[4] =
2361 AX_REG
, DX_REG
, DI_REG
, SI_REG
2364 /* Define the structure for the machine field in struct function. */
2366 struct GTY(()) stack_local_entry
{
2367 unsigned short mode
;
2370 struct stack_local_entry
*next
;
2373 /* Structure describing stack frame layout.
2374 Stack grows downward:
2380 saved static chain if ix86_static_chain_on_stack
2382 saved frame pointer if frame_pointer_needed
2383 <- HARD_FRAME_POINTER
2389 <- sse_regs_save_offset
2392 [va_arg registers] |
2396 [padding2] | = to_allocate
2405 int outgoing_arguments_size
;
2406 HOST_WIDE_INT frame
;
2408 /* The offsets relative to ARG_POINTER. */
2409 HOST_WIDE_INT frame_pointer_offset
;
2410 HOST_WIDE_INT hard_frame_pointer_offset
;
2411 HOST_WIDE_INT stack_pointer_offset
;
2412 HOST_WIDE_INT hfp_save_offset
;
2413 HOST_WIDE_INT reg_save_offset
;
2414 HOST_WIDE_INT sse_reg_save_offset
;
2416 /* When save_regs_using_mov is set, emit prologue using
2417 move instead of push instructions. */
2418 bool save_regs_using_mov
;
2421 /* Which cpu are we scheduling for. */
2422 enum attr_cpu ix86_schedule
;
2424 /* Which cpu are we optimizing for. */
2425 enum processor_type ix86_tune
;
2427 /* Which instruction set architecture to use. */
2428 enum processor_type ix86_arch
;
2430 /* true if sse prefetch instruction is not NOOP. */
2431 int x86_prefetch_sse
;
2433 /* -mstackrealign option */
2434 static const char ix86_force_align_arg_pointer_string
[]
2435 = "force_align_arg_pointer";
2437 static rtx (*ix86_gen_leave
) (void);
2438 static rtx (*ix86_gen_add3
) (rtx
, rtx
, rtx
);
2439 static rtx (*ix86_gen_sub3
) (rtx
, rtx
, rtx
);
2440 static rtx (*ix86_gen_sub3_carry
) (rtx
, rtx
, rtx
, rtx
, rtx
);
2441 static rtx (*ix86_gen_one_cmpl2
) (rtx
, rtx
);
2442 static rtx (*ix86_gen_monitor
) (rtx
, rtx
, rtx
);
2443 static rtx (*ix86_gen_andsp
) (rtx
, rtx
, rtx
);
2444 static rtx (*ix86_gen_allocate_stack_worker
) (rtx
, rtx
);
2445 static rtx (*ix86_gen_adjust_stack_and_probe
) (rtx
, rtx
, rtx
);
2446 static rtx (*ix86_gen_probe_stack_range
) (rtx
, rtx
, rtx
);
2448 /* Preferred alignment for stack boundary in bits. */
2449 unsigned int ix86_preferred_stack_boundary
;
2451 /* Alignment for incoming stack boundary in bits specified at
2453 static unsigned int ix86_user_incoming_stack_boundary
;
2455 /* Default alignment for incoming stack boundary in bits. */
2456 static unsigned int ix86_default_incoming_stack_boundary
;
2458 /* Alignment for incoming stack boundary in bits. */
2459 unsigned int ix86_incoming_stack_boundary
;
2461 /* Calling abi specific va_list type nodes. */
2462 static GTY(()) tree sysv_va_list_type_node
;
2463 static GTY(()) tree ms_va_list_type_node
;
2465 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2466 char internal_label_prefix
[16];
2467 int internal_label_prefix_len
;
2469 /* Fence to use after loop using movnt. */
2472 /* Register class used for passing given 64bit part of the argument.
2473 These represent classes as documented by the PS ABI, with the exception
2474 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2475 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2477 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2478 whenever possible (upper half does contain padding). */
2479 enum x86_64_reg_class
2482 X86_64_INTEGER_CLASS
,
2483 X86_64_INTEGERSI_CLASS
,
2490 X86_64_COMPLEX_X87_CLASS
,
2494 #define MAX_CLASSES 4
2496 /* Table of constants used by fldpi, fldln2, etc.... */
2497 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
2498 static bool ext_80387_constants_init
= 0;
2501 static struct machine_function
* ix86_init_machine_status (void);
2502 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
2503 static bool ix86_function_value_regno_p (const unsigned int);
2504 static unsigned int ix86_function_arg_boundary (enum machine_mode
,
2506 static rtx
ix86_static_chain (const_tree
, bool);
2507 static int ix86_function_regparm (const_tree
, const_tree
);
2508 static void ix86_compute_frame_layout (struct ix86_frame
*);
2509 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode
,
2511 static void ix86_add_new_builtins (HOST_WIDE_INT
);
2512 static tree
ix86_canonical_va_list_type (tree
);
2513 static void predict_jump (int);
2514 static unsigned int split_stack_prologue_scratch_regno (void);
2515 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
2517 enum ix86_function_specific_strings
2519 IX86_FUNCTION_SPECIFIC_ARCH
,
2520 IX86_FUNCTION_SPECIFIC_TUNE
,
2521 IX86_FUNCTION_SPECIFIC_MAX
2524 static char *ix86_target_string (HOST_WIDE_INT
, int, const char *,
2525 const char *, enum fpmath_unit
, bool);
2526 static void ix86_debug_options (void) ATTRIBUTE_UNUSED
;
2527 static void ix86_function_specific_save (struct cl_target_option
*);
2528 static void ix86_function_specific_restore (struct cl_target_option
*);
2529 static void ix86_function_specific_print (FILE *, int,
2530 struct cl_target_option
*);
2531 static bool ix86_valid_target_attribute_p (tree
, tree
, tree
, int);
2532 static bool ix86_valid_target_attribute_inner_p (tree
, char *[],
2533 struct gcc_options
*);
2534 static bool ix86_can_inline_p (tree
, tree
);
2535 static void ix86_set_current_function (tree
);
2536 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2538 static enum calling_abi
ix86_function_abi (const_tree
);
2541 #ifndef SUBTARGET32_DEFAULT_CPU
2542 #define SUBTARGET32_DEFAULT_CPU "i386"
2545 /* The svr4 ABI for the i386 says that records and unions are returned
2547 #ifndef DEFAULT_PCC_STRUCT_RETURN
2548 #define DEFAULT_PCC_STRUCT_RETURN 1
2551 /* Whether -mtune= or -march= were specified */
2552 static int ix86_tune_defaulted
;
2553 static int ix86_arch_specified
;
2555 /* Vectorization library interface and handlers. */
2556 static tree (*ix86_veclib_handler
) (enum built_in_function
, tree
, tree
);
2558 static tree
ix86_veclibabi_svml (enum built_in_function
, tree
, tree
);
2559 static tree
ix86_veclibabi_acml (enum built_in_function
, tree
, tree
);
2561 /* Processor target table, indexed by processor number */
2564 const struct processor_costs
*cost
; /* Processor costs */
2565 const int align_loop
; /* Default alignments. */
2566 const int align_loop_max_skip
;
2567 const int align_jump
;
2568 const int align_jump_max_skip
;
2569 const int align_func
;
2572 static const struct ptt processor_target_table
[PROCESSOR_max
] =
2574 {&i386_cost
, 4, 3, 4, 3, 4},
2575 {&i486_cost
, 16, 15, 16, 15, 16},
2576 {&pentium_cost
, 16, 7, 16, 7, 16},
2577 {&pentiumpro_cost
, 16, 15, 16, 10, 16},
2578 {&geode_cost
, 0, 0, 0, 0, 0},
2579 {&k6_cost
, 32, 7, 32, 7, 32},
2580 {&athlon_cost
, 16, 7, 16, 7, 16},
2581 {&pentium4_cost
, 0, 0, 0, 0, 0},
2582 {&k8_cost
, 16, 7, 16, 7, 16},
2583 {&nocona_cost
, 0, 0, 0, 0, 0},
2584 /* Core 2 32-bit. */
2585 {&generic32_cost
, 16, 10, 16, 10, 16},
2586 /* Core 2 64-bit. */
2587 {&generic64_cost
, 16, 10, 16, 10, 16},
2588 /* Core i7 32-bit. */
2589 {&generic32_cost
, 16, 10, 16, 10, 16},
2590 /* Core i7 64-bit. */
2591 {&generic64_cost
, 16, 10, 16, 10, 16},
2592 {&generic32_cost
, 16, 7, 16, 7, 16},
2593 {&generic64_cost
, 16, 10, 16, 10, 16},
2594 {&amdfam10_cost
, 32, 24, 32, 7, 32},
2595 {&bdver1_cost
, 32, 24, 32, 7, 32},
2596 {&bdver2_cost
, 32, 24, 32, 7, 32},
2597 {&btver1_cost
, 32, 24, 32, 7, 32},
2598 {&atom_cost
, 16, 15, 16, 7, 16}
2601 static const char *const cpu_names
[TARGET_CPU_DEFAULT_max
] =
2631 /* Return true if a red-zone is in use. */
2634 ix86_using_red_zone (void)
2636 return TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
;
2639 /* Return a string that documents the current -m options. The caller is
2640 responsible for freeing the string. */
2643 ix86_target_string (HOST_WIDE_INT isa
, int flags
, const char *arch
,
2644 const char *tune
, enum fpmath_unit fpmath
,
2647 struct ix86_target_opts
2649 const char *option
; /* option string */
2650 HOST_WIDE_INT mask
; /* isa mask options */
2653 /* This table is ordered so that options like -msse4.2 that imply
2654 preceding options while match those first. */
2655 static struct ix86_target_opts isa_opts
[] =
2657 { "-m64", OPTION_MASK_ISA_64BIT
},
2658 { "-mfma4", OPTION_MASK_ISA_FMA4
},
2659 { "-mfma", OPTION_MASK_ISA_FMA
},
2660 { "-mxop", OPTION_MASK_ISA_XOP
},
2661 { "-mlwp", OPTION_MASK_ISA_LWP
},
2662 { "-msse4a", OPTION_MASK_ISA_SSE4A
},
2663 { "-msse4.2", OPTION_MASK_ISA_SSE4_2
},
2664 { "-msse4.1", OPTION_MASK_ISA_SSE4_1
},
2665 { "-mssse3", OPTION_MASK_ISA_SSSE3
},
2666 { "-msse3", OPTION_MASK_ISA_SSE3
},
2667 { "-msse2", OPTION_MASK_ISA_SSE2
},
2668 { "-msse", OPTION_MASK_ISA_SSE
},
2669 { "-m3dnow", OPTION_MASK_ISA_3DNOW
},
2670 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A
},
2671 { "-mmmx", OPTION_MASK_ISA_MMX
},
2672 { "-mabm", OPTION_MASK_ISA_ABM
},
2673 { "-mbmi", OPTION_MASK_ISA_BMI
},
2674 { "-mbmi2", OPTION_MASK_ISA_BMI2
},
2675 { "-mlzcnt", OPTION_MASK_ISA_LZCNT
},
2676 { "-mtbm", OPTION_MASK_ISA_TBM
},
2677 { "-mpopcnt", OPTION_MASK_ISA_POPCNT
},
2678 { "-mmovbe", OPTION_MASK_ISA_MOVBE
},
2679 { "-mcrc32", OPTION_MASK_ISA_CRC32
},
2680 { "-maes", OPTION_MASK_ISA_AES
},
2681 { "-mpclmul", OPTION_MASK_ISA_PCLMUL
},
2682 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE
},
2683 { "-mrdrnd", OPTION_MASK_ISA_RDRND
},
2684 { "-mf16c", OPTION_MASK_ISA_F16C
},
2688 static struct ix86_target_opts flag_opts
[] =
2690 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE
},
2691 { "-m80387", MASK_80387
},
2692 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS
},
2693 { "-malign-double", MASK_ALIGN_DOUBLE
},
2694 { "-mcld", MASK_CLD
},
2695 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS
},
2696 { "-mieee-fp", MASK_IEEE_FP
},
2697 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS
},
2698 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY
},
2699 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT
},
2700 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS
},
2701 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387
},
2702 { "-mno-push-args", MASK_NO_PUSH_ARGS
},
2703 { "-mno-red-zone", MASK_NO_RED_ZONE
},
2704 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER
},
2705 { "-mrecip", MASK_RECIP
},
2706 { "-mrtd", MASK_RTD
},
2707 { "-msseregparm", MASK_SSEREGPARM
},
2708 { "-mstack-arg-probe", MASK_STACK_PROBE
},
2709 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS
},
2710 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS
},
2711 { "-m8bit-idiv", MASK_USE_8BIT_IDIV
},
2712 { "-mvzeroupper", MASK_VZEROUPPER
},
2713 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD
},
2714 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE
},
2715 { "-mprefer-avx128", MASK_PREFER_AVX128
},
2718 const char *opts
[ARRAY_SIZE (isa_opts
) + ARRAY_SIZE (flag_opts
) + 6][2];
2721 char target_other
[40];
2730 memset (opts
, '\0', sizeof (opts
));
2732 /* Add -march= option. */
2735 opts
[num
][0] = "-march=";
2736 opts
[num
++][1] = arch
;
2739 /* Add -mtune= option. */
2742 opts
[num
][0] = "-mtune=";
2743 opts
[num
++][1] = tune
;
2746 /* Pick out the options in isa options. */
2747 for (i
= 0; i
< ARRAY_SIZE (isa_opts
); i
++)
2749 if ((isa
& isa_opts
[i
].mask
) != 0)
2751 opts
[num
++][0] = isa_opts
[i
].option
;
2752 isa
&= ~ isa_opts
[i
].mask
;
2756 if (isa
&& add_nl_p
)
2758 opts
[num
++][0] = isa_other
;
2759 sprintf (isa_other
, "(other isa: %#" HOST_WIDE_INT_PRINT
"x)",
2763 /* Add flag options. */
2764 for (i
= 0; i
< ARRAY_SIZE (flag_opts
); i
++)
2766 if ((flags
& flag_opts
[i
].mask
) != 0)
2768 opts
[num
++][0] = flag_opts
[i
].option
;
2769 flags
&= ~ flag_opts
[i
].mask
;
2773 if (flags
&& add_nl_p
)
2775 opts
[num
++][0] = target_other
;
2776 sprintf (target_other
, "(other flags: %#x)", flags
);
2779 /* Add -fpmath= option. */
2782 opts
[num
][0] = "-mfpmath=";
2783 switch ((int) fpmath
)
2786 opts
[num
++][1] = "387";
2790 opts
[num
++][1] = "sse";
2793 case FPMATH_387
| FPMATH_SSE
:
2794 opts
[num
++][1] = "sse+387";
2806 gcc_assert (num
< ARRAY_SIZE (opts
));
2808 /* Size the string. */
2810 sep_len
= (add_nl_p
) ? 3 : 1;
2811 for (i
= 0; i
< num
; i
++)
2814 for (j
= 0; j
< 2; j
++)
2816 len
+= strlen (opts
[i
][j
]);
2819 /* Build the string. */
2820 ret
= ptr
= (char *) xmalloc (len
);
2823 for (i
= 0; i
< num
; i
++)
2827 for (j
= 0; j
< 2; j
++)
2828 len2
[j
] = (opts
[i
][j
]) ? strlen (opts
[i
][j
]) : 0;
2835 if (add_nl_p
&& line_len
+ len2
[0] + len2
[1] > 70)
2843 for (j
= 0; j
< 2; j
++)
2846 memcpy (ptr
, opts
[i
][j
], len2
[j
]);
2848 line_len
+= len2
[j
];
2853 gcc_assert (ret
+ len
>= ptr
);
2858 /* Return true, if profiling code should be emitted before
2859 prologue. Otherwise it returns false.
2860 Note: For x86 with "hotfix" it is sorried. */
2862 ix86_profile_before_prologue (void)
2864 return flag_fentry
!= 0;
2867 /* Function that is callable from the debugger to print the current
2870 ix86_debug_options (void)
2872 char *opts
= ix86_target_string (ix86_isa_flags
, target_flags
,
2873 ix86_arch_string
, ix86_tune_string
,
2878 fprintf (stderr
, "%s\n\n", opts
);
2882 fputs ("<no options>\n\n", stderr
);
2887 /* Override various settings based on options. If MAIN_ARGS_P, the
2888 options are from the command line, otherwise they are from
2892 ix86_option_override_internal (bool main_args_p
)
2895 unsigned int ix86_arch_mask
, ix86_tune_mask
;
2896 const bool ix86_tune_specified
= (ix86_tune_string
!= NULL
);
2901 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
2902 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
2903 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
2904 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
2905 #define PTA_AES (HOST_WIDE_INT_1 << 4)
2906 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
2907 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
2908 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
2909 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
2910 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
2911 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
2912 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
2913 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
2914 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
2915 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
2916 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
2917 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
2918 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
2919 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
2920 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
2921 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
2922 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
2923 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
2924 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
2925 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
2926 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
2927 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
2928 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
2929 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
2930 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
2931 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
2932 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
2933 /* if this reaches 64, need to widen struct pta flags below */
2937 const char *const name
; /* processor name or nickname. */
2938 const enum processor_type processor
;
2939 const enum attr_cpu schedule
;
2940 const unsigned HOST_WIDE_INT flags
;
2942 const processor_alias_table
[] =
2944 {"i386", PROCESSOR_I386
, CPU_NONE
, 0},
2945 {"i486", PROCESSOR_I486
, CPU_NONE
, 0},
2946 {"i586", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2947 {"pentium", PROCESSOR_PENTIUM
, CPU_PENTIUM
, 0},
2948 {"pentium-mmx", PROCESSOR_PENTIUM
, CPU_PENTIUM
, PTA_MMX
},
2949 {"winchip-c6", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
},
2950 {"winchip2", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2951 {"c3", PROCESSOR_I486
, CPU_NONE
, PTA_MMX
| PTA_3DNOW
},
2952 {"c3-2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
| PTA_SSE
},
2953 {"i686", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2954 {"pentiumpro", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, 0},
2955 {"pentium2", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
, PTA_MMX
},
2956 {"pentium3", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2958 {"pentium3m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2960 {"pentium-m", PROCESSOR_PENTIUMPRO
, CPU_PENTIUMPRO
,
2961 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2962 {"pentium4", PROCESSOR_PENTIUM4
, CPU_NONE
,
2963 PTA_MMX
|PTA_SSE
| PTA_SSE2
},
2964 {"pentium4m", PROCESSOR_PENTIUM4
, CPU_NONE
,
2965 PTA_MMX
| PTA_SSE
| PTA_SSE2
},
2966 {"prescott", PROCESSOR_NOCONA
, CPU_NONE
,
2967 PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
},
2968 {"nocona", PROCESSOR_NOCONA
, CPU_NONE
,
2969 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2970 | PTA_CX16
| PTA_NO_SAHF
},
2971 {"core2", PROCESSOR_CORE2_64
, CPU_CORE2
,
2972 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2973 | PTA_SSSE3
| PTA_CX16
},
2974 {"corei7", PROCESSOR_COREI7_64
, CPU_COREI7
,
2975 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2976 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_CX16
},
2977 {"corei7-avx", PROCESSOR_COREI7_64
, CPU_COREI7
,
2978 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2979 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2980 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
},
2981 {"core-avx-i", PROCESSOR_COREI7_64
, CPU_COREI7
,
2982 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2983 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
2984 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2985 | PTA_RDRND
| PTA_F16C
},
2986 {"core-avx2", PROCESSOR_COREI7_64
, CPU_COREI7
,
2987 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2988 | PTA_SSSE3
| PTA_SSE4_1
| PTA_SSE4_2
| PTA_AVX
| PTA_AVX2
2989 | PTA_CX16
| PTA_POPCNT
| PTA_AES
| PTA_PCLMUL
| PTA_FSGSBASE
2990 | PTA_RDRND
| PTA_F16C
| PTA_BMI
| PTA_BMI2
| PTA_LZCNT
2991 | PTA_FMA
| PTA_MOVBE
},
2992 {"atom", PROCESSOR_ATOM
, CPU_ATOM
,
2993 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
2994 | PTA_SSSE3
| PTA_CX16
| PTA_MOVBE
},
2995 {"geode", PROCESSOR_GEODE
, CPU_GEODE
,
2996 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
|PTA_PREFETCH_SSE
},
2997 {"k6", PROCESSOR_K6
, CPU_K6
, PTA_MMX
},
2998 {"k6-2", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
2999 {"k6-3", PROCESSOR_K6
, CPU_K6
, PTA_MMX
| PTA_3DNOW
},
3000 {"athlon", PROCESSOR_ATHLON
, CPU_ATHLON
,
3001 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3002 {"athlon-tbird", PROCESSOR_ATHLON
, CPU_ATHLON
,
3003 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_PREFETCH_SSE
},
3004 {"athlon-4", PROCESSOR_ATHLON
, CPU_ATHLON
,
3005 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3006 {"athlon-xp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3007 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3008 {"athlon-mp", PROCESSOR_ATHLON
, CPU_ATHLON
,
3009 PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
},
3010 {"x86-64", PROCESSOR_K8
, CPU_K8
,
3011 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_NO_SAHF
},
3012 {"k8", PROCESSOR_K8
, CPU_K8
,
3013 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3014 | PTA_SSE2
| PTA_NO_SAHF
},
3015 {"k8-sse3", PROCESSOR_K8
, CPU_K8
,
3016 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3017 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3018 {"opteron", PROCESSOR_K8
, CPU_K8
,
3019 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3020 | PTA_SSE2
| PTA_NO_SAHF
},
3021 {"opteron-sse3", PROCESSOR_K8
, CPU_K8
,
3022 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3023 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3024 {"athlon64", PROCESSOR_K8
, CPU_K8
,
3025 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3026 | PTA_SSE2
| PTA_NO_SAHF
},
3027 {"athlon64-sse3", PROCESSOR_K8
, CPU_K8
,
3028 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3029 | PTA_SSE2
| PTA_SSE3
| PTA_NO_SAHF
},
3030 {"athlon-fx", PROCESSOR_K8
, CPU_K8
,
3031 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3032 | PTA_SSE2
| PTA_NO_SAHF
},
3033 {"amdfam10", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3034 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3035 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3036 {"barcelona", PROCESSOR_AMDFAM10
, CPU_AMDFAM10
,
3037 PTA_64BIT
| PTA_MMX
| PTA_3DNOW
| PTA_3DNOW_A
| PTA_SSE
3038 | PTA_SSE2
| PTA_SSE3
| PTA_SSE4A
| PTA_CX16
| PTA_ABM
},
3039 {"bdver1", PROCESSOR_BDVER1
, CPU_BDVER1
,
3040 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3041 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3042 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
| PTA_FMA4
3043 | PTA_XOP
| PTA_LWP
},
3044 {"bdver2", PROCESSOR_BDVER2
, CPU_BDVER2
,
3045 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3046 | PTA_SSE4A
| PTA_CX16
| PTA_ABM
| PTA_SSSE3
| PTA_SSE4_1
3047 | PTA_SSE4_2
| PTA_AES
| PTA_PCLMUL
| PTA_AVX
3048 | PTA_XOP
| PTA_LWP
| PTA_BMI
| PTA_TBM
| PTA_F16C
3050 {"btver1", PROCESSOR_BTVER1
, CPU_GENERIC64
,
3051 PTA_64BIT
| PTA_MMX
| PTA_SSE
| PTA_SSE2
| PTA_SSE3
3052 | PTA_SSSE3
| PTA_SSE4A
|PTA_ABM
| PTA_CX16
},
3053 {"generic32", PROCESSOR_GENERIC32
, CPU_PENTIUMPRO
,
3054 0 /* flags are only used for -march switch. */ },
3055 {"generic64", PROCESSOR_GENERIC64
, CPU_GENERIC64
,
3056 PTA_64BIT
/* flags are only used for -march switch. */ },
3059 /* -mrecip options. */
3062 const char *string
; /* option name */
3063 unsigned int mask
; /* mask bits to set */
3065 const recip_options
[] =
3067 { "all", RECIP_MASK_ALL
},
3068 { "none", RECIP_MASK_NONE
},
3069 { "div", RECIP_MASK_DIV
},
3070 { "sqrt", RECIP_MASK_SQRT
},
3071 { "vec-div", RECIP_MASK_VEC_DIV
},
3072 { "vec-sqrt", RECIP_MASK_VEC_SQRT
},
3075 int const pta_size
= ARRAY_SIZE (processor_alias_table
);
3077 /* Set up prefix/suffix so the error messages refer to either the command
3078 line argument, or the attribute(target). */
3087 prefix
= "option(\"";
3092 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3093 SUBTARGET_OVERRIDE_OPTIONS
;
3096 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3097 SUBSUBTARGET_OVERRIDE_OPTIONS
;
3101 ix86_isa_flags
|= OPTION_MASK_ISA_64BIT
;
3103 /* -fPIC is the default for x86_64. */
3104 if (TARGET_MACHO
&& TARGET_64BIT
)
3107 /* Need to check -mtune=generic first. */
3108 if (ix86_tune_string
)
3110 if (!strcmp (ix86_tune_string
, "generic")
3111 || !strcmp (ix86_tune_string
, "i686")
3112 /* As special support for cross compilers we read -mtune=native
3113 as -mtune=generic. With native compilers we won't see the
3114 -mtune=native, as it was changed by the driver. */
3115 || !strcmp (ix86_tune_string
, "native"))
3118 ix86_tune_string
= "generic64";
3120 ix86_tune_string
= "generic32";
3122 /* If this call is for setting the option attribute, allow the
3123 generic32/generic64 that was previously set. */
3124 else if (!main_args_p
3125 && (!strcmp (ix86_tune_string
, "generic32")
3126 || !strcmp (ix86_tune_string
, "generic64")))
3128 else if (!strncmp (ix86_tune_string
, "generic", 7))
3129 error ("bad value (%s) for %stune=%s %s",
3130 ix86_tune_string
, prefix
, suffix
, sw
);
3131 else if (!strcmp (ix86_tune_string
, "x86-64"))
3132 warning (OPT_Wdeprecated
, "%stune=x86-64%s is deprecated; use "
3133 "%stune=k8%s or %stune=generic%s instead as appropriate",
3134 prefix
, suffix
, prefix
, suffix
, prefix
, suffix
);
3138 if (ix86_arch_string
)
3139 ix86_tune_string
= ix86_arch_string
;
3140 if (!ix86_tune_string
)
3142 ix86_tune_string
= cpu_names
[TARGET_CPU_DEFAULT
];
3143 ix86_tune_defaulted
= 1;
3146 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3147 need to use a sensible tune option. */
3148 if (!strcmp (ix86_tune_string
, "generic")
3149 || !strcmp (ix86_tune_string
, "x86-64")
3150 || !strcmp (ix86_tune_string
, "i686"))
3153 ix86_tune_string
= "generic64";
3155 ix86_tune_string
= "generic32";
3159 if (ix86_stringop_alg
== rep_prefix_8_byte
&& !TARGET_64BIT
)
3161 /* rep; movq isn't available in 32-bit code. */
3162 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3163 ix86_stringop_alg
= no_stringop
;
3166 if (!ix86_arch_string
)
3167 ix86_arch_string
= TARGET_64BIT
? "x86-64" : SUBTARGET32_DEFAULT_CPU
;
3169 ix86_arch_specified
= 1;
3171 if (!global_options_set
.x_ix86_abi
)
3172 ix86_abi
= DEFAULT_ABI
;
3174 if (global_options_set
.x_ix86_cmodel
)
3176 switch (ix86_cmodel
)
3181 ix86_cmodel
= CM_SMALL_PIC
;
3183 error ("code model %qs not supported in the %s bit mode",
3190 ix86_cmodel
= CM_MEDIUM_PIC
;
3192 error ("code model %qs not supported in the %s bit mode",
3194 else if (TARGET_X32
)
3195 error ("code model %qs not supported in x32 mode",
3202 ix86_cmodel
= CM_LARGE_PIC
;
3204 error ("code model %qs not supported in the %s bit mode",
3206 else if (TARGET_X32
)
3207 error ("code model %qs not supported in x32 mode",
3213 error ("code model %s does not support PIC mode", "32");
3215 error ("code model %qs not supported in the %s bit mode",
3222 error ("code model %s does not support PIC mode", "kernel");
3223 ix86_cmodel
= CM_32
;
3226 error ("code model %qs not supported in the %s bit mode",
3236 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3237 use of rip-relative addressing. This eliminates fixups that
3238 would otherwise be needed if this object is to be placed in a
3239 DLL, and is essentially just as efficient as direct addressing. */
3240 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
3241 ix86_cmodel
= CM_SMALL_PIC
, flag_pic
= 1;
3242 else if (TARGET_64BIT
)
3243 ix86_cmodel
= flag_pic
? CM_SMALL_PIC
: CM_SMALL
;
3245 ix86_cmodel
= CM_32
;
3247 if (TARGET_MACHO
&& ix86_asm_dialect
== ASM_INTEL
)
3249 error ("-masm=intel not supported in this configuration");
3250 ix86_asm_dialect
= ASM_ATT
;
3252 if ((TARGET_64BIT
!= 0) != ((ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) != 0))
3253 sorry ("%i-bit mode not compiled in",
3254 (ix86_isa_flags
& OPTION_MASK_ISA_64BIT
) ? 64 : 32);
3256 for (i
= 0; i
< pta_size
; i
++)
3257 if (! strcmp (ix86_arch_string
, processor_alias_table
[i
].name
))
3259 ix86_schedule
= processor_alias_table
[i
].schedule
;
3260 ix86_arch
= processor_alias_table
[i
].processor
;
3261 /* Default cpu tuning to the architecture. */
3262 ix86_tune
= ix86_arch
;
3264 if (TARGET_64BIT
&& !(processor_alias_table
[i
].flags
& PTA_64BIT
))
3265 error ("CPU you selected does not support x86-64 "
3268 if (processor_alias_table
[i
].flags
& PTA_MMX
3269 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MMX
))
3270 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
;
3271 if (processor_alias_table
[i
].flags
& PTA_3DNOW
3272 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW
))
3273 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW
;
3274 if (processor_alias_table
[i
].flags
& PTA_3DNOW_A
3275 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_3DNOW_A
))
3276 ix86_isa_flags
|= OPTION_MASK_ISA_3DNOW_A
;
3277 if (processor_alias_table
[i
].flags
& PTA_SSE
3278 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE
))
3279 ix86_isa_flags
|= OPTION_MASK_ISA_SSE
;
3280 if (processor_alias_table
[i
].flags
& PTA_SSE2
3281 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE2
))
3282 ix86_isa_flags
|= OPTION_MASK_ISA_SSE2
;
3283 if (processor_alias_table
[i
].flags
& PTA_SSE3
3284 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE3
))
3285 ix86_isa_flags
|= OPTION_MASK_ISA_SSE3
;
3286 if (processor_alias_table
[i
].flags
& PTA_SSSE3
3287 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSSE3
))
3288 ix86_isa_flags
|= OPTION_MASK_ISA_SSSE3
;
3289 if (processor_alias_table
[i
].flags
& PTA_SSE4_1
3290 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_1
))
3291 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_1
;
3292 if (processor_alias_table
[i
].flags
& PTA_SSE4_2
3293 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4_2
))
3294 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4_2
;
3295 if (processor_alias_table
[i
].flags
& PTA_AVX
3296 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX
))
3297 ix86_isa_flags
|= OPTION_MASK_ISA_AVX
;
3298 if (processor_alias_table
[i
].flags
& PTA_AVX2
3299 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX2
))
3300 ix86_isa_flags
|= OPTION_MASK_ISA_AVX2
;
3301 if (processor_alias_table
[i
].flags
& PTA_FMA
3302 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA
))
3303 ix86_isa_flags
|= OPTION_MASK_ISA_FMA
;
3304 if (processor_alias_table
[i
].flags
& PTA_SSE4A
3305 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SSE4A
))
3306 ix86_isa_flags
|= OPTION_MASK_ISA_SSE4A
;
3307 if (processor_alias_table
[i
].flags
& PTA_FMA4
3308 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FMA4
))
3309 ix86_isa_flags
|= OPTION_MASK_ISA_FMA4
;
3310 if (processor_alias_table
[i
].flags
& PTA_XOP
3311 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_XOP
))
3312 ix86_isa_flags
|= OPTION_MASK_ISA_XOP
;
3313 if (processor_alias_table
[i
].flags
& PTA_LWP
3314 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LWP
))
3315 ix86_isa_flags
|= OPTION_MASK_ISA_LWP
;
3316 if (processor_alias_table
[i
].flags
& PTA_ABM
3317 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_ABM
))
3318 ix86_isa_flags
|= OPTION_MASK_ISA_ABM
;
3319 if (processor_alias_table
[i
].flags
& PTA_BMI
3320 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI
))
3321 ix86_isa_flags
|= OPTION_MASK_ISA_BMI
;
3322 if (processor_alias_table
[i
].flags
& (PTA_LZCNT
| PTA_ABM
)
3323 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_LZCNT
))
3324 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
;
3325 if (processor_alias_table
[i
].flags
& PTA_TBM
3326 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_TBM
))
3327 ix86_isa_flags
|= OPTION_MASK_ISA_TBM
;
3328 if (processor_alias_table
[i
].flags
& PTA_BMI2
3329 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_BMI2
))
3330 ix86_isa_flags
|= OPTION_MASK_ISA_BMI2
;
3331 if (processor_alias_table
[i
].flags
& PTA_CX16
3332 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_CX16
))
3333 ix86_isa_flags
|= OPTION_MASK_ISA_CX16
;
3334 if (processor_alias_table
[i
].flags
& (PTA_POPCNT
| PTA_ABM
)
3335 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_POPCNT
))
3336 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
;
3337 if (!(TARGET_64BIT
&& (processor_alias_table
[i
].flags
& PTA_NO_SAHF
))
3338 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_SAHF
))
3339 ix86_isa_flags
|= OPTION_MASK_ISA_SAHF
;
3340 if (processor_alias_table
[i
].flags
& PTA_MOVBE
3341 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_MOVBE
))
3342 ix86_isa_flags
|= OPTION_MASK_ISA_MOVBE
;
3343 if (processor_alias_table
[i
].flags
& PTA_AES
3344 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_AES
))
3345 ix86_isa_flags
|= OPTION_MASK_ISA_AES
;
3346 if (processor_alias_table
[i
].flags
& PTA_PCLMUL
3347 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_PCLMUL
))
3348 ix86_isa_flags
|= OPTION_MASK_ISA_PCLMUL
;
3349 if (processor_alias_table
[i
].flags
& PTA_FSGSBASE
3350 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_FSGSBASE
))
3351 ix86_isa_flags
|= OPTION_MASK_ISA_FSGSBASE
;
3352 if (processor_alias_table
[i
].flags
& PTA_RDRND
3353 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_RDRND
))
3354 ix86_isa_flags
|= OPTION_MASK_ISA_RDRND
;
3355 if (processor_alias_table
[i
].flags
& PTA_F16C
3356 && !(ix86_isa_flags_explicit
& OPTION_MASK_ISA_F16C
))
3357 ix86_isa_flags
|= OPTION_MASK_ISA_F16C
;
3358 if (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
))
3359 x86_prefetch_sse
= true;
3364 if (!strcmp (ix86_arch_string
, "generic"))
3365 error ("generic CPU can be used only for %stune=%s %s",
3366 prefix
, suffix
, sw
);
3367 else if (!strncmp (ix86_arch_string
, "generic", 7) || i
== pta_size
)
3368 error ("bad value (%s) for %sarch=%s %s",
3369 ix86_arch_string
, prefix
, suffix
, sw
);
3371 ix86_arch_mask
= 1u << ix86_arch
;
3372 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
3373 ix86_arch_features
[i
] = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
3375 for (i
= 0; i
< pta_size
; i
++)
3376 if (! strcmp (ix86_tune_string
, processor_alias_table
[i
].name
))
3378 ix86_schedule
= processor_alias_table
[i
].schedule
;
3379 ix86_tune
= processor_alias_table
[i
].processor
;
3382 if (!(processor_alias_table
[i
].flags
& PTA_64BIT
))
3384 if (ix86_tune_defaulted
)
3386 ix86_tune_string
= "x86-64";
3387 for (i
= 0; i
< pta_size
; i
++)
3388 if (! strcmp (ix86_tune_string
,
3389 processor_alias_table
[i
].name
))
3391 ix86_schedule
= processor_alias_table
[i
].schedule
;
3392 ix86_tune
= processor_alias_table
[i
].processor
;
3395 error ("CPU you selected does not support x86-64 "
3401 /* Adjust tuning when compiling for 32-bit ABI. */
3404 case PROCESSOR_GENERIC64
:
3405 ix86_tune
= PROCESSOR_GENERIC32
;
3406 ix86_schedule
= CPU_PENTIUMPRO
;
3409 case PROCESSOR_CORE2_64
:
3410 ix86_tune
= PROCESSOR_CORE2_32
;
3413 case PROCESSOR_COREI7_64
:
3414 ix86_tune
= PROCESSOR_COREI7_32
;
3421 /* Intel CPUs have always interpreted SSE prefetch instructions as
3422 NOPs; so, we can enable SSE prefetch instructions even when
3423 -mtune (rather than -march) points us to a processor that has them.
3424 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3425 higher processors. */
3427 && (processor_alias_table
[i
].flags
& (PTA_PREFETCH_SSE
| PTA_SSE
)))
3428 x86_prefetch_sse
= true;
3432 if (ix86_tune_specified
&& i
== pta_size
)
3433 error ("bad value (%s) for %stune=%s %s",
3434 ix86_tune_string
, prefix
, suffix
, sw
);
3436 ix86_tune_mask
= 1u << ix86_tune
;
3437 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
3438 ix86_tune_features
[i
] = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
3440 #ifndef USE_IX86_FRAME_POINTER
3441 #define USE_IX86_FRAME_POINTER 0
3444 #ifndef USE_X86_64_FRAME_POINTER
3445 #define USE_X86_64_FRAME_POINTER 0
3448 /* Set the default values for switches whose default depends on TARGET_64BIT
3449 in case they weren't overwritten by command line options. */
3452 if (optimize
> 1 && !global_options_set
.x_flag_zee
)
3454 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3455 flag_omit_frame_pointer
= !USE_X86_64_FRAME_POINTER
;
3456 if (flag_asynchronous_unwind_tables
== 2)
3457 flag_unwind_tables
= flag_asynchronous_unwind_tables
= 1;
3458 if (flag_pcc_struct_return
== 2)
3459 flag_pcc_struct_return
= 0;
3463 if (optimize
>= 1 && !global_options_set
.x_flag_omit_frame_pointer
)
3464 flag_omit_frame_pointer
= !(USE_IX86_FRAME_POINTER
|| optimize_size
);
3465 if (flag_asynchronous_unwind_tables
== 2)
3466 flag_asynchronous_unwind_tables
= !USE_IX86_FRAME_POINTER
;
3467 if (flag_pcc_struct_return
== 2)
3468 flag_pcc_struct_return
= DEFAULT_PCC_STRUCT_RETURN
;
3472 ix86_cost
= &ix86_size_cost
;
3474 ix86_cost
= processor_target_table
[ix86_tune
].cost
;
3476 /* Arrange to set up i386_stack_locals for all functions. */
3477 init_machine_status
= ix86_init_machine_status
;
3479 /* Validate -mregparm= value. */
3480 if (global_options_set
.x_ix86_regparm
)
3483 warning (0, "-mregparm is ignored in 64-bit mode");
3484 if (ix86_regparm
> REGPARM_MAX
)
3486 error ("-mregparm=%d is not between 0 and %d",
3487 ix86_regparm
, REGPARM_MAX
);
3492 ix86_regparm
= REGPARM_MAX
;
3494 /* Default align_* from the processor table. */
3495 if (align_loops
== 0)
3497 align_loops
= processor_target_table
[ix86_tune
].align_loop
;
3498 align_loops_max_skip
= processor_target_table
[ix86_tune
].align_loop_max_skip
;
3500 if (align_jumps
== 0)
3502 align_jumps
= processor_target_table
[ix86_tune
].align_jump
;
3503 align_jumps_max_skip
= processor_target_table
[ix86_tune
].align_jump_max_skip
;
3505 if (align_functions
== 0)
3507 align_functions
= processor_target_table
[ix86_tune
].align_func
;
3510 /* Provide default for -mbranch-cost= value. */
3511 if (!global_options_set
.x_ix86_branch_cost
)
3512 ix86_branch_cost
= ix86_cost
->branch_cost
;
3516 target_flags
|= TARGET_SUBTARGET64_DEFAULT
& ~target_flags_explicit
;
3518 /* Enable by default the SSE and MMX builtins. Do allow the user to
3519 explicitly disable any of these. In particular, disabling SSE and
3520 MMX for kernel code is extremely useful. */
3521 if (!ix86_arch_specified
)
3523 |= ((OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_MMX
3524 | TARGET_SUBTARGET64_ISA_DEFAULT
) & ~ix86_isa_flags_explicit
);
3527 warning (0, "%srtd%s is ignored in 64bit mode", prefix
, suffix
);
3531 target_flags
|= TARGET_SUBTARGET32_DEFAULT
& ~target_flags_explicit
;
3533 if (!ix86_arch_specified
)
3535 |= TARGET_SUBTARGET32_ISA_DEFAULT
& ~ix86_isa_flags_explicit
;
3537 /* i386 ABI does not specify red zone. It still makes sense to use it
3538 when programmer takes care to stack from being destroyed. */
3539 if (!(target_flags_explicit
& MASK_NO_RED_ZONE
))
3540 target_flags
|= MASK_NO_RED_ZONE
;
3543 /* Keep nonleaf frame pointers. */
3544 if (flag_omit_frame_pointer
)
3545 target_flags
&= ~MASK_OMIT_LEAF_FRAME_POINTER
;
3546 else if (TARGET_OMIT_LEAF_FRAME_POINTER
)
3547 flag_omit_frame_pointer
= 1;
3549 /* If we're doing fast math, we don't care about comparison order
3550 wrt NaNs. This lets us use a shorter comparison sequence. */
3551 if (flag_finite_math_only
)
3552 target_flags
&= ~MASK_IEEE_FP
;
3554 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3555 since the insns won't need emulation. */
3556 if (x86_arch_always_fancy_math_387
& ix86_arch_mask
)
3557 target_flags
&= ~MASK_NO_FANCY_MATH_387
;
3559 /* Likewise, if the target doesn't have a 387, or we've specified
3560 software floating point, don't use 387 inline intrinsics. */
3562 target_flags
|= MASK_NO_FANCY_MATH_387
;
3564 /* Turn on MMX builtins for -msse. */
3567 ix86_isa_flags
|= OPTION_MASK_ISA_MMX
& ~ix86_isa_flags_explicit
;
3568 x86_prefetch_sse
= true;
3571 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3572 if (TARGET_SSE4_2
|| TARGET_ABM
)
3573 ix86_isa_flags
|= OPTION_MASK_ISA_POPCNT
& ~ix86_isa_flags_explicit
;
3575 /* Turn on lzcnt instruction for -mabm. */
3577 ix86_isa_flags
|= OPTION_MASK_ISA_LZCNT
& ~ix86_isa_flags_explicit
;
3579 /* Validate -mpreferred-stack-boundary= value or default it to
3580 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3581 ix86_preferred_stack_boundary
= PREFERRED_STACK_BOUNDARY_DEFAULT
;
3582 if (global_options_set
.x_ix86_preferred_stack_boundary_arg
)
3584 int min
= (TARGET_64BIT
? 4 : 2);
3585 int max
= (TARGET_SEH
? 4 : 12);
3587 if (ix86_preferred_stack_boundary_arg
< min
3588 || ix86_preferred_stack_boundary_arg
> max
)
3591 error ("-mpreferred-stack-boundary is not supported "
3594 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3595 ix86_preferred_stack_boundary_arg
, min
, max
);
3598 ix86_preferred_stack_boundary
3599 = (1 << ix86_preferred_stack_boundary_arg
) * BITS_PER_UNIT
;
3602 /* Set the default value for -mstackrealign. */
3603 if (ix86_force_align_arg_pointer
== -1)
3604 ix86_force_align_arg_pointer
= STACK_REALIGN_DEFAULT
;
3606 ix86_default_incoming_stack_boundary
= PREFERRED_STACK_BOUNDARY
;
3608 /* Validate -mincoming-stack-boundary= value or default it to
3609 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3610 ix86_incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
3611 if (global_options_set
.x_ix86_incoming_stack_boundary_arg
)
3613 if (ix86_incoming_stack_boundary_arg
< (TARGET_64BIT
? 4 : 2)
3614 || ix86_incoming_stack_boundary_arg
> 12)
3615 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3616 ix86_incoming_stack_boundary_arg
, TARGET_64BIT
? 4 : 2);
3619 ix86_user_incoming_stack_boundary
3620 = (1 << ix86_incoming_stack_boundary_arg
) * BITS_PER_UNIT
;
3621 ix86_incoming_stack_boundary
3622 = ix86_user_incoming_stack_boundary
;
3626 /* Accept -msseregparm only if at least SSE support is enabled. */
3627 if (TARGET_SSEREGPARM
3629 error ("%ssseregparm%s used without SSE enabled", prefix
, suffix
);
3631 if (global_options_set
.x_ix86_fpmath
)
3633 if (ix86_fpmath
& FPMATH_SSE
)
3637 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3638 ix86_fpmath
= FPMATH_387
;
3640 else if ((ix86_fpmath
& FPMATH_387
) && !TARGET_80387
)
3642 warning (0, "387 instruction set disabled, using SSE arithmetics");
3643 ix86_fpmath
= FPMATH_SSE
;
3648 ix86_fpmath
= TARGET_FPMATH_DEFAULT
;
3650 /* If the i387 is disabled, then do not return values in it. */
3652 target_flags
&= ~MASK_FLOAT_RETURNS
;
3654 /* Use external vectorized library in vectorizing intrinsics. */
3655 if (global_options_set
.x_ix86_veclibabi_type
)
3656 switch (ix86_veclibabi_type
)
3658 case ix86_veclibabi_type_svml
:
3659 ix86_veclib_handler
= ix86_veclibabi_svml
;
3662 case ix86_veclibabi_type_acml
:
3663 ix86_veclib_handler
= ix86_veclibabi_acml
;
3670 if ((!USE_IX86_FRAME_POINTER
3671 || (x86_accumulate_outgoing_args
& ix86_tune_mask
))
3672 && !(target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3674 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3676 /* ??? Unwind info is not correct around the CFG unless either a frame
3677 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3678 unwind info generation to be aware of the CFG and propagating states
3680 if ((flag_unwind_tables
|| flag_asynchronous_unwind_tables
3681 || flag_exceptions
|| flag_non_call_exceptions
)
3682 && flag_omit_frame_pointer
3683 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3685 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3686 warning (0, "unwind tables currently require either a frame pointer "
3687 "or %saccumulate-outgoing-args%s for correctness",
3689 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3692 /* If stack probes are required, the space used for large function
3693 arguments on the stack must also be probed, so enable
3694 -maccumulate-outgoing-args so this happens in the prologue. */
3695 if (TARGET_STACK_PROBE
3696 && !(target_flags
& MASK_ACCUMULATE_OUTGOING_ARGS
))
3698 if (target_flags_explicit
& MASK_ACCUMULATE_OUTGOING_ARGS
)
3699 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3700 "for correctness", prefix
, suffix
);
3701 target_flags
|= MASK_ACCUMULATE_OUTGOING_ARGS
;
3704 /* For sane SSE instruction set generation we need fcomi instruction.
3705 It is safe to enable all CMOVE instructions. Also, RDRAND intrinsic
3706 expands to a sequence that includes conditional move. */
3707 if (TARGET_SSE
|| TARGET_RDRND
)
3710 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3713 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix
, "LX", 0);
3714 p
= strchr (internal_label_prefix
, 'X');
3715 internal_label_prefix_len
= p
- internal_label_prefix
;
3719 /* When scheduling description is not available, disable scheduler pass
3720 so it won't slow down the compilation and make x87 code slower. */
3721 if (!TARGET_SCHEDULE
)
3722 flag_schedule_insns_after_reload
= flag_schedule_insns
= 0;
3724 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
3725 ix86_cost
->simultaneous_prefetches
,
3726 global_options
.x_param_values
,
3727 global_options_set
.x_param_values
);
3728 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
, ix86_cost
->prefetch_block
,
3729 global_options
.x_param_values
,
3730 global_options_set
.x_param_values
);
3731 maybe_set_param_value (PARAM_L1_CACHE_SIZE
, ix86_cost
->l1_cache_size
,
3732 global_options
.x_param_values
,
3733 global_options_set
.x_param_values
);
3734 maybe_set_param_value (PARAM_L2_CACHE_SIZE
, ix86_cost
->l2_cache_size
,
3735 global_options
.x_param_values
,
3736 global_options_set
.x_param_values
);
3738 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3739 if (flag_prefetch_loop_arrays
< 0
3742 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL
)
3743 flag_prefetch_loop_arrays
= 1;
3745 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3746 can be optimized to ap = __builtin_next_arg (0). */
3747 if (!TARGET_64BIT
&& !flag_split_stack
)
3748 targetm
.expand_builtin_va_start
= NULL
;
3752 ix86_gen_leave
= gen_leave_rex64
;
3753 ix86_gen_add3
= gen_adddi3
;
3754 ix86_gen_sub3
= gen_subdi3
;
3755 ix86_gen_sub3_carry
= gen_subdi3_carry
;
3756 ix86_gen_one_cmpl2
= gen_one_cmpldi2
;
3757 ix86_gen_monitor
= gen_sse3_monitor64
;
3758 ix86_gen_andsp
= gen_anddi3
;
3759 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_di
;
3760 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probedi
;
3761 ix86_gen_probe_stack_range
= gen_probe_stack_rangedi
;
3765 ix86_gen_leave
= gen_leave
;
3766 ix86_gen_add3
= gen_addsi3
;
3767 ix86_gen_sub3
= gen_subsi3
;
3768 ix86_gen_sub3_carry
= gen_subsi3_carry
;
3769 ix86_gen_one_cmpl2
= gen_one_cmplsi2
;
3770 ix86_gen_monitor
= gen_sse3_monitor
;
3771 ix86_gen_andsp
= gen_andsi3
;
3772 ix86_gen_allocate_stack_worker
= gen_allocate_stack_worker_probe_si
;
3773 ix86_gen_adjust_stack_and_probe
= gen_adjust_stack_and_probesi
;
3774 ix86_gen_probe_stack_range
= gen_probe_stack_rangesi
;
3778 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3780 target_flags
|= MASK_CLD
& ~target_flags_explicit
;
3783 if (!TARGET_64BIT
&& flag_pic
)
3785 if (flag_fentry
> 0)
3786 sorry ("-mfentry isn%'t supported for 32-bit in combination "
3790 else if (TARGET_SEH
)
3792 if (flag_fentry
== 0)
3793 sorry ("-mno-fentry isn%'t compatible with SEH");
3796 else if (flag_fentry
< 0)
3798 #if defined(PROFILE_BEFORE_PROLOGUE)
3807 /* When not optimize for size, enable vzeroupper optimization for
3808 TARGET_AVX with -fexpensive-optimizations and split 32-byte
3809 AVX unaligned load/store. */
3812 if (flag_expensive_optimizations
3813 && !(target_flags_explicit
& MASK_VZEROUPPER
))
3814 target_flags
|= MASK_VZEROUPPER
;
3815 if ((x86_avx256_split_unaligned_load
& ix86_tune_mask
)
3816 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_LOAD
))
3817 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_LOAD
;
3818 if ((x86_avx256_split_unaligned_store
& ix86_tune_mask
)
3819 && !(target_flags_explicit
& MASK_AVX256_SPLIT_UNALIGNED_STORE
))
3820 target_flags
|= MASK_AVX256_SPLIT_UNALIGNED_STORE
;
3821 /* Enable 128-bit AVX instruction generation for the auto-vectorizer. */
3822 if (TARGET_AVX128_OPTIMAL
&& !(target_flags_explicit
& MASK_PREFER_AVX128
))
3823 target_flags
|= MASK_PREFER_AVX128
;
3828 /* Disable vzeroupper pass if TARGET_AVX is disabled. */
3829 target_flags
&= ~MASK_VZEROUPPER
;
3832 if (ix86_recip_name
)
3834 char *p
= ASTRDUP (ix86_recip_name
);
3836 unsigned int mask
, i
;
3839 while ((q
= strtok (p
, ",")) != NULL
)
3850 if (!strcmp (q
, "default"))
3851 mask
= RECIP_MASK_ALL
;
3854 for (i
= 0; i
< ARRAY_SIZE (recip_options
); i
++)
3855 if (!strcmp (q
, recip_options
[i
].string
))
3857 mask
= recip_options
[i
].mask
;
3861 if (i
== ARRAY_SIZE (recip_options
))
3863 error ("unknown option for -mrecip=%s", q
);
3865 mask
= RECIP_MASK_NONE
;
3869 recip_mask_explicit
|= mask
;
3871 recip_mask
&= ~mask
;
3878 recip_mask
|= RECIP_MASK_ALL
& ~recip_mask_explicit
;
3879 else if (target_flags_explicit
& MASK_RECIP
)
3880 recip_mask
&= ~(RECIP_MASK_ALL
& ~recip_mask_explicit
);
3882 /* Save the initial options in case the user does function specific
3885 target_option_default_node
= target_option_current_node
3886 = build_target_option_node ();
3889 /* Return TRUE if VAL is passed in register with 256bit AVX modes. */
3892 function_pass_avx256_p (const_rtx val
)
3897 if (REG_P (val
) && VALID_AVX256_REG_MODE (GET_MODE (val
)))
3900 if (GET_CODE (val
) == PARALLEL
)
3905 for (i
= XVECLEN (val
, 0) - 1; i
>= 0; i
--)
3907 r
= XVECEXP (val
, 0, i
);
3908 if (GET_CODE (r
) == EXPR_LIST
3910 && REG_P (XEXP (r
, 0))
3911 && (GET_MODE (XEXP (r
, 0)) == OImode
3912 || VALID_AVX256_REG_MODE (GET_MODE (XEXP (r
, 0)))))
3920 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3923 ix86_option_override (void)
3925 ix86_option_override_internal (true);
3928 /* Update register usage after having seen the compiler flags. */
3931 ix86_conditional_register_usage (void)
3936 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3938 if (fixed_regs
[i
] > 1)
3939 fixed_regs
[i
] = (fixed_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3940 if (call_used_regs
[i
] > 1)
3941 call_used_regs
[i
] = (call_used_regs
[i
] == (TARGET_64BIT
? 3 : 2));
3944 /* The PIC register, if it exists, is fixed. */
3945 j
= PIC_OFFSET_TABLE_REGNUM
;
3946 if (j
!= INVALID_REGNUM
)
3947 fixed_regs
[j
] = call_used_regs
[j
] = 1;
3949 /* The 64-bit MS_ABI changes the set of call-used registers. */
3950 if (TARGET_64BIT_MS_ABI
)
3952 call_used_regs
[SI_REG
] = 0;
3953 call_used_regs
[DI_REG
] = 0;
3954 call_used_regs
[XMM6_REG
] = 0;
3955 call_used_regs
[XMM7_REG
] = 0;
3956 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
3957 call_used_regs
[i
] = 0;
3960 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3961 other call-clobbered regs for 64-bit. */
3964 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
3966 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3967 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
3968 && call_used_regs
[i
])
3969 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
3972 /* If MMX is disabled, squash the registers. */
3974 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3975 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)MMX_REGS
], i
))
3976 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3978 /* If SSE is disabled, squash the registers. */
3980 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3981 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)SSE_REGS
], i
))
3982 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3984 /* If the FPU is disabled, squash the registers. */
3985 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3986 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
3987 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)FLOAT_REGS
], i
))
3988 fixed_regs
[i
] = call_used_regs
[i
] = 1, reg_names
[i
] = "";
3990 /* If 32-bit, squash the 64-bit registers. */
3993 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
3995 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
4001 /* Save the current options */
4004 ix86_function_specific_save (struct cl_target_option
*ptr
)
4006 ptr
->arch
= ix86_arch
;
4007 ptr
->schedule
= ix86_schedule
;
4008 ptr
->tune
= ix86_tune
;
4009 ptr
->branch_cost
= ix86_branch_cost
;
4010 ptr
->tune_defaulted
= ix86_tune_defaulted
;
4011 ptr
->arch_specified
= ix86_arch_specified
;
4012 ptr
->x_ix86_isa_flags_explicit
= ix86_isa_flags_explicit
;
4013 ptr
->ix86_target_flags_explicit
= target_flags_explicit
;
4014 ptr
->x_recip_mask_explicit
= recip_mask_explicit
;
4016 /* The fields are char but the variables are not; make sure the
4017 values fit in the fields. */
4018 gcc_assert (ptr
->arch
== ix86_arch
);
4019 gcc_assert (ptr
->schedule
== ix86_schedule
);
4020 gcc_assert (ptr
->tune
== ix86_tune
);
4021 gcc_assert (ptr
->branch_cost
== ix86_branch_cost
);
4024 /* Restore the current options */
4027 ix86_function_specific_restore (struct cl_target_option
*ptr
)
4029 enum processor_type old_tune
= ix86_tune
;
4030 enum processor_type old_arch
= ix86_arch
;
4031 unsigned int ix86_arch_mask
, ix86_tune_mask
;
4034 ix86_arch
= (enum processor_type
) ptr
->arch
;
4035 ix86_schedule
= (enum attr_cpu
) ptr
->schedule
;
4036 ix86_tune
= (enum processor_type
) ptr
->tune
;
4037 ix86_branch_cost
= ptr
->branch_cost
;
4038 ix86_tune_defaulted
= ptr
->tune_defaulted
;
4039 ix86_arch_specified
= ptr
->arch_specified
;
4040 ix86_isa_flags_explicit
= ptr
->x_ix86_isa_flags_explicit
;
4041 target_flags_explicit
= ptr
->ix86_target_flags_explicit
;
4042 recip_mask_explicit
= ptr
->x_recip_mask_explicit
;
4044 /* Recreate the arch feature tests if the arch changed */
4045 if (old_arch
!= ix86_arch
)
4047 ix86_arch_mask
= 1u << ix86_arch
;
4048 for (i
= 0; i
< X86_ARCH_LAST
; ++i
)
4049 ix86_arch_features
[i
]
4050 = !!(initial_ix86_arch_features
[i
] & ix86_arch_mask
);
4053 /* Recreate the tune optimization tests */
4054 if (old_tune
!= ix86_tune
)
4056 ix86_tune_mask
= 1u << ix86_tune
;
4057 for (i
= 0; i
< X86_TUNE_LAST
; ++i
)
4058 ix86_tune_features
[i
]
4059 = !!(initial_ix86_tune_features
[i
] & ix86_tune_mask
);
4063 /* Print the current options */
4066 ix86_function_specific_print (FILE *file
, int indent
,
4067 struct cl_target_option
*ptr
)
4070 = ix86_target_string (ptr
->x_ix86_isa_flags
, ptr
->x_target_flags
,
4071 NULL
, NULL
, ptr
->x_ix86_fpmath
, false);
4073 fprintf (file
, "%*sarch = %d (%s)\n",
4076 ((ptr
->arch
< TARGET_CPU_DEFAULT_max
)
4077 ? cpu_names
[ptr
->arch
]
4080 fprintf (file
, "%*stune = %d (%s)\n",
4083 ((ptr
->tune
< TARGET_CPU_DEFAULT_max
)
4084 ? cpu_names
[ptr
->tune
]
4087 fprintf (file
, "%*sbranch_cost = %d\n", indent
, "", ptr
->branch_cost
);
4091 fprintf (file
, "%*s%s\n", indent
, "", target_string
);
4092 free (target_string
);
4097 /* Inner function to process the attribute((target(...))), take an argument and
4098 set the current options from the argument. If we have a list, recursively go
4102 ix86_valid_target_attribute_inner_p (tree args
, char *p_strings
[],
4103 struct gcc_options
*enum_opts_set
)
4108 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4109 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4110 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4111 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4112 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4128 enum ix86_opt_type type
;
4133 IX86_ATTR_ISA ("3dnow", OPT_m3dnow
),
4134 IX86_ATTR_ISA ("abm", OPT_mabm
),
4135 IX86_ATTR_ISA ("bmi", OPT_mbmi
),
4136 IX86_ATTR_ISA ("bmi2", OPT_mbmi2
),
4137 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt
),
4138 IX86_ATTR_ISA ("tbm", OPT_mtbm
),
4139 IX86_ATTR_ISA ("aes", OPT_maes
),
4140 IX86_ATTR_ISA ("avx", OPT_mavx
),
4141 IX86_ATTR_ISA ("avx2", OPT_mavx2
),
4142 IX86_ATTR_ISA ("mmx", OPT_mmmx
),
4143 IX86_ATTR_ISA ("pclmul", OPT_mpclmul
),
4144 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt
),
4145 IX86_ATTR_ISA ("sse", OPT_msse
),
4146 IX86_ATTR_ISA ("sse2", OPT_msse2
),
4147 IX86_ATTR_ISA ("sse3", OPT_msse3
),
4148 IX86_ATTR_ISA ("sse4", OPT_msse4
),
4149 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1
),
4150 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2
),
4151 IX86_ATTR_ISA ("sse4a", OPT_msse4a
),
4152 IX86_ATTR_ISA ("ssse3", OPT_mssse3
),
4153 IX86_ATTR_ISA ("fma4", OPT_mfma4
),
4154 IX86_ATTR_ISA ("fma", OPT_mfma
),
4155 IX86_ATTR_ISA ("xop", OPT_mxop
),
4156 IX86_ATTR_ISA ("lwp", OPT_mlwp
),
4157 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase
),
4158 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd
),
4159 IX86_ATTR_ISA ("f16c", OPT_mf16c
),
4162 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_
),
4164 /* string options */
4165 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH
),
4166 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE
),
4169 IX86_ATTR_YES ("cld",
4173 IX86_ATTR_NO ("fancy-math-387",
4174 OPT_mfancy_math_387
,
4175 MASK_NO_FANCY_MATH_387
),
4177 IX86_ATTR_YES ("ieee-fp",
4181 IX86_ATTR_YES ("inline-all-stringops",
4182 OPT_minline_all_stringops
,
4183 MASK_INLINE_ALL_STRINGOPS
),
4185 IX86_ATTR_YES ("inline-stringops-dynamically",
4186 OPT_minline_stringops_dynamically
,
4187 MASK_INLINE_STRINGOPS_DYNAMICALLY
),
4189 IX86_ATTR_NO ("align-stringops",
4190 OPT_mno_align_stringops
,
4191 MASK_NO_ALIGN_STRINGOPS
),
4193 IX86_ATTR_YES ("recip",
4199 /* If this is a list, recurse to get the options. */
4200 if (TREE_CODE (args
) == TREE_LIST
)
4204 for (; args
; args
= TREE_CHAIN (args
))
4205 if (TREE_VALUE (args
)
4206 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args
),
4207 p_strings
, enum_opts_set
))
4213 else if (TREE_CODE (args
) != STRING_CST
)
4216 /* Handle multiple arguments separated by commas. */
4217 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
4219 while (next_optstr
&& *next_optstr
!= '\0')
4221 char *p
= next_optstr
;
4223 char *comma
= strchr (next_optstr
, ',');
4224 const char *opt_string
;
4225 size_t len
, opt_len
;
4230 enum ix86_opt_type type
= ix86_opt_unknown
;
4236 len
= comma
- next_optstr
;
4237 next_optstr
= comma
+ 1;
4245 /* Recognize no-xxx. */
4246 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
4255 /* Find the option. */
4258 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
4260 type
= attrs
[i
].type
;
4261 opt_len
= attrs
[i
].len
;
4262 if (ch
== attrs
[i
].string
[0]
4263 && ((type
!= ix86_opt_str
&& type
!= ix86_opt_enum
)
4266 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
4269 mask
= attrs
[i
].mask
;
4270 opt_string
= attrs
[i
].string
;
4275 /* Process the option. */
4278 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4282 else if (type
== ix86_opt_isa
)
4284 struct cl_decoded_option decoded
;
4286 generate_option (opt
, NULL
, opt_set_p
, CL_TARGET
, &decoded
);
4287 ix86_handle_option (&global_options
, &global_options_set
,
4288 &decoded
, input_location
);
4291 else if (type
== ix86_opt_yes
|| type
== ix86_opt_no
)
4293 if (type
== ix86_opt_no
)
4294 opt_set_p
= !opt_set_p
;
4297 target_flags
|= mask
;
4299 target_flags
&= ~mask
;
4302 else if (type
== ix86_opt_str
)
4306 error ("option(\"%s\") was already specified", opt_string
);
4310 p_strings
[opt
] = xstrdup (p
+ opt_len
);
4313 else if (type
== ix86_opt_enum
)
4318 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
4320 set_option (&global_options
, enum_opts_set
, opt
, value
,
4321 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
4325 error ("attribute(target(\"%s\")) is unknown", orig_p
);
4337 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4340 ix86_valid_target_attribute_tree (tree args
)
4342 const char *orig_arch_string
= ix86_arch_string
;
4343 const char *orig_tune_string
= ix86_tune_string
;
4344 enum fpmath_unit orig_fpmath_set
= global_options_set
.x_ix86_fpmath
;
4345 int orig_tune_defaulted
= ix86_tune_defaulted
;
4346 int orig_arch_specified
= ix86_arch_specified
;
4347 char *option_strings
[IX86_FUNCTION_SPECIFIC_MAX
] = { NULL
, NULL
};
4350 struct cl_target_option
*def
4351 = TREE_TARGET_OPTION (target_option_default_node
);
4352 struct gcc_options enum_opts_set
;
4354 memset (&enum_opts_set
, 0, sizeof (enum_opts_set
));
4356 /* Process each of the options on the chain. */
4357 if (! ix86_valid_target_attribute_inner_p (args
, option_strings
,
4361 /* If the changed options are different from the default, rerun
4362 ix86_option_override_internal, and then save the options away.
4363 The string options are are attribute options, and will be undone
4364 when we copy the save structure. */
4365 if (ix86_isa_flags
!= def
->x_ix86_isa_flags
4366 || target_flags
!= def
->x_target_flags
4367 || option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
]
4368 || option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
]
4369 || enum_opts_set
.x_ix86_fpmath
)
4371 /* If we are using the default tune= or arch=, undo the string assigned,
4372 and use the default. */
4373 if (option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
])
4374 ix86_arch_string
= option_strings
[IX86_FUNCTION_SPECIFIC_ARCH
];
4375 else if (!orig_arch_specified
)
4376 ix86_arch_string
= NULL
;
4378 if (option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
])
4379 ix86_tune_string
= option_strings
[IX86_FUNCTION_SPECIFIC_TUNE
];
4380 else if (orig_tune_defaulted
)
4381 ix86_tune_string
= NULL
;
4383 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4384 if (enum_opts_set
.x_ix86_fpmath
)
4385 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4386 else if (!TARGET_64BIT
&& TARGET_SSE
)
4388 ix86_fpmath
= (enum fpmath_unit
) (FPMATH_SSE
| FPMATH_387
);
4389 global_options_set
.x_ix86_fpmath
= (enum fpmath_unit
) 1;
4392 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4393 ix86_option_override_internal (false);
4395 /* Add any builtin functions with the new isa if any. */
4396 ix86_add_new_builtins (ix86_isa_flags
);
4398 /* Save the current options unless we are validating options for
4400 t
= build_target_option_node ();
4402 ix86_arch_string
= orig_arch_string
;
4403 ix86_tune_string
= orig_tune_string
;
4404 global_options_set
.x_ix86_fpmath
= orig_fpmath_set
;
4406 /* Free up memory allocated to hold the strings */
4407 for (i
= 0; i
< IX86_FUNCTION_SPECIFIC_MAX
; i
++)
4408 free (option_strings
[i
]);
4414 /* Hook to validate attribute((target("string"))). */
4417 ix86_valid_target_attribute_p (tree fndecl
,
4418 tree
ARG_UNUSED (name
),
4420 int ARG_UNUSED (flags
))
4422 struct cl_target_option cur_target
;
4424 tree old_optimize
= build_optimization_node ();
4425 tree new_target
, new_optimize
;
4426 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
4428 /* If the function changed the optimization levels as well as setting target
4429 options, start with the optimizations specified. */
4430 if (func_optimize
&& func_optimize
!= old_optimize
)
4431 cl_optimization_restore (&global_options
,
4432 TREE_OPTIMIZATION (func_optimize
));
4434 /* The target attributes may also change some optimization flags, so update
4435 the optimization options if necessary. */
4436 cl_target_option_save (&cur_target
, &global_options
);
4437 new_target
= ix86_valid_target_attribute_tree (args
);
4438 new_optimize
= build_optimization_node ();
4445 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
4447 if (old_optimize
!= new_optimize
)
4448 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
4451 cl_target_option_restore (&global_options
, &cur_target
);
4453 if (old_optimize
!= new_optimize
)
4454 cl_optimization_restore (&global_options
,
4455 TREE_OPTIMIZATION (old_optimize
));
4461 /* Hook to determine if one function can safely inline another. */
4464 ix86_can_inline_p (tree caller
, tree callee
)
4467 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
4468 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
4470 /* If callee has no option attributes, then it is ok to inline. */
4474 /* If caller has no option attributes, but callee does then it is not ok to
4476 else if (!caller_tree
)
4481 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
4482 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
4484 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4485 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4487 if ((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
4488 != callee_opts
->x_ix86_isa_flags
)
4491 /* See if we have the same non-isa options. */
4492 else if (caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
4495 /* See if arch, tune, etc. are the same. */
4496 else if (caller_opts
->arch
!= callee_opts
->arch
)
4499 else if (caller_opts
->tune
!= callee_opts
->tune
)
4502 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
)
4505 else if (caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
4516 /* Remember the last target of ix86_set_current_function. */
4517 static GTY(()) tree ix86_previous_fndecl
;
4519 /* Establish appropriate back-end context for processing the function
4520 FNDECL. The argument might be NULL to indicate processing at top
4521 level, outside of any function scope. */
4523 ix86_set_current_function (tree fndecl
)
4525 /* Only change the context if the function changes. This hook is called
4526 several times in the course of compiling a function, and we don't want to
4527 slow things down too much or call target_reinit when it isn't safe. */
4528 if (fndecl
&& fndecl
!= ix86_previous_fndecl
)
4530 tree old_tree
= (ix86_previous_fndecl
4531 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl
)
4534 tree new_tree
= (fndecl
4535 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
4538 ix86_previous_fndecl
= fndecl
;
4539 if (old_tree
== new_tree
)
4544 cl_target_option_restore (&global_options
,
4545 TREE_TARGET_OPTION (new_tree
));
4551 struct cl_target_option
*def
4552 = TREE_TARGET_OPTION (target_option_current_node
);
4554 cl_target_option_restore (&global_options
, def
);
4561 /* Return true if this goes in large data/bss. */
4564 ix86_in_large_data_p (tree exp
)
4566 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
4569 /* Functions are never large data. */
4570 if (TREE_CODE (exp
) == FUNCTION_DECL
)
4573 if (TREE_CODE (exp
) == VAR_DECL
&& DECL_SECTION_NAME (exp
))
4575 const char *section
= TREE_STRING_POINTER (DECL_SECTION_NAME (exp
));
4576 if (strcmp (section
, ".ldata") == 0
4577 || strcmp (section
, ".lbss") == 0)
4583 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
4585 /* If this is an incomplete type with size 0, then we can't put it
4586 in data because it might be too big when completed. */
4587 if (!size
|| size
> ix86_section_threshold
)
4594 /* Switch to the appropriate section for output of DECL.
4595 DECL is either a `VAR_DECL' node or a constant of some sort.
4596 RELOC indicates whether forming the initial value of DECL requires
4597 link-time relocations. */
4599 static section
* x86_64_elf_select_section (tree
, int, unsigned HOST_WIDE_INT
)
4603 x86_64_elf_select_section (tree decl
, int reloc
,
4604 unsigned HOST_WIDE_INT align
)
4606 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4607 && ix86_in_large_data_p (decl
))
4609 const char *sname
= NULL
;
4610 unsigned int flags
= SECTION_WRITE
;
4611 switch (categorize_decl_for_section (decl
, reloc
))
4616 case SECCAT_DATA_REL
:
4617 sname
= ".ldata.rel";
4619 case SECCAT_DATA_REL_LOCAL
:
4620 sname
= ".ldata.rel.local";
4622 case SECCAT_DATA_REL_RO
:
4623 sname
= ".ldata.rel.ro";
4625 case SECCAT_DATA_REL_RO_LOCAL
:
4626 sname
= ".ldata.rel.ro.local";
4630 flags
|= SECTION_BSS
;
4633 case SECCAT_RODATA_MERGE_STR
:
4634 case SECCAT_RODATA_MERGE_STR_INIT
:
4635 case SECCAT_RODATA_MERGE_CONST
:
4639 case SECCAT_SRODATA
:
4646 /* We don't split these for medium model. Place them into
4647 default sections and hope for best. */
4652 /* We might get called with string constants, but get_named_section
4653 doesn't like them as they are not DECLs. Also, we need to set
4654 flags in that case. */
4656 return get_section (sname
, flags
, NULL
);
4657 return get_named_section (decl
, sname
, reloc
);
4660 return default_elf_select_section (decl
, reloc
, align
);
4663 /* Build up a unique section name, expressed as a
4664 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4665 RELOC indicates whether the initial value of EXP requires
4666 link-time relocations. */
4668 static void ATTRIBUTE_UNUSED
4669 x86_64_elf_unique_section (tree decl
, int reloc
)
4671 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4672 && ix86_in_large_data_p (decl
))
4674 const char *prefix
= NULL
;
4675 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4676 bool one_only
= DECL_ONE_ONLY (decl
) && !HAVE_COMDAT_GROUP
;
4678 switch (categorize_decl_for_section (decl
, reloc
))
4681 case SECCAT_DATA_REL
:
4682 case SECCAT_DATA_REL_LOCAL
:
4683 case SECCAT_DATA_REL_RO
:
4684 case SECCAT_DATA_REL_RO_LOCAL
:
4685 prefix
= one_only
? ".ld" : ".ldata";
4688 prefix
= one_only
? ".lb" : ".lbss";
4691 case SECCAT_RODATA_MERGE_STR
:
4692 case SECCAT_RODATA_MERGE_STR_INIT
:
4693 case SECCAT_RODATA_MERGE_CONST
:
4694 prefix
= one_only
? ".lr" : ".lrodata";
4696 case SECCAT_SRODATA
:
4703 /* We don't split these for medium model. Place them into
4704 default sections and hope for best. */
4709 const char *name
, *linkonce
;
4712 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
4713 name
= targetm
.strip_name_encoding (name
);
4715 /* If we're using one_only, then there needs to be a .gnu.linkonce
4716 prefix to the section name. */
4717 linkonce
= one_only
? ".gnu.linkonce" : "";
4719 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
4721 DECL_SECTION_NAME (decl
) = build_string (strlen (string
), string
);
4725 default_unique_section (decl
, reloc
);
4728 #ifdef COMMON_ASM_OP
4729 /* This says how to output assembler code to declare an
4730 uninitialized external linkage data object.
4732 For medium model x86-64 we need to use .largecomm opcode for
4735 x86_elf_aligned_common (FILE *file
,
4736 const char *name
, unsigned HOST_WIDE_INT size
,
4739 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4740 && size
> (unsigned int)ix86_section_threshold
)
4741 fputs (".largecomm\t", file
);
4743 fputs (COMMON_ASM_OP
, file
);
4744 assemble_name (file
, name
);
4745 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
4746 size
, align
/ BITS_PER_UNIT
);
4750 /* Utility function for targets to use in implementing
4751 ASM_OUTPUT_ALIGNED_BSS. */
4754 x86_output_aligned_bss (FILE *file
, tree decl ATTRIBUTE_UNUSED
,
4755 const char *name
, unsigned HOST_WIDE_INT size
,
4758 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
4759 && size
> (unsigned int)ix86_section_threshold
)
4760 switch_to_section (get_named_section (decl
, ".lbss", 0));
4762 switch_to_section (bss_section
);
4763 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
4764 #ifdef ASM_DECLARE_OBJECT_NAME
4765 last_assemble_variable_decl
= decl
;
4766 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
4768 /* Standard thing is just output label for the object. */
4769 ASM_OUTPUT_LABEL (file
, name
);
4770 #endif /* ASM_DECLARE_OBJECT_NAME */
4771 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
4774 /* Decide whether we must probe the stack before any space allocation
4775 on this target. It's essentially TARGET_STACK_PROBE except when
4776 -fstack-check causes the stack to be already probed differently. */
4779 ix86_target_stack_probe (void)
4781 /* Do not probe the stack twice if static stack checking is enabled. */
4782 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
4785 return TARGET_STACK_PROBE
;
4788 /* Decide whether we can make a sibling call to a function. DECL is the
4789 declaration of the function being targeted by the call and EXP is the
4790 CALL_EXPR representing the call. */
4793 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
4795 tree type
, decl_or_type
;
4798 /* If we are generating position-independent code, we cannot sibcall
4799 optimize any indirect call, or a direct call to a global function,
4800 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
4804 && (!decl
|| !targetm
.binds_local_p (decl
)))
4807 /* If we need to align the outgoing stack, then sibcalling would
4808 unalign the stack, which may break the called function. */
4809 if (ix86_minimum_incoming_stack_boundary (true)
4810 < PREFERRED_STACK_BOUNDARY
)
4815 decl_or_type
= decl
;
4816 type
= TREE_TYPE (decl
);
4820 /* We're looking at the CALL_EXPR, we need the type of the function. */
4821 type
= CALL_EXPR_FN (exp
); /* pointer expression */
4822 type
= TREE_TYPE (type
); /* pointer type */
4823 type
= TREE_TYPE (type
); /* function type */
4824 decl_or_type
= type
;
4827 /* Check that the return value locations are the same. Like
4828 if we are returning floats on the 80387 register stack, we cannot
4829 make a sibcall from a function that doesn't return a float to a
4830 function that does or, conversely, from a function that does return
4831 a float to a function that doesn't; the necessary stack adjustment
4832 would not be executed. This is also the place we notice
4833 differences in the return value ABI. Note that it is ok for one
4834 of the functions to have void return type as long as the return
4835 value of the other is passed in a register. */
4836 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
4837 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4839 if (STACK_REG_P (a
) || STACK_REG_P (b
))
4841 if (!rtx_equal_p (a
, b
))
4844 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4846 /* Disable sibcall if we need to generate vzeroupper after
4848 if (TARGET_VZEROUPPER
4849 && cfun
->machine
->callee_return_avx256_p
4850 && !cfun
->machine
->caller_return_avx256_p
)
4853 else if (!rtx_equal_p (a
, b
))
4858 /* The SYSV ABI has more call-clobbered registers;
4859 disallow sibcalls from MS to SYSV. */
4860 if (cfun
->machine
->call_abi
== MS_ABI
4861 && ix86_function_type_abi (type
) == SYSV_ABI
)
4866 /* If this call is indirect, we'll need to be able to use a
4867 call-clobbered register for the address of the target function.
4868 Make sure that all such registers are not used for passing
4869 parameters. Note that DLLIMPORT functions are indirect. */
4871 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
)))
4873 if (ix86_function_regparm (type
, NULL
) >= 3)
4875 /* ??? Need to count the actual number of registers to be used,
4876 not the possible number of registers. Fix later. */
4882 /* Otherwise okay. That also includes certain types of indirect calls. */
4886 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4887 and "sseregparm" calling convention attributes;
4888 arguments as in struct attribute_spec.handler. */
4891 ix86_handle_cconv_attribute (tree
*node
, tree name
,
4893 int flags ATTRIBUTE_UNUSED
,
4896 if (TREE_CODE (*node
) != FUNCTION_TYPE
4897 && TREE_CODE (*node
) != METHOD_TYPE
4898 && TREE_CODE (*node
) != FIELD_DECL
4899 && TREE_CODE (*node
) != TYPE_DECL
)
4901 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4903 *no_add_attrs
= true;
4907 /* Can combine regparm with all attributes but fastcall, and thiscall. */
4908 if (is_attribute_p ("regparm", name
))
4912 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4914 error ("fastcall and regparm attributes are not compatible");
4917 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4919 error ("regparam and thiscall attributes are not compatible");
4922 cst
= TREE_VALUE (args
);
4923 if (TREE_CODE (cst
) != INTEGER_CST
)
4925 warning (OPT_Wattributes
,
4926 "%qE attribute requires an integer constant argument",
4928 *no_add_attrs
= true;
4930 else if (compare_tree_int (cst
, REGPARM_MAX
) > 0)
4932 warning (OPT_Wattributes
, "argument to %qE attribute larger than %d",
4934 *no_add_attrs
= true;
4942 /* Do not warn when emulating the MS ABI. */
4943 if ((TREE_CODE (*node
) != FUNCTION_TYPE
4944 && TREE_CODE (*node
) != METHOD_TYPE
)
4945 || ix86_function_type_abi (*node
) != MS_ABI
)
4946 warning (OPT_Wattributes
, "%qE attribute ignored",
4948 *no_add_attrs
= true;
4952 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4953 if (is_attribute_p ("fastcall", name
))
4955 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4957 error ("fastcall and cdecl attributes are not compatible");
4959 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4961 error ("fastcall and stdcall attributes are not compatible");
4963 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node
)))
4965 error ("fastcall and regparm attributes are not compatible");
4967 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4969 error ("fastcall and thiscall attributes are not compatible");
4973 /* Can combine stdcall with fastcall (redundant), regparm and
4975 else if (is_attribute_p ("stdcall", name
))
4977 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
4979 error ("stdcall and cdecl attributes are not compatible");
4981 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
4983 error ("stdcall and fastcall attributes are not compatible");
4985 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
4987 error ("stdcall and thiscall attributes are not compatible");
4991 /* Can combine cdecl with regparm and sseregparm. */
4992 else if (is_attribute_p ("cdecl", name
))
4994 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
4996 error ("stdcall and cdecl attributes are not compatible");
4998 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5000 error ("fastcall and cdecl attributes are not compatible");
5002 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node
)))
5004 error ("cdecl and thiscall attributes are not compatible");
5007 else if (is_attribute_p ("thiscall", name
))
5009 if (TREE_CODE (*node
) != METHOD_TYPE
&& pedantic
)
5010 warning (OPT_Wattributes
, "%qE attribute is used for none class-method",
5012 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node
)))
5014 error ("stdcall and thiscall attributes are not compatible");
5016 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node
)))
5018 error ("fastcall and thiscall attributes are not compatible");
5020 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node
)))
5022 error ("cdecl and thiscall attributes are not compatible");
5026 /* Can combine sseregparm with all attributes. */
5031 /* The transactional memory builtins are implicitly regparm or fastcall
5032 depending on the ABI. Override the generic do-nothing attribute that
5033 these builtins were declared with, and replace it with one of the two
5034 attributes that we expect elsewhere. */
5037 ix86_handle_tm_regparm_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
5038 tree args ATTRIBUTE_UNUSED
,
5039 int flags ATTRIBUTE_UNUSED
,
5044 /* In no case do we want to add the placeholder attribute. */
5045 *no_add_attrs
= true;
5047 /* The 64-bit ABI is unchanged for transactional memory. */
5051 /* ??? Is there a better way to validate 32-bit windows? We have
5052 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5053 if (CHECK_STACK_LIMIT
> 0)
5054 alt
= tree_cons (get_identifier ("fastcall"), NULL
, NULL
);
5057 alt
= tree_cons (NULL
, build_int_cst (NULL
, 2), NULL
);
5058 alt
= tree_cons (get_identifier ("regparm"), alt
, NULL
);
5060 decl_attributes (node
, alt
, flags
);
5065 /* This function determines from TYPE the calling-convention. */
5068 ix86_get_callcvt (const_tree type
)
5070 unsigned int ret
= 0;
5075 return IX86_CALLCVT_CDECL
;
5077 attrs
= TYPE_ATTRIBUTES (type
);
5078 if (attrs
!= NULL_TREE
)
5080 if (lookup_attribute ("cdecl", attrs
))
5081 ret
|= IX86_CALLCVT_CDECL
;
5082 else if (lookup_attribute ("stdcall", attrs
))
5083 ret
|= IX86_CALLCVT_STDCALL
;
5084 else if (lookup_attribute ("fastcall", attrs
))
5085 ret
|= IX86_CALLCVT_FASTCALL
;
5086 else if (lookup_attribute ("thiscall", attrs
))
5087 ret
|= IX86_CALLCVT_THISCALL
;
5089 /* Regparam isn't allowed for thiscall and fastcall. */
5090 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
5092 if (lookup_attribute ("regparm", attrs
))
5093 ret
|= IX86_CALLCVT_REGPARM
;
5094 if (lookup_attribute ("sseregparm", attrs
))
5095 ret
|= IX86_CALLCVT_SSEREGPARM
;
5098 if (IX86_BASE_CALLCVT(ret
) != 0)
5102 is_stdarg
= stdarg_p (type
);
5103 if (TARGET_RTD
&& !is_stdarg
)
5104 return IX86_CALLCVT_STDCALL
| ret
;
5108 || TREE_CODE (type
) != METHOD_TYPE
5109 || ix86_function_type_abi (type
) != MS_ABI
)
5110 return IX86_CALLCVT_CDECL
| ret
;
5112 return IX86_CALLCVT_THISCALL
;
5115 /* Return 0 if the attributes for two types are incompatible, 1 if they
5116 are compatible, and 2 if they are nearly compatible (which causes a
5117 warning to be generated). */
5120 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
5122 unsigned int ccvt1
, ccvt2
;
5124 if (TREE_CODE (type1
) != FUNCTION_TYPE
5125 && TREE_CODE (type1
) != METHOD_TYPE
)
5128 ccvt1
= ix86_get_callcvt (type1
);
5129 ccvt2
= ix86_get_callcvt (type2
);
5132 if (ix86_function_regparm (type1
, NULL
)
5133 != ix86_function_regparm (type2
, NULL
))
5139 /* Return the regparm value for a function with the indicated TYPE and DECL.
5140 DECL may be NULL when calling function indirectly
5141 or considering a libcall. */
5144 ix86_function_regparm (const_tree type
, const_tree decl
)
5151 return (ix86_function_type_abi (type
) == SYSV_ABI
5152 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
5153 ccvt
= ix86_get_callcvt (type
);
5154 regparm
= ix86_regparm
;
5156 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
5158 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
5161 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
5165 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5167 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5170 /* Use register calling convention for local functions when possible. */
5172 && TREE_CODE (decl
) == FUNCTION_DECL
5174 && !(profile_flag
&& !flag_fentry
))
5176 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5177 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE (decl
));
5178 if (i
&& i
->local
&& i
->can_change_signature
)
5180 int local_regparm
, globals
= 0, regno
;
5182 /* Make sure no regparm register is taken by a
5183 fixed register variable. */
5184 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
; local_regparm
++)
5185 if (fixed_regs
[local_regparm
])
5188 /* We don't want to use regparm(3) for nested functions as
5189 these use a static chain pointer in the third argument. */
5190 if (local_regparm
== 3 && DECL_STATIC_CHAIN (decl
))
5193 /* In 32-bit mode save a register for the split stack. */
5194 if (!TARGET_64BIT
&& local_regparm
== 3 && flag_split_stack
)
5197 /* Each fixed register usage increases register pressure,
5198 so less registers should be used for argument passing.
5199 This functionality can be overriden by an explicit
5201 for (regno
= 0; regno
<= DI_REG
; regno
++)
5202 if (fixed_regs
[regno
])
5206 = globals
< local_regparm
? local_regparm
- globals
: 0;
5208 if (local_regparm
> regparm
)
5209 regparm
= local_regparm
;
5216 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5217 DFmode (2) arguments in SSE registers for a function with the
5218 indicated TYPE and DECL. DECL may be NULL when calling function
5219 indirectly or considering a libcall. Otherwise return 0. */
5222 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
5224 gcc_assert (!TARGET_64BIT
);
5226 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5227 by the sseregparm attribute. */
5228 if (TARGET_SSEREGPARM
5229 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
5236 error ("calling %qD with attribute sseregparm without "
5237 "SSE/SSE2 enabled", decl
);
5239 error ("calling %qT with attribute sseregparm without "
5240 "SSE/SSE2 enabled", type
);
5248 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5249 (and DFmode for SSE2) arguments in SSE registers. */
5250 if (decl
&& TARGET_SSE_MATH
&& optimize
5251 && !(profile_flag
&& !flag_fentry
))
5253 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5254 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
5255 if (i
&& i
->local
&& i
->can_change_signature
)
5256 return TARGET_SSE2
? 2 : 1;
5262 /* Return true if EAX is live at the start of the function. Used by
5263 ix86_expand_prologue to determine if we need special help before
5264 calling allocate_stack_worker. */
5267 ix86_eax_live_at_start_p (void)
5269 /* Cheat. Don't bother working forward from ix86_function_regparm
5270 to the function type to whether an actual argument is located in
5271 eax. Instead just look at cfg info, which is still close enough
5272 to correct at this point. This gives false positives for broken
5273 functions that might use uninitialized data that happens to be
5274 allocated in eax, but who cares? */
5275 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR
), 0);
5279 ix86_keep_aggregate_return_pointer (tree fntype
)
5285 attr
= lookup_attribute ("callee_pop_aggregate_return",
5286 TYPE_ATTRIBUTES (fntype
));
5288 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
5290 /* For 32-bit MS-ABI the default is to keep aggregate
5292 if (ix86_function_type_abi (fntype
) == MS_ABI
)
5295 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
5298 /* Value is the number of bytes of arguments automatically
5299 popped when returning from a subroutine call.
5300 FUNDECL is the declaration node of the function (as a tree),
5301 FUNTYPE is the data type of the function (as a tree),
5302 or for a library call it is an identifier node for the subroutine name.
5303 SIZE is the number of bytes of arguments passed on the stack.
5305 On the 80386, the RTD insn may be used to pop them if the number
5306 of args is fixed, but if the number is variable then the caller
5307 must pop them all. RTD can't be used for library calls now
5308 because the library is compiled with the Unix compiler.
5309 Use of RTD is a selectable option, since it is incompatible with
5310 standard Unix calling sequences. If the option is not selected,
5311 the caller must always pop the args.
5313 The attribute stdcall is equivalent to RTD on a per module basis. */
5316 ix86_return_pops_args (tree fundecl
, tree funtype
, int size
)
5320 /* None of the 64-bit ABIs pop arguments. */
5324 ccvt
= ix86_get_callcvt (funtype
);
5326 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
5327 | IX86_CALLCVT_THISCALL
)) != 0
5328 && ! stdarg_p (funtype
))
5331 /* Lose any fake structure return argument if it is passed on the stack. */
5332 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
5333 && !ix86_keep_aggregate_return_pointer (funtype
))
5335 int nregs
= ix86_function_regparm (funtype
, fundecl
);
5337 return GET_MODE_SIZE (Pmode
);
5343 /* Argument support functions. */
5345 /* Return true when register may be used to pass function parameters. */
5347 ix86_function_arg_regno_p (int regno
)
5350 const int *parm_regs
;
5355 return (regno
< REGPARM_MAX
5356 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
5358 return (regno
< REGPARM_MAX
5359 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
5360 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
5361 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
5362 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
5367 if (SSE_REGNO_P (regno
) && TARGET_SSE
)
5372 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
5373 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
5377 /* TODO: The function should depend on current function ABI but
5378 builtins.c would need updating then. Therefore we use the
5381 /* RAX is used as hidden argument to va_arg functions. */
5382 if (ix86_abi
== SYSV_ABI
&& regno
== AX_REG
)
5385 if (ix86_abi
== MS_ABI
)
5386 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
5388 parm_regs
= x86_64_int_parameter_registers
;
5389 for (i
= 0; i
< (ix86_abi
== MS_ABI
5390 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
5391 if (regno
== parm_regs
[i
])
5396 /* Return if we do not know how to pass TYPE solely in registers. */
5399 ix86_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
5401 if (must_pass_in_stack_var_size_or_pad (mode
, type
))
5404 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5405 The layout_type routine is crafty and tries to trick us into passing
5406 currently unsupported vector types on the stack by using TImode. */
5407 return (!TARGET_64BIT
&& mode
== TImode
5408 && type
&& TREE_CODE (type
) != VECTOR_TYPE
);
5411 /* It returns the size, in bytes, of the area reserved for arguments passed
5412 in registers for the function represented by fndecl dependent to the used
5415 ix86_reg_parm_stack_space (const_tree fndecl
)
5417 enum calling_abi call_abi
= SYSV_ABI
;
5418 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
5419 call_abi
= ix86_function_abi (fndecl
);
5421 call_abi
= ix86_function_type_abi (fndecl
);
5422 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
5427 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5430 ix86_function_type_abi (const_tree fntype
)
5432 if (fntype
!= NULL_TREE
&& TYPE_ATTRIBUTES (fntype
) != NULL_TREE
)
5434 enum calling_abi abi
= ix86_abi
;
5435 if (abi
== SYSV_ABI
)
5437 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
5440 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
5448 ix86_function_ms_hook_prologue (const_tree fn
)
5450 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
5452 if (decl_function_context (fn
) != NULL_TREE
)
5453 error_at (DECL_SOURCE_LOCATION (fn
),
5454 "ms_hook_prologue is not compatible with nested function");
5461 static enum calling_abi
5462 ix86_function_abi (const_tree fndecl
)
5466 return ix86_function_type_abi (TREE_TYPE (fndecl
));
5469 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5472 ix86_cfun_abi (void)
5476 return cfun
->machine
->call_abi
;
5479 /* Write the extra assembler code needed to declare a function properly. */
5482 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
5485 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
5489 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
5490 unsigned int filler_cc
= 0xcccccccc;
5492 for (i
= 0; i
< filler_count
; i
+= 4)
5493 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
5496 #ifdef SUBTARGET_ASM_UNWIND_INIT
5497 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
5500 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
5502 /* Output magic byte marker, if hot-patch attribute is set. */
5507 /* leaq [%rsp + 0], %rsp */
5508 asm_fprintf (asm_out_file
, ASM_BYTE
5509 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5513 /* movl.s %edi, %edi
5515 movl.s %esp, %ebp */
5516 asm_fprintf (asm_out_file
, ASM_BYTE
5517 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5523 extern void init_regs (void);
5525 /* Implementation of call abi switching target hook. Specific to FNDECL
5526 the specific call register sets are set. See also
5527 ix86_conditional_register_usage for more details. */
5529 ix86_call_abi_override (const_tree fndecl
)
5531 if (fndecl
== NULL_TREE
)
5532 cfun
->machine
->call_abi
= ix86_abi
;
5534 cfun
->machine
->call_abi
= ix86_function_type_abi (TREE_TYPE (fndecl
));
5537 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
5538 expensive re-initialization of init_regs each time we switch function context
5539 since this is needed only during RTL expansion. */
5541 ix86_maybe_switch_abi (void)
5544 call_used_regs
[SI_REG
] == (cfun
->machine
->call_abi
== MS_ABI
))
5548 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5549 for a call to a function whose data type is FNTYPE.
5550 For a library call, FNTYPE is 0. */
5553 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
5554 tree fntype
, /* tree ptr for function decl */
5555 rtx libname
, /* SYMBOL_REF of library name or 0 */
5559 struct cgraph_local_info
*i
;
5562 memset (cum
, 0, sizeof (*cum
));
5564 /* Initialize for the current callee. */
5567 cfun
->machine
->callee_pass_avx256_p
= false;
5568 cfun
->machine
->callee_return_avx256_p
= false;
5573 i
= cgraph_local_info (fndecl
);
5574 cum
->call_abi
= ix86_function_abi (fndecl
);
5575 fnret_type
= TREE_TYPE (TREE_TYPE (fndecl
));
5580 cum
->call_abi
= ix86_function_type_abi (fntype
);
5582 fnret_type
= TREE_TYPE (fntype
);
5587 if (TARGET_VZEROUPPER
&& fnret_type
)
5589 rtx fnret_value
= ix86_function_value (fnret_type
, fntype
,
5591 if (function_pass_avx256_p (fnret_value
))
5593 /* The return value of this function uses 256bit AVX modes. */
5595 cfun
->machine
->callee_return_avx256_p
= true;
5597 cfun
->machine
->caller_return_avx256_p
= true;
5601 cum
->caller
= caller
;
5603 /* Set up the number of registers to use for passing arguments. */
5605 if (TARGET_64BIT
&& cum
->call_abi
== MS_ABI
&& !ACCUMULATE_OUTGOING_ARGS
)
5606 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5607 "or subtarget optimization implying it");
5608 cum
->nregs
= ix86_regparm
;
5611 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
5612 ? X86_64_REGPARM_MAX
5613 : X86_64_MS_REGPARM_MAX
);
5617 cum
->sse_nregs
= SSE_REGPARM_MAX
;
5620 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
5621 ? X86_64_SSE_REGPARM_MAX
5622 : X86_64_MS_SSE_REGPARM_MAX
);
5626 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
5627 cum
->warn_avx
= true;
5628 cum
->warn_sse
= true;
5629 cum
->warn_mmx
= true;
5631 /* Because type might mismatch in between caller and callee, we need to
5632 use actual type of function for local calls.
5633 FIXME: cgraph_analyze can be told to actually record if function uses
5634 va_start so for local functions maybe_vaarg can be made aggressive
5636 FIXME: once typesytem is fixed, we won't need this code anymore. */
5637 if (i
&& i
->local
&& i
->can_change_signature
)
5638 fntype
= TREE_TYPE (fndecl
);
5639 cum
->maybe_vaarg
= (fntype
5640 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
5645 /* If there are variable arguments, then we won't pass anything
5646 in registers in 32-bit mode. */
5647 if (stdarg_p (fntype
))
5658 /* Use ecx and edx registers if function has fastcall attribute,
5659 else look for regparm information. */
5662 unsigned int ccvt
= ix86_get_callcvt (fntype
);
5663 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
5666 cum
->fastcall
= 1; /* Same first register as in fastcall. */
5668 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
5674 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
5677 /* Set up the number of SSE registers used for passing SFmode
5678 and DFmode arguments. Warn for mismatching ABI. */
5679 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
5683 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5684 But in the case of vector types, it is some vector mode.
5686 When we have only some of our vector isa extensions enabled, then there
5687 are some modes for which vector_mode_supported_p is false. For these
5688 modes, the generic vector support in gcc will choose some non-vector mode
5689 in order to implement the type. By computing the natural mode, we'll
5690 select the proper ABI location for the operand and not depend on whatever
5691 the middle-end decides to do with these vector types.
5693 The midde-end can't deal with the vector types > 16 bytes. In this
5694 case, we return the original mode and warn ABI change if CUM isn't
5697 static enum machine_mode
5698 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
)
5700 enum machine_mode mode
= TYPE_MODE (type
);
5702 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
5704 HOST_WIDE_INT size
= int_size_in_bytes (type
);
5705 if ((size
== 8 || size
== 16 || size
== 32)
5706 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5707 && TYPE_VECTOR_SUBPARTS (type
) > 1)
5709 enum machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
5711 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
5712 mode
= MIN_MODE_VECTOR_FLOAT
;
5714 mode
= MIN_MODE_VECTOR_INT
;
5716 /* Get the mode which has this inner mode and number of units. */
5717 for (; mode
!= VOIDmode
; mode
= GET_MODE_WIDER_MODE (mode
))
5718 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
5719 && GET_MODE_INNER (mode
) == innermode
)
5721 if (size
== 32 && !TARGET_AVX
)
5723 static bool warnedavx
;
5730 warning (0, "AVX vector argument without AVX "
5731 "enabled changes the ABI");
5733 return TYPE_MODE (type
);
5746 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5747 this may not agree with the mode that the type system has chosen for the
5748 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5749 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5752 gen_reg_or_parallel (enum machine_mode mode
, enum machine_mode orig_mode
,
5757 if (orig_mode
!= BLKmode
)
5758 tmp
= gen_rtx_REG (orig_mode
, regno
);
5761 tmp
= gen_rtx_REG (mode
, regno
);
5762 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
5763 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
5769 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5770 of this code is to classify each 8bytes of incoming argument by the register
5771 class and assign registers accordingly. */
5773 /* Return the union class of CLASS1 and CLASS2.
5774 See the x86-64 PS ABI for details. */
5776 static enum x86_64_reg_class
5777 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
5779 /* Rule #1: If both classes are equal, this is the resulting class. */
5780 if (class1
== class2
)
5783 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5785 if (class1
== X86_64_NO_CLASS
)
5787 if (class2
== X86_64_NO_CLASS
)
5790 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5791 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
5792 return X86_64_MEMORY_CLASS
;
5794 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5795 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
5796 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
5797 return X86_64_INTEGERSI_CLASS
;
5798 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
5799 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
5800 return X86_64_INTEGER_CLASS
;
5802 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5804 if (class1
== X86_64_X87_CLASS
5805 || class1
== X86_64_X87UP_CLASS
5806 || class1
== X86_64_COMPLEX_X87_CLASS
5807 || class2
== X86_64_X87_CLASS
5808 || class2
== X86_64_X87UP_CLASS
5809 || class2
== X86_64_COMPLEX_X87_CLASS
)
5810 return X86_64_MEMORY_CLASS
;
5812 /* Rule #6: Otherwise class SSE is used. */
5813 return X86_64_SSE_CLASS
;
5816 /* Classify the argument of type TYPE and mode MODE.
5817 CLASSES will be filled by the register class used to pass each word
5818 of the operand. The number of words is returned. In case the parameter
5819 should be passed in memory, 0 is returned. As a special case for zero
5820 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5822 BIT_OFFSET is used internally for handling records and specifies offset
5823 of the offset in bits modulo 256 to avoid overflow cases.
5825 See the x86-64 PS ABI for details.
5829 classify_argument (enum machine_mode mode
, const_tree type
,
5830 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
5832 HOST_WIDE_INT bytes
=
5833 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
5834 int words
= (bytes
+ (bit_offset
% 64) / 8 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5836 /* Variable sized entities are always passed/returned in memory. */
5840 if (mode
!= VOIDmode
5841 && targetm
.calls
.must_pass_in_stack (mode
, type
))
5844 if (type
&& AGGREGATE_TYPE_P (type
))
5848 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
5850 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5854 for (i
= 0; i
< words
; i
++)
5855 classes
[i
] = X86_64_NO_CLASS
;
5857 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5858 signalize memory class, so handle it as special case. */
5861 classes
[0] = X86_64_NO_CLASS
;
5865 /* Classify each field of record and merge classes. */
5866 switch (TREE_CODE (type
))
5869 /* And now merge the fields of structure. */
5870 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5872 if (TREE_CODE (field
) == FIELD_DECL
)
5876 if (TREE_TYPE (field
) == error_mark_node
)
5879 /* Bitfields are always classified as integer. Handle them
5880 early, since later code would consider them to be
5881 misaligned integers. */
5882 if (DECL_BIT_FIELD (field
))
5884 for (i
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
5885 i
< ((int_bit_position (field
) + (bit_offset
% 64))
5886 + tree_low_cst (DECL_SIZE (field
), 0)
5889 merge_classes (X86_64_INTEGER_CLASS
,
5896 type
= TREE_TYPE (field
);
5898 /* Flexible array member is ignored. */
5899 if (TYPE_MODE (type
) == BLKmode
5900 && TREE_CODE (type
) == ARRAY_TYPE
5901 && TYPE_SIZE (type
) == NULL_TREE
5902 && TYPE_DOMAIN (type
) != NULL_TREE
5903 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
5908 if (!warned
&& warn_psabi
)
5911 inform (input_location
,
5912 "the ABI of passing struct with"
5913 " a flexible array member has"
5914 " changed in GCC 4.4");
5918 num
= classify_argument (TYPE_MODE (type
), type
,
5920 (int_bit_position (field
)
5921 + bit_offset
) % 256);
5924 pos
= (int_bit_position (field
) + (bit_offset
% 64)) / 8 / 8;
5925 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
5927 merge_classes (subclasses
[i
], classes
[i
+ pos
]);
5934 /* Arrays are handled as small records. */
5937 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
5938 TREE_TYPE (type
), subclasses
, bit_offset
);
5942 /* The partial classes are now full classes. */
5943 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
5944 subclasses
[0] = X86_64_SSE_CLASS
;
5945 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
5946 && !((bit_offset
% 64) == 0 && bytes
== 4))
5947 subclasses
[0] = X86_64_INTEGER_CLASS
;
5949 for (i
= 0; i
< words
; i
++)
5950 classes
[i
] = subclasses
[i
% num
];
5955 case QUAL_UNION_TYPE
:
5956 /* Unions are similar to RECORD_TYPE but offset is always 0.
5958 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
5960 if (TREE_CODE (field
) == FIELD_DECL
)
5964 if (TREE_TYPE (field
) == error_mark_node
)
5967 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
5968 TREE_TYPE (field
), subclasses
,
5972 for (i
= 0; i
< num
; i
++)
5973 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
5984 /* When size > 16 bytes, if the first one isn't
5985 X86_64_SSE_CLASS or any other ones aren't
5986 X86_64_SSEUP_CLASS, everything should be passed in
5988 if (classes
[0] != X86_64_SSE_CLASS
)
5991 for (i
= 1; i
< words
; i
++)
5992 if (classes
[i
] != X86_64_SSEUP_CLASS
)
5996 /* Final merger cleanup. */
5997 for (i
= 0; i
< words
; i
++)
5999 /* If one class is MEMORY, everything should be passed in
6001 if (classes
[i
] == X86_64_MEMORY_CLASS
)
6004 /* The X86_64_SSEUP_CLASS should be always preceded by
6005 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6006 if (classes
[i
] == X86_64_SSEUP_CLASS
6007 && classes
[i
- 1] != X86_64_SSE_CLASS
6008 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
6010 /* The first one should never be X86_64_SSEUP_CLASS. */
6011 gcc_assert (i
!= 0);
6012 classes
[i
] = X86_64_SSE_CLASS
;
6015 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6016 everything should be passed in memory. */
6017 if (classes
[i
] == X86_64_X87UP_CLASS
6018 && (classes
[i
- 1] != X86_64_X87_CLASS
))
6022 /* The first one should never be X86_64_X87UP_CLASS. */
6023 gcc_assert (i
!= 0);
6024 if (!warned
&& warn_psabi
)
6027 inform (input_location
,
6028 "the ABI of passing union with long double"
6029 " has changed in GCC 4.4");
6037 /* Compute alignment needed. We align all types to natural boundaries with
6038 exception of XFmode that is aligned to 64bits. */
6039 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
6041 int mode_alignment
= GET_MODE_BITSIZE (mode
);
6044 mode_alignment
= 128;
6045 else if (mode
== XCmode
)
6046 mode_alignment
= 256;
6047 if (COMPLEX_MODE_P (mode
))
6048 mode_alignment
/= 2;
6049 /* Misaligned fields are always returned in memory. */
6050 if (bit_offset
% mode_alignment
)
6054 /* for V1xx modes, just use the base mode */
6055 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
6056 && GET_MODE_SIZE (GET_MODE_INNER (mode
)) == bytes
)
6057 mode
= GET_MODE_INNER (mode
);
6059 /* Classification of atomic types. */
6064 classes
[0] = X86_64_SSE_CLASS
;
6067 classes
[0] = X86_64_SSE_CLASS
;
6068 classes
[1] = X86_64_SSEUP_CLASS
;
6078 int size
= (bit_offset
% 64)+ (int) GET_MODE_BITSIZE (mode
);
6082 classes
[0] = X86_64_INTEGERSI_CLASS
;
6085 else if (size
<= 64)
6087 classes
[0] = X86_64_INTEGER_CLASS
;
6090 else if (size
<= 64+32)
6092 classes
[0] = X86_64_INTEGER_CLASS
;
6093 classes
[1] = X86_64_INTEGERSI_CLASS
;
6096 else if (size
<= 64+64)
6098 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6106 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
6110 /* OImode shouldn't be used directly. */
6115 if (!(bit_offset
% 64))
6116 classes
[0] = X86_64_SSESF_CLASS
;
6118 classes
[0] = X86_64_SSE_CLASS
;
6121 classes
[0] = X86_64_SSEDF_CLASS
;
6124 classes
[0] = X86_64_X87_CLASS
;
6125 classes
[1] = X86_64_X87UP_CLASS
;
6128 classes
[0] = X86_64_SSE_CLASS
;
6129 classes
[1] = X86_64_SSEUP_CLASS
;
6132 classes
[0] = X86_64_SSE_CLASS
;
6133 if (!(bit_offset
% 64))
6139 if (!warned
&& warn_psabi
)
6142 inform (input_location
,
6143 "the ABI of passing structure with complex float"
6144 " member has changed in GCC 4.4");
6146 classes
[1] = X86_64_SSESF_CLASS
;
6150 classes
[0] = X86_64_SSEDF_CLASS
;
6151 classes
[1] = X86_64_SSEDF_CLASS
;
6154 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
6157 /* This modes is larger than 16 bytes. */
6165 classes
[0] = X86_64_SSE_CLASS
;
6166 classes
[1] = X86_64_SSEUP_CLASS
;
6167 classes
[2] = X86_64_SSEUP_CLASS
;
6168 classes
[3] = X86_64_SSEUP_CLASS
;
6176 classes
[0] = X86_64_SSE_CLASS
;
6177 classes
[1] = X86_64_SSEUP_CLASS
;
6185 classes
[0] = X86_64_SSE_CLASS
;
6191 gcc_assert (VECTOR_MODE_P (mode
));
6196 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
6198 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
6199 classes
[0] = X86_64_INTEGERSI_CLASS
;
6201 classes
[0] = X86_64_INTEGER_CLASS
;
6202 classes
[1] = X86_64_INTEGER_CLASS
;
6203 return 1 + (bytes
> 8);
6207 /* Examine the argument and return set number of register required in each
6208 class. Return 0 iff parameter should be passed in memory. */
6210 examine_argument (enum machine_mode mode
, const_tree type
, int in_return
,
6211 int *int_nregs
, int *sse_nregs
)
6213 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6214 int n
= classify_argument (mode
, type
, regclass
, 0);
6220 for (n
--; n
>= 0; n
--)
6221 switch (regclass
[n
])
6223 case X86_64_INTEGER_CLASS
:
6224 case X86_64_INTEGERSI_CLASS
:
6227 case X86_64_SSE_CLASS
:
6228 case X86_64_SSESF_CLASS
:
6229 case X86_64_SSEDF_CLASS
:
6232 case X86_64_NO_CLASS
:
6233 case X86_64_SSEUP_CLASS
:
6235 case X86_64_X87_CLASS
:
6236 case X86_64_X87UP_CLASS
:
6240 case X86_64_COMPLEX_X87_CLASS
:
6241 return in_return
? 2 : 0;
6242 case X86_64_MEMORY_CLASS
:
6248 /* Construct container for the argument used by GCC interface. See
6249 FUNCTION_ARG for the detailed description. */
6252 construct_container (enum machine_mode mode
, enum machine_mode orig_mode
,
6253 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
6254 const int *intreg
, int sse_regno
)
6256 /* The following variables hold the static issued_error state. */
6257 static bool issued_sse_arg_error
;
6258 static bool issued_sse_ret_error
;
6259 static bool issued_x87_ret_error
;
6261 enum machine_mode tmpmode
;
6263 (mode
== BLKmode
) ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
6264 enum x86_64_reg_class regclass
[MAX_CLASSES
];
6268 int needed_sseregs
, needed_intregs
;
6269 rtx exp
[MAX_CLASSES
];
6272 n
= classify_argument (mode
, type
, regclass
, 0);
6275 if (!examine_argument (mode
, type
, in_return
, &needed_intregs
,
6278 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
6281 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6282 some less clueful developer tries to use floating-point anyway. */
6283 if (needed_sseregs
&& !TARGET_SSE
)
6287 if (!issued_sse_ret_error
)
6289 error ("SSE register return with SSE disabled");
6290 issued_sse_ret_error
= true;
6293 else if (!issued_sse_arg_error
)
6295 error ("SSE register argument with SSE disabled");
6296 issued_sse_arg_error
= true;
6301 /* Likewise, error if the ABI requires us to return values in the
6302 x87 registers and the user specified -mno-80387. */
6303 if (!TARGET_80387
&& in_return
)
6304 for (i
= 0; i
< n
; i
++)
6305 if (regclass
[i
] == X86_64_X87_CLASS
6306 || regclass
[i
] == X86_64_X87UP_CLASS
6307 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
6309 if (!issued_x87_ret_error
)
6311 error ("x87 register return with x87 disabled");
6312 issued_x87_ret_error
= true;
6317 /* First construct simple cases. Avoid SCmode, since we want to use
6318 single register to pass this type. */
6319 if (n
== 1 && mode
!= SCmode
)
6320 switch (regclass
[0])
6322 case X86_64_INTEGER_CLASS
:
6323 case X86_64_INTEGERSI_CLASS
:
6324 return gen_rtx_REG (mode
, intreg
[0]);
6325 case X86_64_SSE_CLASS
:
6326 case X86_64_SSESF_CLASS
:
6327 case X86_64_SSEDF_CLASS
:
6328 if (mode
!= BLKmode
)
6329 return gen_reg_or_parallel (mode
, orig_mode
,
6330 SSE_REGNO (sse_regno
));
6332 case X86_64_X87_CLASS
:
6333 case X86_64_COMPLEX_X87_CLASS
:
6334 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
6335 case X86_64_NO_CLASS
:
6336 /* Zero sized array, struct or class. */
6341 if (n
== 2 && regclass
[0] == X86_64_SSE_CLASS
6342 && regclass
[1] == X86_64_SSEUP_CLASS
&& mode
!= BLKmode
)
6343 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6345 && regclass
[0] == X86_64_SSE_CLASS
6346 && regclass
[1] == X86_64_SSEUP_CLASS
6347 && regclass
[2] == X86_64_SSEUP_CLASS
6348 && regclass
[3] == X86_64_SSEUP_CLASS
6350 return gen_rtx_REG (mode
, SSE_REGNO (sse_regno
));
6353 && regclass
[0] == X86_64_X87_CLASS
&& regclass
[1] == X86_64_X87UP_CLASS
)
6354 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
6355 if (n
== 2 && regclass
[0] == X86_64_INTEGER_CLASS
6356 && regclass
[1] == X86_64_INTEGER_CLASS
6357 && (mode
== CDImode
|| mode
== TImode
|| mode
== TFmode
)
6358 && intreg
[0] + 1 == intreg
[1])
6359 return gen_rtx_REG (mode
, intreg
[0]);
6361 /* Otherwise figure out the entries of the PARALLEL. */
6362 for (i
= 0; i
< n
; i
++)
6366 switch (regclass
[i
])
6368 case X86_64_NO_CLASS
:
6370 case X86_64_INTEGER_CLASS
:
6371 case X86_64_INTEGERSI_CLASS
:
6372 /* Merge TImodes on aligned occasions here too. */
6373 if (i
* 8 + 8 > bytes
)
6374 tmpmode
= mode_for_size ((bytes
- i
* 8) * BITS_PER_UNIT
, MODE_INT
, 0);
6375 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
6379 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6380 if (tmpmode
== BLKmode
)
6382 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6383 gen_rtx_REG (tmpmode
, *intreg
),
6387 case X86_64_SSESF_CLASS
:
6388 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6389 gen_rtx_REG (SFmode
,
6390 SSE_REGNO (sse_regno
)),
6394 case X86_64_SSEDF_CLASS
:
6395 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6396 gen_rtx_REG (DFmode
,
6397 SSE_REGNO (sse_regno
)),
6401 case X86_64_SSE_CLASS
:
6409 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
6419 && regclass
[1] == X86_64_SSEUP_CLASS
6420 && regclass
[2] == X86_64_SSEUP_CLASS
6421 && regclass
[3] == X86_64_SSEUP_CLASS
);
6428 exp
[nexps
++] = gen_rtx_EXPR_LIST (VOIDmode
,
6429 gen_rtx_REG (tmpmode
,
6430 SSE_REGNO (sse_regno
)),
6439 /* Empty aligned struct, union or class. */
6443 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
6444 for (i
= 0; i
< nexps
; i
++)
6445 XVECEXP (ret
, 0, i
) = exp
[i
];
6449 /* Update the data in CUM to advance over an argument of mode MODE
6450 and data type TYPE. (TYPE is null for libcalls where that information
6451 may not be available.) */
6454 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6455 const_tree type
, HOST_WIDE_INT bytes
,
6456 HOST_WIDE_INT words
)
6472 cum
->words
+= words
;
6473 cum
->nregs
-= words
;
6474 cum
->regno
+= words
;
6476 if (cum
->nregs
<= 0)
6484 /* OImode shouldn't be used directly. */
6488 if (cum
->float_in_sse
< 2)
6491 if (cum
->float_in_sse
< 1)
6508 if (!type
|| !AGGREGATE_TYPE_P (type
))
6510 cum
->sse_words
+= words
;
6511 cum
->sse_nregs
-= 1;
6512 cum
->sse_regno
+= 1;
6513 if (cum
->sse_nregs
<= 0)
6527 if (!type
|| !AGGREGATE_TYPE_P (type
))
6529 cum
->mmx_words
+= words
;
6530 cum
->mmx_nregs
-= 1;
6531 cum
->mmx_regno
+= 1;
6532 if (cum
->mmx_nregs
<= 0)
6543 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6544 const_tree type
, HOST_WIDE_INT words
, bool named
)
6546 int int_nregs
, sse_nregs
;
6548 /* Unnamed 256bit vector mode parameters are passed on stack. */
6549 if (!named
&& VALID_AVX256_REG_MODE (mode
))
6552 if (examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
6553 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
6555 cum
->nregs
-= int_nregs
;
6556 cum
->sse_nregs
-= sse_nregs
;
6557 cum
->regno
+= int_nregs
;
6558 cum
->sse_regno
+= sse_nregs
;
6562 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
6563 cum
->words
= (cum
->words
+ align
- 1) & ~(align
- 1);
6564 cum
->words
+= words
;
6569 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
6570 HOST_WIDE_INT words
)
6572 /* Otherwise, this should be passed indirect. */
6573 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
6575 cum
->words
+= words
;
6583 /* Update the data in CUM to advance over an argument of mode MODE and
6584 data type TYPE. (TYPE is null for libcalls where that information
6585 may not be available.) */
6588 ix86_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
6589 const_tree type
, bool named
)
6591 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6592 HOST_WIDE_INT bytes
, words
;
6594 if (mode
== BLKmode
)
6595 bytes
= int_size_in_bytes (type
);
6597 bytes
= GET_MODE_SIZE (mode
);
6598 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6601 mode
= type_natural_mode (type
, NULL
);
6603 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6604 function_arg_advance_ms_64 (cum
, bytes
, words
);
6605 else if (TARGET_64BIT
)
6606 function_arg_advance_64 (cum
, mode
, type
, words
, named
);
6608 function_arg_advance_32 (cum
, mode
, type
, bytes
, words
);
6611 /* Define where to put the arguments to a function.
6612 Value is zero to push the argument on the stack,
6613 or a hard register in which to store the argument.
6615 MODE is the argument's machine mode.
6616 TYPE is the data type of the argument (as a tree).
6617 This is null for libcalls where that information may
6619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6620 the preceding args and about the function being called.
6621 NAMED is nonzero if this argument is a named parameter
6622 (otherwise it is an extra parameter matching an ellipsis). */
6625 function_arg_32 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6626 enum machine_mode orig_mode
, const_tree type
,
6627 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
6629 static bool warnedsse
, warnedmmx
;
6631 /* Avoid the AL settings for the Unix64 ABI. */
6632 if (mode
== VOIDmode
)
6648 if (words
<= cum
->nregs
)
6650 int regno
= cum
->regno
;
6652 /* Fastcall allocates the first two DWORD (SImode) or
6653 smaller arguments to ECX and EDX if it isn't an
6659 || (type
&& AGGREGATE_TYPE_P (type
)))
6662 /* ECX not EAX is the first allocated register. */
6663 if (regno
== AX_REG
)
6666 return gen_rtx_REG (mode
, regno
);
6671 if (cum
->float_in_sse
< 2)
6674 if (cum
->float_in_sse
< 1)
6678 /* In 32bit, we pass TImode in xmm registers. */
6685 if (!type
|| !AGGREGATE_TYPE_P (type
))
6687 if (!TARGET_SSE
&& !warnedsse
&& cum
->warn_sse
)
6690 warning (0, "SSE vector argument without SSE enabled "
6694 return gen_reg_or_parallel (mode
, orig_mode
,
6695 cum
->sse_regno
+ FIRST_SSE_REG
);
6700 /* OImode shouldn't be used directly. */
6709 if (!type
|| !AGGREGATE_TYPE_P (type
))
6712 return gen_reg_or_parallel (mode
, orig_mode
,
6713 cum
->sse_regno
+ FIRST_SSE_REG
);
6723 if (!type
|| !AGGREGATE_TYPE_P (type
))
6725 if (!TARGET_MMX
&& !warnedmmx
&& cum
->warn_mmx
)
6728 warning (0, "MMX vector argument without MMX enabled "
6732 return gen_reg_or_parallel (mode
, orig_mode
,
6733 cum
->mmx_regno
+ FIRST_MMX_REG
);
6742 function_arg_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6743 enum machine_mode orig_mode
, const_tree type
, bool named
)
6745 /* Handle a hidden AL argument containing number of registers
6746 for varargs x86-64 functions. */
6747 if (mode
== VOIDmode
)
6748 return GEN_INT (cum
->maybe_vaarg
6749 ? (cum
->sse_nregs
< 0
6750 ? X86_64_SSE_REGPARM_MAX
6765 /* Unnamed 256bit vector mode parameters are passed on stack. */
6771 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
6773 &x86_64_int_parameter_registers
[cum
->regno
],
6778 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
6779 enum machine_mode orig_mode
, bool named
,
6780 HOST_WIDE_INT bytes
)
6784 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6785 We use value of -2 to specify that current function call is MSABI. */
6786 if (mode
== VOIDmode
)
6787 return GEN_INT (-2);
6789 /* If we've run out of registers, it goes on the stack. */
6790 if (cum
->nregs
== 0)
6793 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
6795 /* Only floating point modes are passed in anything but integer regs. */
6796 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
6799 regno
= cum
->regno
+ FIRST_SSE_REG
;
6804 /* Unnamed floating parameters are passed in both the
6805 SSE and integer registers. */
6806 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
6807 t2
= gen_rtx_REG (mode
, regno
);
6808 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
6809 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
6810 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
6813 /* Handle aggregated types passed in register. */
6814 if (orig_mode
== BLKmode
)
6816 if (bytes
> 0 && bytes
<= 8)
6817 mode
= (bytes
> 4 ? DImode
: SImode
);
6818 if (mode
== BLKmode
)
6822 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
6825 /* Return where to put the arguments to a function.
6826 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6828 MODE is the argument's machine mode. TYPE is the data type of the
6829 argument. It is null for libcalls where that information may not be
6830 available. CUM gives information about the preceding args and about
6831 the function being called. NAMED is nonzero if this argument is a
6832 named parameter (otherwise it is an extra parameter matching an
6836 ix86_function_arg (cumulative_args_t cum_v
, enum machine_mode omode
,
6837 const_tree type
, bool named
)
6839 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6840 enum machine_mode mode
= omode
;
6841 HOST_WIDE_INT bytes
, words
;
6844 if (mode
== BLKmode
)
6845 bytes
= int_size_in_bytes (type
);
6847 bytes
= GET_MODE_SIZE (mode
);
6848 words
= (bytes
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
6850 /* To simplify the code below, represent vector types with a vector mode
6851 even if MMX/SSE are not active. */
6852 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6853 mode
= type_natural_mode (type
, cum
);
6855 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6856 arg
= function_arg_ms_64 (cum
, mode
, omode
, named
, bytes
);
6857 else if (TARGET_64BIT
)
6858 arg
= function_arg_64 (cum
, mode
, omode
, type
, named
);
6860 arg
= function_arg_32 (cum
, mode
, omode
, type
, bytes
, words
);
6862 if (TARGET_VZEROUPPER
&& function_pass_avx256_p (arg
))
6864 /* This argument uses 256bit AVX modes. */
6866 cfun
->machine
->callee_pass_avx256_p
= true;
6868 cfun
->machine
->caller_pass_avx256_p
= true;
6874 /* A C expression that indicates when an argument must be passed by
6875 reference. If nonzero for an argument, a copy of that argument is
6876 made in memory and a pointer to the argument is passed instead of
6877 the argument itself. The pointer is passed in whatever way is
6878 appropriate for passing a pointer to that type. */
6881 ix86_pass_by_reference (cumulative_args_t cum_v ATTRIBUTE_UNUSED
,
6882 enum machine_mode mode ATTRIBUTE_UNUSED
,
6883 const_tree type
, bool named ATTRIBUTE_UNUSED
)
6885 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6887 /* See Windows x64 Software Convention. */
6888 if (TARGET_64BIT
&& (cum
? cum
->call_abi
: ix86_abi
) == MS_ABI
)
6890 int msize
= (int) GET_MODE_SIZE (mode
);
6893 /* Arrays are passed by reference. */
6894 if (TREE_CODE (type
) == ARRAY_TYPE
)
6897 if (AGGREGATE_TYPE_P (type
))
6899 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6900 are passed by reference. */
6901 msize
= int_size_in_bytes (type
);
6905 /* __m128 is passed by reference. */
6907 case 1: case 2: case 4: case 8:
6913 else if (TARGET_64BIT
&& type
&& int_size_in_bytes (type
) == -1)
6919 /* Return true when TYPE should be 128bit aligned for 32bit argument
6920 passing ABI. XXX: This function is obsolete and is only used for
6921 checking psABI compatibility with previous versions of GCC. */
6924 ix86_compat_aligned_value_p (const_tree type
)
6926 enum machine_mode mode
= TYPE_MODE (type
);
6927 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
6931 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
6933 if (TYPE_ALIGN (type
) < 128)
6936 if (AGGREGATE_TYPE_P (type
))
6938 /* Walk the aggregates recursively. */
6939 switch (TREE_CODE (type
))
6943 case QUAL_UNION_TYPE
:
6947 /* Walk all the structure fields. */
6948 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
6950 if (TREE_CODE (field
) == FIELD_DECL
6951 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
6958 /* Just for use if some languages passes arrays by value. */
6959 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
6970 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
6971 XXX: This function is obsolete and is only used for checking psABI
6972 compatibility with previous versions of GCC. */
6975 ix86_compat_function_arg_boundary (enum machine_mode mode
,
6976 const_tree type
, unsigned int align
)
6978 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6979 natural boundaries. */
6980 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
6982 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6983 make an exception for SSE modes since these require 128bit
6986 The handling here differs from field_alignment. ICC aligns MMX
6987 arguments to 4 byte boundaries, while structure fields are aligned
6988 to 8 byte boundaries. */
6991 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
6992 align
= PARM_BOUNDARY
;
6996 if (!ix86_compat_aligned_value_p (type
))
6997 align
= PARM_BOUNDARY
;
7000 if (align
> BIGGEST_ALIGNMENT
)
7001 align
= BIGGEST_ALIGNMENT
;
7005 /* Return true when TYPE should be 128bit aligned for 32bit argument
7009 ix86_contains_aligned_value_p (const_tree type
)
7011 enum machine_mode mode
= TYPE_MODE (type
);
7013 if (mode
== XFmode
|| mode
== XCmode
)
7016 if (TYPE_ALIGN (type
) < 128)
7019 if (AGGREGATE_TYPE_P (type
))
7021 /* Walk the aggregates recursively. */
7022 switch (TREE_CODE (type
))
7026 case QUAL_UNION_TYPE
:
7030 /* Walk all the structure fields. */
7031 for (field
= TYPE_FIELDS (type
);
7033 field
= DECL_CHAIN (field
))
7035 if (TREE_CODE (field
) == FIELD_DECL
7036 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
7043 /* Just for use if some languages passes arrays by value. */
7044 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
7053 return TYPE_ALIGN (type
) >= 128;
7058 /* Gives the alignment boundary, in bits, of an argument with the
7059 specified mode and type. */
7062 ix86_function_arg_boundary (enum machine_mode mode
, const_tree type
)
7067 /* Since the main variant type is used for call, we convert it to
7068 the main variant type. */
7069 type
= TYPE_MAIN_VARIANT (type
);
7070 align
= TYPE_ALIGN (type
);
7073 align
= GET_MODE_ALIGNMENT (mode
);
7074 if (align
< PARM_BOUNDARY
)
7075 align
= PARM_BOUNDARY
;
7079 unsigned int saved_align
= align
;
7083 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7086 if (mode
== XFmode
|| mode
== XCmode
)
7087 align
= PARM_BOUNDARY
;
7089 else if (!ix86_contains_aligned_value_p (type
))
7090 align
= PARM_BOUNDARY
;
7093 align
= PARM_BOUNDARY
;
7098 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
7102 inform (input_location
,
7103 "The ABI for passing parameters with %d-byte"
7104 " alignment has changed in GCC 4.6",
7105 align
/ BITS_PER_UNIT
);
7112 /* Return true if N is a possible register number of function value. */
7115 ix86_function_value_regno_p (const unsigned int regno
)
7122 case FIRST_FLOAT_REG
:
7123 /* TODO: The function should depend on current function ABI but
7124 builtins.c would need updating then. Therefore we use the
7126 if (TARGET_64BIT
&& ix86_abi
== MS_ABI
)
7128 return TARGET_FLOAT_RETURNS_IN_80387
;
7134 if (TARGET_MACHO
|| TARGET_64BIT
)
7142 /* Define how to find the value returned by a function.
7143 VALTYPE is the data type of the value (as a tree).
7144 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7145 otherwise, FUNC is 0. */
7148 function_value_32 (enum machine_mode orig_mode
, enum machine_mode mode
,
7149 const_tree fntype
, const_tree fn
)
7153 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7154 we normally prevent this case when mmx is not available. However
7155 some ABIs may require the result to be returned like DImode. */
7156 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7157 regno
= FIRST_MMX_REG
;
7159 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7160 we prevent this case when sse is not available. However some ABIs
7161 may require the result to be returned like integer TImode. */
7162 else if (mode
== TImode
7163 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7164 regno
= FIRST_SSE_REG
;
7166 /* 32-byte vector modes in %ymm0. */
7167 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
7168 regno
= FIRST_SSE_REG
;
7170 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7171 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
7172 regno
= FIRST_FLOAT_REG
;
7174 /* Most things go in %eax. */
7177 /* Override FP return register with %xmm0 for local functions when
7178 SSE math is enabled or for functions with sseregparm attribute. */
7179 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
7181 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
7182 if ((sse_level
>= 1 && mode
== SFmode
)
7183 || (sse_level
== 2 && mode
== DFmode
))
7184 regno
= FIRST_SSE_REG
;
7187 /* OImode shouldn't be used directly. */
7188 gcc_assert (mode
!= OImode
);
7190 return gen_rtx_REG (orig_mode
, regno
);
7194 function_value_64 (enum machine_mode orig_mode
, enum machine_mode mode
,
7199 /* Handle libcalls, which don't provide a type node. */
7200 if (valtype
== NULL
)
7214 regno
= FIRST_SSE_REG
;
7218 regno
= FIRST_FLOAT_REG
;
7226 return gen_rtx_REG (mode
, regno
);
7228 else if (POINTER_TYPE_P (valtype
))
7230 /* Pointers are always returned in Pmode. */
7234 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
7235 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
7236 x86_64_int_return_registers
, 0);
7238 /* For zero sized structures, construct_container returns NULL, but we
7239 need to keep rest of compiler happy by returning meaningful value. */
7241 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
7247 function_value_ms_64 (enum machine_mode orig_mode
, enum machine_mode mode
)
7249 unsigned int regno
= AX_REG
;
7253 switch (GET_MODE_SIZE (mode
))
7256 if((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7257 && !COMPLEX_MODE_P (mode
))
7258 regno
= FIRST_SSE_REG
;
7262 if (mode
== SFmode
|| mode
== DFmode
)
7263 regno
= FIRST_SSE_REG
;
7269 return gen_rtx_REG (orig_mode
, regno
);
7273 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
7274 enum machine_mode orig_mode
, enum machine_mode mode
)
7276 const_tree fn
, fntype
;
7279 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
7280 fn
= fntype_or_decl
;
7281 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
7283 if (TARGET_64BIT
&& ix86_function_type_abi (fntype
) == MS_ABI
)
7284 return function_value_ms_64 (orig_mode
, mode
);
7285 else if (TARGET_64BIT
)
7286 return function_value_64 (orig_mode
, mode
, valtype
);
7288 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
7292 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
,
7293 bool outgoing ATTRIBUTE_UNUSED
)
7295 enum machine_mode mode
, orig_mode
;
7297 orig_mode
= TYPE_MODE (valtype
);
7298 mode
= type_natural_mode (valtype
, NULL
);
7299 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
7302 /* Pointer function arguments and return values are promoted to Pmode. */
7304 static enum machine_mode
7305 ix86_promote_function_mode (const_tree type
, enum machine_mode mode
,
7306 int *punsignedp
, const_tree fntype
,
7309 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
7311 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
7314 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
7319 ix86_libcall_value (enum machine_mode mode
)
7321 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
7324 /* Return true iff type is returned in memory. */
7326 static bool ATTRIBUTE_UNUSED
7327 return_in_memory_32 (const_tree type
, enum machine_mode mode
)
7331 if (mode
== BLKmode
)
7334 size
= int_size_in_bytes (type
);
7336 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
7339 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
7341 /* User-created vectors small enough to fit in EAX. */
7345 /* MMX/3dNow values are returned in MM0,
7346 except when it doesn't exits or the ABI prescribes otherwise. */
7348 return !TARGET_MMX
|| TARGET_VECT8_RETURNS
;
7350 /* SSE values are returned in XMM0, except when it doesn't exist. */
7354 /* AVX values are returned in YMM0, except when it doesn't exist. */
7365 /* OImode shouldn't be used directly. */
7366 gcc_assert (mode
!= OImode
);
7371 static bool ATTRIBUTE_UNUSED
7372 return_in_memory_64 (const_tree type
, enum machine_mode mode
)
7374 int needed_intregs
, needed_sseregs
;
7375 return !examine_argument (mode
, type
, 1, &needed_intregs
, &needed_sseregs
);
7378 static bool ATTRIBUTE_UNUSED
7379 return_in_memory_ms_64 (const_tree type
, enum machine_mode mode
)
7381 HOST_WIDE_INT size
= int_size_in_bytes (type
);
7383 /* __m128 is returned in xmm0. */
7384 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
7385 && !COMPLEX_MODE_P (mode
) && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
7388 /* Otherwise, the size must be exactly in [1248]. */
7389 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
7393 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
7395 #ifdef SUBTARGET_RETURN_IN_MEMORY
7396 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
7398 const enum machine_mode mode
= type_natural_mode (type
, NULL
);
7402 if (ix86_function_type_abi (fntype
) == MS_ABI
)
7403 return return_in_memory_ms_64 (type
, mode
);
7405 return return_in_memory_64 (type
, mode
);
7408 return return_in_memory_32 (type
, mode
);
7412 /* When returning SSE vector types, we have a choice of either
7413 (1) being abi incompatible with a -march switch, or
7414 (2) generating an error.
7415 Given no good solution, I think the safest thing is one warning.
7416 The user won't be able to use -Werror, but....
7418 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
7419 called in response to actually generating a caller or callee that
7420 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
7421 via aggregate_value_p for general type probing from tree-ssa. */
7424 ix86_struct_value_rtx (tree type
, int incoming ATTRIBUTE_UNUSED
)
7426 static bool warnedsse
, warnedmmx
;
7428 if (!TARGET_64BIT
&& type
)
7430 /* Look at the return type of the function, not the function type. */
7431 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (type
));
7433 if (!TARGET_SSE
&& !warnedsse
)
7436 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
7439 warning (0, "SSE vector return without SSE enabled "
7444 if (!TARGET_MMX
&& !warnedmmx
)
7446 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
7449 warning (0, "MMX vector return without MMX enabled "
7459 /* Create the va_list data type. */
7461 /* Returns the calling convention specific va_list date type.
7462 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
7465 ix86_build_builtin_va_list_abi (enum calling_abi abi
)
7467 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
7469 /* For i386 we use plain pointer to argument area. */
7470 if (!TARGET_64BIT
|| abi
== MS_ABI
)
7471 return build_pointer_type (char_type_node
);
7473 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7474 type_decl
= build_decl (BUILTINS_LOCATION
,
7475 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
7477 f_gpr
= build_decl (BUILTINS_LOCATION
,
7478 FIELD_DECL
, get_identifier ("gp_offset"),
7479 unsigned_type_node
);
7480 f_fpr
= build_decl (BUILTINS_LOCATION
,
7481 FIELD_DECL
, get_identifier ("fp_offset"),
7482 unsigned_type_node
);
7483 f_ovf
= build_decl (BUILTINS_LOCATION
,
7484 FIELD_DECL
, get_identifier ("overflow_arg_area"),
7486 f_sav
= build_decl (BUILTINS_LOCATION
,
7487 FIELD_DECL
, get_identifier ("reg_save_area"),
7490 va_list_gpr_counter_field
= f_gpr
;
7491 va_list_fpr_counter_field
= f_fpr
;
7493 DECL_FIELD_CONTEXT (f_gpr
) = record
;
7494 DECL_FIELD_CONTEXT (f_fpr
) = record
;
7495 DECL_FIELD_CONTEXT (f_ovf
) = record
;
7496 DECL_FIELD_CONTEXT (f_sav
) = record
;
7498 TYPE_STUB_DECL (record
) = type_decl
;
7499 TYPE_NAME (record
) = type_decl
;
7500 TYPE_FIELDS (record
) = f_gpr
;
7501 DECL_CHAIN (f_gpr
) = f_fpr
;
7502 DECL_CHAIN (f_fpr
) = f_ovf
;
7503 DECL_CHAIN (f_ovf
) = f_sav
;
7505 layout_type (record
);
7507 /* The correct type is an array type of one element. */
7508 return build_array_type (record
, build_index_type (size_zero_node
));
7511 /* Setup the builtin va_list data type and for 64-bit the additional
7512 calling convention specific va_list data types. */
7515 ix86_build_builtin_va_list (void)
7517 tree ret
= ix86_build_builtin_va_list_abi (ix86_abi
);
7519 /* Initialize abi specific va_list builtin types. */
7523 if (ix86_abi
== MS_ABI
)
7525 t
= ix86_build_builtin_va_list_abi (SYSV_ABI
);
7526 if (TREE_CODE (t
) != RECORD_TYPE
)
7527 t
= build_variant_type_copy (t
);
7528 sysv_va_list_type_node
= t
;
7533 if (TREE_CODE (t
) != RECORD_TYPE
)
7534 t
= build_variant_type_copy (t
);
7535 sysv_va_list_type_node
= t
;
7537 if (ix86_abi
!= MS_ABI
)
7539 t
= ix86_build_builtin_va_list_abi (MS_ABI
);
7540 if (TREE_CODE (t
) != RECORD_TYPE
)
7541 t
= build_variant_type_copy (t
);
7542 ms_va_list_type_node
= t
;
7547 if (TREE_CODE (t
) != RECORD_TYPE
)
7548 t
= build_variant_type_copy (t
);
7549 ms_va_list_type_node
= t
;
7556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7559 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
7565 /* GPR size of varargs save area. */
7566 if (cfun
->va_list_gpr_size
)
7567 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
7569 ix86_varargs_gpr_size
= 0;
7571 /* FPR size of varargs save area. We don't need it if we don't pass
7572 anything in SSE registers. */
7573 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7574 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
7576 ix86_varargs_fpr_size
= 0;
7578 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
7581 save_area
= frame_pointer_rtx
;
7582 set
= get_varargs_alias_set ();
7584 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
7585 if (max
> X86_64_REGPARM_MAX
)
7586 max
= X86_64_REGPARM_MAX
;
7588 for (i
= cum
->regno
; i
< max
; i
++)
7590 mem
= gen_rtx_MEM (Pmode
,
7591 plus_constant (save_area
, i
* UNITS_PER_WORD
));
7592 MEM_NOTRAP_P (mem
) = 1;
7593 set_mem_alias_set (mem
, set
);
7594 emit_move_insn (mem
, gen_rtx_REG (Pmode
,
7595 x86_64_int_parameter_registers
[i
]));
7598 if (ix86_varargs_fpr_size
)
7600 enum machine_mode smode
;
7603 /* Now emit code to save SSE registers. The AX parameter contains number
7604 of SSE parameter registers used to call this function, though all we
7605 actually check here is the zero/non-zero status. */
7607 label
= gen_label_rtx ();
7608 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
7609 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
7612 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7613 we used movdqa (i.e. TImode) instead? Perhaps even better would
7614 be if we could determine the real mode of the data, via a hook
7615 into pass_stdarg. Ignore all that for now. */
7617 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
7618 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
7620 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
7621 if (max
> X86_64_SSE_REGPARM_MAX
)
7622 max
= X86_64_SSE_REGPARM_MAX
;
7624 for (i
= cum
->sse_regno
; i
< max
; ++i
)
7626 mem
= plus_constant (save_area
, i
* 16 + ix86_varargs_gpr_size
);
7627 mem
= gen_rtx_MEM (smode
, mem
);
7628 MEM_NOTRAP_P (mem
) = 1;
7629 set_mem_alias_set (mem
, set
);
7630 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
7632 emit_move_insn (mem
, gen_rtx_REG (smode
, SSE_REGNO (i
)));
7640 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
7642 alias_set_type set
= get_varargs_alias_set ();
7645 /* Reset to zero, as there might be a sysv vaarg used
7647 ix86_varargs_gpr_size
= 0;
7648 ix86_varargs_fpr_size
= 0;
7650 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
7654 mem
= gen_rtx_MEM (Pmode
,
7655 plus_constant (virtual_incoming_args_rtx
,
7656 i
* UNITS_PER_WORD
));
7657 MEM_NOTRAP_P (mem
) = 1;
7658 set_mem_alias_set (mem
, set
);
7660 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
7661 emit_move_insn (mem
, reg
);
7666 ix86_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7667 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7670 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7671 CUMULATIVE_ARGS next_cum
;
7674 /* This argument doesn't appear to be used anymore. Which is good,
7675 because the old code here didn't suppress rtl generation. */
7676 gcc_assert (!no_rtl
);
7681 fntype
= TREE_TYPE (current_function_decl
);
7683 /* For varargs, we do not want to skip the dummy va_dcl argument.
7684 For stdargs, we do want to skip the last named argument. */
7686 if (stdarg_p (fntype
))
7687 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), mode
, type
,
7690 if (cum
->call_abi
== MS_ABI
)
7691 setup_incoming_varargs_ms_64 (&next_cum
);
7693 setup_incoming_varargs_64 (&next_cum
);
7696 /* Checks if TYPE is of kind va_list char *. */
7699 is_va_list_char_pointer (tree type
)
7703 /* For 32-bit it is always true. */
7706 canonic
= ix86_canonical_va_list_type (type
);
7707 return (canonic
== ms_va_list_type_node
7708 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
7711 /* Implement va_start. */
7714 ix86_va_start (tree valist
, rtx nextarg
)
7716 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
7717 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7718 tree gpr
, fpr
, ovf
, sav
, t
;
7722 if (flag_split_stack
7723 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7725 unsigned int scratch_regno
;
7727 /* When we are splitting the stack, we can't refer to the stack
7728 arguments using internal_arg_pointer, because they may be on
7729 the old stack. The split stack prologue will arrange to
7730 leave a pointer to the old stack arguments in a scratch
7731 register, which we here copy to a pseudo-register. The split
7732 stack prologue can't set the pseudo-register directly because
7733 it (the prologue) runs before any registers have been saved. */
7735 scratch_regno
= split_stack_prologue_scratch_regno ();
7736 if (scratch_regno
!= INVALID_REGNUM
)
7740 reg
= gen_reg_rtx (Pmode
);
7741 cfun
->machine
->split_stack_varargs_pointer
= reg
;
7744 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
7748 push_topmost_sequence ();
7749 emit_insn_after (seq
, entry_of_function ());
7750 pop_topmost_sequence ();
7754 /* Only 64bit target needs something special. */
7755 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7757 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7758 std_expand_builtin_va_start (valist
, nextarg
);
7763 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
7764 next
= expand_binop (ptr_mode
, add_optab
,
7765 cfun
->machine
->split_stack_varargs_pointer
,
7766 crtl
->args
.arg_offset_rtx
,
7767 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
7768 convert_move (va_r
, next
, 0);
7773 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7774 f_fpr
= DECL_CHAIN (f_gpr
);
7775 f_ovf
= DECL_CHAIN (f_fpr
);
7776 f_sav
= DECL_CHAIN (f_ovf
);
7778 valist
= build_simple_mem_ref (valist
);
7779 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
7780 /* The following should be folded into the MEM_REF offset. */
7781 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
7783 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
7785 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
7787 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
7790 /* Count number of gp and fp argument registers used. */
7791 words
= crtl
->args
.info
.words
;
7792 n_gpr
= crtl
->args
.info
.regno
;
7793 n_fpr
= crtl
->args
.info
.sse_regno
;
7795 if (cfun
->va_list_gpr_size
)
7797 type
= TREE_TYPE (gpr
);
7798 t
= build2 (MODIFY_EXPR
, type
,
7799 gpr
, build_int_cst (type
, n_gpr
* 8));
7800 TREE_SIDE_EFFECTS (t
) = 1;
7801 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7804 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
7806 type
= TREE_TYPE (fpr
);
7807 t
= build2 (MODIFY_EXPR
, type
, fpr
,
7808 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
7809 TREE_SIDE_EFFECTS (t
) = 1;
7810 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7813 /* Find the overflow area. */
7814 type
= TREE_TYPE (ovf
);
7815 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
7816 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
7818 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
7819 t
= make_tree (type
, ovf_rtx
);
7821 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
7822 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
7823 TREE_SIDE_EFFECTS (t
) = 1;
7824 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7826 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
7828 /* Find the register save area.
7829 Prologue of the function save it right above stack frame. */
7830 type
= TREE_TYPE (sav
);
7831 t
= make_tree (type
, frame_pointer_rtx
);
7832 if (!ix86_varargs_gpr_size
)
7833 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
7834 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
7835 TREE_SIDE_EFFECTS (t
) = 1;
7836 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7840 /* Implement va_arg. */
7843 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
7846 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
7847 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
7848 tree gpr
, fpr
, ovf
, sav
, t
;
7850 tree lab_false
, lab_over
= NULL_TREE
;
7855 enum machine_mode nat_mode
;
7856 unsigned int arg_boundary
;
7858 /* Only 64bit target needs something special. */
7859 if (!TARGET_64BIT
|| is_va_list_char_pointer (TREE_TYPE (valist
)))
7860 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
7862 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
7863 f_fpr
= DECL_CHAIN (f_gpr
);
7864 f_ovf
= DECL_CHAIN (f_fpr
);
7865 f_sav
= DECL_CHAIN (f_ovf
);
7867 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
7868 build_va_arg_indirect_ref (valist
), f_gpr
, NULL_TREE
);
7869 valist
= build_va_arg_indirect_ref (valist
);
7870 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
7871 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
7872 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
7874 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7876 type
= build_pointer_type (type
);
7877 size
= int_size_in_bytes (type
);
7878 rsize
= (size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
7880 nat_mode
= type_natural_mode (type
, NULL
);
7889 /* Unnamed 256bit vector mode parameters are passed on stack. */
7890 if (!TARGET_64BIT_MS_ABI
)
7897 container
= construct_container (nat_mode
, TYPE_MODE (type
),
7898 type
, 0, X86_64_REGPARM_MAX
,
7899 X86_64_SSE_REGPARM_MAX
, intreg
,
7904 /* Pull the value out of the saved registers. */
7906 addr
= create_tmp_var (ptr_type_node
, "addr");
7910 int needed_intregs
, needed_sseregs
;
7912 tree int_addr
, sse_addr
;
7914 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
7915 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
7917 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
7919 need_temp
= (!REG_P (container
)
7920 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
7921 || TYPE_ALIGN (type
) > 128));
7923 /* In case we are passing structure, verify that it is consecutive block
7924 on the register save area. If not we need to do moves. */
7925 if (!need_temp
&& !REG_P (container
))
7927 /* Verify that all registers are strictly consecutive */
7928 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
7932 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7934 rtx slot
= XVECEXP (container
, 0, i
);
7935 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
7936 || INTVAL (XEXP (slot
, 1)) != i
* 16)
7944 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
7946 rtx slot
= XVECEXP (container
, 0, i
);
7947 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
7948 || INTVAL (XEXP (slot
, 1)) != i
* 8)
7960 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
7961 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
7964 /* First ensure that we fit completely in registers. */
7967 t
= build_int_cst (TREE_TYPE (gpr
),
7968 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
7969 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
7970 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7971 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7972 gimplify_and_add (t
, pre_p
);
7976 t
= build_int_cst (TREE_TYPE (fpr
),
7977 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
7978 + X86_64_REGPARM_MAX
* 8);
7979 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
7980 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
7981 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
7982 gimplify_and_add (t
, pre_p
);
7985 /* Compute index to start of area used for integer regs. */
7988 /* int_addr = gpr + sav; */
7989 t
= fold_build_pointer_plus (sav
, gpr
);
7990 gimplify_assign (int_addr
, t
, pre_p
);
7994 /* sse_addr = fpr + sav; */
7995 t
= fold_build_pointer_plus (sav
, fpr
);
7996 gimplify_assign (sse_addr
, t
, pre_p
);
8000 int i
, prev_size
= 0;
8001 tree temp
= create_tmp_var (type
, "va_arg_tmp");
8004 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
8005 gimplify_assign (addr
, t
, pre_p
);
8007 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
8009 rtx slot
= XVECEXP (container
, 0, i
);
8010 rtx reg
= XEXP (slot
, 0);
8011 enum machine_mode mode
= GET_MODE (reg
);
8017 tree dest_addr
, dest
;
8018 int cur_size
= GET_MODE_SIZE (mode
);
8020 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
8021 prev_size
= INTVAL (XEXP (slot
, 1));
8022 if (prev_size
+ cur_size
> size
)
8024 cur_size
= size
- prev_size
;
8025 mode
= mode_for_size (cur_size
* BITS_PER_UNIT
, MODE_INT
, 1);
8026 if (mode
== BLKmode
)
8029 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
8030 if (mode
== GET_MODE (reg
))
8031 addr_type
= build_pointer_type (piece_type
);
8033 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8035 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
8038 if (SSE_REGNO_P (REGNO (reg
)))
8040 src_addr
= sse_addr
;
8041 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
8045 src_addr
= int_addr
;
8046 src_offset
= REGNO (reg
) * 8;
8048 src_addr
= fold_convert (addr_type
, src_addr
);
8049 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
8051 dest_addr
= fold_convert (daddr_type
, addr
);
8052 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
8053 if (cur_size
== GET_MODE_SIZE (mode
))
8055 src
= build_va_arg_indirect_ref (src_addr
);
8056 dest
= build_va_arg_indirect_ref (dest_addr
);
8058 gimplify_assign (dest
, src
, pre_p
);
8063 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
8064 3, dest_addr
, src_addr
,
8065 size_int (cur_size
));
8066 gimplify_and_add (copy
, pre_p
);
8068 prev_size
+= cur_size
;
8074 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
8075 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
8076 gimplify_assign (gpr
, t
, pre_p
);
8081 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
8082 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
8083 gimplify_assign (fpr
, t
, pre_p
);
8086 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
8088 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
8091 /* ... otherwise out of the overflow area. */
8093 /* When we align parameter on stack for caller, if the parameter
8094 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8095 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8096 here with caller. */
8097 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
8098 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
8099 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
8101 /* Care for on-stack alignment if needed. */
8102 if (arg_boundary
<= 64 || size
== 0)
8106 HOST_WIDE_INT align
= arg_boundary
/ 8;
8107 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
8108 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8109 build_int_cst (TREE_TYPE (t
), -align
));
8112 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
8113 gimplify_assign (addr
, t
, pre_p
);
8115 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
8116 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
8119 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
8121 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
8122 addr
= fold_convert (ptrtype
, addr
);
8125 addr
= build_va_arg_indirect_ref (addr
);
8126 return build_va_arg_indirect_ref (addr
);
8129 /* Return true if OPNUM's MEM should be matched
8130 in movabs* patterns. */
8133 ix86_check_movabs (rtx insn
, int opnum
)
8137 set
= PATTERN (insn
);
8138 if (GET_CODE (set
) == PARALLEL
)
8139 set
= XVECEXP (set
, 0, 0);
8140 gcc_assert (GET_CODE (set
) == SET
);
8141 mem
= XEXP (set
, opnum
);
8142 while (GET_CODE (mem
) == SUBREG
)
8143 mem
= SUBREG_REG (mem
);
8144 gcc_assert (MEM_P (mem
));
8145 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
8148 /* Initialize the table of extra 80387 mathematical constants. */
8151 init_ext_80387_constants (void)
8153 static const char * cst
[5] =
8155 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8156 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8157 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8158 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8159 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8163 for (i
= 0; i
< 5; i
++)
8165 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
8166 /* Ensure each constant is rounded to XFmode precision. */
8167 real_convert (&ext_80387_constants_table
[i
],
8168 XFmode
, &ext_80387_constants_table
[i
]);
8171 ext_80387_constants_init
= 1;
8174 /* Return non-zero if the constant is something that
8175 can be loaded with a special instruction. */
8178 standard_80387_constant_p (rtx x
)
8180 enum machine_mode mode
= GET_MODE (x
);
8184 if (!(X87_FLOAT_MODE_P (mode
) && (GET_CODE (x
) == CONST_DOUBLE
)))
8187 if (x
== CONST0_RTX (mode
))
8189 if (x
== CONST1_RTX (mode
))
8192 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8194 /* For XFmode constants, try to find a special 80387 instruction when
8195 optimizing for size or on those CPUs that benefit from them. */
8197 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
8201 if (! ext_80387_constants_init
)
8202 init_ext_80387_constants ();
8204 for (i
= 0; i
< 5; i
++)
8205 if (real_identical (&r
, &ext_80387_constants_table
[i
]))
8209 /* Load of the constant -0.0 or -1.0 will be split as
8210 fldz;fchs or fld1;fchs sequence. */
8211 if (real_isnegzero (&r
))
8213 if (real_identical (&r
, &dconstm1
))
8219 /* Return the opcode of the special instruction to be used to load
8223 standard_80387_constant_opcode (rtx x
)
8225 switch (standard_80387_constant_p (x
))
8249 /* Return the CONST_DOUBLE representing the 80387 constant that is
8250 loaded by the specified special instruction. The argument IDX
8251 matches the return value from standard_80387_constant_p. */
8254 standard_80387_constant_rtx (int idx
)
8258 if (! ext_80387_constants_init
)
8259 init_ext_80387_constants ();
8275 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table
[i
],
8279 /* Return 1 if X is all 0s and 2 if x is all 1s
8280 in supported SSE/AVX vector mode. */
8283 standard_sse_constant_p (rtx x
)
8285 enum machine_mode mode
= GET_MODE (x
);
8287 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
8289 if (vector_all_ones_operand (x
, mode
))
8311 /* Return the opcode of the special instruction to be used to load
8315 standard_sse_constant_opcode (rtx insn
, rtx x
)
8317 switch (standard_sse_constant_p (x
))
8320 switch (get_attr_mode (insn
))
8323 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8324 return "%vpxor\t%0, %d0";
8326 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8327 return "%vxorpd\t%0, %d0";
8329 return "%vxorps\t%0, %d0";
8332 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8333 return "vpxor\t%x0, %x0, %x0";
8335 if (!TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
8336 return "vxorpd\t%x0, %x0, %x0";
8338 return "vxorps\t%x0, %x0, %x0";
8346 return "vpcmpeqd\t%0, %0, %0";
8348 return "pcmpeqd\t%0, %0";
8356 /* Returns true if OP contains a symbol reference */
8359 symbolic_reference_mentioned_p (rtx op
)
8364 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
8367 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
8368 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
8374 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
8375 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
8379 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
8386 /* Return true if it is appropriate to emit `ret' instructions in the
8387 body of a function. Do this only if the epilogue is simple, needing a
8388 couple of insns. Prior to reloading, we can't tell how many registers
8389 must be saved, so return false then. Return false if there is no frame
8390 marker to de-allocate. */
8393 ix86_can_use_return_insn_p (void)
8395 struct ix86_frame frame
;
8397 if (! reload_completed
|| frame_pointer_needed
)
8400 /* Don't allow more than 32k pop, since that's all we can do
8401 with one instruction. */
8402 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
8405 ix86_compute_frame_layout (&frame
);
8406 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
8407 && (frame
.nregs
+ frame
.nsseregs
) == 0);
8410 /* Value should be nonzero if functions must have frame pointers.
8411 Zero means the frame pointer need not be set up (and parms may
8412 be accessed via the stack pointer) in functions that seem suitable. */
8415 ix86_frame_pointer_required (void)
8417 /* If we accessed previous frames, then the generated code expects
8418 to be able to access the saved ebp value in our frame. */
8419 if (cfun
->machine
->accesses_prev_frame
)
8422 /* Several x86 os'es need a frame pointer for other reasons,
8423 usually pertaining to setjmp. */
8424 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
8427 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
8428 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
8431 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
8432 turns off the frame pointer by default. Turn it back on now if
8433 we've not got a leaf function. */
8434 if (TARGET_OMIT_LEAF_FRAME_POINTER
8435 && (!current_function_is_leaf
8436 || ix86_current_function_calls_tls_descriptor
))
8439 if (crtl
->profile
&& !flag_fentry
)
8445 /* Record that the current function accesses previous call frames. */
8448 ix86_setup_frame_addresses (void)
8450 cfun
->machine
->accesses_prev_frame
= 1;
8453 #ifndef USE_HIDDEN_LINKONCE
8454 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
8455 # define USE_HIDDEN_LINKONCE 1
8457 # define USE_HIDDEN_LINKONCE 0
8461 static int pic_labels_used
;
8463 /* Fills in the label name that should be used for a pc thunk for
8464 the given register. */
8467 get_pc_thunk_name (char name
[32], unsigned int regno
)
8469 gcc_assert (!TARGET_64BIT
);
8471 if (USE_HIDDEN_LINKONCE
)
8472 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
8474 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
8478 /* This function generates code for -fpic that loads %ebx with
8479 the return address of the caller and then returns. */
8482 ix86_code_end (void)
8487 for (regno
= AX_REG
; regno
<= SP_REG
; regno
++)
8492 if (!(pic_labels_used
& (1 << regno
)))
8495 get_pc_thunk_name (name
, regno
);
8497 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
8498 get_identifier (name
),
8499 build_function_type_list (void_type_node
, NULL_TREE
));
8500 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
8501 NULL_TREE
, void_type_node
);
8502 TREE_PUBLIC (decl
) = 1;
8503 TREE_STATIC (decl
) = 1;
8508 switch_to_section (darwin_sections
[text_coal_section
]);
8509 fputs ("\t.weak_definition\t", asm_out_file
);
8510 assemble_name (asm_out_file
, name
);
8511 fputs ("\n\t.private_extern\t", asm_out_file
);
8512 assemble_name (asm_out_file
, name
);
8513 putc ('\n', asm_out_file
);
8514 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8515 DECL_WEAK (decl
) = 1;
8519 if (USE_HIDDEN_LINKONCE
)
8521 DECL_COMDAT_GROUP (decl
) = DECL_ASSEMBLER_NAME (decl
);
8523 targetm
.asm_out
.unique_section (decl
, 0);
8524 switch_to_section (get_named_section (decl
, NULL
, 0));
8526 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
8527 fputs ("\t.hidden\t", asm_out_file
);
8528 assemble_name (asm_out_file
, name
);
8529 putc ('\n', asm_out_file
);
8530 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
8534 switch_to_section (text_section
);
8535 ASM_OUTPUT_LABEL (asm_out_file
, name
);
8538 DECL_INITIAL (decl
) = make_node (BLOCK
);
8539 current_function_decl
= decl
;
8540 init_function_start (decl
);
8541 first_function_block_is_cold
= false;
8542 /* Make sure unwind info is emitted for the thunk if needed. */
8543 final_start_function (emit_barrier (), asm_out_file
, 1);
8545 /* Pad stack IP move with 4 instructions (two NOPs count
8546 as one instruction). */
8547 if (TARGET_PAD_SHORT_FUNCTION
)
8552 fputs ("\tnop\n", asm_out_file
);
8555 xops
[0] = gen_rtx_REG (Pmode
, regno
);
8556 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
8557 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
8558 fputs ("\tret\n", asm_out_file
);
8559 final_end_function ();
8560 init_insn_lengths ();
8561 free_after_compilation (cfun
);
8563 current_function_decl
= NULL
;
8566 if (flag_split_stack
)
8567 file_end_indicate_split_stack ();
8570 /* Emit code for the SET_GOT patterns. */
8573 output_set_got (rtx dest
, rtx label ATTRIBUTE_UNUSED
)
8579 if (TARGET_VXWORKS_RTP
&& flag_pic
)
8581 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8582 xops
[2] = gen_rtx_MEM (Pmode
,
8583 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
8584 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
8586 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8587 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8588 an unadorned address. */
8589 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
8590 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
8591 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
8595 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
8599 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
8601 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
8604 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8605 is what will be referenced by the Mach-O PIC subsystem. */
8607 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8610 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8611 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
8616 get_pc_thunk_name (name
, REGNO (dest
));
8617 pic_labels_used
|= 1 << REGNO (dest
);
8619 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
8620 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
8621 output_asm_insn ("call\t%X2", xops
);
8622 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8623 is what will be referenced by the Mach-O PIC subsystem. */
8626 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
8628 targetm
.asm_out
.internal_label (asm_out_file
, "L",
8629 CODE_LABEL_NUMBER (label
));
8634 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
8639 /* Generate an "push" pattern for input ARG. */
8644 struct machine_function
*m
= cfun
->machine
;
8646 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8647 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8648 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
8650 return gen_rtx_SET (VOIDmode
,
8652 gen_rtx_PRE_DEC (Pmode
,
8653 stack_pointer_rtx
)),
8657 /* Generate an "pop" pattern for input ARG. */
8662 return gen_rtx_SET (VOIDmode
,
8665 gen_rtx_POST_INC (Pmode
,
8666 stack_pointer_rtx
)));
8669 /* Return >= 0 if there is an unused call-clobbered register available
8670 for the entire function. */
8673 ix86_select_alt_pic_regnum (void)
8675 if (current_function_is_leaf
8677 && !ix86_current_function_calls_tls_descriptor
)
8680 /* Can't use the same register for both PIC and DRAP. */
8682 drap
= REGNO (crtl
->drap_reg
);
8685 for (i
= 2; i
>= 0; --i
)
8686 if (i
!= drap
&& !df_regs_ever_live_p (i
))
8690 return INVALID_REGNUM
;
8693 /* Return TRUE if we need to save REGNO. */
8696 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
)
8698 if (pic_offset_table_rtx
8699 && regno
== REAL_PIC_OFFSET_TABLE_REGNUM
8700 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
8702 || crtl
->calls_eh_return
8703 || crtl
->uses_const_pool
))
8704 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
8706 if (crtl
->calls_eh_return
&& maybe_eh_return
)
8711 unsigned test
= EH_RETURN_DATA_REGNO (i
);
8712 if (test
== INVALID_REGNUM
)
8719 if (crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8722 return (df_regs_ever_live_p (regno
)
8723 && !call_used_regs
[regno
]
8724 && !fixed_regs
[regno
]
8725 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
8728 /* Return number of saved general prupose registers. */
8731 ix86_nsaved_regs (void)
8736 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8737 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8742 /* Return number of saved SSE registrers. */
8745 ix86_nsaved_sseregs (void)
8750 if (!TARGET_64BIT_MS_ABI
)
8752 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8753 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
8758 /* Given FROM and TO register numbers, say whether this elimination is
8759 allowed. If stack alignment is needed, we can only replace argument
8760 pointer with hard frame pointer, or replace frame pointer with stack
8761 pointer. Otherwise, frame pointer elimination is automatically
8762 handled and all other eliminations are valid. */
8765 ix86_can_eliminate (const int from
, const int to
)
8767 if (stack_realign_fp
)
8768 return ((from
== ARG_POINTER_REGNUM
8769 && to
== HARD_FRAME_POINTER_REGNUM
)
8770 || (from
== FRAME_POINTER_REGNUM
8771 && to
== STACK_POINTER_REGNUM
));
8773 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
8776 /* Return the offset between two registers, one to be eliminated, and the other
8777 its replacement, at the start of a routine. */
8780 ix86_initial_elimination_offset (int from
, int to
)
8782 struct ix86_frame frame
;
8783 ix86_compute_frame_layout (&frame
);
8785 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
8786 return frame
.hard_frame_pointer_offset
;
8787 else if (from
== FRAME_POINTER_REGNUM
8788 && to
== HARD_FRAME_POINTER_REGNUM
)
8789 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
8792 gcc_assert (to
== STACK_POINTER_REGNUM
);
8794 if (from
== ARG_POINTER_REGNUM
)
8795 return frame
.stack_pointer_offset
;
8797 gcc_assert (from
== FRAME_POINTER_REGNUM
);
8798 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
8802 /* In a dynamically-aligned function, we can't know the offset from
8803 stack pointer to frame pointer, so we must ensure that setjmp
8804 eliminates fp against the hard fp (%ebp) rather than trying to
8805 index from %esp up to the top of the frame across a gap that is
8806 of unknown (at compile-time) size. */
8808 ix86_builtin_setjmp_frame_value (void)
8810 return stack_realign_fp
? hard_frame_pointer_rtx
: virtual_stack_vars_rtx
;
8813 /* When using -fsplit-stack, the allocation routines set a field in
8814 the TCB to the bottom of the stack plus this much space, measured
8817 #define SPLIT_STACK_AVAILABLE 256
8819 /* Fill structure ix86_frame about frame of currently computed function. */
8822 ix86_compute_frame_layout (struct ix86_frame
*frame
)
8824 unsigned int stack_alignment_needed
;
8825 HOST_WIDE_INT offset
;
8826 unsigned int preferred_alignment
;
8827 HOST_WIDE_INT size
= get_frame_size ();
8828 HOST_WIDE_INT to_allocate
;
8830 frame
->nregs
= ix86_nsaved_regs ();
8831 frame
->nsseregs
= ix86_nsaved_sseregs ();
8833 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8834 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
8836 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
8837 function prologues and leaf. */
8838 if ((TARGET_64BIT_MS_ABI
&& preferred_alignment
< 16)
8839 && (!current_function_is_leaf
|| cfun
->calls_alloca
!= 0
8840 || ix86_current_function_calls_tls_descriptor
))
8842 preferred_alignment
= 16;
8843 stack_alignment_needed
= 16;
8844 crtl
->preferred_stack_boundary
= 128;
8845 crtl
->stack_alignment_needed
= 128;
8848 gcc_assert (!size
|| stack_alignment_needed
);
8849 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
8850 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
8852 /* For SEH we have to limit the amount of code movement into the prologue.
8853 At present we do this via a BLOCKAGE, at which point there's very little
8854 scheduling that can be done, which means that there's very little point
8855 in doing anything except PUSHs. */
8857 cfun
->machine
->use_fast_prologue_epilogue
= false;
8859 /* During reload iteration the amount of registers saved can change.
8860 Recompute the value as needed. Do not recompute when amount of registers
8861 didn't change as reload does multiple calls to the function and does not
8862 expect the decision to change within single iteration. */
8863 else if (!optimize_function_for_size_p (cfun
)
8864 && cfun
->machine
->use_fast_prologue_epilogue_nregs
!= frame
->nregs
)
8866 int count
= frame
->nregs
;
8867 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
8869 cfun
->machine
->use_fast_prologue_epilogue_nregs
= count
;
8871 /* The fast prologue uses move instead of push to save registers. This
8872 is significantly longer, but also executes faster as modern hardware
8873 can execute the moves in parallel, but can't do that for push/pop.
8875 Be careful about choosing what prologue to emit: When function takes
8876 many instructions to execute we may use slow version as well as in
8877 case function is known to be outside hot spot (this is known with
8878 feedback only). Weight the size of function by number of registers
8879 to save as it is cheap to use one or two push instructions but very
8880 slow to use many of them. */
8882 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
8883 if (node
->frequency
< NODE_FREQUENCY_NORMAL
8884 || (flag_branch_probabilities
8885 && node
->frequency
< NODE_FREQUENCY_HOT
))
8886 cfun
->machine
->use_fast_prologue_epilogue
= false;
8888 cfun
->machine
->use_fast_prologue_epilogue
8889 = !expensive_function_p (count
);
8892 frame
->save_regs_using_mov
8893 = (TARGET_PROLOGUE_USING_MOVE
&& cfun
->machine
->use_fast_prologue_epilogue
8894 /* If static stack checking is enabled and done with probes,
8895 the registers need to be saved before allocating the frame. */
8896 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
8898 /* Skip return address. */
8899 offset
= UNITS_PER_WORD
;
8901 /* Skip pushed static chain. */
8902 if (ix86_static_chain_on_stack
)
8903 offset
+= UNITS_PER_WORD
;
8905 /* Skip saved base pointer. */
8906 if (frame_pointer_needed
)
8907 offset
+= UNITS_PER_WORD
;
8908 frame
->hfp_save_offset
= offset
;
8910 /* The traditional frame pointer location is at the top of the frame. */
8911 frame
->hard_frame_pointer_offset
= offset
;
8913 /* Register save area */
8914 offset
+= frame
->nregs
* UNITS_PER_WORD
;
8915 frame
->reg_save_offset
= offset
;
8917 /* Align and set SSE register save area. */
8918 if (frame
->nsseregs
)
8920 /* The only ABI that has saved SSE registers (Win64) also has a
8921 16-byte aligned default stack, and thus we don't need to be
8922 within the re-aligned local stack frame to save them. */
8923 gcc_assert (INCOMING_STACK_BOUNDARY
>= 128);
8924 offset
= (offset
+ 16 - 1) & -16;
8925 offset
+= frame
->nsseregs
* 16;
8927 frame
->sse_reg_save_offset
= offset
;
8929 /* The re-aligned stack starts here. Values before this point are not
8930 directly comparable with values below this point. In order to make
8931 sure that no value happens to be the same before and after, force
8932 the alignment computation below to add a non-zero value. */
8933 if (stack_realign_fp
)
8934 offset
= (offset
+ stack_alignment_needed
) & -stack_alignment_needed
;
8937 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
8938 offset
+= frame
->va_arg_size
;
8940 /* Align start of frame for local function. */
8941 if (stack_realign_fp
8942 || offset
!= frame
->sse_reg_save_offset
8944 || !current_function_is_leaf
8945 || cfun
->calls_alloca
8946 || ix86_current_function_calls_tls_descriptor
)
8947 offset
= (offset
+ stack_alignment_needed
- 1) & -stack_alignment_needed
;
8949 /* Frame pointer points here. */
8950 frame
->frame_pointer_offset
= offset
;
8954 /* Add outgoing arguments area. Can be skipped if we eliminated
8955 all the function calls as dead code.
8956 Skipping is however impossible when function calls alloca. Alloca
8957 expander assumes that last crtl->outgoing_args_size
8958 of stack frame are unused. */
8959 if (ACCUMULATE_OUTGOING_ARGS
8960 && (!current_function_is_leaf
|| cfun
->calls_alloca
8961 || ix86_current_function_calls_tls_descriptor
))
8963 offset
+= crtl
->outgoing_args_size
;
8964 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
8967 frame
->outgoing_arguments_size
= 0;
8969 /* Align stack boundary. Only needed if we're calling another function
8971 if (!current_function_is_leaf
|| cfun
->calls_alloca
8972 || ix86_current_function_calls_tls_descriptor
)
8973 offset
= (offset
+ preferred_alignment
- 1) & -preferred_alignment
;
8975 /* We've reached end of stack frame. */
8976 frame
->stack_pointer_offset
= offset
;
8978 /* Size prologue needs to allocate. */
8979 to_allocate
= offset
- frame
->sse_reg_save_offset
;
8981 if ((!to_allocate
&& frame
->nregs
<= 1)
8982 || (TARGET_64BIT
&& to_allocate
>= (HOST_WIDE_INT
) 0x80000000))
8983 frame
->save_regs_using_mov
= false;
8985 if (ix86_using_red_zone ()
8986 && current_function_sp_is_unchanging
8987 && current_function_is_leaf
8988 && !ix86_current_function_calls_tls_descriptor
)
8990 frame
->red_zone_size
= to_allocate
;
8991 if (frame
->save_regs_using_mov
)
8992 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
8993 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
8994 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
8997 frame
->red_zone_size
= 0;
8998 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
9000 /* The SEH frame pointer location is near the bottom of the frame.
9001 This is enforced by the fact that the difference between the
9002 stack pointer and the frame pointer is limited to 240 bytes in
9003 the unwind data structure. */
9008 /* If we can leave the frame pointer where it is, do so. */
9009 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
9010 if (diff
> 240 || (diff
& 15) != 0)
9012 /* Ideally we'd determine what portion of the local stack frame
9013 (within the constraint of the lowest 240) is most heavily used.
9014 But without that complication, simply bias the frame pointer
9015 by 128 bytes so as to maximize the amount of the local stack
9016 frame that is addressable with 8-bit offsets. */
9017 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
9022 /* This is semi-inlined memory_address_length, but simplified
9023 since we know that we're always dealing with reg+offset, and
9024 to avoid having to create and discard all that rtl. */
9027 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
9033 /* EBP and R13 cannot be encoded without an offset. */
9034 len
= (regno
== BP_REG
|| regno
== R13_REG
);
9036 else if (IN_RANGE (offset
, -128, 127))
9039 /* ESP and R12 must be encoded with a SIB byte. */
9040 if (regno
== SP_REG
|| regno
== R12_REG
)
9046 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9047 The valid base registers are taken from CFUN->MACHINE->FS. */
9050 choose_baseaddr (HOST_WIDE_INT cfa_offset
)
9052 const struct machine_function
*m
= cfun
->machine
;
9053 rtx base_reg
= NULL
;
9054 HOST_WIDE_INT base_offset
= 0;
9056 if (m
->use_fast_prologue_epilogue
)
9058 /* Choose the base register most likely to allow the most scheduling
9059 opportunities. Generally FP is valid througout the function,
9060 while DRAP must be reloaded within the epilogue. But choose either
9061 over the SP due to increased encoding size. */
9065 base_reg
= hard_frame_pointer_rtx
;
9066 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
9068 else if (m
->fs
.drap_valid
)
9070 base_reg
= crtl
->drap_reg
;
9071 base_offset
= 0 - cfa_offset
;
9073 else if (m
->fs
.sp_valid
)
9075 base_reg
= stack_pointer_rtx
;
9076 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9081 HOST_WIDE_INT toffset
;
9084 /* Choose the base register with the smallest address encoding.
9085 With a tie, choose FP > DRAP > SP. */
9088 base_reg
= stack_pointer_rtx
;
9089 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
9090 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
9092 if (m
->fs
.drap_valid
)
9094 toffset
= 0 - cfa_offset
;
9095 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
9098 base_reg
= crtl
->drap_reg
;
9099 base_offset
= toffset
;
9105 toffset
= m
->fs
.fp_offset
- cfa_offset
;
9106 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
9109 base_reg
= hard_frame_pointer_rtx
;
9110 base_offset
= toffset
;
9115 gcc_assert (base_reg
!= NULL
);
9117 return plus_constant (base_reg
, base_offset
);
9120 /* Emit code to save registers in the prologue. */
9123 ix86_emit_save_regs (void)
9128 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
9129 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9131 insn
= emit_insn (gen_push (gen_rtx_REG (Pmode
, regno
)));
9132 RTX_FRAME_RELATED_P (insn
) = 1;
9136 /* Emit a single register save at CFA - CFA_OFFSET. */
9139 ix86_emit_save_reg_using_mov (enum machine_mode mode
, unsigned int regno
,
9140 HOST_WIDE_INT cfa_offset
)
9142 struct machine_function
*m
= cfun
->machine
;
9143 rtx reg
= gen_rtx_REG (mode
, regno
);
9144 rtx mem
, addr
, base
, insn
;
9146 addr
= choose_baseaddr (cfa_offset
);
9147 mem
= gen_frame_mem (mode
, addr
);
9149 /* For SSE saves, we need to indicate the 128-bit alignment. */
9150 set_mem_align (mem
, GET_MODE_ALIGNMENT (mode
));
9152 insn
= emit_move_insn (mem
, reg
);
9153 RTX_FRAME_RELATED_P (insn
) = 1;
9156 if (GET_CODE (base
) == PLUS
)
9157 base
= XEXP (base
, 0);
9158 gcc_checking_assert (REG_P (base
));
9160 /* When saving registers into a re-aligned local stack frame, avoid
9161 any tricky guessing by dwarf2out. */
9162 if (m
->fs
.realigned
)
9164 gcc_checking_assert (stack_realign_drap
);
9166 if (regno
== REGNO (crtl
->drap_reg
))
9168 /* A bit of a hack. We force the DRAP register to be saved in
9169 the re-aligned stack frame, which provides us with a copy
9170 of the CFA that will last past the prologue. Install it. */
9171 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9172 addr
= plus_constant (hard_frame_pointer_rtx
,
9173 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9174 mem
= gen_rtx_MEM (mode
, addr
);
9175 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
9179 /* The frame pointer is a stable reference within the
9180 aligned frame. Use it. */
9181 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
9182 addr
= plus_constant (hard_frame_pointer_rtx
,
9183 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
9184 mem
= gen_rtx_MEM (mode
, addr
);
9185 add_reg_note (insn
, REG_CFA_EXPRESSION
,
9186 gen_rtx_SET (VOIDmode
, mem
, reg
));
9190 /* The memory may not be relative to the current CFA register,
9191 which means that we may need to generate a new pattern for
9192 use by the unwind info. */
9193 else if (base
!= m
->fs
.cfa_reg
)
9195 addr
= plus_constant (m
->fs
.cfa_reg
, m
->fs
.cfa_offset
- cfa_offset
);
9196 mem
= gen_rtx_MEM (mode
, addr
);
9197 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (VOIDmode
, mem
, reg
));
9201 /* Emit code to save registers using MOV insns.
9202 First register is stored at CFA - CFA_OFFSET. */
9204 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9208 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9209 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9211 ix86_emit_save_reg_using_mov (Pmode
, regno
, cfa_offset
);
9212 cfa_offset
-= UNITS_PER_WORD
;
9216 /* Emit code to save SSE registers using MOV insns.
9217 First register is stored at CFA - CFA_OFFSET. */
9219 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
9223 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
9224 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true))
9226 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
9231 static GTY(()) rtx queued_cfa_restores
;
9233 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9234 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9235 Don't add the note if the previously saved value will be left untouched
9236 within stack red-zone till return, as unwinders can find the same value
9237 in the register and on the stack. */
9240 ix86_add_cfa_restore_note (rtx insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
9242 if (!crtl
->shrink_wrapped
9243 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
9248 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
9249 RTX_FRAME_RELATED_P (insn
) = 1;
9253 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
9256 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9259 ix86_add_queued_cfa_restore_notes (rtx insn
)
9262 if (!queued_cfa_restores
)
9264 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
9266 XEXP (last
, 1) = REG_NOTES (insn
);
9267 REG_NOTES (insn
) = queued_cfa_restores
;
9268 queued_cfa_restores
= NULL_RTX
;
9269 RTX_FRAME_RELATED_P (insn
) = 1;
9272 /* Expand prologue or epilogue stack adjustment.
9273 The pattern exist to put a dependency on all ebp-based memory accesses.
9274 STYLE should be negative if instructions should be marked as frame related,
9275 zero if %r11 register is live and cannot be freely used and positive
9279 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
9280 int style
, bool set_cfa
)
9282 struct machine_function
*m
= cfun
->machine
;
9284 bool add_frame_related_expr
= false;
9287 insn
= gen_pro_epilogue_adjust_stack_si_add (dest
, src
, offset
);
9288 else if (x86_64_immediate_operand (offset
, DImode
))
9289 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, offset
);
9293 /* r11 is used by indirect sibcall return as well, set before the
9294 epilogue and used after the epilogue. */
9296 tmp
= gen_rtx_REG (DImode
, R11_REG
);
9299 gcc_assert (src
!= hard_frame_pointer_rtx
9300 && dest
!= hard_frame_pointer_rtx
);
9301 tmp
= hard_frame_pointer_rtx
;
9303 insn
= emit_insn (gen_rtx_SET (DImode
, tmp
, offset
));
9305 add_frame_related_expr
= true;
9307 insn
= gen_pro_epilogue_adjust_stack_di_add (dest
, src
, tmp
);
9310 insn
= emit_insn (insn
);
9312 ix86_add_queued_cfa_restore_notes (insn
);
9318 gcc_assert (m
->fs
.cfa_reg
== src
);
9319 m
->fs
.cfa_offset
+= INTVAL (offset
);
9320 m
->fs
.cfa_reg
= dest
;
9322 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9323 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9324 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
9325 RTX_FRAME_RELATED_P (insn
) = 1;
9329 RTX_FRAME_RELATED_P (insn
) = 1;
9330 if (add_frame_related_expr
)
9332 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
9333 r
= gen_rtx_SET (VOIDmode
, dest
, r
);
9334 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
9338 if (dest
== stack_pointer_rtx
)
9340 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
9341 bool valid
= m
->fs
.sp_valid
;
9343 if (src
== hard_frame_pointer_rtx
)
9345 valid
= m
->fs
.fp_valid
;
9346 ooffset
= m
->fs
.fp_offset
;
9348 else if (src
== crtl
->drap_reg
)
9350 valid
= m
->fs
.drap_valid
;
9355 /* Else there are two possibilities: SP itself, which we set
9356 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
9357 taken care of this by hand along the eh_return path. */
9358 gcc_checking_assert (src
== stack_pointer_rtx
9359 || offset
== const0_rtx
);
9362 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
9363 m
->fs
.sp_valid
= valid
;
9367 /* Find an available register to be used as dynamic realign argument
9368 pointer regsiter. Such a register will be written in prologue and
9369 used in begin of body, so it must not be
9370 1. parameter passing register.
9372 We reuse static-chain register if it is available. Otherwise, we
9373 use DI for i386 and R13 for x86-64. We chose R13 since it has
9376 Return: the regno of chosen register. */
9379 find_drap_reg (void)
9381 tree decl
= cfun
->decl
;
9385 /* Use R13 for nested function or function need static chain.
9386 Since function with tail call may use any caller-saved
9387 registers in epilogue, DRAP must not use caller-saved
9388 register in such case. */
9389 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9396 /* Use DI for nested function or function need static chain.
9397 Since function with tail call may use any caller-saved
9398 registers in epilogue, DRAP must not use caller-saved
9399 register in such case. */
9400 if (DECL_STATIC_CHAIN (decl
) || crtl
->tail_call_emit
)
9403 /* Reuse static chain register if it isn't used for parameter
9405 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
9407 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
9408 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
9415 /* Return minimum incoming stack alignment. */
9418 ix86_minimum_incoming_stack_boundary (bool sibcall
)
9420 unsigned int incoming_stack_boundary
;
9422 /* Prefer the one specified at command line. */
9423 if (ix86_user_incoming_stack_boundary
)
9424 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
9425 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
9426 if -mstackrealign is used, it isn't used for sibcall check and
9427 estimated stack alignment is 128bit. */
9430 && ix86_force_align_arg_pointer
9431 && crtl
->stack_alignment_estimated
== 128)
9432 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9434 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
9436 /* Incoming stack alignment can be changed on individual functions
9437 via force_align_arg_pointer attribute. We use the smallest
9438 incoming stack boundary. */
9439 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
9440 && lookup_attribute (ix86_force_align_arg_pointer_string
,
9441 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
9442 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
9444 /* The incoming stack frame has to be aligned at least at
9445 parm_stack_boundary. */
9446 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
9447 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
9449 /* Stack at entrance of main is aligned by runtime. We use the
9450 smallest incoming stack boundary. */
9451 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
9452 && DECL_NAME (current_function_decl
)
9453 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
9454 && DECL_FILE_SCOPE_P (current_function_decl
))
9455 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
9457 return incoming_stack_boundary
;
9460 /* Update incoming stack boundary and estimated stack alignment. */
9463 ix86_update_stack_boundary (void)
9465 ix86_incoming_stack_boundary
9466 = ix86_minimum_incoming_stack_boundary (false);
9468 /* x86_64 vararg needs 16byte stack alignment for register save
9472 && crtl
->stack_alignment_estimated
< 128)
9473 crtl
->stack_alignment_estimated
= 128;
9476 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9477 needed or an rtx for DRAP otherwise. */
9480 ix86_get_drap_rtx (void)
9482 if (ix86_force_drap
|| !ACCUMULATE_OUTGOING_ARGS
)
9483 crtl
->need_drap
= true;
9485 if (stack_realign_drap
)
9487 /* Assign DRAP to vDRAP and returns vDRAP */
9488 unsigned int regno
= find_drap_reg ();
9493 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
9494 crtl
->drap_reg
= arg_ptr
;
9497 drap_vreg
= copy_to_reg (arg_ptr
);
9501 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
9504 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
9505 RTX_FRAME_RELATED_P (insn
) = 1;
9513 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9516 ix86_internal_arg_pointer (void)
9518 return virtual_incoming_args_rtx
;
9521 struct scratch_reg
{
9526 /* Return a short-lived scratch register for use on function entry.
9527 In 32-bit mode, it is valid only after the registers are saved
9528 in the prologue. This register must be released by means of
9529 release_scratch_register_on_entry once it is dead. */
9532 get_scratch_register_on_entry (struct scratch_reg
*sr
)
9540 /* We always use R11 in 64-bit mode. */
9545 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
9547 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
9548 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
9549 int regparm
= ix86_function_regparm (fntype
, decl
);
9551 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
9553 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9554 for the static chain register. */
9555 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
9556 && drap_regno
!= AX_REG
)
9558 else if (regparm
< 2 && drap_regno
!= DX_REG
)
9560 /* ecx is the static chain register. */
9561 else if (regparm
< 3 && !fastcall_p
&& !static_chain_p
9562 && drap_regno
!= CX_REG
)
9564 else if (ix86_save_reg (BX_REG
, true))
9566 /* esi is the static chain register. */
9567 else if (!(regparm
== 3 && static_chain_p
)
9568 && ix86_save_reg (SI_REG
, true))
9570 else if (ix86_save_reg (DI_REG
, true))
9574 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
9579 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
9582 rtx insn
= emit_insn (gen_push (sr
->reg
));
9583 RTX_FRAME_RELATED_P (insn
) = 1;
9587 /* Release a scratch register obtained from the preceding function. */
9590 release_scratch_register_on_entry (struct scratch_reg
*sr
)
9594 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
9596 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9597 RTX_FRAME_RELATED_P (insn
) = 1;
9598 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
9599 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
9600 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
9604 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9606 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9609 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size
)
9611 /* We skip the probe for the first interval + a small dope of 4 words and
9612 probe that many bytes past the specified size to maintain a protection
9613 area at the botton of the stack. */
9614 const int dope
= 4 * UNITS_PER_WORD
;
9615 rtx size_rtx
= GEN_INT (size
), last
;
9617 /* See if we have a constant small number of probes to generate. If so,
9618 that's the easy case. The run-time loop is made up of 11 insns in the
9619 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9620 for n # of intervals. */
9621 if (size
<= 5 * PROBE_INTERVAL
)
9623 HOST_WIDE_INT i
, adjust
;
9624 bool first_probe
= true;
9626 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9627 values of N from 1 until it exceeds SIZE. If only one probe is
9628 needed, this will not generate any code. Then adjust and probe
9629 to PROBE_INTERVAL + SIZE. */
9630 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9634 adjust
= 2 * PROBE_INTERVAL
+ dope
;
9635 first_probe
= false;
9638 adjust
= PROBE_INTERVAL
;
9640 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9641 plus_constant (stack_pointer_rtx
, -adjust
)));
9642 emit_stack_probe (stack_pointer_rtx
);
9646 adjust
= size
+ PROBE_INTERVAL
+ dope
;
9648 adjust
= size
+ PROBE_INTERVAL
- i
;
9650 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9651 plus_constant (stack_pointer_rtx
, -adjust
)));
9652 emit_stack_probe (stack_pointer_rtx
);
9654 /* Adjust back to account for the additional first interval. */
9655 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9656 plus_constant (stack_pointer_rtx
,
9657 PROBE_INTERVAL
+ dope
)));
9660 /* Otherwise, do the same as above, but in a loop. Note that we must be
9661 extra careful with variables wrapping around because we might be at
9662 the very top (or the very bottom) of the address space and we have
9663 to be able to handle this case properly; in particular, we use an
9664 equality test for the loop condition. */
9667 HOST_WIDE_INT rounded_size
;
9668 struct scratch_reg sr
;
9670 get_scratch_register_on_entry (&sr
);
9673 /* Step 1: round SIZE to the previous multiple of the interval. */
9675 rounded_size
= size
& -PROBE_INTERVAL
;
9678 /* Step 2: compute initial and final value of the loop counter. */
9680 /* SP = SP_0 + PROBE_INTERVAL. */
9681 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9682 plus_constant (stack_pointer_rtx
,
9683 - (PROBE_INTERVAL
+ dope
))));
9685 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9686 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
9687 emit_insn (gen_rtx_SET (VOIDmode
, sr
.reg
,
9688 gen_rtx_PLUS (Pmode
, sr
.reg
,
9689 stack_pointer_rtx
)));
9694 while (SP != LAST_ADDR)
9696 SP = SP + PROBE_INTERVAL
9700 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9701 values of N from 1 until it is equal to ROUNDED_SIZE. */
9703 emit_insn (ix86_gen_adjust_stack_and_probe (sr
.reg
, sr
.reg
, size_rtx
));
9706 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9707 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9709 if (size
!= rounded_size
)
9711 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9712 plus_constant (stack_pointer_rtx
,
9713 rounded_size
- size
)));
9714 emit_stack_probe (stack_pointer_rtx
);
9717 /* Adjust back to account for the additional first interval. */
9718 last
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9719 plus_constant (stack_pointer_rtx
,
9720 PROBE_INTERVAL
+ dope
)));
9722 release_scratch_register_on_entry (&sr
);
9725 gcc_assert (cfun
->machine
->fs
.cfa_reg
!= stack_pointer_rtx
);
9727 /* Even if the stack pointer isn't the CFA register, we need to correctly
9728 describe the adjustments made to it, in particular differentiate the
9729 frame-related ones from the frame-unrelated ones. */
9732 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
9733 XVECEXP (expr
, 0, 0)
9734 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9735 plus_constant (stack_pointer_rtx
, -size
));
9736 XVECEXP (expr
, 0, 1)
9737 = gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9738 plus_constant (stack_pointer_rtx
,
9739 PROBE_INTERVAL
+ dope
+ size
));
9740 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
9741 RTX_FRAME_RELATED_P (last
) = 1;
9743 cfun
->machine
->fs
.sp_offset
+= size
;
9746 /* Make sure nothing is scheduled before we are done. */
9747 emit_insn (gen_blockage ());
9750 /* Adjust the stack pointer up to REG while probing it. */
9753 output_adjust_stack_and_probe (rtx reg
)
9755 static int labelno
= 0;
9756 char loop_lab
[32], end_lab
[32];
9759 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9760 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9762 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9764 /* Jump to END_LAB if SP == LAST_ADDR. */
9765 xops
[0] = stack_pointer_rtx
;
9767 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9768 fputs ("\tje\t", asm_out_file
);
9769 assemble_name_raw (asm_out_file
, end_lab
);
9770 fputc ('\n', asm_out_file
);
9772 /* SP = SP + PROBE_INTERVAL. */
9773 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9774 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9777 xops
[1] = const0_rtx
;
9778 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
9780 fprintf (asm_out_file
, "\tjmp\t");
9781 assemble_name_raw (asm_out_file
, loop_lab
);
9782 fputc ('\n', asm_out_file
);
9784 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9789 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9790 inclusive. These are offsets from the current stack pointer. */
9793 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
)
9795 /* See if we have a constant small number of probes to generate. If so,
9796 that's the easy case. The run-time loop is made up of 7 insns in the
9797 generic case while the compile-time loop is made up of n insns for n #
9799 if (size
<= 7 * PROBE_INTERVAL
)
9803 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9804 it exceeds SIZE. If only one probe is needed, this will not
9805 generate any code. Then probe at FIRST + SIZE. */
9806 for (i
= PROBE_INTERVAL
; i
< size
; i
+= PROBE_INTERVAL
)
9807 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ i
)));
9809 emit_stack_probe (plus_constant (stack_pointer_rtx
, -(first
+ size
)));
9812 /* Otherwise, do the same as above, but in a loop. Note that we must be
9813 extra careful with variables wrapping around because we might be at
9814 the very top (or the very bottom) of the address space and we have
9815 to be able to handle this case properly; in particular, we use an
9816 equality test for the loop condition. */
9819 HOST_WIDE_INT rounded_size
, last
;
9820 struct scratch_reg sr
;
9822 get_scratch_register_on_entry (&sr
);
9825 /* Step 1: round SIZE to the previous multiple of the interval. */
9827 rounded_size
= size
& -PROBE_INTERVAL
;
9830 /* Step 2: compute initial and final value of the loop counter. */
9832 /* TEST_OFFSET = FIRST. */
9833 emit_move_insn (sr
.reg
, GEN_INT (-first
));
9835 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9836 last
= first
+ rounded_size
;
9841 while (TEST_ADDR != LAST_ADDR)
9843 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9847 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9848 until it is equal to ROUNDED_SIZE. */
9850 emit_insn (ix86_gen_probe_stack_range (sr
.reg
, sr
.reg
, GEN_INT (-last
)));
9853 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9854 that SIZE is equal to ROUNDED_SIZE. */
9856 if (size
!= rounded_size
)
9857 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode
,
9860 rounded_size
- size
));
9862 release_scratch_register_on_entry (&sr
);
9865 /* Make sure nothing is scheduled before we are done. */
9866 emit_insn (gen_blockage ());
9869 /* Probe a range of stack addresses from REG to END, inclusive. These are
9870 offsets from the current stack pointer. */
9873 output_probe_stack_range (rtx reg
, rtx end
)
9875 static int labelno
= 0;
9876 char loop_lab
[32], end_lab
[32];
9879 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
);
9880 ASM_GENERATE_INTERNAL_LABEL (end_lab
, "LPSRE", labelno
++);
9882 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
9884 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9887 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
9888 fputs ("\tje\t", asm_out_file
);
9889 assemble_name_raw (asm_out_file
, end_lab
);
9890 fputc ('\n', asm_out_file
);
9892 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9893 xops
[1] = GEN_INT (PROBE_INTERVAL
);
9894 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
9896 /* Probe at TEST_ADDR. */
9897 xops
[0] = stack_pointer_rtx
;
9899 xops
[2] = const0_rtx
;
9900 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
9902 fprintf (asm_out_file
, "\tjmp\t");
9903 assemble_name_raw (asm_out_file
, loop_lab
);
9904 fputc ('\n', asm_out_file
);
9906 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, end_lab
);
9911 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9912 to be generated in correct form. */
9914 ix86_finalize_stack_realign_flags (void)
9916 /* Check if stack realign is really needed after reload, and
9917 stores result in cfun */
9918 unsigned int incoming_stack_boundary
9919 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
9920 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
9921 unsigned int stack_realign
= (incoming_stack_boundary
9922 < (current_function_is_leaf
9923 ? crtl
->max_used_stack_slot_alignment
9924 : crtl
->stack_alignment_needed
));
9926 if (crtl
->stack_realign_finalized
)
9928 /* After stack_realign_needed is finalized, we can't no longer
9930 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
9934 /* If the only reason for frame_pointer_needed is that we conservatively
9935 assumed stack realignment might be needed, but in the end nothing that
9936 needed the stack alignment had been spilled, clear frame_pointer_needed
9937 and say we don't need stack realignment. */
9940 && frame_pointer_needed
9941 && current_function_is_leaf
9942 && flag_omit_frame_pointer
9943 && current_function_sp_is_unchanging
9944 && !ix86_current_function_calls_tls_descriptor
9945 && !crtl
->accesses_prior_frames
9946 && !cfun
->calls_alloca
9947 && !crtl
->calls_eh_return
9948 && !(flag_stack_check
&& STACK_CHECK_MOVING_SP
)
9949 && !ix86_frame_pointer_required ()
9950 && get_frame_size () == 0
9951 && ix86_nsaved_sseregs () == 0
9952 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
9954 HARD_REG_SET set_up_by_prologue
, prologue_used
;
9957 CLEAR_HARD_REG_SET (prologue_used
);
9958 CLEAR_HARD_REG_SET (set_up_by_prologue
);
9959 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
9960 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
9961 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
9962 HARD_FRAME_POINTER_REGNUM
);
9966 FOR_BB_INSNS (bb
, insn
)
9967 if (NONDEBUG_INSN_P (insn
)
9968 && requires_stack_frame_p (insn
, prologue_used
,
9969 set_up_by_prologue
))
9971 crtl
->stack_realign_needed
= stack_realign
;
9972 crtl
->stack_realign_finalized
= true;
9977 frame_pointer_needed
= false;
9978 stack_realign
= false;
9979 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
9980 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
9981 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
9982 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
9983 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
9984 df_finish_pass (true);
9985 df_scan_alloc (NULL
);
9987 df_compute_regs_ever_live (true);
9991 crtl
->stack_realign_needed
= stack_realign
;
9992 crtl
->stack_realign_finalized
= true;
9995 /* Expand the prologue into a bunch of separate insns. */
9998 ix86_expand_prologue (void)
10000 struct machine_function
*m
= cfun
->machine
;
10003 struct ix86_frame frame
;
10004 HOST_WIDE_INT allocate
;
10005 bool int_registers_saved
;
10007 ix86_finalize_stack_realign_flags ();
10009 /* DRAP should not coexist with stack_realign_fp */
10010 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
10012 memset (&m
->fs
, 0, sizeof (m
->fs
));
10014 /* Initialize CFA state for before the prologue. */
10015 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10016 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
10018 /* Track SP offset to the CFA. We continue tracking this after we've
10019 swapped the CFA register away from SP. In the case of re-alignment
10020 this is fudged; we're interested to offsets within the local frame. */
10021 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10022 m
->fs
.sp_valid
= true;
10024 ix86_compute_frame_layout (&frame
);
10026 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
10028 /* We should have already generated an error for any use of
10029 ms_hook on a nested function. */
10030 gcc_checking_assert (!ix86_static_chain_on_stack
);
10032 /* Check if profiling is active and we shall use profiling before
10033 prologue variant. If so sorry. */
10034 if (crtl
->profile
&& flag_fentry
!= 0)
10035 sorry ("ms_hook_prologue attribute isn%'t compatible "
10036 "with -mfentry for 32-bit");
10038 /* In ix86_asm_output_function_label we emitted:
10039 8b ff movl.s %edi,%edi
10041 8b ec movl.s %esp,%ebp
10043 This matches the hookable function prologue in Win32 API
10044 functions in Microsoft Windows XP Service Pack 2 and newer.
10045 Wine uses this to enable Windows apps to hook the Win32 API
10046 functions provided by Wine.
10048 What that means is that we've already set up the frame pointer. */
10050 if (frame_pointer_needed
10051 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
10055 /* We've decided to use the frame pointer already set up.
10056 Describe this to the unwinder by pretending that both
10057 push and mov insns happen right here.
10059 Putting the unwind info here at the end of the ms_hook
10060 is done so that we can make absolutely certain we get
10061 the required byte sequence at the start of the function,
10062 rather than relying on an assembler that can produce
10063 the exact encoding required.
10065 However it does mean (in the unpatched case) that we have
10066 a 1 insn window where the asynchronous unwind info is
10067 incorrect. However, if we placed the unwind info at
10068 its correct location we would have incorrect unwind info
10069 in the patched case. Which is probably all moot since
10070 I don't expect Wine generates dwarf2 unwind info for the
10071 system libraries that use this feature. */
10073 insn
= emit_insn (gen_blockage ());
10075 push
= gen_push (hard_frame_pointer_rtx
);
10076 mov
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
10077 stack_pointer_rtx
);
10078 RTX_FRAME_RELATED_P (push
) = 1;
10079 RTX_FRAME_RELATED_P (mov
) = 1;
10081 RTX_FRAME_RELATED_P (insn
) = 1;
10082 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10083 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
10085 /* Note that gen_push incremented m->fs.cfa_offset, even
10086 though we didn't emit the push insn here. */
10087 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10088 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
10089 m
->fs
.fp_valid
= true;
10093 /* The frame pointer is not needed so pop %ebp again.
10094 This leaves us with a pristine state. */
10095 emit_insn (gen_pop (hard_frame_pointer_rtx
));
10099 /* The first insn of a function that accepts its static chain on the
10100 stack is to push the register that would be filled in by a direct
10101 call. This insn will be skipped by the trampoline. */
10102 else if (ix86_static_chain_on_stack
)
10104 insn
= emit_insn (gen_push (ix86_static_chain (cfun
->decl
, false)));
10105 emit_insn (gen_blockage ());
10107 /* We don't want to interpret this push insn as a register save,
10108 only as a stack adjustment. The real copy of the register as
10109 a save will be done later, if needed. */
10110 t
= plus_constant (stack_pointer_rtx
, -UNITS_PER_WORD
);
10111 t
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
);
10112 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
10113 RTX_FRAME_RELATED_P (insn
) = 1;
10116 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10117 of DRAP is needed and stack realignment is really needed after reload */
10118 if (stack_realign_drap
)
10120 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10122 /* Only need to push parameter pointer reg if it is caller saved. */
10123 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10125 /* Push arg pointer reg */
10126 insn
= emit_insn (gen_push (crtl
->drap_reg
));
10127 RTX_FRAME_RELATED_P (insn
) = 1;
10130 /* Grab the argument pointer. */
10131 t
= plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
);
10132 insn
= emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10133 RTX_FRAME_RELATED_P (insn
) = 1;
10134 m
->fs
.cfa_reg
= crtl
->drap_reg
;
10135 m
->fs
.cfa_offset
= 0;
10137 /* Align the stack. */
10138 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10140 GEN_INT (-align_bytes
)));
10141 RTX_FRAME_RELATED_P (insn
) = 1;
10143 /* Replicate the return address on the stack so that return
10144 address can be reached via (argp - 1) slot. This is needed
10145 to implement macro RETURN_ADDR_RTX and intrinsic function
10146 expand_builtin_return_addr etc. */
10147 t
= plus_constant (crtl
->drap_reg
, -UNITS_PER_WORD
);
10148 t
= gen_frame_mem (Pmode
, t
);
10149 insn
= emit_insn (gen_push (t
));
10150 RTX_FRAME_RELATED_P (insn
) = 1;
10152 /* For the purposes of frame and register save area addressing,
10153 we've started over with a new frame. */
10154 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
10155 m
->fs
.realigned
= true;
10158 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10160 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10161 slower on all targets. Also sdb doesn't like it. */
10162 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
10163 RTX_FRAME_RELATED_P (insn
) = 1;
10165 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
10167 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
10168 RTX_FRAME_RELATED_P (insn
) = 1;
10170 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10171 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10172 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
10173 m
->fs
.fp_valid
= true;
10177 int_registers_saved
= (frame
.nregs
== 0);
10179 if (!int_registers_saved
)
10181 /* If saving registers via PUSH, do so now. */
10182 if (!frame
.save_regs_using_mov
)
10184 ix86_emit_save_regs ();
10185 int_registers_saved
= true;
10186 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
10189 /* When using red zone we may start register saving before allocating
10190 the stack frame saving one cycle of the prologue. However, avoid
10191 doing this if we have to probe the stack; at least on x86_64 the
10192 stack probe can turn into a call that clobbers a red zone location. */
10193 else if (ix86_using_red_zone ()
10194 && (! TARGET_STACK_PROBE
10195 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
10197 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10198 int_registers_saved
= true;
10202 if (stack_realign_fp
)
10204 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10205 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
10207 /* The computation of the size of the re-aligned stack frame means
10208 that we must allocate the size of the register save area before
10209 performing the actual alignment. Otherwise we cannot guarantee
10210 that there's enough storage above the realignment point. */
10211 if (m
->fs
.sp_offset
!= frame
.sse_reg_save_offset
)
10212 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10213 GEN_INT (m
->fs
.sp_offset
10214 - frame
.sse_reg_save_offset
),
10217 /* Align the stack. */
10218 insn
= emit_insn (ix86_gen_andsp (stack_pointer_rtx
,
10220 GEN_INT (-align_bytes
)));
10222 /* For the purposes of register save area addressing, the stack
10223 pointer is no longer valid. As for the value of sp_offset,
10224 see ix86_compute_frame_layout, which we need to match in order
10225 to pass verification of stack_pointer_offset at the end. */
10226 m
->fs
.sp_offset
= (m
->fs
.sp_offset
+ align_bytes
) & -align_bytes
;
10227 m
->fs
.sp_valid
= false;
10230 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
10232 if (flag_stack_usage_info
)
10234 /* We start to count from ARG_POINTER. */
10235 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
10237 /* If it was realigned, take into account the fake frame. */
10238 if (stack_realign_drap
)
10240 if (ix86_static_chain_on_stack
)
10241 stack_size
+= UNITS_PER_WORD
;
10243 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10244 stack_size
+= UNITS_PER_WORD
;
10246 /* This over-estimates by 1 minimal-stack-alignment-unit but
10247 mitigates that by counting in the new return address slot. */
10248 current_function_dynamic_stack_size
10249 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
10252 current_function_static_stack_size
= stack_size
;
10255 /* The stack has already been decremented by the instruction calling us
10256 so probe if the size is non-negative to preserve the protection area. */
10257 if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
10259 /* We expect the registers to be saved when probes are used. */
10260 gcc_assert (int_registers_saved
);
10262 if (STACK_CHECK_MOVING_SP
)
10264 ix86_adjust_stack_and_probe (allocate
);
10269 HOST_WIDE_INT size
= allocate
;
10271 if (TARGET_64BIT
&& size
>= (HOST_WIDE_INT
) 0x80000000)
10272 size
= 0x80000000 - STACK_CHECK_PROTECT
- 1;
10274 if (TARGET_STACK_PROBE
)
10275 ix86_emit_probe_stack_range (0, size
+ STACK_CHECK_PROTECT
);
10277 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT
, size
);
10283 else if (!ix86_target_stack_probe ()
10284 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
10286 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10287 GEN_INT (-allocate
), -1,
10288 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10292 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
10294 rtx (*adjust_stack_insn
)(rtx
, rtx
, rtx
);
10296 bool eax_live
= false;
10297 bool r10_live
= false;
10300 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
10301 if (!TARGET_64BIT_MS_ABI
)
10302 eax_live
= ix86_eax_live_at_start_p ();
10306 emit_insn (gen_push (eax
));
10307 allocate
-= UNITS_PER_WORD
;
10311 r10
= gen_rtx_REG (Pmode
, R10_REG
);
10312 emit_insn (gen_push (r10
));
10313 allocate
-= UNITS_PER_WORD
;
10316 emit_move_insn (eax
, GEN_INT (allocate
));
10317 emit_insn (ix86_gen_allocate_stack_worker (eax
, eax
));
10319 /* Use the fact that AX still contains ALLOCATE. */
10320 adjust_stack_insn
= (TARGET_64BIT
10321 ? gen_pro_epilogue_adjust_stack_di_sub
10322 : gen_pro_epilogue_adjust_stack_si_sub
);
10324 insn
= emit_insn (adjust_stack_insn (stack_pointer_rtx
,
10325 stack_pointer_rtx
, eax
));
10327 /* Note that SEH directives need to continue tracking the stack
10328 pointer even after the frame pointer has been set up. */
10329 if (m
->fs
.cfa_reg
== stack_pointer_rtx
|| TARGET_SEH
)
10331 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10332 m
->fs
.cfa_offset
+= allocate
;
10334 RTX_FRAME_RELATED_P (insn
) = 1;
10335 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
10336 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
10337 plus_constant (stack_pointer_rtx
,
10340 m
->fs
.sp_offset
+= allocate
;
10342 if (r10_live
&& eax_live
)
10344 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10345 emit_move_insn (r10
, gen_frame_mem (Pmode
, t
));
10346 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
- UNITS_PER_WORD
);
10347 emit_move_insn (eax
, gen_frame_mem (Pmode
, t
));
10349 else if (eax_live
|| r10_live
)
10351 t
= choose_baseaddr (m
->fs
.sp_offset
- allocate
);
10352 emit_move_insn ((eax_live
? eax
: r10
), gen_frame_mem (Pmode
, t
));
10355 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10357 /* If we havn't already set up the frame pointer, do so now. */
10358 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
10360 insn
= ix86_gen_add3 (hard_frame_pointer_rtx
, stack_pointer_rtx
,
10361 GEN_INT (frame
.stack_pointer_offset
10362 - frame
.hard_frame_pointer_offset
));
10363 insn
= emit_insn (insn
);
10364 RTX_FRAME_RELATED_P (insn
) = 1;
10365 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
10367 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10368 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
10369 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
10370 m
->fs
.fp_valid
= true;
10373 if (!int_registers_saved
)
10374 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
10375 if (frame
.nsseregs
)
10376 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
10378 pic_reg_used
= false;
10379 if (pic_offset_table_rtx
10380 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
10383 unsigned int alt_pic_reg_used
= ix86_select_alt_pic_regnum ();
10385 if (alt_pic_reg_used
!= INVALID_REGNUM
)
10386 SET_REGNO (pic_offset_table_rtx
, alt_pic_reg_used
);
10388 pic_reg_used
= true;
10395 if (ix86_cmodel
== CM_LARGE_PIC
)
10397 rtx tmp_reg
= gen_rtx_REG (DImode
, R11_REG
);
10398 rtx label
= gen_label_rtx ();
10399 emit_label (label
);
10400 LABEL_PRESERVE_P (label
) = 1;
10401 gcc_assert (REGNO (pic_offset_table_rtx
) != REGNO (tmp_reg
));
10402 insn
= emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
, label
));
10403 insn
= emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
10404 insn
= emit_insn (gen_adddi3 (pic_offset_table_rtx
,
10405 pic_offset_table_rtx
, tmp_reg
));
10408 insn
= emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
10412 insn
= emit_insn (gen_set_got (pic_offset_table_rtx
));
10413 RTX_FRAME_RELATED_P (insn
) = 1;
10414 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
10418 /* In the pic_reg_used case, make sure that the got load isn't deleted
10419 when mcount needs it. Blockage to avoid call movement across mcount
10420 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
10422 if (crtl
->profile
&& !flag_fentry
&& pic_reg_used
)
10423 emit_insn (gen_prologue_use (pic_offset_table_rtx
));
10425 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
10427 /* vDRAP is setup but after reload it turns out stack realign
10428 isn't necessary, here we will emit prologue to setup DRAP
10429 without stack realign adjustment */
10430 t
= choose_baseaddr (0);
10431 emit_insn (gen_rtx_SET (VOIDmode
, crtl
->drap_reg
, t
));
10434 /* Prevent instructions from being scheduled into register save push
10435 sequence when access to the redzone area is done through frame pointer.
10436 The offset between the frame pointer and the stack pointer is calculated
10437 relative to the value of the stack pointer at the end of the function
10438 prologue, and moving instructions that access redzone area via frame
10439 pointer inside push sequence violates this assumption. */
10440 if (frame_pointer_needed
&& frame
.red_zone_size
)
10441 emit_insn (gen_memory_blockage ());
10443 /* Emit cld instruction if stringops are used in the function. */
10444 if (TARGET_CLD
&& ix86_current_function_needs_cld
)
10445 emit_insn (gen_cld ());
10447 /* SEH requires that the prologue end within 256 bytes of the start of
10448 the function. Prevent instruction schedules that would extend that.
10449 Further, prevent alloca modifications to the stack pointer from being
10450 combined with prologue modifications. */
10452 emit_insn (gen_prologue_use (stack_pointer_rtx
));
10455 /* Emit code to restore REG using a POP insn. */
10458 ix86_emit_restore_reg_using_pop (rtx reg
)
10460 struct machine_function
*m
= cfun
->machine
;
10461 rtx insn
= emit_insn (gen_pop (reg
));
10463 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
10464 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10466 if (m
->fs
.cfa_reg
== crtl
->drap_reg
10467 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
10469 /* Previously we'd represented the CFA as an expression
10470 like *(%ebp - 8). We've just popped that value from
10471 the stack, which means we need to reset the CFA to
10472 the drap register. This will remain until we restore
10473 the stack pointer. */
10474 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10475 RTX_FRAME_RELATED_P (insn
) = 1;
10477 /* This means that the DRAP register is valid for addressing too. */
10478 m
->fs
.drap_valid
= true;
10482 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
10484 rtx x
= plus_constant (stack_pointer_rtx
, UNITS_PER_WORD
);
10485 x
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, x
);
10486 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
10487 RTX_FRAME_RELATED_P (insn
) = 1;
10489 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10492 /* When the frame pointer is the CFA, and we pop it, we are
10493 swapping back to the stack pointer as the CFA. This happens
10494 for stack frames that don't allocate other data, so we assume
10495 the stack pointer is now pointing at the return address, i.e.
10496 the function entry state, which makes the offset be 1 word. */
10497 if (reg
== hard_frame_pointer_rtx
)
10499 m
->fs
.fp_valid
= false;
10500 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10502 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10503 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10505 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10506 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10507 GEN_INT (m
->fs
.cfa_offset
)));
10508 RTX_FRAME_RELATED_P (insn
) = 1;
10513 /* Emit code to restore saved registers using POP insns. */
10516 ix86_emit_restore_regs_using_pop (void)
10518 unsigned int regno
;
10520 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10521 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, false))
10522 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode
, regno
));
10525 /* Emit code and notes for the LEAVE instruction. */
10528 ix86_emit_leave (void)
10530 struct machine_function
*m
= cfun
->machine
;
10531 rtx insn
= emit_insn (ix86_gen_leave ());
10533 ix86_add_queued_cfa_restore_notes (insn
);
10535 gcc_assert (m
->fs
.fp_valid
);
10536 m
->fs
.sp_valid
= true;
10537 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
10538 m
->fs
.fp_valid
= false;
10540 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
10542 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10543 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
10545 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10546 plus_constant (stack_pointer_rtx
, m
->fs
.sp_offset
));
10547 RTX_FRAME_RELATED_P (insn
) = 1;
10548 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
10553 /* Emit code to restore saved registers using MOV insns.
10554 First register is restored from CFA - CFA_OFFSET. */
10556 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10557 bool maybe_eh_return
)
10559 struct machine_function
*m
= cfun
->machine
;
10560 unsigned int regno
;
10562 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10563 if (!SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10565 rtx reg
= gen_rtx_REG (Pmode
, regno
);
10568 mem
= choose_baseaddr (cfa_offset
);
10569 mem
= gen_frame_mem (Pmode
, mem
);
10570 insn
= emit_move_insn (reg
, mem
);
10572 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
10574 /* Previously we'd represented the CFA as an expression
10575 like *(%ebp - 8). We've just popped that value from
10576 the stack, which means we need to reset the CFA to
10577 the drap register. This will remain until we restore
10578 the stack pointer. */
10579 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
10580 RTX_FRAME_RELATED_P (insn
) = 1;
10582 /* This means that the DRAP register is valid for addressing. */
10583 m
->fs
.drap_valid
= true;
10586 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10588 cfa_offset
-= UNITS_PER_WORD
;
10592 /* Emit code to restore saved registers using MOV insns.
10593 First register is restored from CFA - CFA_OFFSET. */
10595 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
10596 bool maybe_eh_return
)
10598 unsigned int regno
;
10600 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
10601 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
))
10603 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
10606 mem
= choose_baseaddr (cfa_offset
);
10607 mem
= gen_rtx_MEM (V4SFmode
, mem
);
10608 set_mem_align (mem
, 128);
10609 emit_move_insn (reg
, mem
);
10611 ix86_add_cfa_restore_note (NULL_RTX
, reg
, cfa_offset
);
10617 /* Restore function stack, frame, and registers. */
10620 ix86_expand_epilogue (int style
)
10622 struct machine_function
*m
= cfun
->machine
;
10623 struct machine_frame_state frame_state_save
= m
->fs
;
10624 struct ix86_frame frame
;
10625 bool restore_regs_via_mov
;
10628 ix86_finalize_stack_realign_flags ();
10629 ix86_compute_frame_layout (&frame
);
10631 m
->fs
.sp_valid
= (!frame_pointer_needed
10632 || (current_function_sp_is_unchanging
10633 && !stack_realign_fp
));
10634 gcc_assert (!m
->fs
.sp_valid
10635 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
10637 /* The FP must be valid if the frame pointer is present. */
10638 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
10639 gcc_assert (!m
->fs
.fp_valid
10640 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
10642 /* We must have *some* valid pointer to the stack frame. */
10643 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
10645 /* The DRAP is never valid at this point. */
10646 gcc_assert (!m
->fs
.drap_valid
);
10648 /* See the comment about red zone and frame
10649 pointer usage in ix86_expand_prologue. */
10650 if (frame_pointer_needed
&& frame
.red_zone_size
)
10651 emit_insn (gen_memory_blockage ());
10653 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
10654 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
10656 /* Determine the CFA offset of the end of the red-zone. */
10657 m
->fs
.red_zone_offset
= 0;
10658 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
10660 /* The red-zone begins below the return address. */
10661 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ UNITS_PER_WORD
;
10663 /* When the register save area is in the aligned portion of
10664 the stack, determine the maximum runtime displacement that
10665 matches up with the aligned frame. */
10666 if (stack_realign_drap
)
10667 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
10671 /* Special care must be taken for the normal return case of a function
10672 using eh_return: the eax and edx registers are marked as saved, but
10673 not restored along this path. Adjust the save location to match. */
10674 if (crtl
->calls_eh_return
&& style
!= 2)
10675 frame
.reg_save_offset
-= 2 * UNITS_PER_WORD
;
10677 /* EH_RETURN requires the use of moves to function properly. */
10678 if (crtl
->calls_eh_return
)
10679 restore_regs_via_mov
= true;
10680 /* SEH requires the use of pops to identify the epilogue. */
10681 else if (TARGET_SEH
)
10682 restore_regs_via_mov
= false;
10683 /* If we're only restoring one register and sp is not valid then
10684 using a move instruction to restore the register since it's
10685 less work than reloading sp and popping the register. */
10686 else if (!m
->fs
.sp_valid
&& frame
.nregs
<= 1)
10687 restore_regs_via_mov
= true;
10688 else if (TARGET_EPILOGUE_USING_MOVE
10689 && cfun
->machine
->use_fast_prologue_epilogue
10690 && (frame
.nregs
> 1
10691 || m
->fs
.sp_offset
!= frame
.reg_save_offset
))
10692 restore_regs_via_mov
= true;
10693 else if (frame_pointer_needed
10695 && m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10696 restore_regs_via_mov
= true;
10697 else if (frame_pointer_needed
10698 && TARGET_USE_LEAVE
10699 && cfun
->machine
->use_fast_prologue_epilogue
10700 && frame
.nregs
== 1)
10701 restore_regs_via_mov
= true;
10703 restore_regs_via_mov
= false;
10705 if (restore_regs_via_mov
|| frame
.nsseregs
)
10707 /* Ensure that the entire register save area is addressable via
10708 the stack pointer, if we will restore via sp. */
10710 && m
->fs
.sp_offset
> 0x7fffffff
10711 && !(m
->fs
.fp_valid
|| m
->fs
.drap_valid
)
10712 && (frame
.nsseregs
+ frame
.nregs
) != 0)
10714 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10715 GEN_INT (m
->fs
.sp_offset
10716 - frame
.sse_reg_save_offset
),
10718 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10722 /* If there are any SSE registers to restore, then we have to do it
10723 via moves, since there's obviously no pop for SSE regs. */
10724 if (frame
.nsseregs
)
10725 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
10728 if (restore_regs_via_mov
)
10733 ix86_emit_restore_regs_using_mov (frame
.reg_save_offset
, style
== 2);
10735 /* eh_return epilogues need %ecx added to the stack pointer. */
10738 rtx insn
, sa
= EH_RETURN_STACKADJ_RTX
;
10740 /* Stack align doesn't work with eh_return. */
10741 gcc_assert (!stack_realign_drap
);
10742 /* Neither does regparm nested functions. */
10743 gcc_assert (!ix86_static_chain_on_stack
);
10745 if (frame_pointer_needed
)
10747 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
10748 t
= plus_constant (t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
10749 emit_insn (gen_rtx_SET (VOIDmode
, sa
, t
));
10751 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
10752 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
10754 /* Note that we use SA as a temporary CFA, as the return
10755 address is at the proper place relative to it. We
10756 pretend this happens at the FP restore insn because
10757 prior to this insn the FP would be stored at the wrong
10758 offset relative to SA, and after this insn we have no
10759 other reasonable register to use for the CFA. We don't
10760 bother resetting the CFA to the SP for the duration of
10761 the return insn. */
10762 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10763 plus_constant (sa
, UNITS_PER_WORD
));
10764 ix86_add_queued_cfa_restore_notes (insn
);
10765 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
10766 RTX_FRAME_RELATED_P (insn
) = 1;
10768 m
->fs
.cfa_reg
= sa
;
10769 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10770 m
->fs
.fp_valid
= false;
10772 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
10773 const0_rtx
, style
, false);
10777 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
10778 t
= plus_constant (t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
10779 insn
= emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, t
));
10780 ix86_add_queued_cfa_restore_notes (insn
);
10782 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
10783 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
10785 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
10786 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10787 plus_constant (stack_pointer_rtx
,
10789 RTX_FRAME_RELATED_P (insn
) = 1;
10792 m
->fs
.sp_offset
= UNITS_PER_WORD
;
10793 m
->fs
.sp_valid
= true;
10798 /* SEH requires that the function end with (1) a stack adjustment
10799 if necessary, (2) a sequence of pops, and (3) a return or
10800 jump instruction. Prevent insns from the function body from
10801 being scheduled into this sequence. */
10804 /* Prevent a catch region from being adjacent to the standard
10805 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
10806 several other flags that would be interesting to test are
10808 if (flag_non_call_exceptions
)
10809 emit_insn (gen_nops (const1_rtx
));
10811 emit_insn (gen_blockage ());
10814 /* First step is to deallocate the stack frame so that we can
10815 pop the registers. */
10816 if (!m
->fs
.sp_valid
)
10818 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
10819 GEN_INT (m
->fs
.fp_offset
10820 - frame
.reg_save_offset
),
10823 else if (m
->fs
.sp_offset
!= frame
.reg_save_offset
)
10825 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10826 GEN_INT (m
->fs
.sp_offset
10827 - frame
.reg_save_offset
),
10829 m
->fs
.cfa_reg
== stack_pointer_rtx
);
10832 ix86_emit_restore_regs_using_pop ();
10835 /* If we used a stack pointer and haven't already got rid of it,
10837 if (m
->fs
.fp_valid
)
10839 /* If the stack pointer is valid and pointing at the frame
10840 pointer store address, then we only need a pop. */
10841 if (m
->fs
.sp_valid
&& m
->fs
.sp_offset
== frame
.hfp_save_offset
)
10842 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10843 /* Leave results in shorter dependency chains on CPUs that are
10844 able to grok it fast. */
10845 else if (TARGET_USE_LEAVE
10846 || optimize_function_for_size_p (cfun
)
10847 || !cfun
->machine
->use_fast_prologue_epilogue
)
10848 ix86_emit_leave ();
10851 pro_epilogue_adjust_stack (stack_pointer_rtx
,
10852 hard_frame_pointer_rtx
,
10853 const0_rtx
, style
, !using_drap
);
10854 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
10860 int param_ptr_offset
= UNITS_PER_WORD
;
10863 gcc_assert (stack_realign_drap
);
10865 if (ix86_static_chain_on_stack
)
10866 param_ptr_offset
+= UNITS_PER_WORD
;
10867 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10868 param_ptr_offset
+= UNITS_PER_WORD
;
10870 insn
= emit_insn (gen_rtx_SET
10871 (VOIDmode
, stack_pointer_rtx
,
10872 gen_rtx_PLUS (Pmode
,
10874 GEN_INT (-param_ptr_offset
))));
10875 m
->fs
.cfa_reg
= stack_pointer_rtx
;
10876 m
->fs
.cfa_offset
= param_ptr_offset
;
10877 m
->fs
.sp_offset
= param_ptr_offset
;
10878 m
->fs
.realigned
= false;
10880 add_reg_note (insn
, REG_CFA_DEF_CFA
,
10881 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
10882 GEN_INT (param_ptr_offset
)));
10883 RTX_FRAME_RELATED_P (insn
) = 1;
10885 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
10886 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
10889 /* At this point the stack pointer must be valid, and we must have
10890 restored all of the registers. We may not have deallocated the
10891 entire stack frame. We've delayed this until now because it may
10892 be possible to merge the local stack deallocation with the
10893 deallocation forced by ix86_static_chain_on_stack. */
10894 gcc_assert (m
->fs
.sp_valid
);
10895 gcc_assert (!m
->fs
.fp_valid
);
10896 gcc_assert (!m
->fs
.realigned
);
10897 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
10899 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10900 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
10904 ix86_add_queued_cfa_restore_notes (get_last_insn ());
10906 /* Sibcall epilogues don't want a return instruction. */
10909 m
->fs
= frame_state_save
;
10913 /* Emit vzeroupper if needed. */
10914 if (TARGET_VZEROUPPER
10915 && !TREE_THIS_VOLATILE (cfun
->decl
)
10916 && !cfun
->machine
->caller_return_avx256_p
)
10917 emit_insn (gen_avx_vzeroupper (GEN_INT (call_no_avx256
)));
10919 if (crtl
->args
.pops_args
&& crtl
->args
.size
)
10921 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
10923 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10924 address, do explicit add, and jump indirectly to the caller. */
10926 if (crtl
->args
.pops_args
>= 65536)
10928 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
10931 /* There is no "pascal" calling convention in any 64bit ABI. */
10932 gcc_assert (!TARGET_64BIT
);
10934 insn
= emit_insn (gen_pop (ecx
));
10935 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
10936 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
10938 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
10939 copy_rtx (XVECEXP (PATTERN (insn
), 0, 1)));
10940 add_reg_note (insn
, REG_CFA_REGISTER
,
10941 gen_rtx_SET (VOIDmode
, ecx
, pc_rtx
));
10942 RTX_FRAME_RELATED_P (insn
) = 1;
10944 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
10946 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
10949 emit_jump_insn (gen_simple_return_pop_internal (popc
));
10952 emit_jump_insn (gen_simple_return_internal ());
10954 /* Restore the state back to the state from the prologue,
10955 so that it's correct for the next epilogue. */
10956 m
->fs
= frame_state_save
;
10959 /* Reset from the function's potential modifications. */
10962 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
10963 HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
10965 if (pic_offset_table_rtx
)
10966 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
10968 /* Mach-O doesn't support labels at the end of objects, so if
10969 it looks like we might want one, insert a NOP. */
10971 rtx insn
= get_last_insn ();
10972 rtx deleted_debug_label
= NULL_RTX
;
10975 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
10977 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
10978 notes only, instead set their CODE_LABEL_NUMBER to -1,
10979 otherwise there would be code generation differences
10980 in between -g and -g0. */
10981 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
10982 deleted_debug_label
= insn
;
10983 insn
= PREV_INSN (insn
);
10988 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
)))
10989 fputs ("\tnop\n", file
);
10990 else if (deleted_debug_label
)
10991 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
10992 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
10993 CODE_LABEL_NUMBER (insn
) = -1;
10999 /* Return a scratch register to use in the split stack prologue. The
11000 split stack prologue is used for -fsplit-stack. It is the first
11001 instructions in the function, even before the regular prologue.
11002 The scratch register can be any caller-saved register which is not
11003 used for parameters or for the static chain. */
11005 static unsigned int
11006 split_stack_prologue_scratch_regno (void)
11015 is_fastcall
= (lookup_attribute ("fastcall",
11016 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
11018 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
11022 if (DECL_STATIC_CHAIN (cfun
->decl
))
11024 sorry ("-fsplit-stack does not support fastcall with "
11025 "nested function");
11026 return INVALID_REGNUM
;
11030 else if (regparm
< 3)
11032 if (!DECL_STATIC_CHAIN (cfun
->decl
))
11038 sorry ("-fsplit-stack does not support 2 register "
11039 " parameters for a nested function");
11040 return INVALID_REGNUM
;
11047 /* FIXME: We could make this work by pushing a register
11048 around the addition and comparison. */
11049 sorry ("-fsplit-stack does not support 3 register parameters");
11050 return INVALID_REGNUM
;
11055 /* A SYMBOL_REF for the function which allocates new stackspace for
11058 static GTY(()) rtx split_stack_fn
;
11060 /* A SYMBOL_REF for the more stack function when using the large
11063 static GTY(()) rtx split_stack_fn_large
;
11065 /* Handle -fsplit-stack. These are the first instructions in the
11066 function, even before the regular prologue. */
11069 ix86_expand_split_stack_prologue (void)
11071 struct ix86_frame frame
;
11072 HOST_WIDE_INT allocate
;
11073 unsigned HOST_WIDE_INT args_size
;
11074 rtx label
, limit
, current
, jump_insn
, allocate_rtx
, call_insn
, call_fusage
;
11075 rtx scratch_reg
= NULL_RTX
;
11076 rtx varargs_label
= NULL_RTX
;
11079 gcc_assert (flag_split_stack
&& reload_completed
);
11081 ix86_finalize_stack_realign_flags ();
11082 ix86_compute_frame_layout (&frame
);
11083 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
11085 /* This is the label we will branch to if we have enough stack
11086 space. We expect the basic block reordering pass to reverse this
11087 branch if optimizing, so that we branch in the unlikely case. */
11088 label
= gen_label_rtx ();
11090 /* We need to compare the stack pointer minus the frame size with
11091 the stack boundary in the TCB. The stack boundary always gives
11092 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11093 can compare directly. Otherwise we need to do an addition. */
11095 limit
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11096 UNSPEC_STACK_CHECK
);
11097 limit
= gen_rtx_CONST (Pmode
, limit
);
11098 limit
= gen_rtx_MEM (Pmode
, limit
);
11099 if (allocate
< SPLIT_STACK_AVAILABLE
)
11100 current
= stack_pointer_rtx
;
11103 unsigned int scratch_regno
;
11106 /* We need a scratch register to hold the stack pointer minus
11107 the required frame size. Since this is the very start of the
11108 function, the scratch register can be any caller-saved
11109 register which is not used for parameters. */
11110 offset
= GEN_INT (- allocate
);
11111 scratch_regno
= split_stack_prologue_scratch_regno ();
11112 if (scratch_regno
== INVALID_REGNUM
)
11114 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11115 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
11117 /* We don't use ix86_gen_add3 in this case because it will
11118 want to split to lea, but when not optimizing the insn
11119 will not be split after this point. */
11120 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11121 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11126 emit_move_insn (scratch_reg
, offset
);
11127 emit_insn (gen_adddi3 (scratch_reg
, scratch_reg
,
11128 stack_pointer_rtx
));
11130 current
= scratch_reg
;
11133 ix86_expand_branch (GEU
, current
, limit
, label
);
11134 jump_insn
= get_last_insn ();
11135 JUMP_LABEL (jump_insn
) = label
;
11137 /* Mark the jump as very likely to be taken. */
11138 add_reg_note (jump_insn
, REG_BR_PROB
,
11139 GEN_INT (REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100));
11141 if (split_stack_fn
== NULL_RTX
)
11142 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11143 fn
= split_stack_fn
;
11145 /* Get more stack space. We pass in the desired stack space and the
11146 size of the arguments to copy to the new stack. In 32-bit mode
11147 we push the parameters; __morestack will return on a new stack
11148 anyhow. In 64-bit mode we pass the parameters in r10 and
11150 allocate_rtx
= GEN_INT (allocate
);
11151 args_size
= crtl
->args
.size
>= 0 ? crtl
->args
.size
: 0;
11152 call_fusage
= NULL_RTX
;
11157 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
11158 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
11160 /* If this function uses a static chain, it will be in %r10.
11161 Preserve it across the call to __morestack. */
11162 if (DECL_STATIC_CHAIN (cfun
->decl
))
11166 rax
= gen_rtx_REG (Pmode
, AX_REG
);
11167 emit_move_insn (rax
, reg10
);
11168 use_reg (&call_fusage
, rax
);
11171 if (ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
11173 HOST_WIDE_INT argval
;
11175 /* When using the large model we need to load the address
11176 into a register, and we've run out of registers. So we
11177 switch to a different calling convention, and we call a
11178 different function: __morestack_large. We pass the
11179 argument size in the upper 32 bits of r10 and pass the
11180 frame size in the lower 32 bits. */
11181 gcc_assert ((allocate
& (HOST_WIDE_INT
) 0xffffffff) == allocate
);
11182 gcc_assert ((args_size
& 0xffffffff) == args_size
);
11184 if (split_stack_fn_large
== NULL_RTX
)
11185 split_stack_fn_large
=
11186 gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
11188 if (ix86_cmodel
== CM_LARGE_PIC
)
11192 label
= gen_label_rtx ();
11193 emit_label (label
);
11194 LABEL_PRESERVE_P (label
) = 1;
11195 emit_insn (gen_set_rip_rex64 (reg10
, label
));
11196 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
11197 emit_insn (gen_adddi3 (reg10
, reg10
, reg11
));
11198 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
11200 x
= gen_rtx_CONST (Pmode
, x
);
11201 emit_move_insn (reg11
, x
);
11202 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
11203 x
= gen_const_mem (Pmode
, x
);
11204 emit_move_insn (reg11
, x
);
11207 emit_move_insn (reg11
, split_stack_fn_large
);
11211 argval
= ((args_size
<< 16) << 16) + allocate
;
11212 emit_move_insn (reg10
, GEN_INT (argval
));
11216 emit_move_insn (reg10
, allocate_rtx
);
11217 emit_move_insn (reg11
, GEN_INT (args_size
));
11218 use_reg (&call_fusage
, reg11
);
11221 use_reg (&call_fusage
, reg10
);
11225 emit_insn (gen_push (GEN_INT (args_size
)));
11226 emit_insn (gen_push (allocate_rtx
));
11228 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
11229 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
11231 add_function_usage_to (call_insn
, call_fusage
);
11233 /* In order to make call/return prediction work right, we now need
11234 to execute a return instruction. See
11235 libgcc/config/i386/morestack.S for the details on how this works.
11237 For flow purposes gcc must not see this as a return
11238 instruction--we need control flow to continue at the subsequent
11239 label. Therefore, we use an unspec. */
11240 gcc_assert (crtl
->args
.pops_args
< 65536);
11241 emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
11243 /* If we are in 64-bit mode and this function uses a static chain,
11244 we saved %r10 in %rax before calling _morestack. */
11245 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
11246 emit_move_insn (gen_rtx_REG (Pmode
, R10_REG
),
11247 gen_rtx_REG (Pmode
, AX_REG
));
11249 /* If this function calls va_start, we need to store a pointer to
11250 the arguments on the old stack, because they may not have been
11251 all copied to the new stack. At this point the old stack can be
11252 found at the frame pointer value used by __morestack, because
11253 __morestack has set that up before calling back to us. Here we
11254 store that pointer in a scratch register, and in
11255 ix86_expand_prologue we store the scratch register in a stack
11257 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11259 unsigned int scratch_regno
;
11263 scratch_regno
= split_stack_prologue_scratch_regno ();
11264 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
11265 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
11269 return address within this function
11270 return address of caller of this function
11272 So we add three words to get to the stack arguments.
11276 return address within this function
11277 first argument to __morestack
11278 second argument to __morestack
11279 return address of caller of this function
11281 So we add five words to get to the stack arguments.
11283 words
= TARGET_64BIT
? 3 : 5;
11284 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11285 gen_rtx_PLUS (Pmode
, frame_reg
,
11286 GEN_INT (words
* UNITS_PER_WORD
))));
11288 varargs_label
= gen_label_rtx ();
11289 emit_jump_insn (gen_jump (varargs_label
));
11290 JUMP_LABEL (get_last_insn ()) = varargs_label
;
11295 emit_label (label
);
11296 LABEL_NUSES (label
) = 1;
11298 /* If this function calls va_start, we now have to set the scratch
11299 register for the case where we do not call __morestack. In this
11300 case we need to set it based on the stack pointer. */
11301 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11303 emit_insn (gen_rtx_SET (VOIDmode
, scratch_reg
,
11304 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11305 GEN_INT (UNITS_PER_WORD
))));
11307 emit_label (varargs_label
);
11308 LABEL_NUSES (varargs_label
) = 1;
11312 /* We may have to tell the dataflow pass that the split stack prologue
11313 is initializing a scratch register. */
11316 ix86_live_on_entry (bitmap regs
)
11318 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11320 gcc_assert (flag_split_stack
);
11321 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
11325 /* Determine if op is suitable SUBREG RTX for address. */
11328 ix86_address_subreg_operand (rtx op
)
11330 enum machine_mode mode
;
11335 mode
= GET_MODE (op
);
11337 if (GET_MODE_CLASS (mode
) != MODE_INT
)
11340 /* Don't allow SUBREGs that span more than a word. It can lead to spill
11341 failures when the register is one word out of a two word structure. */
11342 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
11345 /* Allow only SUBREGs of non-eliminable hard registers. */
11346 return register_no_elim_operand (op
, mode
);
11349 /* Extract the parts of an RTL expression that is a valid memory address
11350 for an instruction. Return 0 if the structure of the address is
11351 grossly off. Return -1 if the address contains ASHIFT, so it is not
11352 strictly valid, but still used for computing length of lea instruction. */
11355 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
11357 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
11358 rtx base_reg
, index_reg
;
11359 HOST_WIDE_INT scale
= 1;
11360 rtx scale_rtx
= NULL_RTX
;
11363 enum ix86_address_seg seg
= SEG_DEFAULT
;
11365 /* Allow zero-extended SImode addresses,
11366 they will be emitted with addr32 prefix. */
11367 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
11369 if (GET_CODE (addr
) == ZERO_EXTEND
11370 && GET_MODE (XEXP (addr
, 0)) == SImode
)
11371 addr
= XEXP (addr
, 0);
11372 else if (GET_CODE (addr
) == AND
11373 && const_32bit_mask (XEXP (addr
, 1), DImode
))
11375 addr
= XEXP (addr
, 0);
11377 /* Strip subreg. */
11378 if (GET_CODE (addr
) == SUBREG
11379 && GET_MODE (SUBREG_REG (addr
)) == SImode
)
11380 addr
= SUBREG_REG (addr
);
11386 else if (GET_CODE (addr
) == SUBREG
)
11388 if (ix86_address_subreg_operand (SUBREG_REG (addr
)))
11393 else if (GET_CODE (addr
) == PLUS
)
11395 rtx addends
[4], op
;
11403 addends
[n
++] = XEXP (op
, 1);
11406 while (GET_CODE (op
) == PLUS
);
11411 for (i
= n
; i
>= 0; --i
)
11414 switch (GET_CODE (op
))
11419 index
= XEXP (op
, 0);
11420 scale_rtx
= XEXP (op
, 1);
11426 index
= XEXP (op
, 0);
11427 tmp
= XEXP (op
, 1);
11428 if (!CONST_INT_P (tmp
))
11430 scale
= INTVAL (tmp
);
11431 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11433 scale
= 1 << scale
;
11437 if (XINT (op
, 1) == UNSPEC_TP
11438 && TARGET_TLS_DIRECT_SEG_REFS
11439 && seg
== SEG_DEFAULT
)
11440 seg
= TARGET_64BIT
? SEG_FS
: SEG_GS
;
11446 if (!ix86_address_subreg_operand (SUBREG_REG (op
)))
11473 else if (GET_CODE (addr
) == MULT
)
11475 index
= XEXP (addr
, 0); /* index*scale */
11476 scale_rtx
= XEXP (addr
, 1);
11478 else if (GET_CODE (addr
) == ASHIFT
)
11480 /* We're called for lea too, which implements ashift on occasion. */
11481 index
= XEXP (addr
, 0);
11482 tmp
= XEXP (addr
, 1);
11483 if (!CONST_INT_P (tmp
))
11485 scale
= INTVAL (tmp
);
11486 if ((unsigned HOST_WIDE_INT
) scale
> 3)
11488 scale
= 1 << scale
;
11492 disp
= addr
; /* displacement */
11498 else if (GET_CODE (index
) == SUBREG
11499 && ix86_address_subreg_operand (SUBREG_REG (index
)))
11505 /* Extract the integral value of scale. */
11508 if (!CONST_INT_P (scale_rtx
))
11510 scale
= INTVAL (scale_rtx
);
11513 base_reg
= base
&& GET_CODE (base
) == SUBREG
? SUBREG_REG (base
) : base
;
11514 index_reg
= index
&& GET_CODE (index
) == SUBREG
? SUBREG_REG (index
) : index
;
11516 /* Avoid useless 0 displacement. */
11517 if (disp
== const0_rtx
&& (base
|| index
))
11520 /* Allow arg pointer and stack pointer as index if there is not scaling. */
11521 if (base_reg
&& index_reg
&& scale
== 1
11522 && (index_reg
== arg_pointer_rtx
11523 || index_reg
== frame_pointer_rtx
11524 || (REG_P (index_reg
) && REGNO (index_reg
) == STACK_POINTER_REGNUM
)))
11527 tmp
= base
, base
= index
, index
= tmp
;
11528 tmp
= base_reg
, base_reg
= index_reg
, index_reg
= tmp
;
11531 /* Special case: %ebp cannot be encoded as a base without a displacement.
11535 && (base_reg
== hard_frame_pointer_rtx
11536 || base_reg
== frame_pointer_rtx
11537 || base_reg
== arg_pointer_rtx
11538 || (REG_P (base_reg
)
11539 && (REGNO (base_reg
) == HARD_FRAME_POINTER_REGNUM
11540 || REGNO (base_reg
) == R13_REG
))))
11543 /* Special case: on K6, [%esi] makes the instruction vector decoded.
11544 Avoid this by transforming to [%esi+0].
11545 Reload calls address legitimization without cfun defined, so we need
11546 to test cfun for being non-NULL. */
11547 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
11548 && base_reg
&& !index_reg
&& !disp
11549 && REG_P (base_reg
) && REGNO (base_reg
) == SI_REG
)
11552 /* Special case: encode reg+reg instead of reg*2. */
11553 if (!base
&& index
&& scale
== 2)
11554 base
= index
, base_reg
= index_reg
, scale
= 1;
11556 /* Special case: scaling cannot be encoded without base or displacement. */
11557 if (!base
&& !disp
&& index
&& scale
!= 1)
11561 out
->index
= index
;
11563 out
->scale
= scale
;
11569 /* Return cost of the memory address x.
11570 For i386, it is better to use a complex address than let gcc copy
11571 the address into a reg and make a new pseudo. But not if the address
11572 requires to two regs - that would mean more pseudos with longer
11575 ix86_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
11577 struct ix86_address parts
;
11579 int ok
= ix86_decompose_address (x
, &parts
);
11583 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
11584 parts
.base
= SUBREG_REG (parts
.base
);
11585 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
11586 parts
.index
= SUBREG_REG (parts
.index
);
11588 /* Attempt to minimize number of registers in the address. */
11590 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
))
11592 && (!REG_P (parts
.index
)
11593 || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)))
11597 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
11599 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
11600 && parts
.base
!= parts
.index
)
11603 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
11604 since it's predecode logic can't detect the length of instructions
11605 and it degenerates to vector decoded. Increase cost of such
11606 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
11607 to split such addresses or even refuse such addresses at all.
11609 Following addressing modes are affected:
11614 The first and last case may be avoidable by explicitly coding the zero in
11615 memory address, but I don't have AMD-K6 machine handy to check this
11619 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11620 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
11621 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
11627 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
11628 this is used for to form addresses to local data when -fPIC is in
11632 darwin_local_data_pic (rtx disp
)
11634 return (GET_CODE (disp
) == UNSPEC
11635 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
11638 /* Determine if a given RTX is a valid constant. We already know this
11639 satisfies CONSTANT_P. */
11642 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
11644 switch (GET_CODE (x
))
11649 if (GET_CODE (x
) == PLUS
)
11651 if (!CONST_INT_P (XEXP (x
, 1)))
11656 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
11659 /* Only some unspecs are valid as "constants". */
11660 if (GET_CODE (x
) == UNSPEC
)
11661 switch (XINT (x
, 1))
11664 case UNSPEC_GOTOFF
:
11665 case UNSPEC_PLTOFF
:
11666 return TARGET_64BIT
;
11668 case UNSPEC_NTPOFF
:
11669 x
= XVECEXP (x
, 0, 0);
11670 return (GET_CODE (x
) == SYMBOL_REF
11671 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11672 case UNSPEC_DTPOFF
:
11673 x
= XVECEXP (x
, 0, 0);
11674 return (GET_CODE (x
) == SYMBOL_REF
11675 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
11680 /* We must have drilled down to a symbol. */
11681 if (GET_CODE (x
) == LABEL_REF
)
11683 if (GET_CODE (x
) != SYMBOL_REF
)
11688 /* TLS symbols are never valid. */
11689 if (SYMBOL_REF_TLS_MODEL (x
))
11692 /* DLLIMPORT symbols are never valid. */
11693 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
11694 && SYMBOL_REF_DLLIMPORT_P (x
))
11698 /* mdynamic-no-pic */
11699 if (MACHO_DYNAMIC_NO_PIC_P
)
11700 return machopic_symbol_defined_p (x
);
11705 if (GET_MODE (x
) == TImode
11706 && x
!= CONST0_RTX (TImode
)
11712 if (!standard_sse_constant_p (x
))
11719 /* Otherwise we handle everything else in the move patterns. */
11723 /* Determine if it's legal to put X into the constant pool. This
11724 is not possible for the address of thread-local symbols, which
11725 is checked above. */
11728 ix86_cannot_force_const_mem (enum machine_mode mode
, rtx x
)
11730 /* We can always put integral constants and vectors in memory. */
11731 switch (GET_CODE (x
))
11741 return !ix86_legitimate_constant_p (mode
, x
);
11745 /* Nonzero if the constant value X is a legitimate general operand
11746 when generating PIC code. It is given that flag_pic is on and
11747 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11750 legitimate_pic_operand_p (rtx x
)
11754 switch (GET_CODE (x
))
11757 inner
= XEXP (x
, 0);
11758 if (GET_CODE (inner
) == PLUS
11759 && CONST_INT_P (XEXP (inner
, 1)))
11760 inner
= XEXP (inner
, 0);
11762 /* Only some unspecs are valid as "constants". */
11763 if (GET_CODE (inner
) == UNSPEC
)
11764 switch (XINT (inner
, 1))
11767 case UNSPEC_GOTOFF
:
11768 case UNSPEC_PLTOFF
:
11769 return TARGET_64BIT
;
11771 x
= XVECEXP (inner
, 0, 0);
11772 return (GET_CODE (x
) == SYMBOL_REF
11773 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
11774 case UNSPEC_MACHOPIC_OFFSET
:
11775 return legitimate_pic_address_disp_p (x
);
11783 return legitimate_pic_address_disp_p (x
);
11790 /* Determine if a given CONST RTX is a valid memory displacement
11794 legitimate_pic_address_disp_p (rtx disp
)
11798 /* In 64bit mode we can allow direct addresses of symbols and labels
11799 when they are not dynamic symbols. */
11802 rtx op0
= disp
, op1
;
11804 switch (GET_CODE (disp
))
11810 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
11812 op0
= XEXP (XEXP (disp
, 0), 0);
11813 op1
= XEXP (XEXP (disp
, 0), 1);
11814 if (!CONST_INT_P (op1
)
11815 || INTVAL (op1
) >= 16*1024*1024
11816 || INTVAL (op1
) < -16*1024*1024)
11818 if (GET_CODE (op0
) == LABEL_REF
)
11820 if (GET_CODE (op0
) != SYMBOL_REF
)
11825 /* TLS references should always be enclosed in UNSPEC. */
11826 if (SYMBOL_REF_TLS_MODEL (op0
))
11828 if (!SYMBOL_REF_FAR_ADDR_P (op0
) && SYMBOL_REF_LOCAL_P (op0
)
11829 && ix86_cmodel
!= CM_LARGE_PIC
)
11837 if (GET_CODE (disp
) != CONST
)
11839 disp
= XEXP (disp
, 0);
11843 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11844 of GOT tables. We should not need these anyway. */
11845 if (GET_CODE (disp
) != UNSPEC
11846 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
11847 && XINT (disp
, 1) != UNSPEC_GOTOFF
11848 && XINT (disp
, 1) != UNSPEC_PCREL
11849 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
11852 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
11853 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
11859 if (GET_CODE (disp
) == PLUS
)
11861 if (!CONST_INT_P (XEXP (disp
, 1)))
11863 disp
= XEXP (disp
, 0);
11867 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
11870 if (GET_CODE (disp
) != UNSPEC
)
11873 switch (XINT (disp
, 1))
11878 /* We need to check for both symbols and labels because VxWorks loads
11879 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11881 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11882 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
11883 case UNSPEC_GOTOFF
:
11884 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11885 While ABI specify also 32bit relocation but we don't produce it in
11886 small PIC model at all. */
11887 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
11888 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
11890 return gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
11892 case UNSPEC_GOTTPOFF
:
11893 case UNSPEC_GOTNTPOFF
:
11894 case UNSPEC_INDNTPOFF
:
11897 disp
= XVECEXP (disp
, 0, 0);
11898 return (GET_CODE (disp
) == SYMBOL_REF
11899 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
11900 case UNSPEC_NTPOFF
:
11901 disp
= XVECEXP (disp
, 0, 0);
11902 return (GET_CODE (disp
) == SYMBOL_REF
11903 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
11904 case UNSPEC_DTPOFF
:
11905 disp
= XVECEXP (disp
, 0, 0);
11906 return (GET_CODE (disp
) == SYMBOL_REF
11907 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
11913 /* Recognizes RTL expressions that are valid memory addresses for an
11914 instruction. The MODE argument is the machine mode for the MEM
11915 expression that wants to use this address.
11917 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11918 convert common non-canonical forms to canonical form so that they will
11922 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
11923 rtx addr
, bool strict
)
11925 struct ix86_address parts
;
11926 rtx base
, index
, disp
;
11927 HOST_WIDE_INT scale
;
11929 if (ix86_decompose_address (addr
, &parts
) <= 0)
11930 /* Decomposition failed. */
11934 index
= parts
.index
;
11936 scale
= parts
.scale
;
11938 /* Validate base register. */
11945 else if (GET_CODE (base
) == SUBREG
&& REG_P (SUBREG_REG (base
)))
11946 reg
= SUBREG_REG (base
);
11948 /* Base is not a register. */
11951 if (GET_MODE (base
) != SImode
&& GET_MODE (base
) != DImode
)
11954 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
11955 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
11956 /* Base is not valid. */
11960 /* Validate index register. */
11967 else if (GET_CODE (index
) == SUBREG
&& REG_P (SUBREG_REG (index
)))
11968 reg
= SUBREG_REG (index
);
11970 /* Index is not a register. */
11973 if (GET_MODE (index
) != SImode
&& GET_MODE (index
) != DImode
)
11976 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
11977 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
11978 /* Index is not valid. */
11982 /* Index and base should have the same mode. */
11984 && GET_MODE (base
) != GET_MODE (index
))
11987 /* Validate scale factor. */
11991 /* Scale without index. */
11994 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
11995 /* Scale is not a valid multiplier. */
11999 /* Validate displacement. */
12002 if (GET_CODE (disp
) == CONST
12003 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
12004 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
12005 switch (XINT (XEXP (disp
, 0), 1))
12007 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12008 used. While ABI specify also 32bit relocations, we don't produce
12009 them at all and use IP relative instead. */
12011 case UNSPEC_GOTOFF
:
12012 gcc_assert (flag_pic
);
12014 goto is_legitimate_pic
;
12016 /* 64bit address unspec. */
12019 case UNSPEC_GOTPCREL
:
12021 gcc_assert (flag_pic
);
12022 goto is_legitimate_pic
;
12024 case UNSPEC_GOTTPOFF
:
12025 case UNSPEC_GOTNTPOFF
:
12026 case UNSPEC_INDNTPOFF
:
12027 case UNSPEC_NTPOFF
:
12028 case UNSPEC_DTPOFF
:
12031 case UNSPEC_STACK_CHECK
:
12032 gcc_assert (flag_split_stack
);
12036 /* Invalid address unspec. */
12040 else if (SYMBOLIC_CONST (disp
)
12044 && MACHOPIC_INDIRECT
12045 && !machopic_operand_p (disp
)
12051 if (TARGET_64BIT
&& (index
|| base
))
12053 /* foo@dtpoff(%rX) is ok. */
12054 if (GET_CODE (disp
) != CONST
12055 || GET_CODE (XEXP (disp
, 0)) != PLUS
12056 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
12057 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
12058 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
12059 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
12060 /* Non-constant pic memory reference. */
12063 else if ((!TARGET_MACHO
|| flag_pic
)
12064 && ! legitimate_pic_address_disp_p (disp
))
12065 /* Displacement is an invalid pic construct. */
12068 else if (MACHO_DYNAMIC_NO_PIC_P
12069 && !ix86_legitimate_constant_p (Pmode
, disp
))
12070 /* displacment must be referenced via non_lazy_pointer */
12074 /* This code used to verify that a symbolic pic displacement
12075 includes the pic_offset_table_rtx register.
12077 While this is good idea, unfortunately these constructs may
12078 be created by "adds using lea" optimization for incorrect
12087 This code is nonsensical, but results in addressing
12088 GOT table with pic_offset_table_rtx base. We can't
12089 just refuse it easily, since it gets matched by
12090 "addsi3" pattern, that later gets split to lea in the
12091 case output register differs from input. While this
12092 can be handled by separate addsi pattern for this case
12093 that never results in lea, this seems to be easier and
12094 correct fix for crash to disable this test. */
12096 else if (GET_CODE (disp
) != LABEL_REF
12097 && !CONST_INT_P (disp
)
12098 && (GET_CODE (disp
) != CONST
12099 || !ix86_legitimate_constant_p (Pmode
, disp
))
12100 && (GET_CODE (disp
) != SYMBOL_REF
12101 || !ix86_legitimate_constant_p (Pmode
, disp
)))
12102 /* Displacement is not constant. */
12104 else if (TARGET_64BIT
12105 && !x86_64_immediate_operand (disp
, VOIDmode
))
12106 /* Displacement is out of range. */
12110 /* Everything looks valid. */
12114 /* Determine if a given RTX is a valid constant address. */
12117 constant_address_p (rtx x
)
12119 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
12122 /* Return a unique alias set for the GOT. */
12124 static alias_set_type
12125 ix86_GOT_alias_set (void)
12127 static alias_set_type set
= -1;
12129 set
= new_alias_set ();
12133 /* Return a legitimate reference for ORIG (an address) using the
12134 register REG. If REG is 0, a new pseudo is generated.
12136 There are two types of references that must be handled:
12138 1. Global data references must load the address from the GOT, via
12139 the PIC reg. An insn is emitted to do this load, and the reg is
12142 2. Static data references, constant pool addresses, and code labels
12143 compute the address as an offset from the GOT, whose base is in
12144 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
12145 differentiate them from global data objects. The returned
12146 address is the PIC reg + an unspec constant.
12148 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
12149 reg also appears in the address. */
12152 legitimize_pic_address (rtx orig
, rtx reg
)
12155 rtx new_rtx
= orig
;
12159 if (TARGET_MACHO
&& !TARGET_64BIT
)
12162 reg
= gen_reg_rtx (Pmode
);
12163 /* Use the generic Mach-O PIC machinery. */
12164 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
12168 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
12170 else if (TARGET_64BIT
12171 && ix86_cmodel
!= CM_SMALL_PIC
12172 && gotoff_operand (addr
, Pmode
))
12175 /* This symbol may be referenced via a displacement from the PIC
12176 base address (@GOTOFF). */
12178 if (reload_in_progress
)
12179 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12180 if (GET_CODE (addr
) == CONST
)
12181 addr
= XEXP (addr
, 0);
12182 if (GET_CODE (addr
) == PLUS
)
12184 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12186 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12189 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12190 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12192 tmpreg
= gen_reg_rtx (Pmode
);
12195 emit_move_insn (tmpreg
, new_rtx
);
12199 new_rtx
= expand_simple_binop (Pmode
, PLUS
, reg
, pic_offset_table_rtx
,
12200 tmpreg
, 1, OPTAB_DIRECT
);
12203 else new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, tmpreg
);
12205 else if (!TARGET_64BIT
&& gotoff_operand (addr
, Pmode
))
12207 /* This symbol may be referenced via a displacement from the PIC
12208 base address (@GOTOFF). */
12210 if (reload_in_progress
)
12211 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12212 if (GET_CODE (addr
) == CONST
)
12213 addr
= XEXP (addr
, 0);
12214 if (GET_CODE (addr
) == PLUS
)
12216 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
12218 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
12221 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
12222 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12223 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12227 emit_move_insn (reg
, new_rtx
);
12231 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
12232 /* We can't use @GOTOFF for text labels on VxWorks;
12233 see gotoff_operand. */
12234 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
12236 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12238 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
12239 return legitimize_dllimport_symbol (addr
, true);
12240 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
12241 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
12242 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
12244 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), true);
12245 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
12249 /* For x64 PE-COFF there is no GOT table. So we use address
12251 if (TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12253 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
12254 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12257 reg
= gen_reg_rtx (Pmode
);
12258 emit_move_insn (reg
, new_rtx
);
12261 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
12263 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTPCREL
);
12264 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12265 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12266 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12269 reg
= gen_reg_rtx (Pmode
);
12270 /* Use directly gen_movsi, otherwise the address is loaded
12271 into register for CSE. We don't want to CSE this addresses,
12272 instead we CSE addresses from the GOT table, so skip this. */
12273 emit_insn (gen_movsi (reg
, new_rtx
));
12278 /* This symbol must be referenced via a load from the
12279 Global Offset Table (@GOT). */
12281 if (reload_in_progress
)
12282 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12283 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
12284 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12286 new_rtx
= force_reg (Pmode
, new_rtx
);
12287 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12288 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
12289 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
12292 reg
= gen_reg_rtx (Pmode
);
12293 emit_move_insn (reg
, new_rtx
);
12299 if (CONST_INT_P (addr
)
12300 && !x86_64_immediate_operand (addr
, VOIDmode
))
12304 emit_move_insn (reg
, addr
);
12308 new_rtx
= force_reg (Pmode
, addr
);
12310 else if (GET_CODE (addr
) == CONST
)
12312 addr
= XEXP (addr
, 0);
12314 /* We must match stuff we generate before. Assume the only
12315 unspecs that can get here are ours. Not that we could do
12316 anything with them anyway.... */
12317 if (GET_CODE (addr
) == UNSPEC
12318 || (GET_CODE (addr
) == PLUS
12319 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
12321 gcc_assert (GET_CODE (addr
) == PLUS
);
12323 if (GET_CODE (addr
) == PLUS
)
12325 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
12327 /* Check first to see if this is a constant offset from a @GOTOFF
12328 symbol reference. */
12329 if (gotoff_operand (op0
, Pmode
)
12330 && CONST_INT_P (op1
))
12334 if (reload_in_progress
)
12335 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12336 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
12338 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
12339 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
12340 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
12344 emit_move_insn (reg
, new_rtx
);
12350 if (INTVAL (op1
) < -16*1024*1024
12351 || INTVAL (op1
) >= 16*1024*1024)
12353 if (!x86_64_immediate_operand (op1
, Pmode
))
12354 op1
= force_reg (Pmode
, op1
);
12355 new_rtx
= gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
12361 base
= legitimize_pic_address (XEXP (addr
, 0), reg
);
12362 new_rtx
= legitimize_pic_address (XEXP (addr
, 1),
12363 base
== reg
? NULL_RTX
: reg
);
12365 if (CONST_INT_P (new_rtx
))
12366 new_rtx
= plus_constant (base
, INTVAL (new_rtx
));
12369 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
12371 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
12372 new_rtx
= XEXP (new_rtx
, 1);
12374 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
12382 /* Load the thread pointer. If TO_REG is true, force it into a register. */
12385 get_thread_pointer (bool to_reg
)
12387 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
12389 if (GET_MODE (tp
) != Pmode
)
12390 tp
= convert_to_mode (Pmode
, tp
, 1);
12393 tp
= copy_addr_to_reg (tp
);
12398 /* Construct the SYMBOL_REF for the tls_get_addr function. */
12400 static GTY(()) rtx ix86_tls_symbol
;
12403 ix86_tls_get_addr (void)
12405 if (!ix86_tls_symbol
)
12408 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
12409 ? "___tls_get_addr" : "__tls_get_addr");
12411 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
12414 return ix86_tls_symbol
;
12417 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
12419 static GTY(()) rtx ix86_tls_module_base_symbol
;
12422 ix86_tls_module_base (void)
12424 if (!ix86_tls_module_base_symbol
)
12426 ix86_tls_module_base_symbol
12427 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
12429 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
12430 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
12433 return ix86_tls_module_base_symbol
;
12436 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
12437 false if we expect this to be used for a memory address and true if
12438 we expect to load the address into a register. */
12441 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
12443 rtx dest
, base
, off
;
12444 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
12449 case TLS_MODEL_GLOBAL_DYNAMIC
:
12450 dest
= gen_reg_rtx (Pmode
);
12455 pic
= pic_offset_table_rtx
;
12458 pic
= gen_reg_rtx (Pmode
);
12459 emit_insn (gen_set_got (pic
));
12463 if (TARGET_GNU2_TLS
)
12466 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
12468 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
12470 tp
= get_thread_pointer (true);
12471 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
12473 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12477 rtx caddr
= ix86_tls_get_addr ();
12481 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
;
12484 emit_call_insn (gen_tls_global_dynamic_64 (rax
, x
, caddr
));
12485 insns
= get_insns ();
12488 RTL_CONST_CALL_P (insns
) = 1;
12489 emit_libcall_block (insns
, dest
, rax
, x
);
12492 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
12496 case TLS_MODEL_LOCAL_DYNAMIC
:
12497 base
= gen_reg_rtx (Pmode
);
12502 pic
= pic_offset_table_rtx
;
12505 pic
= gen_reg_rtx (Pmode
);
12506 emit_insn (gen_set_got (pic
));
12510 if (TARGET_GNU2_TLS
)
12512 rtx tmp
= ix86_tls_module_base ();
12515 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
12517 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
12519 tp
= get_thread_pointer (true);
12520 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
12521 gen_rtx_MINUS (Pmode
, tmp
, tp
));
12525 rtx caddr
= ix86_tls_get_addr ();
12529 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
), insns
, eqv
;
12532 emit_call_insn (gen_tls_local_dynamic_base_64 (rax
, caddr
));
12533 insns
= get_insns ();
12536 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
12537 share the LD_BASE result with other LD model accesses. */
12538 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
12539 UNSPEC_TLS_LD_BASE
);
12541 RTL_CONST_CALL_P (insns
) = 1;
12542 emit_libcall_block (insns
, base
, rax
, eqv
);
12545 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
12548 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
12549 off
= gen_rtx_CONST (Pmode
, off
);
12551 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
12553 if (TARGET_GNU2_TLS
)
12555 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
12557 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
12561 case TLS_MODEL_INITIAL_EXEC
:
12564 if (TARGET_SUN_TLS
)
12566 /* The Sun linker took the AMD64 TLS spec literally
12567 and can only handle %rax as destination of the
12568 initial executable code sequence. */
12570 dest
= gen_reg_rtx (Pmode
);
12571 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
12576 type
= UNSPEC_GOTNTPOFF
;
12580 if (reload_in_progress
)
12581 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
12582 pic
= pic_offset_table_rtx
;
12583 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
12585 else if (!TARGET_ANY_GNU_TLS
)
12587 pic
= gen_reg_rtx (Pmode
);
12588 emit_insn (gen_set_got (pic
));
12589 type
= UNSPEC_GOTTPOFF
;
12594 type
= UNSPEC_INDNTPOFF
;
12597 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), type
);
12598 off
= gen_rtx_CONST (Pmode
, off
);
12600 off
= gen_rtx_PLUS (Pmode
, pic
, off
);
12601 off
= gen_const_mem (Pmode
, off
);
12602 set_mem_alias_set (off
, ix86_GOT_alias_set ());
12604 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12606 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12607 off
= force_reg (Pmode
, off
);
12608 return gen_rtx_PLUS (Pmode
, base
, off
);
12612 base
= get_thread_pointer (true);
12613 dest
= gen_reg_rtx (Pmode
);
12614 emit_insn (gen_subsi3 (dest
, base
, off
));
12618 case TLS_MODEL_LOCAL_EXEC
:
12619 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
12620 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12621 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
12622 off
= gen_rtx_CONST (Pmode
, off
);
12624 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
12626 base
= get_thread_pointer (for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
12627 return gen_rtx_PLUS (Pmode
, base
, off
);
12631 base
= get_thread_pointer (true);
12632 dest
= gen_reg_rtx (Pmode
);
12633 emit_insn (gen_subsi3 (dest
, base
, off
));
12638 gcc_unreachable ();
12644 /* Create or return the unique __imp_DECL dllimport symbol corresponding
12647 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map
)))
12648 htab_t dllimport_map
;
12651 get_dllimport_decl (tree decl
)
12653 struct tree_map
*h
, in
;
12656 const char *prefix
;
12657 size_t namelen
, prefixlen
;
12662 if (!dllimport_map
)
12663 dllimport_map
= htab_create_ggc (512, tree_map_hash
, tree_map_eq
, 0);
12665 in
.hash
= htab_hash_pointer (decl
);
12666 in
.base
.from
= decl
;
12667 loc
= htab_find_slot_with_hash (dllimport_map
, &in
, in
.hash
, INSERT
);
12668 h
= (struct tree_map
*) *loc
;
12672 *loc
= h
= ggc_alloc_tree_map ();
12674 h
->base
.from
= decl
;
12675 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
12676 VAR_DECL
, NULL
, ptr_type_node
);
12677 DECL_ARTIFICIAL (to
) = 1;
12678 DECL_IGNORED_P (to
) = 1;
12679 DECL_EXTERNAL (to
) = 1;
12680 TREE_READONLY (to
) = 1;
12682 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
12683 name
= targetm
.strip_name_encoding (name
);
12684 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
12685 ? "*__imp_" : "*__imp__";
12686 namelen
= strlen (name
);
12687 prefixlen
= strlen (prefix
);
12688 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
12689 memcpy (imp_name
, prefix
, prefixlen
);
12690 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
12692 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
12693 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
12694 SET_SYMBOL_REF_DECL (rtl
, to
);
12695 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
;
12697 rtl
= gen_const_mem (Pmode
, rtl
);
12698 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
12700 SET_DECL_RTL (to
, rtl
);
12701 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
12706 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
12707 true if we require the result be a register. */
12710 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
12715 gcc_assert (SYMBOL_REF_DECL (symbol
));
12716 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
));
12718 x
= DECL_RTL (imp_decl
);
12720 x
= force_reg (Pmode
, x
);
12724 /* Try machine-dependent ways of modifying an illegitimate address
12725 to be legitimate. If we find one, return the new, valid address.
12726 This macro is used in only one place: `memory_address' in explow.c.
12728 OLDX is the address as it was before break_out_memory_refs was called.
12729 In some cases it is useful to look at this to decide what needs to be done.
12731 It is always safe for this macro to do nothing. It exists to recognize
12732 opportunities to optimize the output.
12734 For the 80386, we handle X+REG by loading X into a register R and
12735 using R+REG. R will go in a general reg and indexing will be used.
12736 However, if REG is a broken-out memory address or multiplication,
12737 nothing needs to be done because REG can certainly go in a general reg.
12739 When -fpic is used, special handling is needed for symbolic references.
12740 See comments by legitimize_pic_address in i386.c for details. */
12743 ix86_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
12744 enum machine_mode mode
)
12749 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
12751 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
12752 if (GET_CODE (x
) == CONST
12753 && GET_CODE (XEXP (x
, 0)) == PLUS
12754 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12755 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
12757 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
12758 (enum tls_model
) log
, false);
12759 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12762 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
12764 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (x
))
12765 return legitimize_dllimport_symbol (x
, true);
12766 if (GET_CODE (x
) == CONST
12767 && GET_CODE (XEXP (x
, 0)) == PLUS
12768 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
12769 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x
, 0), 0)))
12771 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (x
, 0), 0), true);
12772 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
12776 if (flag_pic
&& SYMBOLIC_CONST (x
))
12777 return legitimize_pic_address (x
, 0);
12780 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
12781 return machopic_indirect_data_reference (x
, 0);
12784 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12785 if (GET_CODE (x
) == ASHIFT
12786 && CONST_INT_P (XEXP (x
, 1))
12787 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
12790 log
= INTVAL (XEXP (x
, 1));
12791 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
12792 GEN_INT (1 << log
));
12795 if (GET_CODE (x
) == PLUS
)
12797 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12799 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
12800 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12801 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
12804 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
12805 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
12806 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
12807 GEN_INT (1 << log
));
12810 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
12811 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
12812 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
12815 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
12816 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
12817 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
12818 GEN_INT (1 << log
));
12821 /* Put multiply first if it isn't already. */
12822 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12824 rtx tmp
= XEXP (x
, 0);
12825 XEXP (x
, 0) = XEXP (x
, 1);
12830 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12831 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12832 created by virtual register instantiation, register elimination, and
12833 similar optimizations. */
12834 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
12837 x
= gen_rtx_PLUS (Pmode
,
12838 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
12839 XEXP (XEXP (x
, 1), 0)),
12840 XEXP (XEXP (x
, 1), 1));
12844 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12845 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12846 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
12847 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
12848 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
12849 && CONSTANT_P (XEXP (x
, 1)))
12852 rtx other
= NULL_RTX
;
12854 if (CONST_INT_P (XEXP (x
, 1)))
12856 constant
= XEXP (x
, 1);
12857 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12859 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
12861 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
12862 other
= XEXP (x
, 1);
12870 x
= gen_rtx_PLUS (Pmode
,
12871 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
12872 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
12873 plus_constant (other
, INTVAL (constant
)));
12877 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12880 if (GET_CODE (XEXP (x
, 0)) == MULT
)
12883 XEXP (x
, 0) = force_operand (XEXP (x
, 0), 0);
12886 if (GET_CODE (XEXP (x
, 1)) == MULT
)
12889 XEXP (x
, 1) = force_operand (XEXP (x
, 1), 0);
12893 && REG_P (XEXP (x
, 1))
12894 && REG_P (XEXP (x
, 0)))
12897 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
12900 x
= legitimize_pic_address (x
, 0);
12903 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
12906 if (REG_P (XEXP (x
, 0)))
12908 rtx temp
= gen_reg_rtx (Pmode
);
12909 rtx val
= force_operand (XEXP (x
, 1), temp
);
12912 if (GET_MODE (val
) != Pmode
)
12913 val
= convert_to_mode (Pmode
, val
, 1);
12914 emit_move_insn (temp
, val
);
12917 XEXP (x
, 1) = temp
;
12921 else if (REG_P (XEXP (x
, 1)))
12923 rtx temp
= gen_reg_rtx (Pmode
);
12924 rtx val
= force_operand (XEXP (x
, 0), temp
);
12927 if (GET_MODE (val
) != Pmode
)
12928 val
= convert_to_mode (Pmode
, val
, 1);
12929 emit_move_insn (temp
, val
);
12932 XEXP (x
, 0) = temp
;
12940 /* Print an integer constant expression in assembler syntax. Addition
12941 and subtraction are the only arithmetic that may appear in these
12942 expressions. FILE is the stdio stream to write to, X is the rtx, and
12943 CODE is the operand print code from the output string. */
12946 output_pic_addr_const (FILE *file
, rtx x
, int code
)
12950 switch (GET_CODE (x
))
12953 gcc_assert (flag_pic
);
12958 if (TARGET_64BIT
|| ! TARGET_MACHO_BRANCH_ISLANDS
)
12959 output_addr_const (file
, x
);
12962 const char *name
= XSTR (x
, 0);
12964 /* Mark the decl as referenced so that cgraph will
12965 output the function. */
12966 if (SYMBOL_REF_DECL (x
))
12967 mark_decl_referenced (SYMBOL_REF_DECL (x
));
12970 if (MACHOPIC_INDIRECT
12971 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
12972 name
= machopic_indirection_name (x
, /*stub_p=*/true);
12974 assemble_name (file
, name
);
12976 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& DEFAULT_ABI
== MS_ABI
)
12977 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
12978 fputs ("@PLT", file
);
12985 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
12986 assemble_name (asm_out_file
, buf
);
12990 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
12994 /* This used to output parentheses around the expression,
12995 but that does not work on the 386 (either ATT or BSD assembler). */
12996 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13000 if (GET_MODE (x
) == VOIDmode
)
13002 /* We can use %d if the number is <32 bits and positive. */
13003 if (CONST_DOUBLE_HIGH (x
) || CONST_DOUBLE_LOW (x
) < 0)
13004 fprintf (file
, "0x%lx%08lx",
13005 (unsigned long) CONST_DOUBLE_HIGH (x
),
13006 (unsigned long) CONST_DOUBLE_LOW (x
));
13008 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, CONST_DOUBLE_LOW (x
));
13011 /* We can't handle floating point constants;
13012 TARGET_PRINT_OPERAND must handle them. */
13013 output_operand_lossage ("floating constant misused");
13017 /* Some assemblers need integer constants to appear first. */
13018 if (CONST_INT_P (XEXP (x
, 0)))
13020 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13022 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13026 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
13027 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13029 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13035 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
13036 output_pic_addr_const (file
, XEXP (x
, 0), code
);
13038 output_pic_addr_const (file
, XEXP (x
, 1), code
);
13040 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
13044 if (XINT (x
, 1) == UNSPEC_STACK_CHECK
)
13046 bool f
= i386_asm_output_addr_const_extra (file
, x
);
13051 gcc_assert (XVECLEN (x
, 0) == 1);
13052 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
13053 switch (XINT (x
, 1))
13056 fputs ("@GOT", file
);
13058 case UNSPEC_GOTOFF
:
13059 fputs ("@GOTOFF", file
);
13061 case UNSPEC_PLTOFF
:
13062 fputs ("@PLTOFF", file
);
13065 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13066 "(%rip)" : "[rip]", file
);
13068 case UNSPEC_GOTPCREL
:
13069 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13070 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
13072 case UNSPEC_GOTTPOFF
:
13073 /* FIXME: This might be @TPOFF in Sun ld too. */
13074 fputs ("@gottpoff", file
);
13077 fputs ("@tpoff", file
);
13079 case UNSPEC_NTPOFF
:
13081 fputs ("@tpoff", file
);
13083 fputs ("@ntpoff", file
);
13085 case UNSPEC_DTPOFF
:
13086 fputs ("@dtpoff", file
);
13088 case UNSPEC_GOTNTPOFF
:
13090 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13091 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
13093 fputs ("@gotntpoff", file
);
13095 case UNSPEC_INDNTPOFF
:
13096 fputs ("@indntpoff", file
);
13099 case UNSPEC_MACHOPIC_OFFSET
:
13101 machopic_output_function_base_name (file
);
13105 output_operand_lossage ("invalid UNSPEC as operand");
13111 output_operand_lossage ("invalid expression as operand");
13115 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
13116 We need to emit DTP-relative relocations. */
13118 static void ATTRIBUTE_UNUSED
13119 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
13121 fputs (ASM_LONG
, file
);
13122 output_addr_const (file
, x
);
13123 fputs ("@dtpoff", file
);
13129 fputs (", 0", file
);
13132 gcc_unreachable ();
13136 /* Return true if X is a representation of the PIC register. This copes
13137 with calls from ix86_find_base_term, where the register might have
13138 been replaced by a cselib value. */
13141 ix86_pic_register_p (rtx x
)
13143 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
13144 return (pic_offset_table_rtx
13145 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
13147 return REG_P (x
) && REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
13150 /* Helper function for ix86_delegitimize_address.
13151 Attempt to delegitimize TLS local-exec accesses. */
13154 ix86_delegitimize_tls_address (rtx orig_x
)
13156 rtx x
= orig_x
, unspec
;
13157 struct ix86_address addr
;
13159 if (!TARGET_TLS_DIRECT_SEG_REFS
)
13163 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
13165 if (ix86_decompose_address (x
, &addr
) == 0
13166 || addr
.seg
!= (TARGET_64BIT
? SEG_FS
: SEG_GS
)
13167 || addr
.disp
== NULL_RTX
13168 || GET_CODE (addr
.disp
) != CONST
)
13170 unspec
= XEXP (addr
.disp
, 0);
13171 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
13172 unspec
= XEXP (unspec
, 0);
13173 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
13175 x
= XVECEXP (unspec
, 0, 0);
13176 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
13177 if (unspec
!= XEXP (addr
.disp
, 0))
13178 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
13181 rtx idx
= addr
.index
;
13182 if (addr
.scale
!= 1)
13183 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
13184 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
13187 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
13188 if (MEM_P (orig_x
))
13189 x
= replace_equiv_address_nv (orig_x
, x
);
13193 /* In the name of slightly smaller debug output, and to cater to
13194 general assembler lossage, recognize PIC+GOTOFF and turn it back
13195 into a direct symbol reference.
13197 On Darwin, this is necessary to avoid a crash, because Darwin
13198 has a different PIC label for each routine but the DWARF debugging
13199 information is not associated with any particular routine, so it's
13200 necessary to remove references to the PIC label from RTL stored by
13201 the DWARF output code. */
13204 ix86_delegitimize_address (rtx x
)
13206 rtx orig_x
= delegitimize_mem_from_attrs (x
);
13207 /* addend is NULL or some rtx if x is something+GOTOFF where
13208 something doesn't include the PIC register. */
13209 rtx addend
= NULL_RTX
;
13210 /* reg_addend is NULL or a multiple of some register. */
13211 rtx reg_addend
= NULL_RTX
;
13212 /* const_addend is NULL or a const_int. */
13213 rtx const_addend
= NULL_RTX
;
13214 /* This is the result, or NULL. */
13215 rtx result
= NULL_RTX
;
13224 if (GET_CODE (x
) != CONST
13225 || GET_CODE (XEXP (x
, 0)) != UNSPEC
13226 || (XINT (XEXP (x
, 0), 1) != UNSPEC_GOTPCREL
13227 && XINT (XEXP (x
, 0), 1) != UNSPEC_PCREL
)
13228 || !MEM_P (orig_x
))
13229 return ix86_delegitimize_tls_address (orig_x
);
13230 x
= XVECEXP (XEXP (x
, 0), 0, 0);
13231 if (GET_MODE (orig_x
) != GET_MODE (x
))
13233 x
= simplify_gen_subreg (GET_MODE (orig_x
), x
,
13241 if (GET_CODE (x
) != PLUS
13242 || GET_CODE (XEXP (x
, 1)) != CONST
)
13243 return ix86_delegitimize_tls_address (orig_x
);
13245 if (ix86_pic_register_p (XEXP (x
, 0)))
13246 /* %ebx + GOT/GOTOFF */
13248 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
13250 /* %ebx + %reg * scale + GOT/GOTOFF */
13251 reg_addend
= XEXP (x
, 0);
13252 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
13253 reg_addend
= XEXP (reg_addend
, 1);
13254 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
13255 reg_addend
= XEXP (reg_addend
, 0);
13258 reg_addend
= NULL_RTX
;
13259 addend
= XEXP (x
, 0);
13263 addend
= XEXP (x
, 0);
13265 x
= XEXP (XEXP (x
, 1), 0);
13266 if (GET_CODE (x
) == PLUS
13267 && CONST_INT_P (XEXP (x
, 1)))
13269 const_addend
= XEXP (x
, 1);
13273 if (GET_CODE (x
) == UNSPEC
13274 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
13275 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))))
13276 result
= XVECEXP (x
, 0, 0);
13278 if (TARGET_MACHO
&& darwin_local_data_pic (x
)
13279 && !MEM_P (orig_x
))
13280 result
= XVECEXP (x
, 0, 0);
13283 return ix86_delegitimize_tls_address (orig_x
);
13286 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
13288 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
13291 /* If the rest of original X doesn't involve the PIC register, add
13292 addend and subtract pic_offset_table_rtx. This can happen e.g.
13294 leal (%ebx, %ecx, 4), %ecx
13296 movl foo@GOTOFF(%ecx), %edx
13297 in which case we return (%ecx - %ebx) + foo. */
13298 if (pic_offset_table_rtx
)
13299 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
13300 pic_offset_table_rtx
),
13305 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
13307 result
= simplify_gen_subreg (GET_MODE (orig_x
), result
, Pmode
, 0);
13308 if (result
== NULL_RTX
)
13314 /* If X is a machine specific address (i.e. a symbol or label being
13315 referenced as a displacement from the GOT implemented using an
13316 UNSPEC), then return the base term. Otherwise return X. */
13319 ix86_find_base_term (rtx x
)
13325 if (GET_CODE (x
) != CONST
)
13327 term
= XEXP (x
, 0);
13328 if (GET_CODE (term
) == PLUS
13329 && (CONST_INT_P (XEXP (term
, 1))
13330 || GET_CODE (XEXP (term
, 1)) == CONST_DOUBLE
))
13331 term
= XEXP (term
, 0);
13332 if (GET_CODE (term
) != UNSPEC
13333 || (XINT (term
, 1) != UNSPEC_GOTPCREL
13334 && XINT (term
, 1) != UNSPEC_PCREL
))
13337 return XVECEXP (term
, 0, 0);
13340 return ix86_delegitimize_address (x
);
13344 put_condition_code (enum rtx_code code
, enum machine_mode mode
, int reverse
,
13345 int fp
, FILE *file
)
13347 const char *suffix
;
13349 if (mode
== CCFPmode
|| mode
== CCFPUmode
)
13351 code
= ix86_fp_compare_code_to_integer (code
);
13355 code
= reverse_condition (code
);
13406 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
13410 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
13411 Those same assemblers have the same but opposite lossage on cmov. */
13412 if (mode
== CCmode
)
13413 suffix
= fp
? "nbe" : "a";
13414 else if (mode
== CCCmode
)
13417 gcc_unreachable ();
13433 gcc_unreachable ();
13437 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13454 gcc_unreachable ();
13458 /* ??? As above. */
13459 gcc_assert (mode
== CCmode
|| mode
== CCCmode
);
13460 suffix
= fp
? "nb" : "ae";
13463 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
13467 /* ??? As above. */
13468 if (mode
== CCmode
)
13470 else if (mode
== CCCmode
)
13471 suffix
= fp
? "nb" : "ae";
13473 gcc_unreachable ();
13476 suffix
= fp
? "u" : "p";
13479 suffix
= fp
? "nu" : "np";
13482 gcc_unreachable ();
13484 fputs (suffix
, file
);
13487 /* Print the name of register X to FILE based on its machine mode and number.
13488 If CODE is 'w', pretend the mode is HImode.
13489 If CODE is 'b', pretend the mode is QImode.
13490 If CODE is 'k', pretend the mode is SImode.
13491 If CODE is 'q', pretend the mode is DImode.
13492 If CODE is 'x', pretend the mode is V4SFmode.
13493 If CODE is 't', pretend the mode is V8SFmode.
13494 If CODE is 'h', pretend the reg is the 'high' byte register.
13495 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
13496 If CODE is 'd', duplicate the operand for AVX instruction.
13500 print_reg (rtx x
, int code
, FILE *file
)
13503 bool duplicated
= code
== 'd' && TARGET_AVX
;
13505 gcc_assert (x
== pc_rtx
13506 || (REGNO (x
) != ARG_POINTER_REGNUM
13507 && REGNO (x
) != FRAME_POINTER_REGNUM
13508 && REGNO (x
) != FLAGS_REG
13509 && REGNO (x
) != FPSR_REG
13510 && REGNO (x
) != FPCR_REG
));
13512 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13517 gcc_assert (TARGET_64BIT
);
13518 fputs ("rip", file
);
13522 if (code
== 'w' || MMX_REG_P (x
))
13524 else if (code
== 'b')
13526 else if (code
== 'k')
13528 else if (code
== 'q')
13530 else if (code
== 'y')
13532 else if (code
== 'h')
13534 else if (code
== 'x')
13536 else if (code
== 't')
13539 code
= GET_MODE_SIZE (GET_MODE (x
));
13541 /* Irritatingly, AMD extended registers use different naming convention
13542 from the normal registers: "r%d[bwd]" */
13543 if (REX_INT_REG_P (x
))
13545 gcc_assert (TARGET_64BIT
);
13547 fprint_ul (file
, REGNO (x
) - FIRST_REX_INT_REG
+ 8);
13551 error ("extended registers have no high halves");
13566 error ("unsupported operand size for extended register");
13576 if (STACK_TOP_P (x
))
13585 if (! ANY_FP_REG_P (x
))
13586 putc (code
== 8 && TARGET_64BIT
? 'r' : 'e', file
);
13591 reg
= hi_reg_name
[REGNO (x
)];
13594 if (REGNO (x
) >= ARRAY_SIZE (qi_reg_name
))
13596 reg
= qi_reg_name
[REGNO (x
)];
13599 if (REGNO (x
) >= ARRAY_SIZE (qi_high_reg_name
))
13601 reg
= qi_high_reg_name
[REGNO (x
)];
13606 gcc_assert (!duplicated
);
13608 fputs (hi_reg_name
[REGNO (x
)] + 1, file
);
13613 gcc_unreachable ();
13619 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13620 fprintf (file
, ", %%%s", reg
);
13622 fprintf (file
, ", %s", reg
);
13626 /* Locate some local-dynamic symbol still in use by this function
13627 so that we can print its name in some tls_local_dynamic_base
13631 get_some_local_dynamic_name_1 (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
13635 if (GET_CODE (x
) == SYMBOL_REF
13636 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
)
13638 cfun
->machine
->some_ld_name
= XSTR (x
, 0);
13645 static const char *
13646 get_some_local_dynamic_name (void)
13650 if (cfun
->machine
->some_ld_name
)
13651 return cfun
->machine
->some_ld_name
;
13653 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
13654 if (NONDEBUG_INSN_P (insn
)
13655 && for_each_rtx (&PATTERN (insn
), get_some_local_dynamic_name_1
, 0))
13656 return cfun
->machine
->some_ld_name
;
13661 /* Meaning of CODE:
13662 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13663 C -- print opcode suffix for set/cmov insn.
13664 c -- like C, but print reversed condition
13665 F,f -- likewise, but for floating-point.
13666 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13668 R -- print the prefix for register names.
13669 z -- print the opcode suffix for the size of the current operand.
13670 Z -- likewise, with special suffixes for x87 instructions.
13671 * -- print a star (in certain assembler syntax)
13672 A -- print an absolute memory reference.
13673 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13674 s -- print a shift double count, followed by the assemblers argument
13676 b -- print the QImode name of the register for the indicated operand.
13677 %b0 would print %al if operands[0] is reg 0.
13678 w -- likewise, print the HImode name of the register.
13679 k -- likewise, print the SImode name of the register.
13680 q -- likewise, print the DImode name of the register.
13681 x -- likewise, print the V4SFmode name of the register.
13682 t -- likewise, print the V8SFmode name of the register.
13683 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13684 y -- print "st(0)" instead of "st" as a register.
13685 d -- print duplicated register operand for AVX instruction.
13686 D -- print condition for SSE cmp instruction.
13687 P -- if PIC, print an @PLT suffix.
13688 p -- print raw symbol name.
13689 X -- don't print any sort of PIC '@' suffix for a symbol.
13690 & -- print some in-use local-dynamic symbol name.
13691 H -- print a memory address offset by 8; used for sse high-parts
13692 Y -- print condition for XOP pcom* instruction.
13693 + -- print a branch hint as 'cs' or 'ds' prefix
13694 ; -- print a semicolon (after prefixes due to bug in older gas).
13695 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13696 @ -- print a segment register of thread base pointer load
13700 ix86_print_operand (FILE *file
, rtx x
, int code
)
13707 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13713 const char *name
= get_some_local_dynamic_name ();
13715 output_operand_lossage ("'%%&' used without any "
13716 "local dynamic TLS references");
13718 assemble_name (file
, name
);
13723 switch (ASSEMBLER_DIALECT
)
13730 /* Intel syntax. For absolute addresses, registers should not
13731 be surrounded by braces. */
13735 ix86_print_operand (file
, x
, 0);
13742 gcc_unreachable ();
13745 ix86_print_operand (file
, x
, 0);
13750 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13755 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13760 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13765 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13770 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13775 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13780 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13782 /* Opcodes don't get size suffixes if using Intel opcodes. */
13783 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13786 switch (GET_MODE_SIZE (GET_MODE (x
)))
13805 output_operand_lossage
13806 ("invalid operand size for operand code '%c'", code
);
13811 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13813 (0, "non-integer operand used with operand code '%c'", code
);
13817 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13818 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13821 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
13823 switch (GET_MODE_SIZE (GET_MODE (x
)))
13826 #ifdef HAVE_AS_IX86_FILDS
13836 #ifdef HAVE_AS_IX86_FILDQ
13839 fputs ("ll", file
);
13847 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
13849 /* 387 opcodes don't get size suffixes
13850 if the operands are registers. */
13851 if (STACK_REG_P (x
))
13854 switch (GET_MODE_SIZE (GET_MODE (x
)))
13875 output_operand_lossage
13876 ("invalid operand type used with operand code '%c'", code
);
13880 output_operand_lossage
13881 ("invalid operand size for operand code '%c'", code
);
13899 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
13901 ix86_print_operand (file
, x
, 0);
13902 fputs (", ", file
);
13907 /* Little bit of braindamage here. The SSE compare instructions
13908 does use completely different names for the comparisons that the
13909 fp conditional moves. */
13912 switch (GET_CODE (x
))
13915 fputs ("eq", file
);
13918 fputs ("eq_us", file
);
13921 fputs ("lt", file
);
13924 fputs ("nge", file
);
13927 fputs ("le", file
);
13930 fputs ("ngt", file
);
13933 fputs ("unord", file
);
13936 fputs ("neq", file
);
13939 fputs ("neq_oq", file
);
13942 fputs ("ge", file
);
13945 fputs ("nlt", file
);
13948 fputs ("gt", file
);
13951 fputs ("nle", file
);
13954 fputs ("ord", file
);
13957 output_operand_lossage ("operand is not a condition code, "
13958 "invalid operand code 'D'");
13964 switch (GET_CODE (x
))
13968 fputs ("eq", file
);
13972 fputs ("lt", file
);
13976 fputs ("le", file
);
13979 fputs ("unord", file
);
13983 fputs ("neq", file
);
13987 fputs ("nlt", file
);
13991 fputs ("nle", file
);
13994 fputs ("ord", file
);
13997 output_operand_lossage ("operand is not a condition code, "
13998 "invalid operand code 'D'");
14004 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14005 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14007 switch (GET_MODE (x
))
14009 case HImode
: putc ('w', file
); break;
14011 case SFmode
: putc ('l', file
); break;
14013 case DFmode
: putc ('q', file
); break;
14014 default: gcc_unreachable ();
14021 if (!COMPARISON_P (x
))
14023 output_operand_lossage ("operand is neither a constant nor a "
14024 "condition code, invalid operand code "
14028 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 0, file
);
14031 if (!COMPARISON_P (x
))
14033 output_operand_lossage ("operand is neither a constant nor a "
14034 "condition code, invalid operand code "
14038 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14039 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14042 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 0, 1, file
);
14045 /* Like above, but reverse condition */
14047 /* Check to see if argument to %c is really a constant
14048 and not a condition code which needs to be reversed. */
14049 if (!COMPARISON_P (x
))
14051 output_operand_lossage ("operand is neither a constant nor a "
14052 "condition code, invalid operand "
14056 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 0, file
);
14059 if (!COMPARISON_P (x
))
14061 output_operand_lossage ("operand is neither a constant nor a "
14062 "condition code, invalid operand "
14066 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14067 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14070 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)), 1, 1, file
);
14074 /* It doesn't actually matter what mode we use here, as we're
14075 only going to use this for printing. */
14076 x
= adjust_address_nv (x
, DImode
, 8);
14084 || optimize_function_for_size_p (cfun
) || !TARGET_BRANCH_PREDICTION_HINTS
)
14087 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
14090 int pred_val
= INTVAL (XEXP (x
, 0));
14092 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
14093 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
14095 int taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
14096 int cputaken
= final_forward_branch_p (current_output_insn
) == 0;
14098 /* Emit hints only in the case default branch prediction
14099 heuristics would fail. */
14100 if (taken
!= cputaken
)
14102 /* We use 3e (DS) prefix for taken branches and
14103 2e (CS) prefix for not taken branches. */
14105 fputs ("ds ; ", file
);
14107 fputs ("cs ; ", file
);
14115 switch (GET_CODE (x
))
14118 fputs ("neq", file
);
14121 fputs ("eq", file
);
14125 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
14129 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
14133 fputs ("le", file
);
14137 fputs ("lt", file
);
14140 fputs ("unord", file
);
14143 fputs ("ord", file
);
14146 fputs ("ueq", file
);
14149 fputs ("nlt", file
);
14152 fputs ("nle", file
);
14155 fputs ("ule", file
);
14158 fputs ("ult", file
);
14161 fputs ("une", file
);
14164 output_operand_lossage ("operand is not a condition code, "
14165 "invalid operand code 'Y'");
14171 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
14177 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14180 /* The kernel uses a different segment register for performance
14181 reasons; a system call would not have to trash the userspace
14182 segment register, which would be expensive. */
14183 if (TARGET_64BIT
&& ix86_cmodel
!= CM_KERNEL
)
14184 fputs ("fs", file
);
14186 fputs ("gs", file
);
14190 putc (TARGET_AVX2
? 'i' : 'f', file
);
14194 output_operand_lossage ("invalid operand code '%c'", code
);
14199 print_reg (x
, code
, file
);
14201 else if (MEM_P (x
))
14203 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
14204 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P'
14205 && GET_MODE (x
) != BLKmode
)
14208 switch (GET_MODE_SIZE (GET_MODE (x
)))
14210 case 1: size
= "BYTE"; break;
14211 case 2: size
= "WORD"; break;
14212 case 4: size
= "DWORD"; break;
14213 case 8: size
= "QWORD"; break;
14214 case 12: size
= "TBYTE"; break;
14216 if (GET_MODE (x
) == XFmode
)
14221 case 32: size
= "YMMWORD"; break;
14223 gcc_unreachable ();
14226 /* Check for explicit size override (codes 'b', 'w', 'k',
14230 else if (code
== 'w')
14232 else if (code
== 'k')
14234 else if (code
== 'q')
14236 else if (code
== 'x')
14239 fputs (size
, file
);
14240 fputs (" PTR ", file
);
14244 /* Avoid (%rip) for call operands. */
14245 if (CONSTANT_ADDRESS_P (x
) && code
== 'P'
14246 && !CONST_INT_P (x
))
14247 output_addr_const (file
, x
);
14248 else if (this_is_asm_operands
&& ! address_operand (x
, VOIDmode
))
14249 output_operand_lossage ("invalid constraints for operand");
14251 output_address (x
);
14254 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == SFmode
)
14259 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14260 REAL_VALUE_TO_TARGET_SINGLE (r
, l
);
14262 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14264 /* Sign extend 32bit SFmode immediate to 8 bytes. */
14266 fprintf (file
, "0x%08llx", (unsigned long long) (int) l
);
14268 fprintf (file
, "0x%08x", (unsigned int) l
);
14271 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == DFmode
)
14276 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
14277 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
14279 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14281 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
14284 /* These float cases don't actually occur as immediate operands. */
14285 else if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) == XFmode
)
14289 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
14290 fputs (dstr
, file
);
14295 /* We have patterns that allow zero sets of memory, for instance.
14296 In 64-bit mode, we should probably support all 8-byte vectors,
14297 since we can in fact encode that into an immediate. */
14298 if (GET_CODE (x
) == CONST_VECTOR
)
14300 gcc_assert (x
== CONST0_RTX (GET_MODE (x
)));
14304 if (code
!= 'P' && code
!= 'p')
14306 if (CONST_INT_P (x
) || GET_CODE (x
) == CONST_DOUBLE
)
14308 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14311 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
14312 || GET_CODE (x
) == LABEL_REF
)
14314 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14317 fputs ("OFFSET FLAT:", file
);
14320 if (CONST_INT_P (x
))
14321 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
14322 else if (flag_pic
|| MACHOPIC_INDIRECT
)
14323 output_pic_addr_const (file
, x
, code
);
14325 output_addr_const (file
, x
);
14330 ix86_print_operand_punct_valid_p (unsigned char code
)
14332 return (code
== '@' || code
== '*' || code
== '+'
14333 || code
== '&' || code
== ';' || code
== '~');
14336 /* Print a memory operand whose address is ADDR. */
14339 ix86_print_operand_address (FILE *file
, rtx addr
)
14341 struct ix86_address parts
;
14342 rtx base
, index
, disp
;
14347 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
14349 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
14350 gcc_assert (parts
.index
== NULL_RTX
);
14351 parts
.index
= XVECEXP (addr
, 0, 1);
14352 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
14353 addr
= XVECEXP (addr
, 0, 0);
14357 ok
= ix86_decompose_address (addr
, &parts
);
14361 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
14363 rtx tmp
= SUBREG_REG (parts
.base
);
14364 parts
.base
= simplify_subreg (GET_MODE (parts
.base
),
14365 tmp
, GET_MODE (tmp
), 0);
14368 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
14370 rtx tmp
= SUBREG_REG (parts
.index
);
14371 parts
.index
= simplify_subreg (GET_MODE (parts
.index
),
14372 tmp
, GET_MODE (tmp
), 0);
14376 index
= parts
.index
;
14378 scale
= parts
.scale
;
14386 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14388 fputs ((parts
.seg
== SEG_FS
? "fs:" : "gs:"), file
);
14391 gcc_unreachable ();
14394 /* Use one byte shorter RIP relative addressing for 64bit mode. */
14395 if (TARGET_64BIT
&& !base
&& !index
)
14399 if (GET_CODE (disp
) == CONST
14400 && GET_CODE (XEXP (disp
, 0)) == PLUS
14401 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14402 symbol
= XEXP (XEXP (disp
, 0), 0);
14404 if (GET_CODE (symbol
) == LABEL_REF
14405 || (GET_CODE (symbol
) == SYMBOL_REF
14406 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
14409 if (!base
&& !index
)
14411 /* Displacement only requires special attention. */
14413 if (CONST_INT_P (disp
))
14415 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& parts
.seg
== SEG_DEFAULT
)
14416 fputs ("ds:", file
);
14417 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
14420 output_pic_addr_const (file
, disp
, 0);
14422 output_addr_const (file
, disp
);
14428 /* Print SImode registers for zero-extended addresses to force
14429 addr32 prefix. Otherwise print DImode registers to avoid it. */
14431 code
= ((GET_CODE (addr
) == ZERO_EXTEND
14432 || GET_CODE (addr
) == AND
)
14436 if (ASSEMBLER_DIALECT
== ASM_ATT
)
14441 output_pic_addr_const (file
, disp
, 0);
14442 else if (GET_CODE (disp
) == LABEL_REF
)
14443 output_asm_label (disp
);
14445 output_addr_const (file
, disp
);
14450 print_reg (base
, code
, file
);
14454 print_reg (index
, vsib
? 0 : code
, file
);
14455 if (scale
!= 1 || vsib
)
14456 fprintf (file
, ",%d", scale
);
14462 rtx offset
= NULL_RTX
;
14466 /* Pull out the offset of a symbol; print any symbol itself. */
14467 if (GET_CODE (disp
) == CONST
14468 && GET_CODE (XEXP (disp
, 0)) == PLUS
14469 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
14471 offset
= XEXP (XEXP (disp
, 0), 1);
14472 disp
= gen_rtx_CONST (VOIDmode
,
14473 XEXP (XEXP (disp
, 0), 0));
14477 output_pic_addr_const (file
, disp
, 0);
14478 else if (GET_CODE (disp
) == LABEL_REF
)
14479 output_asm_label (disp
);
14480 else if (CONST_INT_P (disp
))
14483 output_addr_const (file
, disp
);
14489 print_reg (base
, code
, file
);
14492 if (INTVAL (offset
) >= 0)
14494 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14498 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
14505 print_reg (index
, vsib
? 0 : code
, file
);
14506 if (scale
!= 1 || vsib
)
14507 fprintf (file
, "*%d", scale
);
14514 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14517 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
14521 if (GET_CODE (x
) != UNSPEC
)
14524 op
= XVECEXP (x
, 0, 0);
14525 switch (XINT (x
, 1))
14527 case UNSPEC_GOTTPOFF
:
14528 output_addr_const (file
, op
);
14529 /* FIXME: This might be @TPOFF in Sun ld. */
14530 fputs ("@gottpoff", file
);
14533 output_addr_const (file
, op
);
14534 fputs ("@tpoff", file
);
14536 case UNSPEC_NTPOFF
:
14537 output_addr_const (file
, op
);
14539 fputs ("@tpoff", file
);
14541 fputs ("@ntpoff", file
);
14543 case UNSPEC_DTPOFF
:
14544 output_addr_const (file
, op
);
14545 fputs ("@dtpoff", file
);
14547 case UNSPEC_GOTNTPOFF
:
14548 output_addr_const (file
, op
);
14550 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
14551 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
14553 fputs ("@gotntpoff", file
);
14555 case UNSPEC_INDNTPOFF
:
14556 output_addr_const (file
, op
);
14557 fputs ("@indntpoff", file
);
14560 case UNSPEC_MACHOPIC_OFFSET
:
14561 output_addr_const (file
, op
);
14563 machopic_output_function_base_name (file
);
14567 case UNSPEC_STACK_CHECK
:
14571 gcc_assert (flag_split_stack
);
14573 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
14574 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
14576 gcc_unreachable ();
14579 fprintf (file
, "%s:%d", TARGET_64BIT
? "%fs" : "%gs", offset
);
14590 /* Split one or more double-mode RTL references into pairs of half-mode
14591 references. The RTL can be REG, offsettable MEM, integer constant, or
14592 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
14593 split and "num" is its length. lo_half and hi_half are output arrays
14594 that parallel "operands". */
14597 split_double_mode (enum machine_mode mode
, rtx operands
[],
14598 int num
, rtx lo_half
[], rtx hi_half
[])
14600 enum machine_mode half_mode
;
14606 half_mode
= DImode
;
14609 half_mode
= SImode
;
14612 gcc_unreachable ();
14615 byte
= GET_MODE_SIZE (half_mode
);
14619 rtx op
= operands
[num
];
14621 /* simplify_subreg refuse to split volatile memory addresses,
14622 but we still have to handle it. */
14625 lo_half
[num
] = adjust_address (op
, half_mode
, 0);
14626 hi_half
[num
] = adjust_address (op
, half_mode
, byte
);
14630 lo_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14631 GET_MODE (op
) == VOIDmode
14632 ? mode
: GET_MODE (op
), 0);
14633 hi_half
[num
] = simplify_gen_subreg (half_mode
, op
,
14634 GET_MODE (op
) == VOIDmode
14635 ? mode
: GET_MODE (op
), byte
);
14640 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14641 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14642 is the expression of the binary operation. The output may either be
14643 emitted here, or returned to the caller, like all output_* functions.
14645 There is no guarantee that the operands are the same mode, as they
14646 might be within FLOAT or FLOAT_EXTEND expressions. */
14648 #ifndef SYSV386_COMPAT
14649 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14650 wants to fix the assemblers because that causes incompatibility
14651 with gcc. No-one wants to fix gcc because that causes
14652 incompatibility with assemblers... You can use the option of
14653 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14654 #define SYSV386_COMPAT 1
14658 output_387_binary_op (rtx insn
, rtx
*operands
)
14660 static char buf
[40];
14663 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]);
14665 #ifdef ENABLE_CHECKING
14666 /* Even if we do not want to check the inputs, this documents input
14667 constraints. Which helps in understanding the following code. */
14668 if (STACK_REG_P (operands
[0])
14669 && ((REG_P (operands
[1])
14670 && REGNO (operands
[0]) == REGNO (operands
[1])
14671 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
14672 || (REG_P (operands
[2])
14673 && REGNO (operands
[0]) == REGNO (operands
[2])
14674 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
14675 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
14678 gcc_assert (is_sse
);
14681 switch (GET_CODE (operands
[3]))
14684 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14685 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14693 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14694 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14702 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14703 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14711 if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
14712 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
14720 gcc_unreachable ();
14727 strcpy (buf
, ssep
);
14728 if (GET_MODE (operands
[0]) == SFmode
)
14729 strcat (buf
, "ss\t{%2, %1, %0|%0, %1, %2}");
14731 strcat (buf
, "sd\t{%2, %1, %0|%0, %1, %2}");
14735 strcpy (buf
, ssep
+ 1);
14736 if (GET_MODE (operands
[0]) == SFmode
)
14737 strcat (buf
, "ss\t{%2, %0|%0, %2}");
14739 strcat (buf
, "sd\t{%2, %0|%0, %2}");
14745 switch (GET_CODE (operands
[3]))
14749 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
14751 rtx temp
= operands
[2];
14752 operands
[2] = operands
[1];
14753 operands
[1] = temp
;
14756 /* know operands[0] == operands[1]. */
14758 if (MEM_P (operands
[2]))
14764 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14766 if (STACK_TOP_P (operands
[0]))
14767 /* How is it that we are storing to a dead operand[2]?
14768 Well, presumably operands[1] is dead too. We can't
14769 store the result to st(0) as st(0) gets popped on this
14770 instruction. Instead store to operands[2] (which I
14771 think has to be st(1)). st(1) will be popped later.
14772 gcc <= 2.8.1 didn't have this check and generated
14773 assembly code that the Unixware assembler rejected. */
14774 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14776 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14780 if (STACK_TOP_P (operands
[0]))
14781 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14783 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14788 if (MEM_P (operands
[1]))
14794 if (MEM_P (operands
[2]))
14800 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
14803 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14804 derived assemblers, confusingly reverse the direction of
14805 the operation for fsub{r} and fdiv{r} when the
14806 destination register is not st(0). The Intel assembler
14807 doesn't have this brain damage. Read !SYSV386_COMPAT to
14808 figure out what the hardware really does. */
14809 if (STACK_TOP_P (operands
[0]))
14810 p
= "{p\t%0, %2|rp\t%2, %0}";
14812 p
= "{rp\t%2, %0|p\t%0, %2}";
14814 if (STACK_TOP_P (operands
[0]))
14815 /* As above for fmul/fadd, we can't store to st(0). */
14816 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14818 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14823 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
14826 if (STACK_TOP_P (operands
[0]))
14827 p
= "{rp\t%0, %1|p\t%1, %0}";
14829 p
= "{p\t%1, %0|rp\t%0, %1}";
14831 if (STACK_TOP_P (operands
[0]))
14832 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14834 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14839 if (STACK_TOP_P (operands
[0]))
14841 if (STACK_TOP_P (operands
[1]))
14842 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14844 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14847 else if (STACK_TOP_P (operands
[1]))
14850 p
= "{\t%1, %0|r\t%0, %1}";
14852 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14858 p
= "{r\t%2, %0|\t%0, %2}";
14860 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14866 gcc_unreachable ();
14873 /* Return needed mode for entity in optimize_mode_switching pass. */
14876 ix86_mode_needed (int entity
, rtx insn
)
14878 enum attr_i387_cw mode
;
14880 /* The mode UNINITIALIZED is used to store control word after a
14881 function call or ASM pattern. The mode ANY specify that function
14882 has no requirements on the control word and make no changes in the
14883 bits we are interested in. */
14886 || (NONJUMP_INSN_P (insn
)
14887 && (asm_noperands (PATTERN (insn
)) >= 0
14888 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
14889 return I387_CW_UNINITIALIZED
;
14891 if (recog_memoized (insn
) < 0)
14892 return I387_CW_ANY
;
14894 mode
= get_attr_i387_cw (insn
);
14899 if (mode
== I387_CW_TRUNC
)
14904 if (mode
== I387_CW_FLOOR
)
14909 if (mode
== I387_CW_CEIL
)
14914 if (mode
== I387_CW_MASK_PM
)
14919 gcc_unreachable ();
14922 return I387_CW_ANY
;
14925 /* Output code to initialize control word copies used by trunc?f?i and
14926 rounding patterns. CURRENT_MODE is set to current control word,
14927 while NEW_MODE is set to new control word. */
14930 emit_i387_cw_initialization (int mode
)
14932 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
14935 enum ix86_stack_slot slot
;
14937 rtx reg
= gen_reg_rtx (HImode
);
14939 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
14940 emit_move_insn (reg
, copy_rtx (stored_mode
));
14942 if (TARGET_64BIT
|| TARGET_PARTIAL_REG_STALL
14943 || optimize_function_for_size_p (cfun
))
14947 case I387_CW_TRUNC
:
14948 /* round toward zero (truncate) */
14949 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
14950 slot
= SLOT_CW_TRUNC
;
14953 case I387_CW_FLOOR
:
14954 /* round down toward -oo */
14955 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14956 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
14957 slot
= SLOT_CW_FLOOR
;
14961 /* round up toward +oo */
14962 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14963 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
14964 slot
= SLOT_CW_CEIL
;
14967 case I387_CW_MASK_PM
:
14968 /* mask precision exception for nearbyint() */
14969 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
14970 slot
= SLOT_CW_MASK_PM
;
14974 gcc_unreachable ();
14981 case I387_CW_TRUNC
:
14982 /* round toward zero (truncate) */
14983 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0xc)));
14984 slot
= SLOT_CW_TRUNC
;
14987 case I387_CW_FLOOR
:
14988 /* round down toward -oo */
14989 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x4)));
14990 slot
= SLOT_CW_FLOOR
;
14994 /* round up toward +oo */
14995 emit_insn (gen_movsi_insv_1 (reg
, GEN_INT (0x8)));
14996 slot
= SLOT_CW_CEIL
;
14999 case I387_CW_MASK_PM
:
15000 /* mask precision exception for nearbyint() */
15001 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0020)));
15002 slot
= SLOT_CW_MASK_PM
;
15006 gcc_unreachable ();
15010 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
15012 new_mode
= assign_386_stack_local (HImode
, slot
);
15013 emit_move_insn (new_mode
, reg
);
15016 /* Output code for INSN to convert a float to a signed int. OPERANDS
15017 are the insn operands. The output may be [HSD]Imode and the input
15018 operand may be [SDX]Fmode. */
15021 output_fix_trunc (rtx insn
, rtx
*operands
, bool fisttp
)
15023 int stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15024 int dimode_p
= GET_MODE (operands
[0]) == DImode
;
15025 int round_mode
= get_attr_i387_cw (insn
);
15027 /* Jump through a hoop or two for DImode, since the hardware has no
15028 non-popping instruction. We used to do this a different way, but
15029 that was somewhat fragile and broke with post-reload splitters. */
15030 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
15031 output_asm_insn ("fld\t%y1", operands
);
15033 gcc_assert (STACK_TOP_P (operands
[1]));
15034 gcc_assert (MEM_P (operands
[0]));
15035 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
15038 output_asm_insn ("fisttp%Z0\t%0", operands
);
15041 if (round_mode
!= I387_CW_ANY
)
15042 output_asm_insn ("fldcw\t%3", operands
);
15043 if (stack_top_dies
|| dimode_p
)
15044 output_asm_insn ("fistp%Z0\t%0", operands
);
15046 output_asm_insn ("fist%Z0\t%0", operands
);
15047 if (round_mode
!= I387_CW_ANY
)
15048 output_asm_insn ("fldcw\t%2", operands
);
15054 /* Output code for x87 ffreep insn. The OPNO argument, which may only
15055 have the values zero or one, indicates the ffreep insn's operand
15056 from the OPERANDS array. */
15058 static const char *
15059 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
15061 if (TARGET_USE_FFREEP
)
15062 #ifdef HAVE_AS_IX86_FFREEP
15063 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
15066 static char retval
[32];
15067 int regno
= REGNO (operands
[opno
]);
15069 gcc_assert (FP_REGNO_P (regno
));
15071 regno
-= FIRST_STACK_REG
;
15073 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
15078 return opno
? "fstp\t%y1" : "fstp\t%y0";
15082 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
15083 should be used. UNORDERED_P is true when fucom should be used. */
15086 output_fp_compare (rtx insn
, rtx
*operands
, bool eflags_p
, bool unordered_p
)
15088 int stack_top_dies
;
15089 rtx cmp_op0
, cmp_op1
;
15090 int is_sse
= SSE_REG_P (operands
[0]) || SSE_REG_P (operands
[1]);
15094 cmp_op0
= operands
[0];
15095 cmp_op1
= operands
[1];
15099 cmp_op0
= operands
[1];
15100 cmp_op1
= operands
[2];
15105 if (GET_MODE (operands
[0]) == SFmode
)
15107 return "%vucomiss\t{%1, %0|%0, %1}";
15109 return "%vcomiss\t{%1, %0|%0, %1}";
15112 return "%vucomisd\t{%1, %0|%0, %1}";
15114 return "%vcomisd\t{%1, %0|%0, %1}";
15117 gcc_assert (STACK_TOP_P (cmp_op0
));
15119 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
) != 0;
15121 if (cmp_op1
== CONST0_RTX (GET_MODE (cmp_op1
)))
15123 if (stack_top_dies
)
15125 output_asm_insn ("ftst\n\tfnstsw\t%0", operands
);
15126 return output_387_ffreep (operands
, 1);
15129 return "ftst\n\tfnstsw\t%0";
15132 if (STACK_REG_P (cmp_op1
)
15134 && find_regno_note (insn
, REG_DEAD
, REGNO (cmp_op1
))
15135 && REGNO (cmp_op1
) != FIRST_STACK_REG
)
15137 /* If both the top of the 387 stack dies, and the other operand
15138 is also a stack register that dies, then this must be a
15139 `fcompp' float compare */
15143 /* There is no double popping fcomi variant. Fortunately,
15144 eflags is immune from the fstp's cc clobbering. */
15146 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands
);
15148 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands
);
15149 return output_387_ffreep (operands
, 0);
15154 return "fucompp\n\tfnstsw\t%0";
15156 return "fcompp\n\tfnstsw\t%0";
15161 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
15163 static const char * const alt
[16] =
15165 "fcom%Z2\t%y2\n\tfnstsw\t%0",
15166 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
15167 "fucom%Z2\t%y2\n\tfnstsw\t%0",
15168 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
15170 "ficom%Z2\t%y2\n\tfnstsw\t%0",
15171 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
15175 "fcomi\t{%y1, %0|%0, %y1}",
15176 "fcomip\t{%y1, %0|%0, %y1}",
15177 "fucomi\t{%y1, %0|%0, %y1}",
15178 "fucomip\t{%y1, %0|%0, %y1}",
15189 mask
= eflags_p
<< 3;
15190 mask
|= (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_INT
) << 2;
15191 mask
|= unordered_p
<< 1;
15192 mask
|= stack_top_dies
;
15194 gcc_assert (mask
< 16);
15203 ix86_output_addr_vec_elt (FILE *file
, int value
)
15205 const char *directive
= ASM_LONG
;
15209 directive
= ASM_QUAD
;
15211 gcc_assert (!TARGET_64BIT
);
15214 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
15218 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
15220 const char *directive
= ASM_LONG
;
15223 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
15224 directive
= ASM_QUAD
;
15226 gcc_assert (!TARGET_64BIT
);
15228 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15229 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
15230 fprintf (file
, "%s%s%d-%s%d\n",
15231 directive
, LPREFIX
, value
, LPREFIX
, rel
);
15232 else if (HAVE_AS_GOTOFF_IN_DATA
)
15233 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
15235 else if (TARGET_MACHO
)
15237 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
15238 machopic_output_function_base_name (file
);
15243 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
15244 GOT_SYMBOL_NAME
, LPREFIX
, value
);
15247 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
15251 ix86_expand_clear (rtx dest
)
15255 /* We play register width games, which are only valid after reload. */
15256 gcc_assert (reload_completed
);
15258 /* Avoid HImode and its attendant prefix byte. */
15259 if (GET_MODE_SIZE (GET_MODE (dest
)) < 4)
15260 dest
= gen_rtx_REG (SImode
, REGNO (dest
));
15261 tmp
= gen_rtx_SET (VOIDmode
, dest
, const0_rtx
);
15263 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
15264 if (!TARGET_USE_MOV0
|| optimize_insn_for_speed_p ())
15266 rtx clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15267 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, clob
));
15273 /* X is an unchanging MEM. If it is a constant pool reference, return
15274 the constant pool rtx, else NULL. */
15277 maybe_get_pool_constant (rtx x
)
15279 x
= ix86_delegitimize_address (XEXP (x
, 0));
15281 if (GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
15282 return get_pool_constant (x
);
15288 ix86_expand_move (enum machine_mode mode
, rtx operands
[])
15291 enum tls_model model
;
15296 if (GET_CODE (op1
) == SYMBOL_REF
)
15298 model
= SYMBOL_REF_TLS_MODEL (op1
);
15301 op1
= legitimize_tls_address (op1
, model
, true);
15302 op1
= force_operand (op1
, op0
);
15305 if (GET_MODE (op1
) != mode
)
15306 op1
= convert_to_mode (mode
, op1
, 1);
15308 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15309 && SYMBOL_REF_DLLIMPORT_P (op1
))
15310 op1
= legitimize_dllimport_symbol (op1
, false);
15312 else if (GET_CODE (op1
) == CONST
15313 && GET_CODE (XEXP (op1
, 0)) == PLUS
15314 && GET_CODE (XEXP (XEXP (op1
, 0), 0)) == SYMBOL_REF
)
15316 rtx addend
= XEXP (XEXP (op1
, 0), 1);
15317 rtx symbol
= XEXP (XEXP (op1
, 0), 0);
15320 model
= SYMBOL_REF_TLS_MODEL (symbol
);
15322 tmp
= legitimize_tls_address (symbol
, model
, true);
15323 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
15324 && SYMBOL_REF_DLLIMPORT_P (symbol
))
15325 tmp
= legitimize_dllimport_symbol (symbol
, true);
15329 tmp
= force_operand (tmp
, NULL
);
15330 tmp
= expand_simple_binop (Pmode
, PLUS
, tmp
, addend
,
15331 op0
, 1, OPTAB_DIRECT
);
15334 if (GET_MODE (tmp
) != mode
)
15335 op1
= convert_to_mode (mode
, tmp
, 1);
15339 if ((flag_pic
|| MACHOPIC_INDIRECT
)
15340 && symbolic_operand (op1
, mode
))
15342 if (TARGET_MACHO
&& !TARGET_64BIT
)
15345 /* dynamic-no-pic */
15346 if (MACHOPIC_INDIRECT
)
15348 rtx temp
= ((reload_in_progress
15349 || ((op0
&& REG_P (op0
))
15351 ? op0
: gen_reg_rtx (Pmode
));
15352 op1
= machopic_indirect_data_reference (op1
, temp
);
15354 op1
= machopic_legitimize_pic_address (op1
, mode
,
15355 temp
== op1
? 0 : temp
);
15357 if (op0
!= op1
&& GET_CODE (op0
) != MEM
)
15359 rtx insn
= gen_rtx_SET (VOIDmode
, op0
, op1
);
15363 if (GET_CODE (op0
) == MEM
)
15364 op1
= force_reg (Pmode
, op1
);
15368 if (GET_CODE (temp
) != REG
)
15369 temp
= gen_reg_rtx (Pmode
);
15370 temp
= legitimize_pic_address (op1
, temp
);
15375 /* dynamic-no-pic */
15381 op1
= force_reg (mode
, op1
);
15382 else if (!(TARGET_64BIT
&& x86_64_movabs_operand (op1
, DImode
)))
15384 rtx reg
= can_create_pseudo_p () ? NULL_RTX
: op0
;
15385 op1
= legitimize_pic_address (op1
, reg
);
15388 if (GET_MODE (op1
) != mode
)
15389 op1
= convert_to_mode (mode
, op1
, 1);
15396 && (PUSH_ROUNDING (GET_MODE_SIZE (mode
)) != GET_MODE_SIZE (mode
)
15397 || !push_operand (op0
, mode
))
15399 op1
= force_reg (mode
, op1
);
15401 if (push_operand (op0
, mode
)
15402 && ! general_no_elim_operand (op1
, mode
))
15403 op1
= copy_to_mode_reg (mode
, op1
);
15405 /* Force large constants in 64bit compilation into register
15406 to get them CSEed. */
15407 if (can_create_pseudo_p ()
15408 && (mode
== DImode
) && TARGET_64BIT
15409 && immediate_operand (op1
, mode
)
15410 && !x86_64_zext_immediate_operand (op1
, VOIDmode
)
15411 && !register_operand (op0
, mode
)
15413 op1
= copy_to_mode_reg (mode
, op1
);
15415 if (can_create_pseudo_p ()
15416 && FLOAT_MODE_P (mode
)
15417 && GET_CODE (op1
) == CONST_DOUBLE
)
15419 /* If we are loading a floating point constant to a register,
15420 force the value to memory now, since we'll get better code
15421 out the back end. */
15423 op1
= validize_mem (force_const_mem (mode
, op1
));
15424 if (!register_operand (op0
, mode
))
15426 rtx temp
= gen_reg_rtx (mode
);
15427 emit_insn (gen_rtx_SET (VOIDmode
, temp
, op1
));
15428 emit_move_insn (op0
, temp
);
15434 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15438 ix86_expand_vector_move (enum machine_mode mode
, rtx operands
[])
15440 rtx op0
= operands
[0], op1
= operands
[1];
15441 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
15443 /* Force constants other than zero into memory. We do not know how
15444 the instructions used to build constants modify the upper 64 bits
15445 of the register, once we have that information we may be able
15446 to handle some of them more efficiently. */
15447 if (can_create_pseudo_p ()
15448 && register_operand (op0
, mode
)
15449 && (CONSTANT_P (op1
)
15450 || (GET_CODE (op1
) == SUBREG
15451 && CONSTANT_P (SUBREG_REG (op1
))))
15452 && !standard_sse_constant_p (op1
))
15453 op1
= validize_mem (force_const_mem (mode
, op1
));
15455 /* We need to check memory alignment for SSE mode since attribute
15456 can make operands unaligned. */
15457 if (can_create_pseudo_p ()
15458 && SSE_REG_MODE_P (mode
)
15459 && ((MEM_P (op0
) && (MEM_ALIGN (op0
) < align
))
15460 || (MEM_P (op1
) && (MEM_ALIGN (op1
) < align
))))
15464 /* ix86_expand_vector_move_misalign() does not like constants ... */
15465 if (CONSTANT_P (op1
)
15466 || (GET_CODE (op1
) == SUBREG
15467 && CONSTANT_P (SUBREG_REG (op1
))))
15468 op1
= validize_mem (force_const_mem (mode
, op1
));
15470 /* ... nor both arguments in memory. */
15471 if (!register_operand (op0
, mode
)
15472 && !register_operand (op1
, mode
))
15473 op1
= force_reg (mode
, op1
);
15475 tmp
[0] = op0
; tmp
[1] = op1
;
15476 ix86_expand_vector_move_misalign (mode
, tmp
);
15480 /* Make operand1 a register if it isn't already. */
15481 if (can_create_pseudo_p ()
15482 && !register_operand (op0
, mode
)
15483 && !register_operand (op1
, mode
))
15485 emit_move_insn (op0
, force_reg (GET_MODE (op0
), op1
));
15489 emit_insn (gen_rtx_SET (VOIDmode
, op0
, op1
));
15492 /* Split 32-byte AVX unaligned load and store if needed. */
15495 ix86_avx256_split_vector_move_misalign (rtx op0
, rtx op1
)
15498 rtx (*extract
) (rtx
, rtx
, rtx
);
15499 rtx (*move_unaligned
) (rtx
, rtx
);
15500 enum machine_mode mode
;
15502 switch (GET_MODE (op0
))
15505 gcc_unreachable ();
15507 extract
= gen_avx_vextractf128v32qi
;
15508 move_unaligned
= gen_avx_movdqu256
;
15512 extract
= gen_avx_vextractf128v8sf
;
15513 move_unaligned
= gen_avx_movups256
;
15517 extract
= gen_avx_vextractf128v4df
;
15518 move_unaligned
= gen_avx_movupd256
;
15523 if (MEM_P (op1
) && TARGET_AVX256_SPLIT_UNALIGNED_LOAD
)
15525 rtx r
= gen_reg_rtx (mode
);
15526 m
= adjust_address (op1
, mode
, 0);
15527 emit_move_insn (r
, m
);
15528 m
= adjust_address (op1
, mode
, 16);
15529 r
= gen_rtx_VEC_CONCAT (GET_MODE (op0
), r
, m
);
15530 emit_move_insn (op0
, r
);
15532 else if (MEM_P (op0
) && TARGET_AVX256_SPLIT_UNALIGNED_STORE
)
15534 m
= adjust_address (op0
, mode
, 0);
15535 emit_insn (extract (m
, op1
, const0_rtx
));
15536 m
= adjust_address (op0
, mode
, 16);
15537 emit_insn (extract (m
, op1
, const1_rtx
));
15540 emit_insn (move_unaligned (op0
, op1
));
15543 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
15544 straight to ix86_expand_vector_move. */
15545 /* Code generation for scalar reg-reg moves of single and double precision data:
15546 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
15550 if (x86_sse_partial_reg_dependency == true)
15555 Code generation for scalar loads of double precision data:
15556 if (x86_sse_split_regs == true)
15557 movlpd mem, reg (gas syntax)
15561 Code generation for unaligned packed loads of single precision data
15562 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
15563 if (x86_sse_unaligned_move_optimal)
15566 if (x86_sse_partial_reg_dependency == true)
15578 Code generation for unaligned packed loads of double precision data
15579 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
15580 if (x86_sse_unaligned_move_optimal)
15583 if (x86_sse_split_regs == true)
15596 ix86_expand_vector_move_misalign (enum machine_mode mode
, rtx operands
[])
15605 switch (GET_MODE_CLASS (mode
))
15607 case MODE_VECTOR_INT
:
15609 switch (GET_MODE_SIZE (mode
))
15612 /* If we're optimizing for size, movups is the smallest. */
15613 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15615 op0
= gen_lowpart (V4SFmode
, op0
);
15616 op1
= gen_lowpart (V4SFmode
, op1
);
15617 emit_insn (gen_sse_movups (op0
, op1
));
15620 op0
= gen_lowpart (V16QImode
, op0
);
15621 op1
= gen_lowpart (V16QImode
, op1
);
15622 emit_insn (gen_sse2_movdqu (op0
, op1
));
15625 op0
= gen_lowpart (V32QImode
, op0
);
15626 op1
= gen_lowpart (V32QImode
, op1
);
15627 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15630 gcc_unreachable ();
15633 case MODE_VECTOR_FLOAT
:
15634 op0
= gen_lowpart (mode
, op0
);
15635 op1
= gen_lowpart (mode
, op1
);
15640 emit_insn (gen_sse_movups (op0
, op1
));
15643 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15646 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15648 op0
= gen_lowpart (V4SFmode
, op0
);
15649 op1
= gen_lowpart (V4SFmode
, op1
);
15650 emit_insn (gen_sse_movups (op0
, op1
));
15653 emit_insn (gen_sse2_movupd (op0
, op1
));
15656 ix86_avx256_split_vector_move_misalign (op0
, op1
);
15659 gcc_unreachable ();
15664 gcc_unreachable ();
15672 /* If we're optimizing for size, movups is the smallest. */
15673 if (optimize_insn_for_size_p ()
15674 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15676 op0
= gen_lowpart (V4SFmode
, op0
);
15677 op1
= gen_lowpart (V4SFmode
, op1
);
15678 emit_insn (gen_sse_movups (op0
, op1
));
15682 /* ??? If we have typed data, then it would appear that using
15683 movdqu is the only way to get unaligned data loaded with
15685 if (TARGET_SSE2
&& GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15687 op0
= gen_lowpart (V16QImode
, op0
);
15688 op1
= gen_lowpart (V16QImode
, op1
);
15689 emit_insn (gen_sse2_movdqu (op0
, op1
));
15693 if (TARGET_SSE2
&& mode
== V2DFmode
)
15697 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15699 op0
= gen_lowpart (V2DFmode
, op0
);
15700 op1
= gen_lowpart (V2DFmode
, op1
);
15701 emit_insn (gen_sse2_movupd (op0
, op1
));
15705 /* When SSE registers are split into halves, we can avoid
15706 writing to the top half twice. */
15707 if (TARGET_SSE_SPLIT_REGS
)
15709 emit_clobber (op0
);
15714 /* ??? Not sure about the best option for the Intel chips.
15715 The following would seem to satisfy; the register is
15716 entirely cleared, breaking the dependency chain. We
15717 then store to the upper half, with a dependency depth
15718 of one. A rumor has it that Intel recommends two movsd
15719 followed by an unpacklpd, but this is unconfirmed. And
15720 given that the dependency depth of the unpacklpd would
15721 still be one, I'm not sure why this would be better. */
15722 zero
= CONST0_RTX (V2DFmode
);
15725 m
= adjust_address (op1
, DFmode
, 0);
15726 emit_insn (gen_sse2_loadlpd (op0
, zero
, m
));
15727 m
= adjust_address (op1
, DFmode
, 8);
15728 emit_insn (gen_sse2_loadhpd (op0
, op0
, m
));
15732 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
)
15734 op0
= gen_lowpart (V4SFmode
, op0
);
15735 op1
= gen_lowpart (V4SFmode
, op1
);
15736 emit_insn (gen_sse_movups (op0
, op1
));
15740 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY
)
15741 emit_move_insn (op0
, CONST0_RTX (mode
));
15743 emit_clobber (op0
);
15745 if (mode
!= V4SFmode
)
15746 op0
= gen_lowpart (V4SFmode
, op0
);
15747 m
= adjust_address (op1
, V2SFmode
, 0);
15748 emit_insn (gen_sse_loadlps (op0
, op0
, m
));
15749 m
= adjust_address (op1
, V2SFmode
, 8);
15750 emit_insn (gen_sse_loadhps (op0
, op0
, m
));
15753 else if (MEM_P (op0
))
15755 /* If we're optimizing for size, movups is the smallest. */
15756 if (optimize_insn_for_size_p ()
15757 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
)
15759 op0
= gen_lowpart (V4SFmode
, op0
);
15760 op1
= gen_lowpart (V4SFmode
, op1
);
15761 emit_insn (gen_sse_movups (op0
, op1
));
15765 /* ??? Similar to above, only less clear because of quote
15766 typeless stores unquote. */
15767 if (TARGET_SSE2
&& !TARGET_SSE_TYPELESS_STORES
15768 && GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15770 op0
= gen_lowpart (V16QImode
, op0
);
15771 op1
= gen_lowpart (V16QImode
, op1
);
15772 emit_insn (gen_sse2_movdqu (op0
, op1
));
15776 if (TARGET_SSE2
&& mode
== V2DFmode
)
15778 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15780 op0
= gen_lowpart (V2DFmode
, op0
);
15781 op1
= gen_lowpart (V2DFmode
, op1
);
15782 emit_insn (gen_sse2_movupd (op0
, op1
));
15786 m
= adjust_address (op0
, DFmode
, 0);
15787 emit_insn (gen_sse2_storelpd (m
, op1
));
15788 m
= adjust_address (op0
, DFmode
, 8);
15789 emit_insn (gen_sse2_storehpd (m
, op1
));
15794 if (mode
!= V4SFmode
)
15795 op1
= gen_lowpart (V4SFmode
, op1
);
15797 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
15799 op0
= gen_lowpart (V4SFmode
, op0
);
15800 emit_insn (gen_sse_movups (op0
, op1
));
15804 m
= adjust_address (op0
, V2SFmode
, 0);
15805 emit_insn (gen_sse_storelps (m
, op1
));
15806 m
= adjust_address (op0
, V2SFmode
, 8);
15807 emit_insn (gen_sse_storehps (m
, op1
));
15812 gcc_unreachable ();
15815 /* Expand a push in MODE. This is some mode for which we do not support
15816 proper push instructions, at least from the registers that we expect
15817 the value to live in. */
15820 ix86_expand_push (enum machine_mode mode
, rtx x
)
15824 tmp
= expand_simple_binop (Pmode
, PLUS
, stack_pointer_rtx
,
15825 GEN_INT (-GET_MODE_SIZE (mode
)),
15826 stack_pointer_rtx
, 1, OPTAB_DIRECT
);
15827 if (tmp
!= stack_pointer_rtx
)
15828 emit_move_insn (stack_pointer_rtx
, tmp
);
15830 tmp
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
15832 /* When we push an operand onto stack, it has to be aligned at least
15833 at the function argument boundary. However since we don't have
15834 the argument type, we can't determine the actual argument
15836 emit_move_insn (tmp
, x
);
15839 /* Helper function of ix86_fixup_binary_operands to canonicalize
15840 operand order. Returns true if the operands should be swapped. */
15843 ix86_swap_binary_operands_p (enum rtx_code code
, enum machine_mode mode
,
15846 rtx dst
= operands
[0];
15847 rtx src1
= operands
[1];
15848 rtx src2
= operands
[2];
15850 /* If the operation is not commutative, we can't do anything. */
15851 if (GET_RTX_CLASS (code
) != RTX_COMM_ARITH
)
15854 /* Highest priority is that src1 should match dst. */
15855 if (rtx_equal_p (dst
, src1
))
15857 if (rtx_equal_p (dst
, src2
))
15860 /* Next highest priority is that immediate constants come second. */
15861 if (immediate_operand (src2
, mode
))
15863 if (immediate_operand (src1
, mode
))
15866 /* Lowest priority is that memory references should come second. */
15876 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
15877 destination to use for the operation. If different from the true
15878 destination in operands[0], a copy operation will be required. */
15881 ix86_fixup_binary_operands (enum rtx_code code
, enum machine_mode mode
,
15884 rtx dst
= operands
[0];
15885 rtx src1
= operands
[1];
15886 rtx src2
= operands
[2];
15888 /* Canonicalize operand order. */
15889 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
15893 /* It is invalid to swap operands of different modes. */
15894 gcc_assert (GET_MODE (src1
) == GET_MODE (src2
));
15901 /* Both source operands cannot be in memory. */
15902 if (MEM_P (src1
) && MEM_P (src2
))
15904 /* Optimization: Only read from memory once. */
15905 if (rtx_equal_p (src1
, src2
))
15907 src2
= force_reg (mode
, src2
);
15911 src2
= force_reg (mode
, src2
);
15914 /* If the destination is memory, and we do not have matching source
15915 operands, do things in registers. */
15916 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
15917 dst
= gen_reg_rtx (mode
);
15919 /* Source 1 cannot be a constant. */
15920 if (CONSTANT_P (src1
))
15921 src1
= force_reg (mode
, src1
);
15923 /* Source 1 cannot be a non-matching memory. */
15924 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
15925 src1
= force_reg (mode
, src1
);
15927 /* Improve address combine. */
15929 && GET_MODE_CLASS (mode
) == MODE_INT
15931 src2
= force_reg (mode
, src2
);
15933 operands
[1] = src1
;
15934 operands
[2] = src2
;
15938 /* Similarly, but assume that the destination has already been
15939 set up properly. */
15942 ix86_fixup_binary_operands_no_copy (enum rtx_code code
,
15943 enum machine_mode mode
, rtx operands
[])
15945 rtx dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
15946 gcc_assert (dst
== operands
[0]);
15949 /* Attempt to expand a binary operator. Make the expansion closer to the
15950 actual machine, then just general_operand, which will allow 3 separate
15951 memory references (one output, two input) in a single insn. */
15954 ix86_expand_binary_operator (enum rtx_code code
, enum machine_mode mode
,
15957 rtx src1
, src2
, dst
, op
, clob
;
15959 dst
= ix86_fixup_binary_operands (code
, mode
, operands
);
15960 src1
= operands
[1];
15961 src2
= operands
[2];
15963 /* Emit the instruction. */
15965 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, src1
, src2
));
15966 if (reload_in_progress
)
15968 /* Reload doesn't know about the flags register, and doesn't know that
15969 it doesn't want to clobber it. We can only do this with PLUS. */
15970 gcc_assert (code
== PLUS
);
15973 else if (reload_completed
15975 && !rtx_equal_p (dst
, src1
))
15977 /* This is going to be an LEA; avoid splitting it later. */
15982 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
15983 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
15986 /* Fix up the destination if needed. */
15987 if (dst
!= operands
[0])
15988 emit_move_insn (operands
[0], dst
);
15991 /* Return TRUE or FALSE depending on whether the binary operator meets the
15992 appropriate constraints. */
15995 ix86_binary_operator_ok (enum rtx_code code
, enum machine_mode mode
,
15998 rtx dst
= operands
[0];
15999 rtx src1
= operands
[1];
16000 rtx src2
= operands
[2];
16002 /* Both source operands cannot be in memory. */
16003 if (MEM_P (src1
) && MEM_P (src2
))
16006 /* Canonicalize operand order for commutative operators. */
16007 if (ix86_swap_binary_operands_p (code
, mode
, operands
))
16014 /* If the destination is memory, we must have a matching source operand. */
16015 if (MEM_P (dst
) && !rtx_equal_p (dst
, src1
))
16018 /* Source 1 cannot be a constant. */
16019 if (CONSTANT_P (src1
))
16022 /* Source 1 cannot be a non-matching memory. */
16023 if (MEM_P (src1
) && !rtx_equal_p (dst
, src1
))
16024 /* Support "andhi/andsi/anddi" as a zero-extending move. */
16025 return (code
== AND
16028 || (TARGET_64BIT
&& mode
== DImode
))
16029 && satisfies_constraint_L (src2
));
16034 /* Attempt to expand a unary operator. Make the expansion closer to the
16035 actual machine, then just general_operand, which will allow 2 separate
16036 memory references (one output, one input) in a single insn. */
16039 ix86_expand_unary_operator (enum rtx_code code
, enum machine_mode mode
,
16042 int matching_memory
;
16043 rtx src
, dst
, op
, clob
;
16048 /* If the destination is memory, and we do not have matching source
16049 operands, do things in registers. */
16050 matching_memory
= 0;
16053 if (rtx_equal_p (dst
, src
))
16054 matching_memory
= 1;
16056 dst
= gen_reg_rtx (mode
);
16059 /* When source operand is memory, destination must match. */
16060 if (MEM_P (src
) && !matching_memory
)
16061 src
= force_reg (mode
, src
);
16063 /* Emit the instruction. */
16065 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_e (code
, mode
, src
));
16066 if (reload_in_progress
|| code
== NOT
)
16068 /* Reload doesn't know about the flags register, and doesn't know that
16069 it doesn't want to clobber it. */
16070 gcc_assert (code
== NOT
);
16075 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16076 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16079 /* Fix up the destination if needed. */
16080 if (dst
!= operands
[0])
16081 emit_move_insn (operands
[0], dst
);
16084 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
16085 divisor are within the range [0-255]. */
16088 ix86_split_idivmod (enum machine_mode mode
, rtx operands
[],
16091 rtx end_label
, qimode_label
;
16092 rtx insn
, div
, mod
;
16093 rtx scratch
, tmp0
, tmp1
, tmp2
;
16094 rtx (*gen_divmod4_1
) (rtx
, rtx
, rtx
, rtx
);
16095 rtx (*gen_zero_extend
) (rtx
, rtx
);
16096 rtx (*gen_test_ccno_1
) (rtx
, rtx
);
16101 gen_divmod4_1
= signed_p
? gen_divmodsi4_1
: gen_udivmodsi4_1
;
16102 gen_test_ccno_1
= gen_testsi_ccno_1
;
16103 gen_zero_extend
= gen_zero_extendqisi2
;
16106 gen_divmod4_1
= signed_p
? gen_divmoddi4_1
: gen_udivmoddi4_1
;
16107 gen_test_ccno_1
= gen_testdi_ccno_1
;
16108 gen_zero_extend
= gen_zero_extendqidi2
;
16111 gcc_unreachable ();
16114 end_label
= gen_label_rtx ();
16115 qimode_label
= gen_label_rtx ();
16117 scratch
= gen_reg_rtx (mode
);
16119 /* Use 8bit unsigned divimod if dividend and divisor are within
16120 the range [0-255]. */
16121 emit_move_insn (scratch
, operands
[2]);
16122 scratch
= expand_simple_binop (mode
, IOR
, scratch
, operands
[3],
16123 scratch
, 1, OPTAB_DIRECT
);
16124 emit_insn (gen_test_ccno_1 (scratch
, GEN_INT (-0x100)));
16125 tmp0
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
16126 tmp0
= gen_rtx_EQ (VOIDmode
, tmp0
, const0_rtx
);
16127 tmp0
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp0
,
16128 gen_rtx_LABEL_REF (VOIDmode
, qimode_label
),
16130 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp0
));
16131 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
16132 JUMP_LABEL (insn
) = qimode_label
;
16134 /* Generate original signed/unsigned divimod. */
16135 div
= gen_divmod4_1 (operands
[0], operands
[1],
16136 operands
[2], operands
[3]);
16139 /* Branch to the end. */
16140 emit_jump_insn (gen_jump (end_label
));
16143 /* Generate 8bit unsigned divide. */
16144 emit_label (qimode_label
);
16145 /* Don't use operands[0] for result of 8bit divide since not all
16146 registers support QImode ZERO_EXTRACT. */
16147 tmp0
= simplify_gen_subreg (HImode
, scratch
, mode
, 0);
16148 tmp1
= simplify_gen_subreg (HImode
, operands
[2], mode
, 0);
16149 tmp2
= simplify_gen_subreg (QImode
, operands
[3], mode
, 0);
16150 emit_insn (gen_udivmodhiqi3 (tmp0
, tmp1
, tmp2
));
16154 div
= gen_rtx_DIV (SImode
, operands
[2], operands
[3]);
16155 mod
= gen_rtx_MOD (SImode
, operands
[2], operands
[3]);
16159 div
= gen_rtx_UDIV (SImode
, operands
[2], operands
[3]);
16160 mod
= gen_rtx_UMOD (SImode
, operands
[2], operands
[3]);
16163 /* Extract remainder from AH. */
16164 tmp1
= gen_rtx_ZERO_EXTRACT (mode
, tmp0
, GEN_INT (8), GEN_INT (8));
16165 if (REG_P (operands
[1]))
16166 insn
= emit_move_insn (operands
[1], tmp1
);
16169 /* Need a new scratch register since the old one has result
16171 scratch
= gen_reg_rtx (mode
);
16172 emit_move_insn (scratch
, tmp1
);
16173 insn
= emit_move_insn (operands
[1], scratch
);
16175 set_unique_reg_note (insn
, REG_EQUAL
, mod
);
16177 /* Zero extend quotient from AL. */
16178 tmp1
= gen_lowpart (QImode
, tmp0
);
16179 insn
= emit_insn (gen_zero_extend (operands
[0], tmp1
));
16180 set_unique_reg_note (insn
, REG_EQUAL
, div
);
16182 emit_label (end_label
);
16185 #define LEA_MAX_STALL (3)
16186 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
16188 /* Increase given DISTANCE in half-cycles according to
16189 dependencies between PREV and NEXT instructions.
16190 Add 1 half-cycle if there is no dependency and
16191 go to next cycle if there is some dependecy. */
16193 static unsigned int
16194 increase_distance (rtx prev
, rtx next
, unsigned int distance
)
16199 if (!prev
|| !next
)
16200 return distance
+ (distance
& 1) + 2;
16202 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
16203 return distance
+ 1;
16205 for (use_rec
= DF_INSN_USES (next
); *use_rec
; use_rec
++)
16206 for (def_rec
= DF_INSN_DEFS (prev
); *def_rec
; def_rec
++)
16207 if (!DF_REF_IS_ARTIFICIAL (*def_rec
)
16208 && DF_REF_REGNO (*use_rec
) == DF_REF_REGNO (*def_rec
))
16209 return distance
+ (distance
& 1) + 2;
16211 return distance
+ 1;
16214 /* Function checks if instruction INSN defines register number
16215 REGNO1 or REGNO2. */
16218 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
16223 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
16224 if (DF_REF_REG_DEF_P (*def_rec
)
16225 && !DF_REF_IS_ARTIFICIAL (*def_rec
)
16226 && (regno1
== DF_REF_REGNO (*def_rec
)
16227 || regno2
== DF_REF_REGNO (*def_rec
)))
16235 /* Function checks if instruction INSN uses register number
16236 REGNO as a part of address expression. */
16239 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
16243 for (use_rec
= DF_INSN_USES (insn
); *use_rec
; use_rec
++)
16244 if (DF_REF_REG_MEM_P (*use_rec
) && regno
== DF_REF_REGNO (*use_rec
))
16250 /* Search backward for non-agu definition of register number REGNO1
16251 or register number REGNO2 in basic block starting from instruction
16252 START up to head of basic block or instruction INSN.
16254 Function puts true value into *FOUND var if definition was found
16255 and false otherwise.
16257 Distance in half-cycles between START and found instruction or head
16258 of BB is added to DISTANCE and returned. */
16261 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
16262 rtx insn
, int distance
,
16263 rtx start
, bool *found
)
16265 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16268 enum attr_type insn_type
;
16274 && distance
< LEA_SEARCH_THRESHOLD
)
16276 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
16278 distance
= increase_distance (prev
, next
, distance
);
16279 if (insn_defines_reg (regno1
, regno2
, prev
))
16281 insn_type
= get_attr_type (prev
);
16282 if (insn_type
!= TYPE_LEA
)
16291 if (prev
== BB_HEAD (bb
))
16294 prev
= PREV_INSN (prev
);
16300 /* Search backward for non-agu definition of register number REGNO1
16301 or register number REGNO2 in INSN's basic block until
16302 1. Pass LEA_SEARCH_THRESHOLD instructions, or
16303 2. Reach neighbour BBs boundary, or
16304 3. Reach agu definition.
16305 Returns the distance between the non-agu definition point and INSN.
16306 If no definition point, returns -1. */
16309 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
16312 basic_block bb
= BLOCK_FOR_INSN (insn
);
16314 bool found
= false;
16316 if (insn
!= BB_HEAD (bb
))
16317 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
16318 distance
, PREV_INSN (insn
),
16321 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
16325 bool simple_loop
= false;
16327 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16330 simple_loop
= true;
16335 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
16337 BB_END (bb
), &found
);
16340 int shortest_dist
= -1;
16341 bool found_in_bb
= false;
16343 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16346 = distance_non_agu_define_in_bb (regno1
, regno2
,
16352 if (shortest_dist
< 0)
16353 shortest_dist
= bb_dist
;
16354 else if (bb_dist
> 0)
16355 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16361 distance
= shortest_dist
;
16365 /* get_attr_type may modify recog data. We want to make sure
16366 that recog data is valid for instruction INSN, on which
16367 distance_non_agu_define is called. INSN is unchanged here. */
16368 extract_insn_cached (insn
);
16373 return distance
>> 1;
16376 /* Return the distance in half-cycles between INSN and the next
16377 insn that uses register number REGNO in memory address added
16378 to DISTANCE. Return -1 if REGNO0 is set.
16380 Put true value into *FOUND if register usage was found and
16382 Put true value into *REDEFINED if register redefinition was
16383 found and false otherwise. */
16386 distance_agu_use_in_bb (unsigned int regno
,
16387 rtx insn
, int distance
, rtx start
,
16388 bool *found
, bool *redefined
)
16390 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
16395 *redefined
= false;
16399 && distance
< LEA_SEARCH_THRESHOLD
)
16401 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
16403 distance
= increase_distance(prev
, next
, distance
);
16404 if (insn_uses_reg_mem (regno
, next
))
16406 /* Return DISTANCE if OP0 is used in memory
16407 address in NEXT. */
16412 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
16414 /* Return -1 if OP0 is set in NEXT. */
16422 if (next
== BB_END (bb
))
16425 next
= NEXT_INSN (next
);
16431 /* Return the distance between INSN and the next insn that uses
16432 register number REGNO0 in memory address. Return -1 if no such
16433 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
16436 distance_agu_use (unsigned int regno0
, rtx insn
)
16438 basic_block bb
= BLOCK_FOR_INSN (insn
);
16440 bool found
= false;
16441 bool redefined
= false;
16443 if (insn
!= BB_END (bb
))
16444 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
16446 &found
, &redefined
);
16448 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
16452 bool simple_loop
= false;
16454 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16457 simple_loop
= true;
16462 distance
= distance_agu_use_in_bb (regno0
, insn
,
16463 distance
, BB_HEAD (bb
),
16464 &found
, &redefined
);
16467 int shortest_dist
= -1;
16468 bool found_in_bb
= false;
16469 bool redefined_in_bb
= false;
16471 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
16474 = distance_agu_use_in_bb (regno0
, insn
,
16475 distance
, BB_HEAD (e
->dest
),
16476 &found_in_bb
, &redefined_in_bb
);
16479 if (shortest_dist
< 0)
16480 shortest_dist
= bb_dist
;
16481 else if (bb_dist
> 0)
16482 shortest_dist
= MIN (bb_dist
, shortest_dist
);
16488 distance
= shortest_dist
;
16492 if (!found
|| redefined
)
16495 return distance
>> 1;
16498 /* Define this macro to tune LEA priority vs ADD, it take effect when
16499 there is a dilemma of choicing LEA or ADD
16500 Negative value: ADD is more preferred than LEA
16502 Positive value: LEA is more preferred than ADD*/
16503 #define IX86_LEA_PRIORITY 0
16505 /* Return true if usage of lea INSN has performance advantage
16506 over a sequence of instructions. Instructions sequence has
16507 SPLIT_COST cycles higher latency than lea latency. */
16510 ix86_lea_outperforms (rtx insn
, unsigned int regno0
, unsigned int regno1
,
16511 unsigned int regno2
, unsigned int split_cost
)
16513 int dist_define
, dist_use
;
16515 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
16516 dist_use
= distance_agu_use (regno0
, insn
);
16518 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
16520 /* If there is no non AGU operand definition, no AGU
16521 operand usage and split cost is 0 then both lea
16522 and non lea variants have same priority. Currently
16523 we prefer lea for 64 bit code and non lea on 32 bit
16525 if (dist_use
< 0 && split_cost
== 0)
16526 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
16531 /* With longer definitions distance lea is more preferable.
16532 Here we change it to take into account splitting cost and
16534 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
16536 /* If there is no use in memory addess then we just check
16537 that split cost does not exceed AGU stall. */
16539 return dist_define
>= LEA_MAX_STALL
;
16541 /* If this insn has both backward non-agu dependence and forward
16542 agu dependence, the one with short distance takes effect. */
16543 return dist_define
>= dist_use
;
16546 /* Return true if it is legal to clobber flags by INSN and
16547 false otherwise. */
16550 ix86_ok_to_clobber_flags (rtx insn
)
16552 basic_block bb
= BLOCK_FOR_INSN (insn
);
16558 if (NONDEBUG_INSN_P (insn
))
16560 for (use
= DF_INSN_USES (insn
); *use
; use
++)
16561 if (DF_REF_REG_USE_P (*use
) && DF_REF_REGNO (*use
) == FLAGS_REG
)
16564 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
16568 if (insn
== BB_END (bb
))
16571 insn
= NEXT_INSN (insn
);
16574 live
= df_get_live_out(bb
);
16575 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
16578 /* Return true if we need to split op0 = op1 + op2 into a sequence of
16579 move and add to avoid AGU stalls. */
16582 ix86_avoid_lea_for_add (rtx insn
, rtx operands
[])
16584 unsigned int regno0
= true_regnum (operands
[0]);
16585 unsigned int regno1
= true_regnum (operands
[1]);
16586 unsigned int regno2
= true_regnum (operands
[2]);
16588 /* Check if we need to optimize. */
16589 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16592 /* Check it is correct to split here. */
16593 if (!ix86_ok_to_clobber_flags(insn
))
16596 /* We need to split only adds with non destructive
16597 destination operand. */
16598 if (regno0
== regno1
|| regno0
== regno2
)
16601 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1);
16604 /* Return true if we should emit lea instruction instead of mov
16608 ix86_use_lea_for_mov (rtx insn
, rtx operands
[])
16610 unsigned int regno0
;
16611 unsigned int regno1
;
16613 /* Check if we need to optimize. */
16614 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16617 /* Use lea for reg to reg moves only. */
16618 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
16621 regno0
= true_regnum (operands
[0]);
16622 regno1
= true_regnum (operands
[1]);
16624 return ix86_lea_outperforms (insn
, regno0
, regno1
, -1, 0);
16627 /* Return true if we need to split lea into a sequence of
16628 instructions to avoid AGU stalls. */
16631 ix86_avoid_lea_for_addr (rtx insn
, rtx operands
[])
16633 unsigned int regno0
= true_regnum (operands
[0]) ;
16634 unsigned int regno1
= -1;
16635 unsigned int regno2
= -1;
16636 unsigned int split_cost
= 0;
16637 struct ix86_address parts
;
16640 /* Check we need to optimize. */
16641 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16644 /* Check it is correct to split here. */
16645 if (!ix86_ok_to_clobber_flags(insn
))
16648 ok
= ix86_decompose_address (operands
[1], &parts
);
16651 /* We should not split into add if non legitimate pic
16652 operand is used as displacement. */
16653 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
16657 regno1
= true_regnum (parts
.base
);
16659 regno2
= true_regnum (parts
.index
);
16661 /* Compute how many cycles we will add to execution time
16662 if split lea into a sequence of instructions. */
16663 if (parts
.base
|| parts
.index
)
16665 /* Have to use mov instruction if non desctructive
16666 destination form is used. */
16667 if (regno1
!= regno0
&& regno2
!= regno0
)
16670 /* Have to add index to base if both exist. */
16671 if (parts
.base
&& parts
.index
)
16674 /* Have to use shift and adds if scale is 2 or greater. */
16675 if (parts
.scale
> 1)
16677 if (regno0
!= regno1
)
16679 else if (regno2
== regno0
)
16682 split_cost
+= parts
.scale
;
16685 /* Have to use add instruction with immediate if
16686 disp is non zero. */
16687 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16690 /* Subtract the price of lea. */
16694 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
);
16697 /* Emit x86 binary operand CODE in mode MODE, where the first operand
16698 matches destination. RTX includes clobber of FLAGS_REG. */
16701 ix86_emit_binop (enum rtx_code code
, enum machine_mode mode
,
16706 op
= gen_rtx_SET (VOIDmode
, dst
, gen_rtx_fmt_ee (code
, mode
, dst
, src
));
16707 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
16709 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
16712 /* Split lea instructions into a sequence of instructions
16713 which are executed on ALU to avoid AGU stalls.
16714 It is assumed that it is allowed to clobber flags register
16715 at lea position. */
16718 ix86_split_lea_for_addr (rtx operands
[], enum machine_mode mode
)
16720 unsigned int regno0
= true_regnum (operands
[0]) ;
16721 unsigned int regno1
= INVALID_REGNUM
;
16722 unsigned int regno2
= INVALID_REGNUM
;
16723 struct ix86_address parts
;
16727 ok
= ix86_decompose_address (operands
[1], &parts
);
16732 if (GET_MODE (parts
.base
) != mode
)
16733 parts
.base
= gen_rtx_SUBREG (mode
, parts
.base
, 0);
16734 regno1
= true_regnum (parts
.base
);
16739 if (GET_MODE (parts
.index
) != mode
)
16740 parts
.index
= gen_rtx_SUBREG (mode
, parts
.index
, 0);
16741 regno2
= true_regnum (parts
.index
);
16744 if (parts
.scale
> 1)
16746 /* Case r1 = r1 + ... */
16747 if (regno1
== regno0
)
16749 /* If we have a case r1 = r1 + C * r1 then we
16750 should use multiplication which is very
16751 expensive. Assume cost model is wrong if we
16752 have such case here. */
16753 gcc_assert (regno2
!= regno0
);
16755 for (adds
= parts
.scale
; adds
> 0; adds
--)
16756 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.index
);
16760 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
16761 if (regno0
!= regno2
)
16762 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16764 /* Use shift for scaling. */
16765 ix86_emit_binop (ASHIFT
, mode
, operands
[0],
16766 GEN_INT (exact_log2 (parts
.scale
)));
16769 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.base
);
16771 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16772 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16775 else if (!parts
.base
&& !parts
.index
)
16777 gcc_assert(parts
.disp
);
16778 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.disp
));
16784 if (regno0
!= regno2
)
16785 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.index
));
16787 else if (!parts
.index
)
16789 if (regno0
!= regno1
)
16790 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16794 if (regno0
== regno1
)
16796 else if (regno0
== regno2
)
16800 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0], parts
.base
));
16804 ix86_emit_binop (PLUS
, mode
, operands
[0], tmp
);
16807 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
16808 ix86_emit_binop (PLUS
, mode
, operands
[0], parts
.disp
);
16812 /* Return true if it is ok to optimize an ADD operation to LEA
16813 operation to avoid flag register consumation. For most processors,
16814 ADD is faster than LEA. For the processors like ATOM, if the
16815 destination register of LEA holds an actual address which will be
16816 used soon, LEA is better and otherwise ADD is better. */
16819 ix86_lea_for_add_ok (rtx insn
, rtx operands
[])
16821 unsigned int regno0
= true_regnum (operands
[0]);
16822 unsigned int regno1
= true_regnum (operands
[1]);
16823 unsigned int regno2
= true_regnum (operands
[2]);
16825 /* If a = b + c, (a!=b && a!=c), must use lea form. */
16826 if (regno0
!= regno1
&& regno0
!= regno2
)
16829 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
16832 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0);
16835 /* Return true if destination reg of SET_BODY is shift count of
16839 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
16845 /* Retrieve destination of SET_BODY. */
16846 switch (GET_CODE (set_body
))
16849 set_dest
= SET_DEST (set_body
);
16850 if (!set_dest
|| !REG_P (set_dest
))
16854 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
16855 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
16863 /* Retrieve shift count of USE_BODY. */
16864 switch (GET_CODE (use_body
))
16867 shift_rtx
= XEXP (use_body
, 1);
16870 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
16871 if (ix86_dep_by_shift_count_body (set_body
,
16872 XVECEXP (use_body
, 0, i
)))
16880 && (GET_CODE (shift_rtx
) == ASHIFT
16881 || GET_CODE (shift_rtx
) == LSHIFTRT
16882 || GET_CODE (shift_rtx
) == ASHIFTRT
16883 || GET_CODE (shift_rtx
) == ROTATE
16884 || GET_CODE (shift_rtx
) == ROTATERT
))
16886 rtx shift_count
= XEXP (shift_rtx
, 1);
16888 /* Return true if shift count is dest of SET_BODY. */
16889 if (REG_P (shift_count
)
16890 && true_regnum (set_dest
) == true_regnum (shift_count
))
16897 /* Return true if destination reg of SET_INSN is shift count of
16901 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
16903 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
16904 PATTERN (use_insn
));
16907 /* Return TRUE or FALSE depending on whether the unary operator meets the
16908 appropriate constraints. */
16911 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED
,
16912 enum machine_mode mode ATTRIBUTE_UNUSED
,
16913 rtx operands
[2] ATTRIBUTE_UNUSED
)
16915 /* If one of operands is memory, source and destination must match. */
16916 if ((MEM_P (operands
[0])
16917 || MEM_P (operands
[1]))
16918 && ! rtx_equal_p (operands
[0], operands
[1]))
16923 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
16924 are ok, keeping in mind the possible movddup alternative. */
16927 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
16929 if (MEM_P (operands
[0]))
16930 return rtx_equal_p (operands
[0], operands
[1 + high
]);
16931 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
16932 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
16936 /* Post-reload splitter for converting an SF or DFmode value in an
16937 SSE register into an unsigned SImode. */
16940 ix86_split_convert_uns_si_sse (rtx operands
[])
16942 enum machine_mode vecmode
;
16943 rtx value
, large
, zero_or_two31
, input
, two31
, x
;
16945 large
= operands
[1];
16946 zero_or_two31
= operands
[2];
16947 input
= operands
[3];
16948 two31
= operands
[4];
16949 vecmode
= GET_MODE (large
);
16950 value
= gen_rtx_REG (vecmode
, REGNO (operands
[0]));
16952 /* Load up the value into the low element. We must ensure that the other
16953 elements are valid floats -- zero is the easiest such value. */
16956 if (vecmode
== V4SFmode
)
16957 emit_insn (gen_vec_setv4sf_0 (value
, CONST0_RTX (V4SFmode
), input
));
16959 emit_insn (gen_sse2_loadlpd (value
, CONST0_RTX (V2DFmode
), input
));
16963 input
= gen_rtx_REG (vecmode
, REGNO (input
));
16964 emit_move_insn (value
, CONST0_RTX (vecmode
));
16965 if (vecmode
== V4SFmode
)
16966 emit_insn (gen_sse_movss (value
, value
, input
));
16968 emit_insn (gen_sse2_movsd (value
, value
, input
));
16971 emit_move_insn (large
, two31
);
16972 emit_move_insn (zero_or_two31
, MEM_P (two31
) ? large
: two31
);
16974 x
= gen_rtx_fmt_ee (LE
, vecmode
, large
, value
);
16975 emit_insn (gen_rtx_SET (VOIDmode
, large
, x
));
16977 x
= gen_rtx_AND (vecmode
, zero_or_two31
, large
);
16978 emit_insn (gen_rtx_SET (VOIDmode
, zero_or_two31
, x
));
16980 x
= gen_rtx_MINUS (vecmode
, value
, zero_or_two31
);
16981 emit_insn (gen_rtx_SET (VOIDmode
, value
, x
));
16983 large
= gen_rtx_REG (V4SImode
, REGNO (large
));
16984 emit_insn (gen_ashlv4si3 (large
, large
, GEN_INT (31)));
16986 x
= gen_rtx_REG (V4SImode
, REGNO (value
));
16987 if (vecmode
== V4SFmode
)
16988 emit_insn (gen_fix_truncv4sfv4si2 (x
, value
));
16990 emit_insn (gen_sse2_cvttpd2dq (x
, value
));
16993 emit_insn (gen_xorv4si3 (value
, value
, large
));
16996 /* Convert an unsigned DImode value into a DFmode, using only SSE.
16997 Expects the 64-bit DImode to be supplied in a pair of integral
16998 registers. Requires SSE2; will use SSE3 if available. For x86_32,
16999 -mfpmath=sse, !optimize_size only. */
17002 ix86_expand_convert_uns_didf_sse (rtx target
, rtx input
)
17004 REAL_VALUE_TYPE bias_lo_rvt
, bias_hi_rvt
;
17005 rtx int_xmm
, fp_xmm
;
17006 rtx biases
, exponents
;
17009 int_xmm
= gen_reg_rtx (V4SImode
);
17010 if (TARGET_INTER_UNIT_MOVES
)
17011 emit_insn (gen_movdi_to_sse (int_xmm
, input
));
17012 else if (TARGET_SSE_SPLIT_REGS
)
17014 emit_clobber (int_xmm
);
17015 emit_move_insn (gen_lowpart (DImode
, int_xmm
), input
);
17019 x
= gen_reg_rtx (V2DImode
);
17020 ix86_expand_vector_init_one_nonzero (false, V2DImode
, x
, input
, 0);
17021 emit_move_insn (int_xmm
, gen_lowpart (V4SImode
, x
));
17024 x
= gen_rtx_CONST_VECTOR (V4SImode
,
17025 gen_rtvec (4, GEN_INT (0x43300000UL
),
17026 GEN_INT (0x45300000UL
),
17027 const0_rtx
, const0_rtx
));
17028 exponents
= validize_mem (force_const_mem (V4SImode
, x
));
17030 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
17031 emit_insn (gen_vec_interleave_lowv4si (int_xmm
, int_xmm
, exponents
));
17033 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
17034 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
17035 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
17036 (0x1.0p84 + double(fp_value_hi_xmm)).
17037 Note these exponents differ by 32. */
17039 fp_xmm
= copy_to_mode_reg (V2DFmode
, gen_lowpart (V2DFmode
, int_xmm
));
17041 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
17042 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
17043 real_ldexp (&bias_lo_rvt
, &dconst1
, 52);
17044 real_ldexp (&bias_hi_rvt
, &dconst1
, 84);
17045 biases
= const_double_from_real_value (bias_lo_rvt
, DFmode
);
17046 x
= const_double_from_real_value (bias_hi_rvt
, DFmode
);
17047 biases
= gen_rtx_CONST_VECTOR (V2DFmode
, gen_rtvec (2, biases
, x
));
17048 biases
= validize_mem (force_const_mem (V2DFmode
, biases
));
17049 emit_insn (gen_subv2df3 (fp_xmm
, fp_xmm
, biases
));
17051 /* Add the upper and lower DFmode values together. */
17053 emit_insn (gen_sse3_haddv2df3 (fp_xmm
, fp_xmm
, fp_xmm
));
17056 x
= copy_to_mode_reg (V2DFmode
, fp_xmm
);
17057 emit_insn (gen_vec_interleave_highv2df (fp_xmm
, fp_xmm
, fp_xmm
));
17058 emit_insn (gen_addv2df3 (fp_xmm
, fp_xmm
, x
));
17061 ix86_expand_vector_extract (false, target
, fp_xmm
, 0);
17064 /* Not used, but eases macroization of patterns. */
17066 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED
,
17067 rtx input ATTRIBUTE_UNUSED
)
17069 gcc_unreachable ();
17072 /* Convert an unsigned SImode value into a DFmode. Only currently used
17073 for SSE, but applicable anywhere. */
17076 ix86_expand_convert_uns_sidf_sse (rtx target
, rtx input
)
17078 REAL_VALUE_TYPE TWO31r
;
17081 x
= expand_simple_binop (SImode
, PLUS
, input
, GEN_INT (-2147483647 - 1),
17082 NULL
, 1, OPTAB_DIRECT
);
17084 fp
= gen_reg_rtx (DFmode
);
17085 emit_insn (gen_floatsidf2 (fp
, x
));
17087 real_ldexp (&TWO31r
, &dconst1
, 31);
17088 x
= const_double_from_real_value (TWO31r
, DFmode
);
17090 x
= expand_simple_binop (DFmode
, PLUS
, fp
, x
, target
, 0, OPTAB_DIRECT
);
17092 emit_move_insn (target
, x
);
17095 /* Convert a signed DImode value into a DFmode. Only used for SSE in
17096 32-bit mode; otherwise we have a direct convert instruction. */
17099 ix86_expand_convert_sign_didf_sse (rtx target
, rtx input
)
17101 REAL_VALUE_TYPE TWO32r
;
17102 rtx fp_lo
, fp_hi
, x
;
17104 fp_lo
= gen_reg_rtx (DFmode
);
17105 fp_hi
= gen_reg_rtx (DFmode
);
17107 emit_insn (gen_floatsidf2 (fp_hi
, gen_highpart (SImode
, input
)));
17109 real_ldexp (&TWO32r
, &dconst1
, 32);
17110 x
= const_double_from_real_value (TWO32r
, DFmode
);
17111 fp_hi
= expand_simple_binop (DFmode
, MULT
, fp_hi
, x
, fp_hi
, 0, OPTAB_DIRECT
);
17113 ix86_expand_convert_uns_sidf_sse (fp_lo
, gen_lowpart (SImode
, input
));
17115 x
= expand_simple_binop (DFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17118 emit_move_insn (target
, x
);
17121 /* Convert an unsigned SImode value into a SFmode, using only SSE.
17122 For x86_32, -mfpmath=sse, !optimize_size only. */
17124 ix86_expand_convert_uns_sisf_sse (rtx target
, rtx input
)
17126 REAL_VALUE_TYPE ONE16r
;
17127 rtx fp_hi
, fp_lo
, int_hi
, int_lo
, x
;
17129 real_ldexp (&ONE16r
, &dconst1
, 16);
17130 x
= const_double_from_real_value (ONE16r
, SFmode
);
17131 int_lo
= expand_simple_binop (SImode
, AND
, input
, GEN_INT(0xffff),
17132 NULL
, 0, OPTAB_DIRECT
);
17133 int_hi
= expand_simple_binop (SImode
, LSHIFTRT
, input
, GEN_INT(16),
17134 NULL
, 0, OPTAB_DIRECT
);
17135 fp_hi
= gen_reg_rtx (SFmode
);
17136 fp_lo
= gen_reg_rtx (SFmode
);
17137 emit_insn (gen_floatsisf2 (fp_hi
, int_hi
));
17138 emit_insn (gen_floatsisf2 (fp_lo
, int_lo
));
17139 fp_hi
= expand_simple_binop (SFmode
, MULT
, fp_hi
, x
, fp_hi
,
17141 fp_hi
= expand_simple_binop (SFmode
, PLUS
, fp_hi
, fp_lo
, target
,
17143 if (!rtx_equal_p (target
, fp_hi
))
17144 emit_move_insn (target
, fp_hi
);
17147 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
17148 a vector of unsigned ints VAL to vector of floats TARGET. */
17151 ix86_expand_vector_convert_uns_vsivsf (rtx target
, rtx val
)
17154 REAL_VALUE_TYPE TWO16r
;
17155 enum machine_mode intmode
= GET_MODE (val
);
17156 enum machine_mode fltmode
= GET_MODE (target
);
17157 rtx (*cvt
) (rtx
, rtx
);
17159 if (intmode
== V4SImode
)
17160 cvt
= gen_floatv4siv4sf2
;
17162 cvt
= gen_floatv8siv8sf2
;
17163 tmp
[0] = ix86_build_const_vector (intmode
, 1, GEN_INT (0xffff));
17164 tmp
[0] = force_reg (intmode
, tmp
[0]);
17165 tmp
[1] = expand_simple_binop (intmode
, AND
, val
, tmp
[0], NULL_RTX
, 1,
17167 tmp
[2] = expand_simple_binop (intmode
, LSHIFTRT
, val
, GEN_INT (16),
17168 NULL_RTX
, 1, OPTAB_DIRECT
);
17169 tmp
[3] = gen_reg_rtx (fltmode
);
17170 emit_insn (cvt (tmp
[3], tmp
[1]));
17171 tmp
[4] = gen_reg_rtx (fltmode
);
17172 emit_insn (cvt (tmp
[4], tmp
[2]));
17173 real_ldexp (&TWO16r
, &dconst1
, 16);
17174 tmp
[5] = const_double_from_real_value (TWO16r
, SFmode
);
17175 tmp
[5] = force_reg (fltmode
, ix86_build_const_vector (fltmode
, 1, tmp
[5]));
17176 tmp
[6] = expand_simple_binop (fltmode
, MULT
, tmp
[4], tmp
[5], NULL_RTX
, 1,
17178 tmp
[7] = expand_simple_binop (fltmode
, PLUS
, tmp
[3], tmp
[6], target
, 1,
17180 if (tmp
[7] != target
)
17181 emit_move_insn (target
, tmp
[7]);
17184 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
17185 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
17186 This is done by doing just signed conversion if < 0x1p31, and otherwise by
17187 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
17190 ix86_expand_adjust_ufix_to_sfix_si (rtx val
, rtx
*xorp
)
17192 REAL_VALUE_TYPE TWO31r
;
17193 rtx two31r
, tmp
[4];
17194 enum machine_mode mode
= GET_MODE (val
);
17195 enum machine_mode scalarmode
= GET_MODE_INNER (mode
);
17196 enum machine_mode intmode
= GET_MODE_SIZE (mode
) == 32 ? V8SImode
: V4SImode
;
17197 rtx (*cmp
) (rtx
, rtx
, rtx
, rtx
);
17200 for (i
= 0; i
< 3; i
++)
17201 tmp
[i
] = gen_reg_rtx (mode
);
17202 real_ldexp (&TWO31r
, &dconst1
, 31);
17203 two31r
= const_double_from_real_value (TWO31r
, scalarmode
);
17204 two31r
= ix86_build_const_vector (mode
, 1, two31r
);
17205 two31r
= force_reg (mode
, two31r
);
17208 case V8SFmode
: cmp
= gen_avx_maskcmpv8sf3
; break;
17209 case V4SFmode
: cmp
= gen_sse_maskcmpv4sf3
; break;
17210 case V4DFmode
: cmp
= gen_avx_maskcmpv4df3
; break;
17211 case V2DFmode
: cmp
= gen_sse2_maskcmpv2df3
; break;
17212 default: gcc_unreachable ();
17214 tmp
[3] = gen_rtx_LE (mode
, two31r
, val
);
17215 emit_insn (cmp (tmp
[0], two31r
, val
, tmp
[3]));
17216 tmp
[1] = expand_simple_binop (mode
, AND
, tmp
[0], two31r
, tmp
[1],
17218 if (intmode
== V4SImode
|| TARGET_AVX2
)
17219 *xorp
= expand_simple_binop (intmode
, ASHIFT
,
17220 gen_lowpart (intmode
, tmp
[0]),
17221 GEN_INT (31), NULL_RTX
, 0,
17225 rtx two31
= GEN_INT ((unsigned HOST_WIDE_INT
) 1 << 31);
17226 two31
= ix86_build_const_vector (intmode
, 1, two31
);
17227 *xorp
= expand_simple_binop (intmode
, AND
,
17228 gen_lowpart (intmode
, tmp
[0]),
17229 two31
, NULL_RTX
, 0,
17232 return expand_simple_binop (mode
, MINUS
, val
, tmp
[1], tmp
[2],
17236 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
17237 then replicate the value for all elements of the vector
17241 ix86_build_const_vector (enum machine_mode mode
, bool vect
, rtx value
)
17245 enum machine_mode scalar_mode
;
17262 n_elt
= GET_MODE_NUNITS (mode
);
17263 v
= rtvec_alloc (n_elt
);
17264 scalar_mode
= GET_MODE_INNER (mode
);
17266 RTVEC_ELT (v
, 0) = value
;
17268 for (i
= 1; i
< n_elt
; ++i
)
17269 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
17271 return gen_rtx_CONST_VECTOR (mode
, v
);
17274 gcc_unreachable ();
17278 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
17279 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
17280 for an SSE register. If VECT is true, then replicate the mask for
17281 all elements of the vector register. If INVERT is true, then create
17282 a mask excluding the sign bit. */
17285 ix86_build_signbit_mask (enum machine_mode mode
, bool vect
, bool invert
)
17287 enum machine_mode vec_mode
, imode
;
17288 HOST_WIDE_INT hi
, lo
;
17293 /* Find the sign bit, sign extended to 2*HWI. */
17301 mode
= GET_MODE_INNER (mode
);
17303 lo
= 0x80000000, hi
= lo
< 0;
17311 mode
= GET_MODE_INNER (mode
);
17313 if (HOST_BITS_PER_WIDE_INT
>= 64)
17314 lo
= (HOST_WIDE_INT
)1 << shift
, hi
= -1;
17316 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17321 vec_mode
= VOIDmode
;
17322 if (HOST_BITS_PER_WIDE_INT
>= 64)
17325 lo
= 0, hi
= (HOST_WIDE_INT
)1 << shift
;
17332 lo
= 0, hi
= (HOST_WIDE_INT
)1 << (shift
- HOST_BITS_PER_WIDE_INT
);
17336 lo
= ~lo
, hi
= ~hi
;
17342 mask
= immed_double_const (lo
, hi
, imode
);
17344 vec
= gen_rtvec (2, v
, mask
);
17345 v
= gen_rtx_CONST_VECTOR (V2DImode
, vec
);
17346 v
= copy_to_mode_reg (mode
, gen_lowpart (mode
, v
));
17353 gcc_unreachable ();
17357 lo
= ~lo
, hi
= ~hi
;
17359 /* Force this value into the low part of a fp vector constant. */
17360 mask
= immed_double_const (lo
, hi
, imode
);
17361 mask
= gen_lowpart (mode
, mask
);
17363 if (vec_mode
== VOIDmode
)
17364 return force_reg (mode
, mask
);
17366 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
17367 return force_reg (vec_mode
, v
);
17370 /* Generate code for floating point ABS or NEG. */
17373 ix86_expand_fp_absneg_operator (enum rtx_code code
, enum machine_mode mode
,
17376 rtx mask
, set
, dst
, src
;
17377 bool use_sse
= false;
17378 bool vector_mode
= VECTOR_MODE_P (mode
);
17379 enum machine_mode vmode
= mode
;
17383 else if (mode
== TFmode
)
17385 else if (TARGET_SSE_MATH
)
17387 use_sse
= SSE_FLOAT_MODE_P (mode
);
17388 if (mode
== SFmode
)
17390 else if (mode
== DFmode
)
17394 /* NEG and ABS performed with SSE use bitwise mask operations.
17395 Create the appropriate mask now. */
17397 mask
= ix86_build_signbit_mask (vmode
, vector_mode
, code
== ABS
);
17404 set
= gen_rtx_fmt_e (code
, mode
, src
);
17405 set
= gen_rtx_SET (VOIDmode
, dst
, set
);
17412 use
= gen_rtx_USE (VOIDmode
, mask
);
17414 par
= gen_rtvec (2, set
, use
);
17417 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, FLAGS_REG
));
17418 par
= gen_rtvec (3, set
, use
, clob
);
17420 emit_insn (gen_rtx_PARALLEL (VOIDmode
, par
));
17426 /* Expand a copysign operation. Special case operand 0 being a constant. */
17429 ix86_expand_copysign (rtx operands
[])
17431 enum machine_mode mode
, vmode
;
17432 rtx dest
, op0
, op1
, mask
, nmask
;
17434 dest
= operands
[0];
17438 mode
= GET_MODE (dest
);
17440 if (mode
== SFmode
)
17442 else if (mode
== DFmode
)
17447 if (GET_CODE (op0
) == CONST_DOUBLE
)
17449 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
);
17451 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0
)))
17452 op0
= simplify_unary_operation (ABS
, mode
, op0
, mode
);
17454 if (mode
== SFmode
|| mode
== DFmode
)
17456 if (op0
== CONST0_RTX (mode
))
17457 op0
= CONST0_RTX (vmode
);
17460 rtx v
= ix86_build_const_vector (vmode
, false, op0
);
17462 op0
= force_reg (vmode
, v
);
17465 else if (op0
!= CONST0_RTX (mode
))
17466 op0
= force_reg (mode
, op0
);
17468 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17470 if (mode
== SFmode
)
17471 copysign_insn
= gen_copysignsf3_const
;
17472 else if (mode
== DFmode
)
17473 copysign_insn
= gen_copysigndf3_const
;
17475 copysign_insn
= gen_copysigntf3_const
;
17477 emit_insn (copysign_insn (dest
, op0
, op1
, mask
));
17481 rtx (*copysign_insn
)(rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
17483 nmask
= ix86_build_signbit_mask (vmode
, 0, 1);
17484 mask
= ix86_build_signbit_mask (vmode
, 0, 0);
17486 if (mode
== SFmode
)
17487 copysign_insn
= gen_copysignsf3_var
;
17488 else if (mode
== DFmode
)
17489 copysign_insn
= gen_copysigndf3_var
;
17491 copysign_insn
= gen_copysigntf3_var
;
17493 emit_insn (copysign_insn (dest
, NULL_RTX
, op0
, op1
, nmask
, mask
));
17497 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
17498 be a constant, and so has already been expanded into a vector constant. */
17501 ix86_split_copysign_const (rtx operands
[])
17503 enum machine_mode mode
, vmode
;
17504 rtx dest
, op0
, mask
, x
;
17506 dest
= operands
[0];
17508 mask
= operands
[3];
17510 mode
= GET_MODE (dest
);
17511 vmode
= GET_MODE (mask
);
17513 dest
= simplify_gen_subreg (vmode
, dest
, mode
, 0);
17514 x
= gen_rtx_AND (vmode
, dest
, mask
);
17515 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17517 if (op0
!= CONST0_RTX (vmode
))
17519 x
= gen_rtx_IOR (vmode
, dest
, op0
);
17520 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17524 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
17525 so we have to do two masks. */
17528 ix86_split_copysign_var (rtx operands
[])
17530 enum machine_mode mode
, vmode
;
17531 rtx dest
, scratch
, op0
, op1
, mask
, nmask
, x
;
17533 dest
= operands
[0];
17534 scratch
= operands
[1];
17537 nmask
= operands
[4];
17538 mask
= operands
[5];
17540 mode
= GET_MODE (dest
);
17541 vmode
= GET_MODE (mask
);
17543 if (rtx_equal_p (op0
, op1
))
17545 /* Shouldn't happen often (it's useless, obviously), but when it does
17546 we'd generate incorrect code if we continue below. */
17547 emit_move_insn (dest
, op0
);
17551 if (REG_P (mask
) && REGNO (dest
) == REGNO (mask
)) /* alternative 0 */
17553 gcc_assert (REGNO (op1
) == REGNO (scratch
));
17555 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17556 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17559 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17560 x
= gen_rtx_NOT (vmode
, dest
);
17561 x
= gen_rtx_AND (vmode
, x
, op0
);
17562 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17566 if (REGNO (op1
) == REGNO (scratch
)) /* alternative 1,3 */
17568 x
= gen_rtx_AND (vmode
, scratch
, mask
);
17570 else /* alternative 2,4 */
17572 gcc_assert (REGNO (mask
) == REGNO (scratch
));
17573 op1
= simplify_gen_subreg (vmode
, op1
, mode
, 0);
17574 x
= gen_rtx_AND (vmode
, scratch
, op1
);
17576 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, x
));
17578 if (REGNO (op0
) == REGNO (dest
)) /* alternative 1,2 */
17580 dest
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17581 x
= gen_rtx_AND (vmode
, dest
, nmask
);
17583 else /* alternative 3,4 */
17585 gcc_assert (REGNO (nmask
) == REGNO (dest
));
17587 op0
= simplify_gen_subreg (vmode
, op0
, mode
, 0);
17588 x
= gen_rtx_AND (vmode
, dest
, op0
);
17590 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17593 x
= gen_rtx_IOR (vmode
, dest
, scratch
);
17594 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
17597 /* Return TRUE or FALSE depending on whether the first SET in INSN
17598 has source and destination with matching CC modes, and that the
17599 CC mode is at least as constrained as REQ_MODE. */
17602 ix86_match_ccmode (rtx insn
, enum machine_mode req_mode
)
17605 enum machine_mode set_mode
;
17607 set
= PATTERN (insn
);
17608 if (GET_CODE (set
) == PARALLEL
)
17609 set
= XVECEXP (set
, 0, 0);
17610 gcc_assert (GET_CODE (set
) == SET
);
17611 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
17613 set_mode
= GET_MODE (SET_DEST (set
));
17617 if (req_mode
!= CCNOmode
17618 && (req_mode
!= CCmode
17619 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
17623 if (req_mode
== CCGCmode
)
17627 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
17631 if (req_mode
== CCZmode
)
17641 if (set_mode
!= req_mode
)
17646 gcc_unreachable ();
17649 return GET_MODE (SET_SRC (set
)) == set_mode
;
17652 /* Generate insn patterns to do an integer compare of OPERANDS. */
17655 ix86_expand_int_compare (enum rtx_code code
, rtx op0
, rtx op1
)
17657 enum machine_mode cmpmode
;
17660 cmpmode
= SELECT_CC_MODE (code
, op0
, op1
);
17661 flags
= gen_rtx_REG (cmpmode
, FLAGS_REG
);
17663 /* This is very simple, but making the interface the same as in the
17664 FP case makes the rest of the code easier. */
17665 tmp
= gen_rtx_COMPARE (cmpmode
, op0
, op1
);
17666 emit_insn (gen_rtx_SET (VOIDmode
, flags
, tmp
));
17668 /* Return the test that should be put into the flags user, i.e.
17669 the bcc, scc, or cmov instruction. */
17670 return gen_rtx_fmt_ee (code
, VOIDmode
, flags
, const0_rtx
);
17673 /* Figure out whether to use ordered or unordered fp comparisons.
17674 Return the appropriate mode to use. */
17677 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED
)
17679 /* ??? In order to make all comparisons reversible, we do all comparisons
17680 non-trapping when compiling for IEEE. Once gcc is able to distinguish
17681 all forms trapping and nontrapping comparisons, we can make inequality
17682 comparisons trapping again, since it results in better code when using
17683 FCOM based compares. */
17684 return TARGET_IEEE_FP
? CCFPUmode
: CCFPmode
;
17688 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
17690 enum machine_mode mode
= GET_MODE (op0
);
17692 if (SCALAR_FLOAT_MODE_P (mode
))
17694 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
17695 return ix86_fp_compare_mode (code
);
17700 /* Only zero flag is needed. */
17701 case EQ
: /* ZF=0 */
17702 case NE
: /* ZF!=0 */
17704 /* Codes needing carry flag. */
17705 case GEU
: /* CF=0 */
17706 case LTU
: /* CF=1 */
17707 /* Detect overflow checks. They need just the carry flag. */
17708 if (GET_CODE (op0
) == PLUS
17709 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17713 case GTU
: /* CF=0 & ZF=0 */
17714 case LEU
: /* CF=1 | ZF=1 */
17715 /* Detect overflow checks. They need just the carry flag. */
17716 if (GET_CODE (op0
) == MINUS
17717 && rtx_equal_p (op1
, XEXP (op0
, 0)))
17721 /* Codes possibly doable only with sign flag when
17722 comparing against zero. */
17723 case GE
: /* SF=OF or SF=0 */
17724 case LT
: /* SF<>OF or SF=1 */
17725 if (op1
== const0_rtx
)
17728 /* For other cases Carry flag is not required. */
17730 /* Codes doable only with sign flag when comparing
17731 against zero, but we miss jump instruction for it
17732 so we need to use relational tests against overflow
17733 that thus needs to be zero. */
17734 case GT
: /* ZF=0 & SF=OF */
17735 case LE
: /* ZF=1 | SF<>OF */
17736 if (op1
== const0_rtx
)
17740 /* strcmp pattern do (use flags) and combine may ask us for proper
17745 gcc_unreachable ();
17749 /* Return the fixed registers used for condition codes. */
17752 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
17759 /* If two condition code modes are compatible, return a condition code
17760 mode which is compatible with both. Otherwise, return
17763 static enum machine_mode
17764 ix86_cc_modes_compatible (enum machine_mode m1
, enum machine_mode m2
)
17769 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
17772 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
17773 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
17779 gcc_unreachable ();
17809 /* These are only compatible with themselves, which we already
17816 /* Return a comparison we can do and that it is equivalent to
17817 swap_condition (code) apart possibly from orderedness.
17818 But, never change orderedness if TARGET_IEEE_FP, returning
17819 UNKNOWN in that case if necessary. */
17821 static enum rtx_code
17822 ix86_fp_swap_condition (enum rtx_code code
)
17826 case GT
: /* GTU - CF=0 & ZF=0 */
17827 return TARGET_IEEE_FP
? UNKNOWN
: UNLT
;
17828 case GE
: /* GEU - CF=0 */
17829 return TARGET_IEEE_FP
? UNKNOWN
: UNLE
;
17830 case UNLT
: /* LTU - CF=1 */
17831 return TARGET_IEEE_FP
? UNKNOWN
: GT
;
17832 case UNLE
: /* LEU - CF=1 | ZF=1 */
17833 return TARGET_IEEE_FP
? UNKNOWN
: GE
;
17835 return swap_condition (code
);
17839 /* Return cost of comparison CODE using the best strategy for performance.
17840 All following functions do use number of instructions as a cost metrics.
17841 In future this should be tweaked to compute bytes for optimize_size and
17842 take into account performance of various instructions on various CPUs. */
17845 ix86_fp_comparison_cost (enum rtx_code code
)
17849 /* The cost of code using bit-twiddling on %ah. */
17866 arith_cost
= TARGET_IEEE_FP
? 5 : 4;
17870 arith_cost
= TARGET_IEEE_FP
? 6 : 4;
17873 gcc_unreachable ();
17876 switch (ix86_fp_comparison_strategy (code
))
17878 case IX86_FPCMP_COMI
:
17879 return arith_cost
> 4 ? 3 : 2;
17880 case IX86_FPCMP_SAHF
:
17881 return arith_cost
> 4 ? 4 : 3;
17887 /* Return strategy to use for floating-point. We assume that fcomi is always
17888 preferrable where available, since that is also true when looking at size
17889 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
17891 enum ix86_fpcmp_strategy
17892 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED
)
17894 /* Do fcomi/sahf based test when profitable. */
17897 return IX86_FPCMP_COMI
;
17899 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_function_for_size_p (cfun
)))
17900 return IX86_FPCMP_SAHF
;
17902 return IX86_FPCMP_ARITH
;
17905 /* Swap, force into registers, or otherwise massage the two operands
17906 to a fp comparison. The operands are updated in place; the new
17907 comparison code is returned. */
17909 static enum rtx_code
17910 ix86_prepare_fp_compare_args (enum rtx_code code
, rtx
*pop0
, rtx
*pop1
)
17912 enum machine_mode fpcmp_mode
= ix86_fp_compare_mode (code
);
17913 rtx op0
= *pop0
, op1
= *pop1
;
17914 enum machine_mode op_mode
= GET_MODE (op0
);
17915 int is_sse
= TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (op_mode
);
17917 /* All of the unordered compare instructions only work on registers.
17918 The same is true of the fcomi compare instructions. The XFmode
17919 compare instructions require registers except when comparing
17920 against zero or when converting operand 1 from fixed point to
17924 && (fpcmp_mode
== CCFPUmode
17925 || (op_mode
== XFmode
17926 && ! (standard_80387_constant_p (op0
) == 1
17927 || standard_80387_constant_p (op1
) == 1)
17928 && GET_CODE (op1
) != FLOAT
)
17929 || ix86_fp_comparison_strategy (code
) == IX86_FPCMP_COMI
))
17931 op0
= force_reg (op_mode
, op0
);
17932 op1
= force_reg (op_mode
, op1
);
17936 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
17937 things around if they appear profitable, otherwise force op0
17938 into a register. */
17940 if (standard_80387_constant_p (op0
) == 0
17942 && ! (standard_80387_constant_p (op1
) == 0
17945 enum rtx_code new_code
= ix86_fp_swap_condition (code
);
17946 if (new_code
!= UNKNOWN
)
17949 tmp
= op0
, op0
= op1
, op1
= tmp
;
17955 op0
= force_reg (op_mode
, op0
);
17957 if (CONSTANT_P (op1
))
17959 int tmp
= standard_80387_constant_p (op1
);
17961 op1
= validize_mem (force_const_mem (op_mode
, op1
));
17965 op1
= force_reg (op_mode
, op1
);
17968 op1
= force_reg (op_mode
, op1
);
17972 /* Try to rearrange the comparison to make it cheaper. */
17973 if (ix86_fp_comparison_cost (code
)
17974 > ix86_fp_comparison_cost (swap_condition (code
))
17975 && (REG_P (op1
) || can_create_pseudo_p ()))
17978 tmp
= op0
, op0
= op1
, op1
= tmp
;
17979 code
= swap_condition (code
);
17981 op0
= force_reg (op_mode
, op0
);
17989 /* Convert comparison codes we use to represent FP comparison to integer
17990 code that will result in proper branch. Return UNKNOWN if no such code
17994 ix86_fp_compare_code_to_integer (enum rtx_code code
)
18023 /* Generate insn patterns to do a floating point compare of OPERANDS. */
18026 ix86_expand_fp_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx scratch
)
18028 enum machine_mode fpcmp_mode
, intcmp_mode
;
18031 fpcmp_mode
= ix86_fp_compare_mode (code
);
18032 code
= ix86_prepare_fp_compare_args (code
, &op0
, &op1
);
18034 /* Do fcomi/sahf based test when profitable. */
18035 switch (ix86_fp_comparison_strategy (code
))
18037 case IX86_FPCMP_COMI
:
18038 intcmp_mode
= fpcmp_mode
;
18039 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18040 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18045 case IX86_FPCMP_SAHF
:
18046 intcmp_mode
= fpcmp_mode
;
18047 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18048 tmp
= gen_rtx_SET (VOIDmode
, gen_rtx_REG (fpcmp_mode
, FLAGS_REG
),
18052 scratch
= gen_reg_rtx (HImode
);
18053 tmp2
= gen_rtx_CLOBBER (VOIDmode
, scratch
);
18054 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, tmp
, tmp2
)));
18057 case IX86_FPCMP_ARITH
:
18058 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
18059 tmp
= gen_rtx_COMPARE (fpcmp_mode
, op0
, op1
);
18060 tmp2
= gen_rtx_UNSPEC (HImode
, gen_rtvec (1, tmp
), UNSPEC_FNSTSW
);
18062 scratch
= gen_reg_rtx (HImode
);
18063 emit_insn (gen_rtx_SET (VOIDmode
, scratch
, tmp2
));
18065 /* In the unordered case, we have to check C2 for NaN's, which
18066 doesn't happen to work out to anything nice combination-wise.
18067 So do some bit twiddling on the value we've got in AH to come
18068 up with an appropriate set of condition codes. */
18070 intcmp_mode
= CCNOmode
;
18075 if (code
== GT
|| !TARGET_IEEE_FP
)
18077 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18082 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18083 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18084 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x44)));
18085 intcmp_mode
= CCmode
;
18091 if (code
== LT
&& TARGET_IEEE_FP
)
18093 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18094 emit_insn (gen_cmpqi_ext_3 (scratch
, const1_rtx
));
18095 intcmp_mode
= CCmode
;
18100 emit_insn (gen_testqi_ext_ccno_0 (scratch
, const1_rtx
));
18106 if (code
== GE
|| !TARGET_IEEE_FP
)
18108 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x05)));
18113 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18114 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
, const1_rtx
));
18120 if (code
== LE
&& TARGET_IEEE_FP
)
18122 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18123 emit_insn (gen_addqi_ext_1 (scratch
, scratch
, constm1_rtx
));
18124 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18125 intcmp_mode
= CCmode
;
18130 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x45)));
18136 if (code
== EQ
&& TARGET_IEEE_FP
)
18138 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18139 emit_insn (gen_cmpqi_ext_3 (scratch
, GEN_INT (0x40)));
18140 intcmp_mode
= CCmode
;
18145 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18151 if (code
== NE
&& TARGET_IEEE_FP
)
18153 emit_insn (gen_andqi_ext_0 (scratch
, scratch
, GEN_INT (0x45)));
18154 emit_insn (gen_xorqi_cc_ext_1 (scratch
, scratch
,
18160 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x40)));
18166 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18170 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x04)));
18175 gcc_unreachable ();
18183 /* Return the test that should be put into the flags user, i.e.
18184 the bcc, scc, or cmov instruction. */
18185 return gen_rtx_fmt_ee (code
, VOIDmode
,
18186 gen_rtx_REG (intcmp_mode
, FLAGS_REG
),
18191 ix86_expand_compare (enum rtx_code code
, rtx op0
, rtx op1
)
18195 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
18196 ret
= gen_rtx_fmt_ee (code
, VOIDmode
, op0
, op1
);
18198 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0
)))
18200 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0
)));
18201 ret
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18204 ret
= ix86_expand_int_compare (code
, op0
, op1
);
18210 ix86_expand_branch (enum rtx_code code
, rtx op0
, rtx op1
, rtx label
)
18212 enum machine_mode mode
= GET_MODE (op0
);
18224 tmp
= ix86_expand_compare (code
, op0
, op1
);
18225 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
18226 gen_rtx_LABEL_REF (VOIDmode
, label
),
18228 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
18235 /* Expand DImode branch into multiple compare+branch. */
18237 rtx lo
[2], hi
[2], label2
;
18238 enum rtx_code code1
, code2
, code3
;
18239 enum machine_mode submode
;
18241 if (CONSTANT_P (op0
) && !CONSTANT_P (op1
))
18243 tmp
= op0
, op0
= op1
, op1
= tmp
;
18244 code
= swap_condition (code
);
18247 split_double_mode (mode
, &op0
, 1, lo
+0, hi
+0);
18248 split_double_mode (mode
, &op1
, 1, lo
+1, hi
+1);
18250 submode
= mode
== DImode
? SImode
: DImode
;
18252 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
18253 avoid two branches. This costs one extra insn, so disable when
18254 optimizing for size. */
18256 if ((code
== EQ
|| code
== NE
)
18257 && (!optimize_insn_for_size_p ()
18258 || hi
[1] == const0_rtx
|| lo
[1] == const0_rtx
))
18263 if (hi
[1] != const0_rtx
)
18264 xor1
= expand_binop (submode
, xor_optab
, xor1
, hi
[1],
18265 NULL_RTX
, 0, OPTAB_WIDEN
);
18268 if (lo
[1] != const0_rtx
)
18269 xor0
= expand_binop (submode
, xor_optab
, xor0
, lo
[1],
18270 NULL_RTX
, 0, OPTAB_WIDEN
);
18272 tmp
= expand_binop (submode
, ior_optab
, xor1
, xor0
,
18273 NULL_RTX
, 0, OPTAB_WIDEN
);
18275 ix86_expand_branch (code
, tmp
, const0_rtx
, label
);
18279 /* Otherwise, if we are doing less-than or greater-or-equal-than,
18280 op1 is a constant and the low word is zero, then we can just
18281 examine the high word. Similarly for low word -1 and
18282 less-or-equal-than or greater-than. */
18284 if (CONST_INT_P (hi
[1]))
18287 case LT
: case LTU
: case GE
: case GEU
:
18288 if (lo
[1] == const0_rtx
)
18290 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18294 case LE
: case LEU
: case GT
: case GTU
:
18295 if (lo
[1] == constm1_rtx
)
18297 ix86_expand_branch (code
, hi
[0], hi
[1], label
);
18305 /* Otherwise, we need two or three jumps. */
18307 label2
= gen_label_rtx ();
18310 code2
= swap_condition (code
);
18311 code3
= unsigned_condition (code
);
18315 case LT
: case GT
: case LTU
: case GTU
:
18318 case LE
: code1
= LT
; code2
= GT
; break;
18319 case GE
: code1
= GT
; code2
= LT
; break;
18320 case LEU
: code1
= LTU
; code2
= GTU
; break;
18321 case GEU
: code1
= GTU
; code2
= LTU
; break;
18323 case EQ
: code1
= UNKNOWN
; code2
= NE
; break;
18324 case NE
: code2
= UNKNOWN
; break;
18327 gcc_unreachable ();
18332 * if (hi(a) < hi(b)) goto true;
18333 * if (hi(a) > hi(b)) goto false;
18334 * if (lo(a) < lo(b)) goto true;
18338 if (code1
!= UNKNOWN
)
18339 ix86_expand_branch (code1
, hi
[0], hi
[1], label
);
18340 if (code2
!= UNKNOWN
)
18341 ix86_expand_branch (code2
, hi
[0], hi
[1], label2
);
18343 ix86_expand_branch (code3
, lo
[0], lo
[1], label
);
18345 if (code2
!= UNKNOWN
)
18346 emit_label (label2
);
18351 gcc_assert (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
);
18356 /* Split branch based on floating point condition. */
18358 ix86_split_fp_branch (enum rtx_code code
, rtx op1
, rtx op2
,
18359 rtx target1
, rtx target2
, rtx tmp
, rtx pushed
)
18364 if (target2
!= pc_rtx
)
18367 code
= reverse_condition_maybe_unordered (code
);
18372 condition
= ix86_expand_fp_compare (code
, op1
, op2
,
18375 /* Remove pushed operand from stack. */
18377 ix86_free_from_memory (GET_MODE (pushed
));
18379 i
= emit_jump_insn (gen_rtx_SET
18381 gen_rtx_IF_THEN_ELSE (VOIDmode
,
18382 condition
, target1
, target2
)));
18383 if (split_branch_probability
>= 0)
18384 add_reg_note (i
, REG_BR_PROB
, GEN_INT (split_branch_probability
));
18388 ix86_expand_setcc (rtx dest
, enum rtx_code code
, rtx op0
, rtx op1
)
18392 gcc_assert (GET_MODE (dest
) == QImode
);
18394 ret
= ix86_expand_compare (code
, op0
, op1
);
18395 PUT_MODE (ret
, QImode
);
18396 emit_insn (gen_rtx_SET (VOIDmode
, dest
, ret
));
18399 /* Expand comparison setting or clearing carry flag. Return true when
18400 successful and set pop for the operation. */
18402 ix86_expand_carry_flag_compare (enum rtx_code code
, rtx op0
, rtx op1
, rtx
*pop
)
18404 enum machine_mode mode
=
18405 GET_MODE (op0
) != VOIDmode
? GET_MODE (op0
) : GET_MODE (op1
);
18407 /* Do not handle double-mode compares that go through special path. */
18408 if (mode
== (TARGET_64BIT
? TImode
: DImode
))
18411 if (SCALAR_FLOAT_MODE_P (mode
))
18413 rtx compare_op
, compare_seq
;
18415 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
18417 /* Shortcut: following common codes never translate
18418 into carry flag compares. */
18419 if (code
== EQ
|| code
== NE
|| code
== UNEQ
|| code
== LTGT
18420 || code
== ORDERED
|| code
== UNORDERED
)
18423 /* These comparisons require zero flag; swap operands so they won't. */
18424 if ((code
== GT
|| code
== UNLE
|| code
== LE
|| code
== UNGT
)
18425 && !TARGET_IEEE_FP
)
18430 code
= swap_condition (code
);
18433 /* Try to expand the comparison and verify that we end up with
18434 carry flag based comparison. This fails to be true only when
18435 we decide to expand comparison using arithmetic that is not
18436 too common scenario. */
18438 compare_op
= ix86_expand_fp_compare (code
, op0
, op1
, NULL_RTX
);
18439 compare_seq
= get_insns ();
18442 if (GET_MODE (XEXP (compare_op
, 0)) == CCFPmode
18443 || GET_MODE (XEXP (compare_op
, 0)) == CCFPUmode
)
18444 code
= ix86_fp_compare_code_to_integer (GET_CODE (compare_op
));
18446 code
= GET_CODE (compare_op
);
18448 if (code
!= LTU
&& code
!= GEU
)
18451 emit_insn (compare_seq
);
18456 if (!INTEGRAL_MODE_P (mode
))
18465 /* Convert a==0 into (unsigned)a<1. */
18468 if (op1
!= const0_rtx
)
18471 code
= (code
== EQ
? LTU
: GEU
);
18474 /* Convert a>b into b<a or a>=b-1. */
18477 if (CONST_INT_P (op1
))
18479 op1
= gen_int_mode (INTVAL (op1
) + 1, GET_MODE (op0
));
18480 /* Bail out on overflow. We still can swap operands but that
18481 would force loading of the constant into register. */
18482 if (op1
== const0_rtx
18483 || !x86_64_immediate_operand (op1
, GET_MODE (op1
)))
18485 code
= (code
== GTU
? GEU
: LTU
);
18492 code
= (code
== GTU
? LTU
: GEU
);
18496 /* Convert a>=0 into (unsigned)a<0x80000000. */
18499 if (mode
== DImode
|| op1
!= const0_rtx
)
18501 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18502 code
= (code
== LT
? GEU
: LTU
);
18506 if (mode
== DImode
|| op1
!= constm1_rtx
)
18508 op1
= gen_int_mode (1 << (GET_MODE_BITSIZE (mode
) - 1), mode
);
18509 code
= (code
== LE
? GEU
: LTU
);
18515 /* Swapping operands may cause constant to appear as first operand. */
18516 if (!nonimmediate_operand (op0
, VOIDmode
))
18518 if (!can_create_pseudo_p ())
18520 op0
= force_reg (mode
, op0
);
18522 *pop
= ix86_expand_compare (code
, op0
, op1
);
18523 gcc_assert (GET_CODE (*pop
) == LTU
|| GET_CODE (*pop
) == GEU
);
18528 ix86_expand_int_movcc (rtx operands
[])
18530 enum rtx_code code
= GET_CODE (operands
[1]), compare_code
;
18531 rtx compare_seq
, compare_op
;
18532 enum machine_mode mode
= GET_MODE (operands
[0]);
18533 bool sign_bit_compare_p
= false;
18534 rtx op0
= XEXP (operands
[1], 0);
18535 rtx op1
= XEXP (operands
[1], 1);
18538 compare_op
= ix86_expand_compare (code
, op0
, op1
);
18539 compare_seq
= get_insns ();
18542 compare_code
= GET_CODE (compare_op
);
18544 if ((op1
== const0_rtx
&& (code
== GE
|| code
== LT
))
18545 || (op1
== constm1_rtx
&& (code
== GT
|| code
== LE
)))
18546 sign_bit_compare_p
= true;
18548 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
18549 HImode insns, we'd be swallowed in word prefix ops. */
18551 if ((mode
!= HImode
|| TARGET_FAST_PREFIX
)
18552 && (mode
!= (TARGET_64BIT
? TImode
: DImode
))
18553 && CONST_INT_P (operands
[2])
18554 && CONST_INT_P (operands
[3]))
18556 rtx out
= operands
[0];
18557 HOST_WIDE_INT ct
= INTVAL (operands
[2]);
18558 HOST_WIDE_INT cf
= INTVAL (operands
[3]);
18559 HOST_WIDE_INT diff
;
18562 /* Sign bit compares are better done using shifts than we do by using
18564 if (sign_bit_compare_p
18565 || ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
18567 /* Detect overlap between destination and compare sources. */
18570 if (!sign_bit_compare_p
)
18573 bool fpcmp
= false;
18575 compare_code
= GET_CODE (compare_op
);
18577 flags
= XEXP (compare_op
, 0);
18579 if (GET_MODE (flags
) == CCFPmode
18580 || GET_MODE (flags
) == CCFPUmode
)
18584 = ix86_fp_compare_code_to_integer (compare_code
);
18587 /* To simplify rest of code, restrict to the GEU case. */
18588 if (compare_code
== LTU
)
18590 HOST_WIDE_INT tmp
= ct
;
18593 compare_code
= reverse_condition (compare_code
);
18594 code
= reverse_condition (code
);
18599 PUT_CODE (compare_op
,
18600 reverse_condition_maybe_unordered
18601 (GET_CODE (compare_op
)));
18603 PUT_CODE (compare_op
,
18604 reverse_condition (GET_CODE (compare_op
)));
18608 if (reg_overlap_mentioned_p (out
, op0
)
18609 || reg_overlap_mentioned_p (out
, op1
))
18610 tmp
= gen_reg_rtx (mode
);
18612 if (mode
== DImode
)
18613 emit_insn (gen_x86_movdicc_0_m1 (tmp
, flags
, compare_op
));
18615 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode
, tmp
),
18616 flags
, compare_op
));
18620 if (code
== GT
|| code
== GE
)
18621 code
= reverse_condition (code
);
18624 HOST_WIDE_INT tmp
= ct
;
18629 tmp
= emit_store_flag (tmp
, code
, op0
, op1
, VOIDmode
, 0, -1);
18642 tmp
= expand_simple_binop (mode
, PLUS
,
18644 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18655 tmp
= expand_simple_binop (mode
, IOR
,
18657 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18659 else if (diff
== -1 && ct
)
18669 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18671 tmp
= expand_simple_binop (mode
, PLUS
,
18672 copy_rtx (tmp
), GEN_INT (cf
),
18673 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18681 * andl cf - ct, dest
18691 tmp
= expand_simple_unop (mode
, NOT
, tmp
, copy_rtx (tmp
), 1);
18694 tmp
= expand_simple_binop (mode
, AND
,
18696 gen_int_mode (cf
- ct
, mode
),
18697 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18699 tmp
= expand_simple_binop (mode
, PLUS
,
18700 copy_rtx (tmp
), GEN_INT (ct
),
18701 copy_rtx (tmp
), 1, OPTAB_DIRECT
);
18704 if (!rtx_equal_p (tmp
, out
))
18705 emit_move_insn (copy_rtx (out
), copy_rtx (tmp
));
18712 enum machine_mode cmp_mode
= GET_MODE (op0
);
18715 tmp
= ct
, ct
= cf
, cf
= tmp
;
18718 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18720 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18722 /* We may be reversing unordered compare to normal compare, that
18723 is not valid in general (we may convert non-trapping condition
18724 to trapping one), however on i386 we currently emit all
18725 comparisons unordered. */
18726 compare_code
= reverse_condition_maybe_unordered (compare_code
);
18727 code
= reverse_condition_maybe_unordered (code
);
18731 compare_code
= reverse_condition (compare_code
);
18732 code
= reverse_condition (code
);
18736 compare_code
= UNKNOWN
;
18737 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
18738 && CONST_INT_P (op1
))
18740 if (op1
== const0_rtx
18741 && (code
== LT
|| code
== GE
))
18742 compare_code
= code
;
18743 else if (op1
== constm1_rtx
)
18747 else if (code
== GT
)
18752 /* Optimize dest = (op0 < 0) ? -1 : cf. */
18753 if (compare_code
!= UNKNOWN
18754 && GET_MODE (op0
) == GET_MODE (out
)
18755 && (cf
== -1 || ct
== -1))
18757 /* If lea code below could be used, only optimize
18758 if it results in a 2 insn sequence. */
18760 if (! (diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18761 || diff
== 3 || diff
== 5 || diff
== 9)
18762 || (compare_code
== LT
&& ct
== -1)
18763 || (compare_code
== GE
&& cf
== -1))
18766 * notl op1 (if necessary)
18774 code
= reverse_condition (code
);
18777 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18779 out
= expand_simple_binop (mode
, IOR
,
18781 out
, 1, OPTAB_DIRECT
);
18782 if (out
!= operands
[0])
18783 emit_move_insn (operands
[0], out
);
18790 if ((diff
== 1 || diff
== 2 || diff
== 4 || diff
== 8
18791 || diff
== 3 || diff
== 5 || diff
== 9)
18792 && ((mode
!= QImode
&& mode
!= HImode
) || !TARGET_PARTIAL_REG_STALL
)
18794 || x86_64_immediate_operand (GEN_INT (cf
), VOIDmode
)))
18800 * lea cf(dest*(ct-cf)),dest
18804 * This also catches the degenerate setcc-only case.
18810 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18813 /* On x86_64 the lea instruction operates on Pmode, so we need
18814 to get arithmetics done in proper mode to match. */
18816 tmp
= copy_rtx (out
);
18820 out1
= copy_rtx (out
);
18821 tmp
= gen_rtx_MULT (mode
, out1
, GEN_INT (diff
& ~1));
18825 tmp
= gen_rtx_PLUS (mode
, tmp
, out1
);
18831 tmp
= gen_rtx_PLUS (mode
, tmp
, GEN_INT (cf
));
18834 if (!rtx_equal_p (tmp
, out
))
18837 out
= force_operand (tmp
, copy_rtx (out
));
18839 emit_insn (gen_rtx_SET (VOIDmode
, copy_rtx (out
), copy_rtx (tmp
)));
18841 if (!rtx_equal_p (out
, operands
[0]))
18842 emit_move_insn (operands
[0], copy_rtx (out
));
18848 * General case: Jumpful:
18849 * xorl dest,dest cmpl op1, op2
18850 * cmpl op1, op2 movl ct, dest
18851 * setcc dest jcc 1f
18852 * decl dest movl cf, dest
18853 * andl (cf-ct),dest 1:
18856 * Size 20. Size 14.
18858 * This is reasonably steep, but branch mispredict costs are
18859 * high on modern cpus, so consider failing only if optimizing
18863 if ((!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18864 && BRANCH_COST (optimize_insn_for_speed_p (),
18869 enum machine_mode cmp_mode
= GET_MODE (op0
);
18874 if (SCALAR_FLOAT_MODE_P (cmp_mode
))
18876 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode
));
18878 /* We may be reversing unordered compare to normal compare,
18879 that is not valid in general (we may convert non-trapping
18880 condition to trapping one), however on i386 we currently
18881 emit all comparisons unordered. */
18882 code
= reverse_condition_maybe_unordered (code
);
18886 code
= reverse_condition (code
);
18887 if (compare_code
!= UNKNOWN
)
18888 compare_code
= reverse_condition (compare_code
);
18892 if (compare_code
!= UNKNOWN
)
18894 /* notl op1 (if needed)
18899 For x < 0 (resp. x <= -1) there will be no notl,
18900 so if possible swap the constants to get rid of the
18902 True/false will be -1/0 while code below (store flag
18903 followed by decrement) is 0/-1, so the constants need
18904 to be exchanged once more. */
18906 if (compare_code
== GE
|| !cf
)
18908 code
= reverse_condition (code
);
18913 HOST_WIDE_INT tmp
= cf
;
18918 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, -1);
18922 out
= emit_store_flag (out
, code
, op0
, op1
, VOIDmode
, 0, 1);
18924 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
),
18926 copy_rtx (out
), 1, OPTAB_DIRECT
);
18929 out
= expand_simple_binop (mode
, AND
, copy_rtx (out
),
18930 gen_int_mode (cf
- ct
, mode
),
18931 copy_rtx (out
), 1, OPTAB_DIRECT
);
18933 out
= expand_simple_binop (mode
, PLUS
, copy_rtx (out
), GEN_INT (ct
),
18934 copy_rtx (out
), 1, OPTAB_DIRECT
);
18935 if (!rtx_equal_p (out
, operands
[0]))
18936 emit_move_insn (operands
[0], copy_rtx (out
));
18942 if (!TARGET_CMOVE
|| (mode
== QImode
&& TARGET_PARTIAL_REG_STALL
))
18944 /* Try a few things more with specific constants and a variable. */
18947 rtx var
, orig_out
, out
, tmp
;
18949 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
18952 /* If one of the two operands is an interesting constant, load a
18953 constant with the above and mask it in with a logical operation. */
18955 if (CONST_INT_P (operands
[2]))
18958 if (INTVAL (operands
[2]) == 0 && operands
[3] != constm1_rtx
)
18959 operands
[3] = constm1_rtx
, op
= and_optab
;
18960 else if (INTVAL (operands
[2]) == -1 && operands
[3] != const0_rtx
)
18961 operands
[3] = const0_rtx
, op
= ior_optab
;
18965 else if (CONST_INT_P (operands
[3]))
18968 if (INTVAL (operands
[3]) == 0 && operands
[2] != constm1_rtx
)
18969 operands
[2] = constm1_rtx
, op
= and_optab
;
18970 else if (INTVAL (operands
[3]) == -1 && operands
[3] != const0_rtx
)
18971 operands
[2] = const0_rtx
, op
= ior_optab
;
18978 orig_out
= operands
[0];
18979 tmp
= gen_reg_rtx (mode
);
18982 /* Recurse to get the constant loaded. */
18983 if (ix86_expand_int_movcc (operands
) == 0)
18986 /* Mask in the interesting variable. */
18987 out
= expand_binop (mode
, op
, var
, tmp
, orig_out
, 0,
18989 if (!rtx_equal_p (out
, orig_out
))
18990 emit_move_insn (copy_rtx (orig_out
), copy_rtx (out
));
18996 * For comparison with above,
19006 if (! nonimmediate_operand (operands
[2], mode
))
19007 operands
[2] = force_reg (mode
, operands
[2]);
19008 if (! nonimmediate_operand (operands
[3], mode
))
19009 operands
[3] = force_reg (mode
, operands
[3]);
19011 if (! register_operand (operands
[2], VOIDmode
)
19013 || ! register_operand (operands
[3], VOIDmode
)))
19014 operands
[2] = force_reg (mode
, operands
[2]);
19017 && ! register_operand (operands
[3], VOIDmode
))
19018 operands
[3] = force_reg (mode
, operands
[3]);
19020 emit_insn (compare_seq
);
19021 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19022 gen_rtx_IF_THEN_ELSE (mode
,
19023 compare_op
, operands
[2],
19028 /* Swap, force into registers, or otherwise massage the two operands
19029 to an sse comparison with a mask result. Thus we differ a bit from
19030 ix86_prepare_fp_compare_args which expects to produce a flags result.
19032 The DEST operand exists to help determine whether to commute commutative
19033 operators. The POP0/POP1 operands are updated in place. The new
19034 comparison code is returned, or UNKNOWN if not implementable. */
19036 static enum rtx_code
19037 ix86_prepare_sse_fp_compare_args (rtx dest
, enum rtx_code code
,
19038 rtx
*pop0
, rtx
*pop1
)
19046 /* AVX supports all the needed comparisons. */
19049 /* We have no LTGT as an operator. We could implement it with
19050 NE & ORDERED, but this requires an extra temporary. It's
19051 not clear that it's worth it. */
19058 /* These are supported directly. */
19065 /* AVX has 3 operand comparisons, no need to swap anything. */
19068 /* For commutative operators, try to canonicalize the destination
19069 operand to be first in the comparison - this helps reload to
19070 avoid extra moves. */
19071 if (!dest
|| !rtx_equal_p (dest
, *pop1
))
19079 /* These are not supported directly before AVX, and furthermore
19080 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
19081 comparison operands to transform into something that is
19086 code
= swap_condition (code
);
19090 gcc_unreachable ();
19096 /* Detect conditional moves that exactly match min/max operational
19097 semantics. Note that this is IEEE safe, as long as we don't
19098 interchange the operands.
19100 Returns FALSE if this conditional move doesn't match a MIN/MAX,
19101 and TRUE if the operation is successful and instructions are emitted. */
19104 ix86_expand_sse_fp_minmax (rtx dest
, enum rtx_code code
, rtx cmp_op0
,
19105 rtx cmp_op1
, rtx if_true
, rtx if_false
)
19107 enum machine_mode mode
;
19113 else if (code
== UNGE
)
19116 if_true
= if_false
;
19122 if (rtx_equal_p (cmp_op0
, if_true
) && rtx_equal_p (cmp_op1
, if_false
))
19124 else if (rtx_equal_p (cmp_op1
, if_true
) && rtx_equal_p (cmp_op0
, if_false
))
19129 mode
= GET_MODE (dest
);
19131 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
19132 but MODE may be a vector mode and thus not appropriate. */
19133 if (!flag_finite_math_only
|| !flag_unsafe_math_optimizations
)
19135 int u
= is_min
? UNSPEC_IEEE_MIN
: UNSPEC_IEEE_MAX
;
19138 if_true
= force_reg (mode
, if_true
);
19139 v
= gen_rtvec (2, if_true
, if_false
);
19140 tmp
= gen_rtx_UNSPEC (mode
, v
, u
);
19144 code
= is_min
? SMIN
: SMAX
;
19145 tmp
= gen_rtx_fmt_ee (code
, mode
, if_true
, if_false
);
19148 emit_insn (gen_rtx_SET (VOIDmode
, dest
, tmp
));
19152 /* Expand an sse vector comparison. Return the register with the result. */
19155 ix86_expand_sse_cmp (rtx dest
, enum rtx_code code
, rtx cmp_op0
, rtx cmp_op1
,
19156 rtx op_true
, rtx op_false
)
19158 enum machine_mode mode
= GET_MODE (dest
);
19159 enum machine_mode cmp_mode
= GET_MODE (cmp_op0
);
19162 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
19163 if (!nonimmediate_operand (cmp_op1
, cmp_mode
))
19164 cmp_op1
= force_reg (cmp_mode
, cmp_op1
);
19167 || reg_overlap_mentioned_p (dest
, op_true
)
19168 || reg_overlap_mentioned_p (dest
, op_false
))
19169 dest
= gen_reg_rtx (mode
);
19171 x
= gen_rtx_fmt_ee (code
, cmp_mode
, cmp_op0
, cmp_op1
);
19172 if (cmp_mode
!= mode
)
19174 x
= force_reg (cmp_mode
, x
);
19175 convert_move (dest
, x
, false);
19178 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19183 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
19184 operations. This is used for both scalar and vector conditional moves. */
19187 ix86_expand_sse_movcc (rtx dest
, rtx cmp
, rtx op_true
, rtx op_false
)
19189 enum machine_mode mode
= GET_MODE (dest
);
19192 if (vector_all_ones_operand (op_true
, mode
)
19193 && rtx_equal_p (op_false
, CONST0_RTX (mode
)))
19195 emit_insn (gen_rtx_SET (VOIDmode
, dest
, cmp
));
19197 else if (op_false
== CONST0_RTX (mode
))
19199 op_true
= force_reg (mode
, op_true
);
19200 x
= gen_rtx_AND (mode
, cmp
, op_true
);
19201 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19203 else if (op_true
== CONST0_RTX (mode
))
19205 op_false
= force_reg (mode
, op_false
);
19206 x
= gen_rtx_NOT (mode
, cmp
);
19207 x
= gen_rtx_AND (mode
, x
, op_false
);
19208 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19210 else if (INTEGRAL_MODE_P (mode
) && op_true
== CONSTM1_RTX (mode
))
19212 op_false
= force_reg (mode
, op_false
);
19213 x
= gen_rtx_IOR (mode
, cmp
, op_false
);
19214 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19216 else if (TARGET_XOP
)
19218 op_true
= force_reg (mode
, op_true
);
19220 if (!nonimmediate_operand (op_false
, mode
))
19221 op_false
= force_reg (mode
, op_false
);
19223 emit_insn (gen_rtx_SET (mode
, dest
,
19224 gen_rtx_IF_THEN_ELSE (mode
, cmp
,
19230 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
) = NULL
;
19232 if (!nonimmediate_operand (op_true
, mode
))
19233 op_true
= force_reg (mode
, op_true
);
19235 op_false
= force_reg (mode
, op_false
);
19241 gen
= gen_sse4_1_blendvps
;
19245 gen
= gen_sse4_1_blendvpd
;
19253 gen
= gen_sse4_1_pblendvb
;
19254 dest
= gen_lowpart (V16QImode
, dest
);
19255 op_false
= gen_lowpart (V16QImode
, op_false
);
19256 op_true
= gen_lowpart (V16QImode
, op_true
);
19257 cmp
= gen_lowpart (V16QImode
, cmp
);
19262 gen
= gen_avx_blendvps256
;
19266 gen
= gen_avx_blendvpd256
;
19274 gen
= gen_avx2_pblendvb
;
19275 dest
= gen_lowpart (V32QImode
, dest
);
19276 op_false
= gen_lowpart (V32QImode
, op_false
);
19277 op_true
= gen_lowpart (V32QImode
, op_true
);
19278 cmp
= gen_lowpart (V32QImode
, cmp
);
19286 emit_insn (gen (dest
, op_false
, op_true
, cmp
));
19289 op_true
= force_reg (mode
, op_true
);
19291 t2
= gen_reg_rtx (mode
);
19293 t3
= gen_reg_rtx (mode
);
19297 x
= gen_rtx_AND (mode
, op_true
, cmp
);
19298 emit_insn (gen_rtx_SET (VOIDmode
, t2
, x
));
19300 x
= gen_rtx_NOT (mode
, cmp
);
19301 x
= gen_rtx_AND (mode
, x
, op_false
);
19302 emit_insn (gen_rtx_SET (VOIDmode
, t3
, x
));
19304 x
= gen_rtx_IOR (mode
, t3
, t2
);
19305 emit_insn (gen_rtx_SET (VOIDmode
, dest
, x
));
19310 /* Expand a floating-point conditional move. Return true if successful. */
19313 ix86_expand_fp_movcc (rtx operands
[])
19315 enum machine_mode mode
= GET_MODE (operands
[0]);
19316 enum rtx_code code
= GET_CODE (operands
[1]);
19317 rtx tmp
, compare_op
;
19318 rtx op0
= XEXP (operands
[1], 0);
19319 rtx op1
= XEXP (operands
[1], 1);
19321 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
19323 enum machine_mode cmode
;
19325 /* Since we've no cmove for sse registers, don't force bad register
19326 allocation just to gain access to it. Deny movcc when the
19327 comparison mode doesn't match the move mode. */
19328 cmode
= GET_MODE (op0
);
19329 if (cmode
== VOIDmode
)
19330 cmode
= GET_MODE (op1
);
19334 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
, &op0
, &op1
);
19335 if (code
== UNKNOWN
)
19338 if (ix86_expand_sse_fp_minmax (operands
[0], code
, op0
, op1
,
19339 operands
[2], operands
[3]))
19342 tmp
= ix86_expand_sse_cmp (operands
[0], code
, op0
, op1
,
19343 operands
[2], operands
[3]);
19344 ix86_expand_sse_movcc (operands
[0], tmp
, operands
[2], operands
[3]);
19348 /* The floating point conditional move instructions don't directly
19349 support conditions resulting from a signed integer comparison. */
19351 compare_op
= ix86_expand_compare (code
, op0
, op1
);
19352 if (!fcmov_comparison_operator (compare_op
, VOIDmode
))
19354 tmp
= gen_reg_rtx (QImode
);
19355 ix86_expand_setcc (tmp
, code
, op0
, op1
);
19357 compare_op
= ix86_expand_compare (NE
, tmp
, const0_rtx
);
19360 emit_insn (gen_rtx_SET (VOIDmode
, operands
[0],
19361 gen_rtx_IF_THEN_ELSE (mode
, compare_op
,
19362 operands
[2], operands
[3])));
19367 /* Expand a floating-point vector conditional move; a vcond operation
19368 rather than a movcc operation. */
19371 ix86_expand_fp_vcond (rtx operands
[])
19373 enum rtx_code code
= GET_CODE (operands
[3]);
19376 code
= ix86_prepare_sse_fp_compare_args (operands
[0], code
,
19377 &operands
[4], &operands
[5]);
19378 if (code
== UNKNOWN
)
19381 switch (GET_CODE (operands
[3]))
19384 temp
= ix86_expand_sse_cmp (operands
[0], ORDERED
, operands
[4],
19385 operands
[5], operands
[0], operands
[0]);
19386 cmp
= ix86_expand_sse_cmp (operands
[0], NE
, operands
[4],
19387 operands
[5], operands
[1], operands
[2]);
19391 temp
= ix86_expand_sse_cmp (operands
[0], UNORDERED
, operands
[4],
19392 operands
[5], operands
[0], operands
[0]);
19393 cmp
= ix86_expand_sse_cmp (operands
[0], EQ
, operands
[4],
19394 operands
[5], operands
[1], operands
[2]);
19398 gcc_unreachable ();
19400 cmp
= expand_simple_binop (GET_MODE (cmp
), code
, temp
, cmp
, cmp
, 1,
19402 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19406 if (ix86_expand_sse_fp_minmax (operands
[0], code
, operands
[4],
19407 operands
[5], operands
[1], operands
[2]))
19410 cmp
= ix86_expand_sse_cmp (operands
[0], code
, operands
[4], operands
[5],
19411 operands
[1], operands
[2]);
19412 ix86_expand_sse_movcc (operands
[0], cmp
, operands
[1], operands
[2]);
19416 /* Expand a signed/unsigned integral vector conditional move. */
19419 ix86_expand_int_vcond (rtx operands
[])
19421 enum machine_mode data_mode
= GET_MODE (operands
[0]);
19422 enum machine_mode mode
= GET_MODE (operands
[4]);
19423 enum rtx_code code
= GET_CODE (operands
[3]);
19424 bool negate
= false;
19427 cop0
= operands
[4];
19428 cop1
= operands
[5];
19430 /* XOP supports all of the comparisons on all vector int types. */
19433 /* Canonicalize the comparison to EQ, GT, GTU. */
19444 code
= reverse_condition (code
);
19450 code
= reverse_condition (code
);
19456 code
= swap_condition (code
);
19457 x
= cop0
, cop0
= cop1
, cop1
= x
;
19461 gcc_unreachable ();
19464 /* Only SSE4.1/SSE4.2 supports V2DImode. */
19465 if (mode
== V2DImode
)
19470 /* SSE4.1 supports EQ. */
19471 if (!TARGET_SSE4_1
)
19477 /* SSE4.2 supports GT/GTU. */
19478 if (!TARGET_SSE4_2
)
19483 gcc_unreachable ();
19487 /* Unsigned parallel compare is not supported by the hardware.
19488 Play some tricks to turn this into a signed comparison
19492 cop0
= force_reg (mode
, cop0
);
19502 rtx (*gen_sub3
) (rtx
, rtx
, rtx
);
19506 case V8SImode
: gen_sub3
= gen_subv8si3
; break;
19507 case V4DImode
: gen_sub3
= gen_subv4di3
; break;
19508 case V4SImode
: gen_sub3
= gen_subv4si3
; break;
19509 case V2DImode
: gen_sub3
= gen_subv2di3
; break;
19511 gcc_unreachable ();
19513 /* Subtract (-(INT MAX) - 1) from both operands to make
19515 mask
= ix86_build_signbit_mask (mode
, true, false);
19516 t1
= gen_reg_rtx (mode
);
19517 emit_insn (gen_sub3 (t1
, cop0
, mask
));
19519 t2
= gen_reg_rtx (mode
);
19520 emit_insn (gen_sub3 (t2
, cop1
, mask
));
19532 /* Perform a parallel unsigned saturating subtraction. */
19533 x
= gen_reg_rtx (mode
);
19534 emit_insn (gen_rtx_SET (VOIDmode
, x
,
19535 gen_rtx_US_MINUS (mode
, cop0
, cop1
)));
19538 cop1
= CONST0_RTX (mode
);
19544 gcc_unreachable ();
19549 /* Allow the comparison to be done in one mode, but the movcc to
19550 happen in another mode. */
19551 if (data_mode
== mode
)
19553 x
= ix86_expand_sse_cmp (operands
[0], code
, cop0
, cop1
,
19554 operands
[1+negate
], operands
[2-negate
]);
19558 gcc_assert (GET_MODE_SIZE (data_mode
) == GET_MODE_SIZE (mode
));
19559 x
= ix86_expand_sse_cmp (gen_lowpart (mode
, operands
[0]),
19561 operands
[1+negate
], operands
[2-negate
]);
19562 x
= gen_lowpart (data_mode
, x
);
19565 ix86_expand_sse_movcc (operands
[0], x
, operands
[1+negate
],
19566 operands
[2-negate
]);
19570 /* Expand a variable vector permutation. */
19573 ix86_expand_vec_perm (rtx operands
[])
19575 rtx target
= operands
[0];
19576 rtx op0
= operands
[1];
19577 rtx op1
= operands
[2];
19578 rtx mask
= operands
[3];
19579 rtx t1
, t2
, t3
, t4
, vt
, vt2
, vec
[32];
19580 enum machine_mode mode
= GET_MODE (op0
);
19581 enum machine_mode maskmode
= GET_MODE (mask
);
19583 bool one_operand_shuffle
= rtx_equal_p (op0
, op1
);
19585 /* Number of elements in the vector. */
19586 w
= GET_MODE_NUNITS (mode
);
19587 e
= GET_MODE_UNIT_SIZE (mode
);
19588 gcc_assert (w
<= 32);
19592 if (mode
== V4DImode
|| mode
== V4DFmode
|| mode
== V16HImode
)
19594 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
19595 an constant shuffle operand. With a tiny bit of effort we can
19596 use VPERMD instead. A re-interpretation stall for V4DFmode is
19597 unfortunate but there's no avoiding it.
19598 Similarly for V16HImode we don't have instructions for variable
19599 shuffling, while for V32QImode we can use after preparing suitable
19600 masks vpshufb; vpshufb; vpermq; vpor. */
19602 if (mode
== V16HImode
)
19604 maskmode
= mode
= V32QImode
;
19610 maskmode
= mode
= V8SImode
;
19614 t1
= gen_reg_rtx (maskmode
);
19616 /* Replicate the low bits of the V4DImode mask into V8SImode:
19618 t1 = { A A B B C C D D }. */
19619 for (i
= 0; i
< w
/ 2; ++i
)
19620 vec
[i
*2 + 1] = vec
[i
*2] = GEN_INT (i
* 2);
19621 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19622 vt
= force_reg (maskmode
, vt
);
19623 mask
= gen_lowpart (maskmode
, mask
);
19624 if (maskmode
== V8SImode
)
19625 emit_insn (gen_avx2_permvarv8si (t1
, vt
, mask
));
19627 emit_insn (gen_avx2_pshufbv32qi3 (t1
, mask
, vt
));
19629 /* Multiply the shuffle indicies by two. */
19630 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, t1
, t1
, 1,
19633 /* Add one to the odd shuffle indicies:
19634 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
19635 for (i
= 0; i
< w
/ 2; ++i
)
19637 vec
[i
* 2] = const0_rtx
;
19638 vec
[i
* 2 + 1] = const1_rtx
;
19640 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19641 vt
= force_const_mem (maskmode
, vt
);
19642 t1
= expand_simple_binop (maskmode
, PLUS
, t1
, vt
, t1
, 1,
19645 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
19646 operands
[3] = mask
= t1
;
19647 target
= gen_lowpart (mode
, target
);
19648 op0
= gen_lowpart (mode
, op0
);
19649 op1
= gen_lowpart (mode
, op1
);
19655 /* The VPERMD and VPERMPS instructions already properly ignore
19656 the high bits of the shuffle elements. No need for us to
19657 perform an AND ourselves. */
19658 if (one_operand_shuffle
)
19659 emit_insn (gen_avx2_permvarv8si (target
, mask
, op0
));
19662 t1
= gen_reg_rtx (V8SImode
);
19663 t2
= gen_reg_rtx (V8SImode
);
19664 emit_insn (gen_avx2_permvarv8si (t1
, mask
, op0
));
19665 emit_insn (gen_avx2_permvarv8si (t2
, mask
, op1
));
19671 mask
= gen_lowpart (V8SFmode
, mask
);
19672 if (one_operand_shuffle
)
19673 emit_insn (gen_avx2_permvarv8sf (target
, mask
, op0
));
19676 t1
= gen_reg_rtx (V8SFmode
);
19677 t2
= gen_reg_rtx (V8SFmode
);
19678 emit_insn (gen_avx2_permvarv8sf (t1
, mask
, op0
));
19679 emit_insn (gen_avx2_permvarv8sf (t2
, mask
, op1
));
19685 /* By combining the two 128-bit input vectors into one 256-bit
19686 input vector, we can use VPERMD and VPERMPS for the full
19687 two-operand shuffle. */
19688 t1
= gen_reg_rtx (V8SImode
);
19689 t2
= gen_reg_rtx (V8SImode
);
19690 emit_insn (gen_avx_vec_concatv8si (t1
, op0
, op1
));
19691 emit_insn (gen_avx_vec_concatv8si (t2
, mask
, mask
));
19692 emit_insn (gen_avx2_permvarv8si (t1
, t2
, t1
));
19693 emit_insn (gen_avx_vextractf128v8si (target
, t1
, const0_rtx
));
19697 t1
= gen_reg_rtx (V8SFmode
);
19698 t2
= gen_reg_rtx (V8SFmode
);
19699 mask
= gen_lowpart (V4SFmode
, mask
);
19700 emit_insn (gen_avx_vec_concatv8sf (t1
, op0
, op1
));
19701 emit_insn (gen_avx_vec_concatv8sf (t2
, mask
, mask
));
19702 emit_insn (gen_avx2_permvarv8sf (t1
, t2
, t1
));
19703 emit_insn (gen_avx_vextractf128v8sf (target
, t1
, const0_rtx
));
19707 t1
= gen_reg_rtx (V32QImode
);
19708 t2
= gen_reg_rtx (V32QImode
);
19709 t3
= gen_reg_rtx (V32QImode
);
19710 vt2
= GEN_INT (128);
19711 for (i
= 0; i
< 32; i
++)
19713 vt
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19714 vt
= force_reg (V32QImode
, vt
);
19715 for (i
= 0; i
< 32; i
++)
19716 vec
[i
] = i
< 16 ? vt2
: const0_rtx
;
19717 vt2
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, vec
));
19718 vt2
= force_reg (V32QImode
, vt2
);
19719 /* From mask create two adjusted masks, which contain the same
19720 bits as mask in the low 7 bits of each vector element.
19721 The first mask will have the most significant bit clear
19722 if it requests element from the same 128-bit lane
19723 and MSB set if it requests element from the other 128-bit lane.
19724 The second mask will have the opposite values of the MSB,
19725 and additionally will have its 128-bit lanes swapped.
19726 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
19727 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
19728 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
19729 stands for other 12 bytes. */
19730 /* The bit whether element is from the same lane or the other
19731 lane is bit 4, so shift it up by 3 to the MSB position. */
19732 emit_insn (gen_ashlv4di3 (gen_lowpart (V4DImode
, t1
),
19733 gen_lowpart (V4DImode
, mask
),
19735 /* Clear MSB bits from the mask just in case it had them set. */
19736 emit_insn (gen_avx2_andnotv32qi3 (t2
, vt
, mask
));
19737 /* After this t1 will have MSB set for elements from other lane. */
19738 emit_insn (gen_xorv32qi3 (t1
, t1
, vt2
));
19739 /* Clear bits other than MSB. */
19740 emit_insn (gen_andv32qi3 (t1
, t1
, vt
));
19741 /* Or in the lower bits from mask into t3. */
19742 emit_insn (gen_iorv32qi3 (t3
, t1
, t2
));
19743 /* And invert MSB bits in t1, so MSB is set for elements from the same
19745 emit_insn (gen_xorv32qi3 (t1
, t1
, vt
));
19746 /* Swap 128-bit lanes in t3. */
19747 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19748 gen_lowpart (V4DImode
, t3
),
19749 const2_rtx
, GEN_INT (3),
19750 const0_rtx
, const1_rtx
));
19751 /* And or in the lower bits from mask into t1. */
19752 emit_insn (gen_iorv32qi3 (t1
, t1
, t2
));
19753 if (one_operand_shuffle
)
19755 /* Each of these shuffles will put 0s in places where
19756 element from the other 128-bit lane is needed, otherwise
19757 will shuffle in the requested value. */
19758 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op0
, t3
));
19759 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op0
, t1
));
19760 /* For t3 the 128-bit lanes are swapped again. */
19761 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19762 gen_lowpart (V4DImode
, t3
),
19763 const2_rtx
, GEN_INT (3),
19764 const0_rtx
, const1_rtx
));
19765 /* And oring both together leads to the result. */
19766 emit_insn (gen_iorv32qi3 (target
, t1
, t3
));
19770 t4
= gen_reg_rtx (V32QImode
);
19771 /* Similarly to the above one_operand_shuffle code,
19772 just for repeated twice for each operand. merge_two:
19773 code will merge the two results together. */
19774 emit_insn (gen_avx2_pshufbv32qi3 (t4
, op0
, t3
));
19775 emit_insn (gen_avx2_pshufbv32qi3 (t3
, op1
, t3
));
19776 emit_insn (gen_avx2_pshufbv32qi3 (t2
, op0
, t1
));
19777 emit_insn (gen_avx2_pshufbv32qi3 (t1
, op1
, t1
));
19778 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t4
),
19779 gen_lowpart (V4DImode
, t4
),
19780 const2_rtx
, GEN_INT (3),
19781 const0_rtx
, const1_rtx
));
19782 emit_insn (gen_avx2_permv4di_1 (gen_lowpart (V4DImode
, t3
),
19783 gen_lowpart (V4DImode
, t3
),
19784 const2_rtx
, GEN_INT (3),
19785 const0_rtx
, const1_rtx
));
19786 emit_insn (gen_iorv32qi3 (t4
, t2
, t4
));
19787 emit_insn (gen_iorv32qi3 (t3
, t1
, t3
));
19793 gcc_assert (GET_MODE_SIZE (mode
) <= 16);
19800 /* The XOP VPPERM insn supports three inputs. By ignoring the
19801 one_operand_shuffle special case, we avoid creating another
19802 set of constant vectors in memory. */
19803 one_operand_shuffle
= false;
19805 /* mask = mask & {2*w-1, ...} */
19806 vt
= GEN_INT (2*w
- 1);
19810 /* mask = mask & {w-1, ...} */
19811 vt
= GEN_INT (w
- 1);
19814 for (i
= 0; i
< w
; i
++)
19816 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19817 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
19818 NULL_RTX
, 0, OPTAB_DIRECT
);
19820 /* For non-QImode operations, convert the word permutation control
19821 into a byte permutation control. */
19822 if (mode
!= V16QImode
)
19824 mask
= expand_simple_binop (maskmode
, ASHIFT
, mask
,
19825 GEN_INT (exact_log2 (e
)),
19826 NULL_RTX
, 0, OPTAB_DIRECT
);
19828 /* Convert mask to vector of chars. */
19829 mask
= force_reg (V16QImode
, gen_lowpart (V16QImode
, mask
));
19831 /* Replicate each of the input bytes into byte positions:
19832 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
19833 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
19834 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
19835 for (i
= 0; i
< 16; ++i
)
19836 vec
[i
] = GEN_INT (i
/e
* e
);
19837 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
19838 vt
= force_const_mem (V16QImode
, vt
);
19840 emit_insn (gen_xop_pperm (mask
, mask
, mask
, vt
));
19842 emit_insn (gen_ssse3_pshufbv16qi3 (mask
, mask
, vt
));
19844 /* Convert it into the byte positions by doing
19845 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
19846 for (i
= 0; i
< 16; ++i
)
19847 vec
[i
] = GEN_INT (i
% e
);
19848 vt
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, vec
));
19849 vt
= force_const_mem (V16QImode
, vt
);
19850 emit_insn (gen_addv16qi3 (mask
, mask
, vt
));
19853 /* The actual shuffle operations all operate on V16QImode. */
19854 op0
= gen_lowpart (V16QImode
, op0
);
19855 op1
= gen_lowpart (V16QImode
, op1
);
19856 target
= gen_lowpart (V16QImode
, target
);
19860 emit_insn (gen_xop_pperm (target
, op0
, op1
, mask
));
19862 else if (one_operand_shuffle
)
19864 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, mask
));
19871 /* Shuffle the two input vectors independently. */
19872 t1
= gen_reg_rtx (V16QImode
);
19873 t2
= gen_reg_rtx (V16QImode
);
19874 emit_insn (gen_ssse3_pshufbv16qi3 (t1
, op0
, mask
));
19875 emit_insn (gen_ssse3_pshufbv16qi3 (t2
, op1
, mask
));
19878 /* Then merge them together. The key is whether any given control
19879 element contained a bit set that indicates the second word. */
19880 mask
= operands
[3];
19882 if (maskmode
== V2DImode
&& !TARGET_SSE4_1
)
19884 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
19885 more shuffle to convert the V2DI input mask into a V4SI
19886 input mask. At which point the masking that expand_int_vcond
19887 will work as desired. */
19888 rtx t3
= gen_reg_rtx (V4SImode
);
19889 emit_insn (gen_sse2_pshufd_1 (t3
, gen_lowpart (V4SImode
, mask
),
19890 const0_rtx
, const0_rtx
,
19891 const2_rtx
, const2_rtx
));
19893 maskmode
= V4SImode
;
19897 for (i
= 0; i
< w
; i
++)
19899 vt
= gen_rtx_CONST_VECTOR (maskmode
, gen_rtvec_v (w
, vec
));
19900 vt
= force_reg (maskmode
, vt
);
19901 mask
= expand_simple_binop (maskmode
, AND
, mask
, vt
,
19902 NULL_RTX
, 0, OPTAB_DIRECT
);
19904 xops
[0] = gen_lowpart (mode
, operands
[0]);
19905 xops
[1] = gen_lowpart (mode
, t2
);
19906 xops
[2] = gen_lowpart (mode
, t1
);
19907 xops
[3] = gen_rtx_EQ (maskmode
, mask
, vt
);
19910 ok
= ix86_expand_int_vcond (xops
);
19915 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
19916 true if we should do zero extension, else sign extension. HIGH_P is
19917 true if we want the N/2 high elements, else the low elements. */
19920 ix86_expand_sse_unpack (rtx operands
[2], bool unsigned_p
, bool high_p
)
19922 enum machine_mode imode
= GET_MODE (operands
[1]);
19927 rtx (*unpack
)(rtx
, rtx
);
19928 rtx (*extract
)(rtx
, rtx
) = NULL
;
19929 enum machine_mode halfmode
= BLKmode
;
19935 unpack
= gen_avx2_zero_extendv16qiv16hi2
;
19937 unpack
= gen_avx2_sign_extendv16qiv16hi2
;
19938 halfmode
= V16QImode
;
19940 = high_p
? gen_vec_extract_hi_v32qi
: gen_vec_extract_lo_v32qi
;
19944 unpack
= gen_avx2_zero_extendv8hiv8si2
;
19946 unpack
= gen_avx2_sign_extendv8hiv8si2
;
19947 halfmode
= V8HImode
;
19949 = high_p
? gen_vec_extract_hi_v16hi
: gen_vec_extract_lo_v16hi
;
19953 unpack
= gen_avx2_zero_extendv4siv4di2
;
19955 unpack
= gen_avx2_sign_extendv4siv4di2
;
19956 halfmode
= V4SImode
;
19958 = high_p
? gen_vec_extract_hi_v8si
: gen_vec_extract_lo_v8si
;
19962 unpack
= gen_sse4_1_zero_extendv8qiv8hi2
;
19964 unpack
= gen_sse4_1_sign_extendv8qiv8hi2
;
19968 unpack
= gen_sse4_1_zero_extendv4hiv4si2
;
19970 unpack
= gen_sse4_1_sign_extendv4hiv4si2
;
19974 unpack
= gen_sse4_1_zero_extendv2siv2di2
;
19976 unpack
= gen_sse4_1_sign_extendv2siv2di2
;
19979 gcc_unreachable ();
19982 if (GET_MODE_SIZE (imode
) == 32)
19984 tmp
= gen_reg_rtx (halfmode
);
19985 emit_insn (extract (tmp
, operands
[1]));
19989 /* Shift higher 8 bytes to lower 8 bytes. */
19990 tmp
= gen_reg_rtx (imode
);
19991 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, tmp
),
19992 gen_lowpart (V1TImode
, operands
[1]),
19998 emit_insn (unpack (operands
[0], tmp
));
20002 rtx (*unpack
)(rtx
, rtx
, rtx
);
20008 unpack
= gen_vec_interleave_highv16qi
;
20010 unpack
= gen_vec_interleave_lowv16qi
;
20014 unpack
= gen_vec_interleave_highv8hi
;
20016 unpack
= gen_vec_interleave_lowv8hi
;
20020 unpack
= gen_vec_interleave_highv4si
;
20022 unpack
= gen_vec_interleave_lowv4si
;
20025 gcc_unreachable ();
20028 dest
= gen_lowpart (imode
, operands
[0]);
20031 tmp
= force_reg (imode
, CONST0_RTX (imode
));
20033 tmp
= ix86_expand_sse_cmp (gen_reg_rtx (imode
), GT
, CONST0_RTX (imode
),
20034 operands
[1], pc_rtx
, pc_rtx
);
20036 emit_insn (unpack (dest
, operands
[1], tmp
));
20040 /* Expand conditional increment or decrement using adb/sbb instructions.
20041 The default case using setcc followed by the conditional move can be
20042 done by generic code. */
20044 ix86_expand_int_addcc (rtx operands
[])
20046 enum rtx_code code
= GET_CODE (operands
[1]);
20048 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
, rtx
);
20050 rtx val
= const0_rtx
;
20051 bool fpcmp
= false;
20052 enum machine_mode mode
;
20053 rtx op0
= XEXP (operands
[1], 0);
20054 rtx op1
= XEXP (operands
[1], 1);
20056 if (operands
[3] != const1_rtx
20057 && operands
[3] != constm1_rtx
)
20059 if (!ix86_expand_carry_flag_compare (code
, op0
, op1
, &compare_op
))
20061 code
= GET_CODE (compare_op
);
20063 flags
= XEXP (compare_op
, 0);
20065 if (GET_MODE (flags
) == CCFPmode
20066 || GET_MODE (flags
) == CCFPUmode
)
20069 code
= ix86_fp_compare_code_to_integer (code
);
20076 PUT_CODE (compare_op
,
20077 reverse_condition_maybe_unordered
20078 (GET_CODE (compare_op
)));
20080 PUT_CODE (compare_op
, reverse_condition (GET_CODE (compare_op
)));
20083 mode
= GET_MODE (operands
[0]);
20085 /* Construct either adc or sbb insn. */
20086 if ((code
== LTU
) == (operands
[3] == constm1_rtx
))
20091 insn
= gen_subqi3_carry
;
20094 insn
= gen_subhi3_carry
;
20097 insn
= gen_subsi3_carry
;
20100 insn
= gen_subdi3_carry
;
20103 gcc_unreachable ();
20111 insn
= gen_addqi3_carry
;
20114 insn
= gen_addhi3_carry
;
20117 insn
= gen_addsi3_carry
;
20120 insn
= gen_adddi3_carry
;
20123 gcc_unreachable ();
20126 emit_insn (insn (operands
[0], operands
[2], val
, flags
, compare_op
));
20132 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
20133 but works for floating pointer parameters and nonoffsetable memories.
20134 For pushes, it returns just stack offsets; the values will be saved
20135 in the right order. Maximally three parts are generated. */
20138 ix86_split_to_parts (rtx operand
, rtx
*parts
, enum machine_mode mode
)
20143 size
= mode
==XFmode
? 3 : GET_MODE_SIZE (mode
) / 4;
20145 size
= (GET_MODE_SIZE (mode
) + 4) / 8;
20147 gcc_assert (!REG_P (operand
) || !MMX_REGNO_P (REGNO (operand
)));
20148 gcc_assert (size
>= 2 && size
<= 4);
20150 /* Optimize constant pool reference to immediates. This is used by fp
20151 moves, that force all constants to memory to allow combining. */
20152 if (MEM_P (operand
) && MEM_READONLY_P (operand
))
20154 rtx tmp
= maybe_get_pool_constant (operand
);
20159 if (MEM_P (operand
) && !offsettable_memref_p (operand
))
20161 /* The only non-offsetable memories we handle are pushes. */
20162 int ok
= push_operand (operand
, VOIDmode
);
20166 operand
= copy_rtx (operand
);
20167 PUT_MODE (operand
, Pmode
);
20168 parts
[0] = parts
[1] = parts
[2] = parts
[3] = operand
;
20172 if (GET_CODE (operand
) == CONST_VECTOR
)
20174 enum machine_mode imode
= int_mode_for_mode (mode
);
20175 /* Caution: if we looked through a constant pool memory above,
20176 the operand may actually have a different mode now. That's
20177 ok, since we want to pun this all the way back to an integer. */
20178 operand
= simplify_subreg (imode
, operand
, GET_MODE (operand
), 0);
20179 gcc_assert (operand
!= NULL
);
20185 if (mode
== DImode
)
20186 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20191 if (REG_P (operand
))
20193 gcc_assert (reload_completed
);
20194 for (i
= 0; i
< size
; i
++)
20195 parts
[i
] = gen_rtx_REG (SImode
, REGNO (operand
) + i
);
20197 else if (offsettable_memref_p (operand
))
20199 operand
= adjust_address (operand
, SImode
, 0);
20200 parts
[0] = operand
;
20201 for (i
= 1; i
< size
; i
++)
20202 parts
[i
] = adjust_address (operand
, SImode
, 4 * i
);
20204 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20209 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20213 real_to_target (l
, &r
, mode
);
20214 parts
[3] = gen_int_mode (l
[3], SImode
);
20215 parts
[2] = gen_int_mode (l
[2], SImode
);
20218 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r
, l
);
20219 parts
[2] = gen_int_mode (l
[2], SImode
);
20222 REAL_VALUE_TO_TARGET_DOUBLE (r
, l
);
20225 gcc_unreachable ();
20227 parts
[1] = gen_int_mode (l
[1], SImode
);
20228 parts
[0] = gen_int_mode (l
[0], SImode
);
20231 gcc_unreachable ();
20236 if (mode
== TImode
)
20237 split_double_mode (mode
, &operand
, 1, &parts
[0], &parts
[1]);
20238 if (mode
== XFmode
|| mode
== TFmode
)
20240 enum machine_mode upper_mode
= mode
==XFmode
? SImode
: DImode
;
20241 if (REG_P (operand
))
20243 gcc_assert (reload_completed
);
20244 parts
[0] = gen_rtx_REG (DImode
, REGNO (operand
) + 0);
20245 parts
[1] = gen_rtx_REG (upper_mode
, REGNO (operand
) + 1);
20247 else if (offsettable_memref_p (operand
))
20249 operand
= adjust_address (operand
, DImode
, 0);
20250 parts
[0] = operand
;
20251 parts
[1] = adjust_address (operand
, upper_mode
, 8);
20253 else if (GET_CODE (operand
) == CONST_DOUBLE
)
20258 REAL_VALUE_FROM_CONST_DOUBLE (r
, operand
);
20259 real_to_target (l
, &r
, mode
);
20261 /* Do not use shift by 32 to avoid warning on 32bit systems. */
20262 if (HOST_BITS_PER_WIDE_INT
>= 64)
20265 ((l
[0] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20266 + ((((HOST_WIDE_INT
) l
[1]) << 31) << 1),
20269 parts
[0] = immed_double_const (l
[0], l
[1], DImode
);
20271 if (upper_mode
== SImode
)
20272 parts
[1] = gen_int_mode (l
[2], SImode
);
20273 else if (HOST_BITS_PER_WIDE_INT
>= 64)
20276 ((l
[2] & (((HOST_WIDE_INT
) 2 << 31) - 1))
20277 + ((((HOST_WIDE_INT
) l
[3]) << 31) << 1),
20280 parts
[1] = immed_double_const (l
[2], l
[3], DImode
);
20283 gcc_unreachable ();
20290 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
20291 Return false when normal moves are needed; true when all required
20292 insns have been emitted. Operands 2-4 contain the input values
20293 int the correct order; operands 5-7 contain the output values. */
20296 ix86_split_long_move (rtx operands
[])
20301 int collisions
= 0;
20302 enum machine_mode mode
= GET_MODE (operands
[0]);
20303 bool collisionparts
[4];
20305 /* The DFmode expanders may ask us to move double.
20306 For 64bit target this is single move. By hiding the fact
20307 here we simplify i386.md splitters. */
20308 if (TARGET_64BIT
&& GET_MODE_SIZE (GET_MODE (operands
[0])) == 8)
20310 /* Optimize constant pool reference to immediates. This is used by
20311 fp moves, that force all constants to memory to allow combining. */
20313 if (MEM_P (operands
[1])
20314 && GET_CODE (XEXP (operands
[1], 0)) == SYMBOL_REF
20315 && CONSTANT_POOL_ADDRESS_P (XEXP (operands
[1], 0)))
20316 operands
[1] = get_pool_constant (XEXP (operands
[1], 0));
20317 if (push_operand (operands
[0], VOIDmode
))
20319 operands
[0] = copy_rtx (operands
[0]);
20320 PUT_MODE (operands
[0], Pmode
);
20323 operands
[0] = gen_lowpart (DImode
, operands
[0]);
20324 operands
[1] = gen_lowpart (DImode
, operands
[1]);
20325 emit_move_insn (operands
[0], operands
[1]);
20329 /* The only non-offsettable memory we handle is push. */
20330 if (push_operand (operands
[0], VOIDmode
))
20333 gcc_assert (!MEM_P (operands
[0])
20334 || offsettable_memref_p (operands
[0]));
20336 nparts
= ix86_split_to_parts (operands
[1], part
[1], GET_MODE (operands
[0]));
20337 ix86_split_to_parts (operands
[0], part
[0], GET_MODE (operands
[0]));
20339 /* When emitting push, take care for source operands on the stack. */
20340 if (push
&& MEM_P (operands
[1])
20341 && reg_overlap_mentioned_p (stack_pointer_rtx
, operands
[1]))
20343 rtx src_base
= XEXP (part
[1][nparts
- 1], 0);
20345 /* Compensate for the stack decrement by 4. */
20346 if (!TARGET_64BIT
&& nparts
== 3
20347 && mode
== XFmode
&& TARGET_128BIT_LONG_DOUBLE
)
20348 src_base
= plus_constant (src_base
, 4);
20350 /* src_base refers to the stack pointer and is
20351 automatically decreased by emitted push. */
20352 for (i
= 0; i
< nparts
; i
++)
20353 part
[1][i
] = change_address (part
[1][i
],
20354 GET_MODE (part
[1][i
]), src_base
);
20357 /* We need to do copy in the right order in case an address register
20358 of the source overlaps the destination. */
20359 if (REG_P (part
[0][0]) && MEM_P (part
[1][0]))
20363 for (i
= 0; i
< nparts
; i
++)
20366 = reg_overlap_mentioned_p (part
[0][i
], XEXP (part
[1][0], 0));
20367 if (collisionparts
[i
])
20371 /* Collision in the middle part can be handled by reordering. */
20372 if (collisions
== 1 && nparts
== 3 && collisionparts
[1])
20374 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20375 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20377 else if (collisions
== 1
20379 && (collisionparts
[1] || collisionparts
[2]))
20381 if (collisionparts
[1])
20383 tmp
= part
[0][1]; part
[0][1] = part
[0][2]; part
[0][2] = tmp
;
20384 tmp
= part
[1][1]; part
[1][1] = part
[1][2]; part
[1][2] = tmp
;
20388 tmp
= part
[0][2]; part
[0][2] = part
[0][3]; part
[0][3] = tmp
;
20389 tmp
= part
[1][2]; part
[1][2] = part
[1][3]; part
[1][3] = tmp
;
20393 /* If there are more collisions, we can't handle it by reordering.
20394 Do an lea to the last part and use only one colliding move. */
20395 else if (collisions
> 1)
20401 base
= part
[0][nparts
- 1];
20403 /* Handle the case when the last part isn't valid for lea.
20404 Happens in 64-bit mode storing the 12-byte XFmode. */
20405 if (GET_MODE (base
) != Pmode
)
20406 base
= gen_rtx_REG (Pmode
, REGNO (base
));
20408 emit_insn (gen_rtx_SET (VOIDmode
, base
, XEXP (part
[1][0], 0)));
20409 part
[1][0] = replace_equiv_address (part
[1][0], base
);
20410 for (i
= 1; i
< nparts
; i
++)
20412 tmp
= plus_constant (base
, UNITS_PER_WORD
* i
);
20413 part
[1][i
] = replace_equiv_address (part
[1][i
], tmp
);
20424 if (TARGET_128BIT_LONG_DOUBLE
&& mode
== XFmode
)
20425 emit_insn (gen_addsi3 (stack_pointer_rtx
,
20426 stack_pointer_rtx
, GEN_INT (-4)));
20427 emit_move_insn (part
[0][2], part
[1][2]);
20429 else if (nparts
== 4)
20431 emit_move_insn (part
[0][3], part
[1][3]);
20432 emit_move_insn (part
[0][2], part
[1][2]);
20437 /* In 64bit mode we don't have 32bit push available. In case this is
20438 register, it is OK - we will just use larger counterpart. We also
20439 retype memory - these comes from attempt to avoid REX prefix on
20440 moving of second half of TFmode value. */
20441 if (GET_MODE (part
[1][1]) == SImode
)
20443 switch (GET_CODE (part
[1][1]))
20446 part
[1][1] = adjust_address (part
[1][1], DImode
, 0);
20450 part
[1][1] = gen_rtx_REG (DImode
, REGNO (part
[1][1]));
20454 gcc_unreachable ();
20457 if (GET_MODE (part
[1][0]) == SImode
)
20458 part
[1][0] = part
[1][1];
20461 emit_move_insn (part
[0][1], part
[1][1]);
20462 emit_move_insn (part
[0][0], part
[1][0]);
20466 /* Choose correct order to not overwrite the source before it is copied. */
20467 if ((REG_P (part
[0][0])
20468 && REG_P (part
[1][1])
20469 && (REGNO (part
[0][0]) == REGNO (part
[1][1])
20471 && REGNO (part
[0][0]) == REGNO (part
[1][2]))
20473 && REGNO (part
[0][0]) == REGNO (part
[1][3]))))
20475 && reg_overlap_mentioned_p (part
[0][0], XEXP (part
[1][0], 0))))
20477 for (i
= 0, j
= nparts
- 1; i
< nparts
; i
++, j
--)
20479 operands
[2 + i
] = part
[0][j
];
20480 operands
[6 + i
] = part
[1][j
];
20485 for (i
= 0; i
< nparts
; i
++)
20487 operands
[2 + i
] = part
[0][i
];
20488 operands
[6 + i
] = part
[1][i
];
20492 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
20493 if (optimize_insn_for_size_p ())
20495 for (j
= 0; j
< nparts
- 1; j
++)
20496 if (CONST_INT_P (operands
[6 + j
])
20497 && operands
[6 + j
] != const0_rtx
20498 && REG_P (operands
[2 + j
]))
20499 for (i
= j
; i
< nparts
- 1; i
++)
20500 if (CONST_INT_P (operands
[7 + i
])
20501 && INTVAL (operands
[7 + i
]) == INTVAL (operands
[6 + j
]))
20502 operands
[7 + i
] = operands
[2 + j
];
20505 for (i
= 0; i
< nparts
; i
++)
20506 emit_move_insn (operands
[2 + i
], operands
[6 + i
]);
20511 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
20512 left shift by a constant, either using a single shift or
20513 a sequence of add instructions. */
20516 ix86_expand_ashl_const (rtx operand
, int count
, enum machine_mode mode
)
20518 rtx (*insn
)(rtx
, rtx
, rtx
);
20521 || (count
* ix86_cost
->add
<= ix86_cost
->shift_const
20522 && !optimize_insn_for_size_p ()))
20524 insn
= mode
== DImode
? gen_addsi3
: gen_adddi3
;
20525 while (count
-- > 0)
20526 emit_insn (insn (operand
, operand
, operand
));
20530 insn
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20531 emit_insn (insn (operand
, operand
, GEN_INT (count
)));
20536 ix86_split_ashl (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20538 rtx (*gen_ashl3
)(rtx
, rtx
, rtx
);
20539 rtx (*gen_shld
)(rtx
, rtx
, rtx
);
20540 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20542 rtx low
[2], high
[2];
20545 if (CONST_INT_P (operands
[2]))
20547 split_double_mode (mode
, operands
, 2, low
, high
);
20548 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20550 if (count
>= half_width
)
20552 emit_move_insn (high
[0], low
[1]);
20553 emit_move_insn (low
[0], const0_rtx
);
20555 if (count
> half_width
)
20556 ix86_expand_ashl_const (high
[0], count
- half_width
, mode
);
20560 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20562 if (!rtx_equal_p (operands
[0], operands
[1]))
20563 emit_move_insn (operands
[0], operands
[1]);
20565 emit_insn (gen_shld (high
[0], low
[0], GEN_INT (count
)));
20566 ix86_expand_ashl_const (low
[0], count
, mode
);
20571 split_double_mode (mode
, operands
, 1, low
, high
);
20573 gen_ashl3
= mode
== DImode
? gen_ashlsi3
: gen_ashldi3
;
20575 if (operands
[1] == const1_rtx
)
20577 /* Assuming we've chosen a QImode capable registers, then 1 << N
20578 can be done with two 32/64-bit shifts, no branches, no cmoves. */
20579 if (ANY_QI_REG_P (low
[0]) && ANY_QI_REG_P (high
[0]))
20581 rtx s
, d
, flags
= gen_rtx_REG (CCZmode
, FLAGS_REG
);
20583 ix86_expand_clear (low
[0]);
20584 ix86_expand_clear (high
[0]);
20585 emit_insn (gen_testqi_ccz_1 (operands
[2], GEN_INT (half_width
)));
20587 d
= gen_lowpart (QImode
, low
[0]);
20588 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20589 s
= gen_rtx_EQ (QImode
, flags
, const0_rtx
);
20590 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20592 d
= gen_lowpart (QImode
, high
[0]);
20593 d
= gen_rtx_STRICT_LOW_PART (VOIDmode
, d
);
20594 s
= gen_rtx_NE (QImode
, flags
, const0_rtx
);
20595 emit_insn (gen_rtx_SET (VOIDmode
, d
, s
));
20598 /* Otherwise, we can get the same results by manually performing
20599 a bit extract operation on bit 5/6, and then performing the two
20600 shifts. The two methods of getting 0/1 into low/high are exactly
20601 the same size. Avoiding the shift in the bit extract case helps
20602 pentium4 a bit; no one else seems to care much either way. */
20605 enum machine_mode half_mode
;
20606 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
);
20607 rtx (*gen_and3
)(rtx
, rtx
, rtx
);
20608 rtx (*gen_xor3
)(rtx
, rtx
, rtx
);
20609 HOST_WIDE_INT bits
;
20612 if (mode
== DImode
)
20614 half_mode
= SImode
;
20615 gen_lshr3
= gen_lshrsi3
;
20616 gen_and3
= gen_andsi3
;
20617 gen_xor3
= gen_xorsi3
;
20622 half_mode
= DImode
;
20623 gen_lshr3
= gen_lshrdi3
;
20624 gen_and3
= gen_anddi3
;
20625 gen_xor3
= gen_xordi3
;
20629 if (TARGET_PARTIAL_REG_STALL
&& !optimize_insn_for_size_p ())
20630 x
= gen_rtx_ZERO_EXTEND (half_mode
, operands
[2]);
20632 x
= gen_lowpart (half_mode
, operands
[2]);
20633 emit_insn (gen_rtx_SET (VOIDmode
, high
[0], x
));
20635 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (bits
)));
20636 emit_insn (gen_and3 (high
[0], high
[0], const1_rtx
));
20637 emit_move_insn (low
[0], high
[0]);
20638 emit_insn (gen_xor3 (low
[0], low
[0], const1_rtx
));
20641 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20642 emit_insn (gen_ashl3 (high
[0], high
[0], operands
[2]));
20646 if (operands
[1] == constm1_rtx
)
20648 /* For -1 << N, we can avoid the shld instruction, because we
20649 know that we're shifting 0...31/63 ones into a -1. */
20650 emit_move_insn (low
[0], constm1_rtx
);
20651 if (optimize_insn_for_size_p ())
20652 emit_move_insn (high
[0], low
[0]);
20654 emit_move_insn (high
[0], constm1_rtx
);
20658 gen_shld
= mode
== DImode
? gen_x86_shld
: gen_x86_64_shld
;
20660 if (!rtx_equal_p (operands
[0], operands
[1]))
20661 emit_move_insn (operands
[0], operands
[1]);
20663 split_double_mode (mode
, operands
, 1, low
, high
);
20664 emit_insn (gen_shld (high
[0], low
[0], operands
[2]));
20667 emit_insn (gen_ashl3 (low
[0], low
[0], operands
[2]));
20669 if (TARGET_CMOVE
&& scratch
)
20671 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20672 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20674 ix86_expand_clear (scratch
);
20675 emit_insn (gen_x86_shift_adj_1 (high
[0], low
[0], operands
[2], scratch
));
20679 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20680 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20682 emit_insn (gen_x86_shift_adj_2 (high
[0], low
[0], operands
[2]));
20687 ix86_split_ashr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20689 rtx (*gen_ashr3
)(rtx
, rtx
, rtx
)
20690 = mode
== DImode
? gen_ashrsi3
: gen_ashrdi3
;
20691 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20692 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20694 rtx low
[2], high
[2];
20697 if (CONST_INT_P (operands
[2]))
20699 split_double_mode (mode
, operands
, 2, low
, high
);
20700 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20702 if (count
== GET_MODE_BITSIZE (mode
) - 1)
20704 emit_move_insn (high
[0], high
[1]);
20705 emit_insn (gen_ashr3 (high
[0], high
[0],
20706 GEN_INT (half_width
- 1)));
20707 emit_move_insn (low
[0], high
[0]);
20710 else if (count
>= half_width
)
20712 emit_move_insn (low
[0], high
[1]);
20713 emit_move_insn (high
[0], low
[0]);
20714 emit_insn (gen_ashr3 (high
[0], high
[0],
20715 GEN_INT (half_width
- 1)));
20717 if (count
> half_width
)
20718 emit_insn (gen_ashr3 (low
[0], low
[0],
20719 GEN_INT (count
- half_width
)));
20723 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20725 if (!rtx_equal_p (operands
[0], operands
[1]))
20726 emit_move_insn (operands
[0], operands
[1]);
20728 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20729 emit_insn (gen_ashr3 (high
[0], high
[0], GEN_INT (count
)));
20734 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20736 if (!rtx_equal_p (operands
[0], operands
[1]))
20737 emit_move_insn (operands
[0], operands
[1]);
20739 split_double_mode (mode
, operands
, 1, low
, high
);
20741 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20742 emit_insn (gen_ashr3 (high
[0], high
[0], operands
[2]));
20744 if (TARGET_CMOVE
&& scratch
)
20746 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20747 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20749 emit_move_insn (scratch
, high
[0]);
20750 emit_insn (gen_ashr3 (scratch
, scratch
,
20751 GEN_INT (half_width
- 1)));
20752 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20757 rtx (*gen_x86_shift_adj_3
)(rtx
, rtx
, rtx
)
20758 = mode
== DImode
? gen_x86_shiftsi_adj_3
: gen_x86_shiftdi_adj_3
;
20760 emit_insn (gen_x86_shift_adj_3 (low
[0], high
[0], operands
[2]));
20766 ix86_split_lshr (rtx
*operands
, rtx scratch
, enum machine_mode mode
)
20768 rtx (*gen_lshr3
)(rtx
, rtx
, rtx
)
20769 = mode
== DImode
? gen_lshrsi3
: gen_lshrdi3
;
20770 rtx (*gen_shrd
)(rtx
, rtx
, rtx
);
20771 int half_width
= GET_MODE_BITSIZE (mode
) >> 1;
20773 rtx low
[2], high
[2];
20776 if (CONST_INT_P (operands
[2]))
20778 split_double_mode (mode
, operands
, 2, low
, high
);
20779 count
= INTVAL (operands
[2]) & (GET_MODE_BITSIZE (mode
) - 1);
20781 if (count
>= half_width
)
20783 emit_move_insn (low
[0], high
[1]);
20784 ix86_expand_clear (high
[0]);
20786 if (count
> half_width
)
20787 emit_insn (gen_lshr3 (low
[0], low
[0],
20788 GEN_INT (count
- half_width
)));
20792 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20794 if (!rtx_equal_p (operands
[0], operands
[1]))
20795 emit_move_insn (operands
[0], operands
[1]);
20797 emit_insn (gen_shrd (low
[0], high
[0], GEN_INT (count
)));
20798 emit_insn (gen_lshr3 (high
[0], high
[0], GEN_INT (count
)));
20803 gen_shrd
= mode
== DImode
? gen_x86_shrd
: gen_x86_64_shrd
;
20805 if (!rtx_equal_p (operands
[0], operands
[1]))
20806 emit_move_insn (operands
[0], operands
[1]);
20808 split_double_mode (mode
, operands
, 1, low
, high
);
20810 emit_insn (gen_shrd (low
[0], high
[0], operands
[2]));
20811 emit_insn (gen_lshr3 (high
[0], high
[0], operands
[2]));
20813 if (TARGET_CMOVE
&& scratch
)
20815 rtx (*gen_x86_shift_adj_1
)(rtx
, rtx
, rtx
, rtx
)
20816 = mode
== DImode
? gen_x86_shiftsi_adj_1
: gen_x86_shiftdi_adj_1
;
20818 ix86_expand_clear (scratch
);
20819 emit_insn (gen_x86_shift_adj_1 (low
[0], high
[0], operands
[2],
20824 rtx (*gen_x86_shift_adj_2
)(rtx
, rtx
, rtx
)
20825 = mode
== DImode
? gen_x86_shiftsi_adj_2
: gen_x86_shiftdi_adj_2
;
20827 emit_insn (gen_x86_shift_adj_2 (low
[0], high
[0], operands
[2]));
20832 /* Predict just emitted jump instruction to be taken with probability PROB. */
20834 predict_jump (int prob
)
20836 rtx insn
= get_last_insn ();
20837 gcc_assert (JUMP_P (insn
));
20838 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (prob
));
20841 /* Helper function for the string operations below. Dest VARIABLE whether
20842 it is aligned to VALUE bytes. If true, jump to the label. */
20844 ix86_expand_aligntest (rtx variable
, int value
, bool epilogue
)
20846 rtx label
= gen_label_rtx ();
20847 rtx tmpcount
= gen_reg_rtx (GET_MODE (variable
));
20848 if (GET_MODE (variable
) == DImode
)
20849 emit_insn (gen_anddi3 (tmpcount
, variable
, GEN_INT (value
)));
20851 emit_insn (gen_andsi3 (tmpcount
, variable
, GEN_INT (value
)));
20852 emit_cmp_and_jump_insns (tmpcount
, const0_rtx
, EQ
, 0, GET_MODE (variable
),
20855 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
20857 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
20861 /* Adjust COUNTER by the VALUE. */
20863 ix86_adjust_counter (rtx countreg
, HOST_WIDE_INT value
)
20865 rtx (*gen_add
)(rtx
, rtx
, rtx
)
20866 = GET_MODE (countreg
) == DImode
? gen_adddi3
: gen_addsi3
;
20868 emit_insn (gen_add (countreg
, countreg
, GEN_INT (-value
)));
20871 /* Zero extend possibly SImode EXP to Pmode register. */
20873 ix86_zero_extend_to_Pmode (rtx exp
)
20876 if (GET_MODE (exp
) == VOIDmode
)
20877 return force_reg (Pmode
, exp
);
20878 if (GET_MODE (exp
) == Pmode
)
20879 return copy_to_mode_reg (Pmode
, exp
);
20880 r
= gen_reg_rtx (Pmode
);
20881 emit_insn (gen_zero_extendsidi2 (r
, exp
));
20885 /* Divide COUNTREG by SCALE. */
20887 scale_counter (rtx countreg
, int scale
)
20893 if (CONST_INT_P (countreg
))
20894 return GEN_INT (INTVAL (countreg
) / scale
);
20895 gcc_assert (REG_P (countreg
));
20897 sc
= expand_simple_binop (GET_MODE (countreg
), LSHIFTRT
, countreg
,
20898 GEN_INT (exact_log2 (scale
)),
20899 NULL
, 1, OPTAB_DIRECT
);
20903 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
20904 DImode for constant loop counts. */
20906 static enum machine_mode
20907 counter_mode (rtx count_exp
)
20909 if (GET_MODE (count_exp
) != VOIDmode
)
20910 return GET_MODE (count_exp
);
20911 if (!CONST_INT_P (count_exp
))
20913 if (TARGET_64BIT
&& (INTVAL (count_exp
) & ~0xffffffff))
20918 /* When SRCPTR is non-NULL, output simple loop to move memory
20919 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
20920 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
20921 equivalent loop to set memory by VALUE (supposed to be in MODE).
20923 The size is rounded down to whole number of chunk size moved at once.
20924 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
20928 expand_set_or_movmem_via_loop (rtx destmem
, rtx srcmem
,
20929 rtx destptr
, rtx srcptr
, rtx value
,
20930 rtx count
, enum machine_mode mode
, int unroll
,
20933 rtx out_label
, top_label
, iter
, tmp
;
20934 enum machine_mode iter_mode
= counter_mode (count
);
20935 rtx piece_size
= GEN_INT (GET_MODE_SIZE (mode
) * unroll
);
20936 rtx piece_size_mask
= GEN_INT (~((GET_MODE_SIZE (mode
) * unroll
) - 1));
20942 top_label
= gen_label_rtx ();
20943 out_label
= gen_label_rtx ();
20944 iter
= gen_reg_rtx (iter_mode
);
20946 size
= expand_simple_binop (iter_mode
, AND
, count
, piece_size_mask
,
20947 NULL
, 1, OPTAB_DIRECT
);
20948 /* Those two should combine. */
20949 if (piece_size
== const1_rtx
)
20951 emit_cmp_and_jump_insns (size
, const0_rtx
, EQ
, NULL_RTX
, iter_mode
,
20953 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
20955 emit_move_insn (iter
, const0_rtx
);
20957 emit_label (top_label
);
20959 tmp
= convert_modes (Pmode
, iter_mode
, iter
, true);
20960 x_addr
= gen_rtx_PLUS (Pmode
, destptr
, tmp
);
20961 destmem
= change_address (destmem
, mode
, x_addr
);
20965 y_addr
= gen_rtx_PLUS (Pmode
, srcptr
, copy_rtx (tmp
));
20966 srcmem
= change_address (srcmem
, mode
, y_addr
);
20968 /* When unrolling for chips that reorder memory reads and writes,
20969 we can save registers by using single temporary.
20970 Also using 4 temporaries is overkill in 32bit mode. */
20971 if (!TARGET_64BIT
&& 0)
20973 for (i
= 0; i
< unroll
; i
++)
20978 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
20980 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
20982 emit_move_insn (destmem
, srcmem
);
20988 gcc_assert (unroll
<= 4);
20989 for (i
= 0; i
< unroll
; i
++)
20991 tmpreg
[i
] = gen_reg_rtx (mode
);
20995 adjust_address (copy_rtx (srcmem
), mode
, GET_MODE_SIZE (mode
));
20997 emit_move_insn (tmpreg
[i
], srcmem
);
20999 for (i
= 0; i
< unroll
; i
++)
21004 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21006 emit_move_insn (destmem
, tmpreg
[i
]);
21011 for (i
= 0; i
< unroll
; i
++)
21015 adjust_address (copy_rtx (destmem
), mode
, GET_MODE_SIZE (mode
));
21016 emit_move_insn (destmem
, value
);
21019 tmp
= expand_simple_binop (iter_mode
, PLUS
, iter
, piece_size
, iter
,
21020 true, OPTAB_LIB_WIDEN
);
21022 emit_move_insn (iter
, tmp
);
21024 emit_cmp_and_jump_insns (iter
, size
, LT
, NULL_RTX
, iter_mode
,
21026 if (expected_size
!= -1)
21028 expected_size
/= GET_MODE_SIZE (mode
) * unroll
;
21029 if (expected_size
== 0)
21031 else if (expected_size
> REG_BR_PROB_BASE
)
21032 predict_jump (REG_BR_PROB_BASE
- 1);
21034 predict_jump (REG_BR_PROB_BASE
- (REG_BR_PROB_BASE
+ expected_size
/ 2) / expected_size
);
21037 predict_jump (REG_BR_PROB_BASE
* 80 / 100);
21038 iter
= ix86_zero_extend_to_Pmode (iter
);
21039 tmp
= expand_simple_binop (Pmode
, PLUS
, destptr
, iter
, destptr
,
21040 true, OPTAB_LIB_WIDEN
);
21041 if (tmp
!= destptr
)
21042 emit_move_insn (destptr
, tmp
);
21045 tmp
= expand_simple_binop (Pmode
, PLUS
, srcptr
, iter
, srcptr
,
21046 true, OPTAB_LIB_WIDEN
);
21048 emit_move_insn (srcptr
, tmp
);
21050 emit_label (out_label
);
21053 /* Output "rep; mov" instruction.
21054 Arguments have same meaning as for previous function */
21056 expand_movmem_via_rep_mov (rtx destmem
, rtx srcmem
,
21057 rtx destptr
, rtx srcptr
,
21059 enum machine_mode mode
)
21064 HOST_WIDE_INT rounded_count
;
21066 /* If the size is known, it is shorter to use rep movs. */
21067 if (mode
== QImode
&& CONST_INT_P (count
)
21068 && !(INTVAL (count
) & 3))
21071 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21072 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21073 if (srcptr
!= XEXP (srcmem
, 0) || GET_MODE (srcmem
) != BLKmode
)
21074 srcmem
= adjust_automodify_address_nv (srcmem
, BLKmode
, srcptr
, 0);
21075 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21076 if (mode
!= QImode
)
21078 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21079 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21080 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21081 srcexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21082 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21083 srcexp
= gen_rtx_PLUS (Pmode
, srcexp
, srcptr
);
21087 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21088 srcexp
= gen_rtx_PLUS (Pmode
, srcptr
, countreg
);
21090 if (CONST_INT_P (count
))
21092 rounded_count
= (INTVAL (count
)
21093 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21094 destmem
= shallow_copy_rtx (destmem
);
21095 srcmem
= shallow_copy_rtx (srcmem
);
21096 set_mem_size (destmem
, rounded_count
);
21097 set_mem_size (srcmem
, rounded_count
);
21101 if (MEM_SIZE_KNOWN_P (destmem
))
21102 clear_mem_size (destmem
);
21103 if (MEM_SIZE_KNOWN_P (srcmem
))
21104 clear_mem_size (srcmem
);
21106 emit_insn (gen_rep_mov (destptr
, destmem
, srcptr
, srcmem
, countreg
,
21110 /* Output "rep; stos" instruction.
21111 Arguments have same meaning as for previous function */
21113 expand_setmem_via_rep_stos (rtx destmem
, rtx destptr
, rtx value
,
21114 rtx count
, enum machine_mode mode
,
21119 HOST_WIDE_INT rounded_count
;
21121 if (destptr
!= XEXP (destmem
, 0) || GET_MODE (destmem
) != BLKmode
)
21122 destmem
= adjust_automodify_address_nv (destmem
, BLKmode
, destptr
, 0);
21123 value
= force_reg (mode
, gen_lowpart (mode
, value
));
21124 countreg
= ix86_zero_extend_to_Pmode (scale_counter (count
, GET_MODE_SIZE (mode
)));
21125 if (mode
!= QImode
)
21127 destexp
= gen_rtx_ASHIFT (Pmode
, countreg
,
21128 GEN_INT (exact_log2 (GET_MODE_SIZE (mode
))));
21129 destexp
= gen_rtx_PLUS (Pmode
, destexp
, destptr
);
21132 destexp
= gen_rtx_PLUS (Pmode
, destptr
, countreg
);
21133 if (orig_value
== const0_rtx
&& CONST_INT_P (count
))
21135 rounded_count
= (INTVAL (count
)
21136 & ~((HOST_WIDE_INT
) GET_MODE_SIZE (mode
) - 1));
21137 destmem
= shallow_copy_rtx (destmem
);
21138 set_mem_size (destmem
, rounded_count
);
21140 else if (MEM_SIZE_KNOWN_P (destmem
))
21141 clear_mem_size (destmem
);
21142 emit_insn (gen_rep_stos (destptr
, countreg
, destmem
, value
, destexp
));
21146 emit_strmov (rtx destmem
, rtx srcmem
,
21147 rtx destptr
, rtx srcptr
, enum machine_mode mode
, int offset
)
21149 rtx src
= adjust_automodify_address_nv (srcmem
, mode
, srcptr
, offset
);
21150 rtx dest
= adjust_automodify_address_nv (destmem
, mode
, destptr
, offset
);
21151 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21154 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
21156 expand_movmem_epilogue (rtx destmem
, rtx srcmem
,
21157 rtx destptr
, rtx srcptr
, rtx count
, int max_size
)
21160 if (CONST_INT_P (count
))
21162 HOST_WIDE_INT countval
= INTVAL (count
);
21165 if ((countval
& 0x10) && max_size
> 16)
21169 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21170 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
+ 8);
21173 gcc_unreachable ();
21176 if ((countval
& 0x08) && max_size
> 8)
21179 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, DImode
, offset
);
21182 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21183 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
+ 4);
21187 if ((countval
& 0x04) && max_size
> 4)
21189 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, SImode
, offset
);
21192 if ((countval
& 0x02) && max_size
> 2)
21194 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, HImode
, offset
);
21197 if ((countval
& 0x01) && max_size
> 1)
21199 emit_strmov (destmem
, srcmem
, destptr
, srcptr
, QImode
, offset
);
21206 count
= expand_simple_binop (GET_MODE (count
), AND
, count
, GEN_INT (max_size
- 1),
21207 count
, 1, OPTAB_DIRECT
);
21208 expand_set_or_movmem_via_loop (destmem
, srcmem
, destptr
, srcptr
, NULL
,
21209 count
, QImode
, 1, 4);
21213 /* When there are stringops, we can cheaply increase dest and src pointers.
21214 Otherwise we save code size by maintaining offset (zero is readily
21215 available from preceding rep operation) and using x86 addressing modes.
21217 if (TARGET_SINGLE_STRINGOP
)
21221 rtx label
= ix86_expand_aligntest (count
, 4, true);
21222 src
= change_address (srcmem
, SImode
, srcptr
);
21223 dest
= change_address (destmem
, SImode
, destptr
);
21224 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21225 emit_label (label
);
21226 LABEL_NUSES (label
) = 1;
21230 rtx label
= ix86_expand_aligntest (count
, 2, true);
21231 src
= change_address (srcmem
, HImode
, srcptr
);
21232 dest
= change_address (destmem
, HImode
, destptr
);
21233 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21234 emit_label (label
);
21235 LABEL_NUSES (label
) = 1;
21239 rtx label
= ix86_expand_aligntest (count
, 1, true);
21240 src
= change_address (srcmem
, QImode
, srcptr
);
21241 dest
= change_address (destmem
, QImode
, destptr
);
21242 emit_insn (gen_strmov (destptr
, dest
, srcptr
, src
));
21243 emit_label (label
);
21244 LABEL_NUSES (label
) = 1;
21249 rtx offset
= force_reg (Pmode
, const0_rtx
);
21254 rtx label
= ix86_expand_aligntest (count
, 4, true);
21255 src
= change_address (srcmem
, SImode
, srcptr
);
21256 dest
= change_address (destmem
, SImode
, destptr
);
21257 emit_move_insn (dest
, src
);
21258 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (4), NULL
,
21259 true, OPTAB_LIB_WIDEN
);
21261 emit_move_insn (offset
, tmp
);
21262 emit_label (label
);
21263 LABEL_NUSES (label
) = 1;
21267 rtx label
= ix86_expand_aligntest (count
, 2, true);
21268 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21269 src
= change_address (srcmem
, HImode
, tmp
);
21270 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21271 dest
= change_address (destmem
, HImode
, tmp
);
21272 emit_move_insn (dest
, src
);
21273 tmp
= expand_simple_binop (Pmode
, PLUS
, offset
, GEN_INT (2), tmp
,
21274 true, OPTAB_LIB_WIDEN
);
21276 emit_move_insn (offset
, tmp
);
21277 emit_label (label
);
21278 LABEL_NUSES (label
) = 1;
21282 rtx label
= ix86_expand_aligntest (count
, 1, true);
21283 tmp
= gen_rtx_PLUS (Pmode
, srcptr
, offset
);
21284 src
= change_address (srcmem
, QImode
, tmp
);
21285 tmp
= gen_rtx_PLUS (Pmode
, destptr
, offset
);
21286 dest
= change_address (destmem
, QImode
, tmp
);
21287 emit_move_insn (dest
, src
);
21288 emit_label (label
);
21289 LABEL_NUSES (label
) = 1;
21294 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21296 expand_setmem_epilogue_via_loop (rtx destmem
, rtx destptr
, rtx value
,
21297 rtx count
, int max_size
)
21300 expand_simple_binop (counter_mode (count
), AND
, count
,
21301 GEN_INT (max_size
- 1), count
, 1, OPTAB_DIRECT
);
21302 expand_set_or_movmem_via_loop (destmem
, NULL
, destptr
, NULL
,
21303 gen_lowpart (QImode
, value
), count
, QImode
,
21307 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
21309 expand_setmem_epilogue (rtx destmem
, rtx destptr
, rtx value
, rtx count
, int max_size
)
21313 if (CONST_INT_P (count
))
21315 HOST_WIDE_INT countval
= INTVAL (count
);
21318 if ((countval
& 0x10) && max_size
> 16)
21322 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21323 emit_insn (gen_strset (destptr
, dest
, value
));
21324 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
+ 8);
21325 emit_insn (gen_strset (destptr
, dest
, value
));
21328 gcc_unreachable ();
21331 if ((countval
& 0x08) && max_size
> 8)
21335 dest
= adjust_automodify_address_nv (destmem
, DImode
, destptr
, offset
);
21336 emit_insn (gen_strset (destptr
, dest
, value
));
21340 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21341 emit_insn (gen_strset (destptr
, dest
, value
));
21342 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
+ 4);
21343 emit_insn (gen_strset (destptr
, dest
, value
));
21347 if ((countval
& 0x04) && max_size
> 4)
21349 dest
= adjust_automodify_address_nv (destmem
, SImode
, destptr
, offset
);
21350 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21353 if ((countval
& 0x02) && max_size
> 2)
21355 dest
= adjust_automodify_address_nv (destmem
, HImode
, destptr
, offset
);
21356 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21359 if ((countval
& 0x01) && max_size
> 1)
21361 dest
= adjust_automodify_address_nv (destmem
, QImode
, destptr
, offset
);
21362 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21369 expand_setmem_epilogue_via_loop (destmem
, destptr
, value
, count
, max_size
);
21374 rtx label
= ix86_expand_aligntest (count
, 16, true);
21377 dest
= change_address (destmem
, DImode
, destptr
);
21378 emit_insn (gen_strset (destptr
, dest
, value
));
21379 emit_insn (gen_strset (destptr
, dest
, value
));
21383 dest
= change_address (destmem
, SImode
, destptr
);
21384 emit_insn (gen_strset (destptr
, dest
, value
));
21385 emit_insn (gen_strset (destptr
, dest
, value
));
21386 emit_insn (gen_strset (destptr
, dest
, value
));
21387 emit_insn (gen_strset (destptr
, dest
, value
));
21389 emit_label (label
);
21390 LABEL_NUSES (label
) = 1;
21394 rtx label
= ix86_expand_aligntest (count
, 8, true);
21397 dest
= change_address (destmem
, DImode
, destptr
);
21398 emit_insn (gen_strset (destptr
, dest
, value
));
21402 dest
= change_address (destmem
, SImode
, destptr
);
21403 emit_insn (gen_strset (destptr
, dest
, value
));
21404 emit_insn (gen_strset (destptr
, dest
, value
));
21406 emit_label (label
);
21407 LABEL_NUSES (label
) = 1;
21411 rtx label
= ix86_expand_aligntest (count
, 4, true);
21412 dest
= change_address (destmem
, SImode
, destptr
);
21413 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (SImode
, value
)));
21414 emit_label (label
);
21415 LABEL_NUSES (label
) = 1;
21419 rtx label
= ix86_expand_aligntest (count
, 2, true);
21420 dest
= change_address (destmem
, HImode
, destptr
);
21421 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (HImode
, value
)));
21422 emit_label (label
);
21423 LABEL_NUSES (label
) = 1;
21427 rtx label
= ix86_expand_aligntest (count
, 1, true);
21428 dest
= change_address (destmem
, QImode
, destptr
);
21429 emit_insn (gen_strset (destptr
, dest
, gen_lowpart (QImode
, value
)));
21430 emit_label (label
);
21431 LABEL_NUSES (label
) = 1;
21435 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
21436 DESIRED_ALIGNMENT. */
21438 expand_movmem_prologue (rtx destmem
, rtx srcmem
,
21439 rtx destptr
, rtx srcptr
, rtx count
,
21440 int align
, int desired_alignment
)
21442 if (align
<= 1 && desired_alignment
> 1)
21444 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21445 srcmem
= change_address (srcmem
, QImode
, srcptr
);
21446 destmem
= change_address (destmem
, QImode
, destptr
);
21447 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21448 ix86_adjust_counter (count
, 1);
21449 emit_label (label
);
21450 LABEL_NUSES (label
) = 1;
21452 if (align
<= 2 && desired_alignment
> 2)
21454 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21455 srcmem
= change_address (srcmem
, HImode
, srcptr
);
21456 destmem
= change_address (destmem
, HImode
, destptr
);
21457 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21458 ix86_adjust_counter (count
, 2);
21459 emit_label (label
);
21460 LABEL_NUSES (label
) = 1;
21462 if (align
<= 4 && desired_alignment
> 4)
21464 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21465 srcmem
= change_address (srcmem
, SImode
, srcptr
);
21466 destmem
= change_address (destmem
, SImode
, destptr
);
21467 emit_insn (gen_strmov (destptr
, destmem
, srcptr
, srcmem
));
21468 ix86_adjust_counter (count
, 4);
21469 emit_label (label
);
21470 LABEL_NUSES (label
) = 1;
21472 gcc_assert (desired_alignment
<= 8);
21475 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
21476 ALIGN_BYTES is how many bytes need to be copied. */
21478 expand_constant_movmem_prologue (rtx dst
, rtx
*srcp
, rtx destreg
, rtx srcreg
,
21479 int desired_align
, int align_bytes
)
21482 rtx orig_dst
= dst
;
21483 rtx orig_src
= src
;
21485 int src_align_bytes
= get_mem_align_offset (src
, desired_align
* BITS_PER_UNIT
);
21486 if (src_align_bytes
>= 0)
21487 src_align_bytes
= desired_align
- src_align_bytes
;
21488 if (align_bytes
& 1)
21490 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21491 src
= adjust_automodify_address_nv (src
, QImode
, srcreg
, 0);
21493 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21495 if (align_bytes
& 2)
21497 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21498 src
= adjust_automodify_address_nv (src
, HImode
, srcreg
, off
);
21499 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21500 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21501 if (src_align_bytes
>= 0
21502 && (src_align_bytes
& 1) == (align_bytes
& 1)
21503 && MEM_ALIGN (src
) < 2 * BITS_PER_UNIT
)
21504 set_mem_align (src
, 2 * BITS_PER_UNIT
);
21506 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21508 if (align_bytes
& 4)
21510 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21511 src
= adjust_automodify_address_nv (src
, SImode
, srcreg
, off
);
21512 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21513 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21514 if (src_align_bytes
>= 0)
21516 unsigned int src_align
= 0;
21517 if ((src_align_bytes
& 3) == (align_bytes
& 3))
21519 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21521 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21522 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21525 emit_insn (gen_strmov (destreg
, dst
, srcreg
, src
));
21527 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21528 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
, off
);
21529 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21530 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21531 if (src_align_bytes
>= 0)
21533 unsigned int src_align
= 0;
21534 if ((src_align_bytes
& 7) == (align_bytes
& 7))
21536 else if ((src_align_bytes
& 3) == (align_bytes
& 3))
21538 else if ((src_align_bytes
& 1) == (align_bytes
& 1))
21540 if (src_align
> (unsigned int) desired_align
)
21541 src_align
= desired_align
;
21542 if (MEM_ALIGN (src
) < src_align
* BITS_PER_UNIT
)
21543 set_mem_align (src
, src_align
* BITS_PER_UNIT
);
21545 if (MEM_SIZE_KNOWN_P (orig_dst
))
21546 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21547 if (MEM_SIZE_KNOWN_P (orig_src
))
21548 set_mem_size (src
, MEM_SIZE (orig_src
) - align_bytes
);
21553 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
21554 DESIRED_ALIGNMENT. */
21556 expand_setmem_prologue (rtx destmem
, rtx destptr
, rtx value
, rtx count
,
21557 int align
, int desired_alignment
)
21559 if (align
<= 1 && desired_alignment
> 1)
21561 rtx label
= ix86_expand_aligntest (destptr
, 1, false);
21562 destmem
= change_address (destmem
, QImode
, destptr
);
21563 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (QImode
, value
)));
21564 ix86_adjust_counter (count
, 1);
21565 emit_label (label
);
21566 LABEL_NUSES (label
) = 1;
21568 if (align
<= 2 && desired_alignment
> 2)
21570 rtx label
= ix86_expand_aligntest (destptr
, 2, false);
21571 destmem
= change_address (destmem
, HImode
, destptr
);
21572 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (HImode
, value
)));
21573 ix86_adjust_counter (count
, 2);
21574 emit_label (label
);
21575 LABEL_NUSES (label
) = 1;
21577 if (align
<= 4 && desired_alignment
> 4)
21579 rtx label
= ix86_expand_aligntest (destptr
, 4, false);
21580 destmem
= change_address (destmem
, SImode
, destptr
);
21581 emit_insn (gen_strset (destptr
, destmem
, gen_lowpart (SImode
, value
)));
21582 ix86_adjust_counter (count
, 4);
21583 emit_label (label
);
21584 LABEL_NUSES (label
) = 1;
21586 gcc_assert (desired_alignment
<= 8);
21589 /* Set enough from DST to align DST known to by aligned by ALIGN to
21590 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
21592 expand_constant_setmem_prologue (rtx dst
, rtx destreg
, rtx value
,
21593 int desired_align
, int align_bytes
)
21596 rtx orig_dst
= dst
;
21597 if (align_bytes
& 1)
21599 dst
= adjust_automodify_address_nv (dst
, QImode
, destreg
, 0);
21601 emit_insn (gen_strset (destreg
, dst
,
21602 gen_lowpart (QImode
, value
)));
21604 if (align_bytes
& 2)
21606 dst
= adjust_automodify_address_nv (dst
, HImode
, destreg
, off
);
21607 if (MEM_ALIGN (dst
) < 2 * BITS_PER_UNIT
)
21608 set_mem_align (dst
, 2 * BITS_PER_UNIT
);
21610 emit_insn (gen_strset (destreg
, dst
,
21611 gen_lowpart (HImode
, value
)));
21613 if (align_bytes
& 4)
21615 dst
= adjust_automodify_address_nv (dst
, SImode
, destreg
, off
);
21616 if (MEM_ALIGN (dst
) < 4 * BITS_PER_UNIT
)
21617 set_mem_align (dst
, 4 * BITS_PER_UNIT
);
21619 emit_insn (gen_strset (destreg
, dst
,
21620 gen_lowpart (SImode
, value
)));
21622 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
, off
);
21623 if (MEM_ALIGN (dst
) < (unsigned int) desired_align
* BITS_PER_UNIT
)
21624 set_mem_align (dst
, desired_align
* BITS_PER_UNIT
);
21625 if (MEM_SIZE_KNOWN_P (orig_dst
))
21626 set_mem_size (dst
, MEM_SIZE (orig_dst
) - align_bytes
);
21630 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
21631 static enum stringop_alg
21632 decide_alg (HOST_WIDE_INT count
, HOST_WIDE_INT expected_size
, bool memset
,
21633 int *dynamic_check
)
21635 const struct stringop_algs
* algs
;
21636 bool optimize_for_speed
;
21637 /* Algorithms using the rep prefix want at least edi and ecx;
21638 additionally, memset wants eax and memcpy wants esi. Don't
21639 consider such algorithms if the user has appropriated those
21640 registers for their own purposes. */
21641 bool rep_prefix_usable
= !(fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
]
21643 ? fixed_regs
[AX_REG
] : fixed_regs
[SI_REG
]));
21645 #define ALG_USABLE_P(alg) (rep_prefix_usable \
21646 || (alg != rep_prefix_1_byte \
21647 && alg != rep_prefix_4_byte \
21648 && alg != rep_prefix_8_byte))
21649 const struct processor_costs
*cost
;
21651 /* Even if the string operation call is cold, we still might spend a lot
21652 of time processing large blocks. */
21653 if (optimize_function_for_size_p (cfun
)
21654 || (optimize_insn_for_size_p ()
21655 && expected_size
!= -1 && expected_size
< 256))
21656 optimize_for_speed
= false;
21658 optimize_for_speed
= true;
21660 cost
= optimize_for_speed
? ix86_cost
: &ix86_size_cost
;
21662 *dynamic_check
= -1;
21664 algs
= &cost
->memset
[TARGET_64BIT
!= 0];
21666 algs
= &cost
->memcpy
[TARGET_64BIT
!= 0];
21667 if (ix86_stringop_alg
!= no_stringop
&& ALG_USABLE_P (ix86_stringop_alg
))
21668 return ix86_stringop_alg
;
21669 /* rep; movq or rep; movl is the smallest variant. */
21670 else if (!optimize_for_speed
)
21672 if (!count
|| (count
& 3))
21673 return rep_prefix_usable
? rep_prefix_1_byte
: loop_1_byte
;
21675 return rep_prefix_usable
? rep_prefix_4_byte
: loop
;
21677 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
21679 else if (expected_size
!= -1 && expected_size
< 4)
21680 return loop_1_byte
;
21681 else if (expected_size
!= -1)
21684 enum stringop_alg alg
= libcall
;
21685 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21687 /* We get here if the algorithms that were not libcall-based
21688 were rep-prefix based and we are unable to use rep prefixes
21689 based on global register usage. Break out of the loop and
21690 use the heuristic below. */
21691 if (algs
->size
[i
].max
== 0)
21693 if (algs
->size
[i
].max
>= expected_size
|| algs
->size
[i
].max
== -1)
21695 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21697 if (candidate
!= libcall
&& ALG_USABLE_P (candidate
))
21699 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
21700 last non-libcall inline algorithm. */
21701 if (TARGET_INLINE_ALL_STRINGOPS
)
21703 /* When the current size is best to be copied by a libcall,
21704 but we are still forced to inline, run the heuristic below
21705 that will pick code for medium sized blocks. */
21706 if (alg
!= libcall
)
21710 else if (ALG_USABLE_P (candidate
))
21714 gcc_assert (TARGET_INLINE_ALL_STRINGOPS
|| !rep_prefix_usable
);
21716 /* When asked to inline the call anyway, try to pick meaningful choice.
21717 We look for maximal size of block that is faster to copy by hand and
21718 take blocks of at most of that size guessing that average size will
21719 be roughly half of the block.
21721 If this turns out to be bad, we might simply specify the preferred
21722 choice in ix86_costs. */
21723 if ((TARGET_INLINE_ALL_STRINGOPS
|| TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21724 && (algs
->unknown_size
== libcall
|| !ALG_USABLE_P (algs
->unknown_size
)))
21727 enum stringop_alg alg
;
21729 bool any_alg_usable_p
= true;
21731 for (i
= 0; i
< MAX_STRINGOP_ALGS
; i
++)
21733 enum stringop_alg candidate
= algs
->size
[i
].alg
;
21734 any_alg_usable_p
= any_alg_usable_p
&& ALG_USABLE_P (candidate
);
21736 if (candidate
!= libcall
&& candidate
21737 && ALG_USABLE_P (candidate
))
21738 max
= algs
->size
[i
].max
;
21740 /* If there aren't any usable algorithms, then recursing on
21741 smaller sizes isn't going to find anything. Just return the
21742 simple byte-at-a-time copy loop. */
21743 if (!any_alg_usable_p
)
21745 /* Pick something reasonable. */
21746 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21747 *dynamic_check
= 128;
21748 return loop_1_byte
;
21752 alg
= decide_alg (count
, max
/ 2, memset
, dynamic_check
);
21753 gcc_assert (*dynamic_check
== -1);
21754 gcc_assert (alg
!= libcall
);
21755 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY
)
21756 *dynamic_check
= max
;
21759 return ALG_USABLE_P (algs
->unknown_size
) ? algs
->unknown_size
: libcall
;
21760 #undef ALG_USABLE_P
21763 /* Decide on alignment. We know that the operand is already aligned to ALIGN
21764 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
21766 decide_alignment (int align
,
21767 enum stringop_alg alg
,
21770 int desired_align
= 0;
21774 gcc_unreachable ();
21776 case unrolled_loop
:
21777 desired_align
= GET_MODE_SIZE (Pmode
);
21779 case rep_prefix_8_byte
:
21782 case rep_prefix_4_byte
:
21783 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21784 copying whole cacheline at once. */
21785 if (TARGET_PENTIUMPRO
)
21790 case rep_prefix_1_byte
:
21791 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
21792 copying whole cacheline at once. */
21793 if (TARGET_PENTIUMPRO
)
21807 if (desired_align
< align
)
21808 desired_align
= align
;
21809 if (expected_size
!= -1 && expected_size
< 4)
21810 desired_align
= align
;
21811 return desired_align
;
21814 /* Return the smallest power of 2 greater than VAL. */
21816 smallest_pow2_greater_than (int val
)
21824 /* Expand string move (memcpy) operation. Use i386 string operations
21825 when profitable. expand_setmem contains similar code. The code
21826 depends upon architecture, block size and alignment, but always has
21827 the same overall structure:
21829 1) Prologue guard: Conditional that jumps up to epilogues for small
21830 blocks that can be handled by epilogue alone. This is faster
21831 but also needed for correctness, since prologue assume the block
21832 is larger than the desired alignment.
21834 Optional dynamic check for size and libcall for large
21835 blocks is emitted here too, with -minline-stringops-dynamically.
21837 2) Prologue: copy first few bytes in order to get destination
21838 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
21839 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
21840 copied. We emit either a jump tree on power of two sized
21841 blocks, or a byte loop.
21843 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
21844 with specified algorithm.
21846 4) Epilogue: code copying tail of the block that is too small to be
21847 handled by main body (or up to size guarded by prologue guard). */
21850 ix86_expand_movmem (rtx dst
, rtx src
, rtx count_exp
, rtx align_exp
,
21851 rtx expected_align_exp
, rtx expected_size_exp
)
21857 rtx jump_around_label
= NULL
;
21858 HOST_WIDE_INT align
= 1;
21859 unsigned HOST_WIDE_INT count
= 0;
21860 HOST_WIDE_INT expected_size
= -1;
21861 int size_needed
= 0, epilogue_size_needed
;
21862 int desired_align
= 0, align_bytes
= 0;
21863 enum stringop_alg alg
;
21865 bool need_zero_guard
= false;
21867 if (CONST_INT_P (align_exp
))
21868 align
= INTVAL (align_exp
);
21869 /* i386 can do misaligned access on reasonably increased cost. */
21870 if (CONST_INT_P (expected_align_exp
)
21871 && INTVAL (expected_align_exp
) > align
)
21872 align
= INTVAL (expected_align_exp
);
21873 /* ALIGN is the minimum of destination and source alignment, but we care here
21874 just about destination alignment. */
21875 else if (MEM_ALIGN (dst
) > (unsigned HOST_WIDE_INT
) align
* BITS_PER_UNIT
)
21876 align
= MEM_ALIGN (dst
) / BITS_PER_UNIT
;
21878 if (CONST_INT_P (count_exp
))
21879 count
= expected_size
= INTVAL (count_exp
);
21880 if (CONST_INT_P (expected_size_exp
) && count
== 0)
21881 expected_size
= INTVAL (expected_size_exp
);
21883 /* Make sure we don't need to care about overflow later on. */
21884 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
21887 /* Step 0: Decide on preferred algorithm, desired alignment and
21888 size of chunks to be copied by main loop. */
21890 alg
= decide_alg (count
, expected_size
, false, &dynamic_check
);
21891 desired_align
= decide_alignment (align
, alg
, expected_size
);
21893 if (!TARGET_ALIGN_STRINGOPS
)
21894 align
= desired_align
;
21896 if (alg
== libcall
)
21898 gcc_assert (alg
!= no_stringop
);
21900 count_exp
= copy_to_mode_reg (GET_MODE (count_exp
), count_exp
);
21901 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
21902 srcreg
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
21907 gcc_unreachable ();
21909 need_zero_guard
= true;
21910 size_needed
= GET_MODE_SIZE (Pmode
);
21912 case unrolled_loop
:
21913 need_zero_guard
= true;
21914 size_needed
= GET_MODE_SIZE (Pmode
) * (TARGET_64BIT
? 4 : 2);
21916 case rep_prefix_8_byte
:
21919 case rep_prefix_4_byte
:
21922 case rep_prefix_1_byte
:
21926 need_zero_guard
= true;
21931 epilogue_size_needed
= size_needed
;
21933 /* Step 1: Prologue guard. */
21935 /* Alignment code needs count to be in register. */
21936 if (CONST_INT_P (count_exp
) && desired_align
> align
)
21938 if (INTVAL (count_exp
) > desired_align
21939 && INTVAL (count_exp
) > size_needed
)
21942 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
21943 if (align_bytes
<= 0)
21946 align_bytes
= desired_align
- align_bytes
;
21948 if (align_bytes
== 0)
21949 count_exp
= force_reg (counter_mode (count_exp
), count_exp
);
21951 gcc_assert (desired_align
>= 1 && align
>= 1);
21953 /* Ensure that alignment prologue won't copy past end of block. */
21954 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
21956 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
21957 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
21958 Make sure it is power of 2. */
21959 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
21963 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
21965 /* If main algorithm works on QImode, no epilogue is needed.
21966 For small sizes just don't align anything. */
21967 if (size_needed
== 1)
21968 desired_align
= align
;
21975 label
= gen_label_rtx ();
21976 emit_cmp_and_jump_insns (count_exp
,
21977 GEN_INT (epilogue_size_needed
),
21978 LTU
, 0, counter_mode (count_exp
), 1, label
);
21979 if (expected_size
== -1 || expected_size
< epilogue_size_needed
)
21980 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
21982 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
21986 /* Emit code to decide on runtime whether library call or inline should be
21988 if (dynamic_check
!= -1)
21990 if (CONST_INT_P (count_exp
))
21992 if (UINTVAL (count_exp
) >= (unsigned HOST_WIDE_INT
)dynamic_check
)
21994 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
21995 count_exp
= const0_rtx
;
22001 rtx hot_label
= gen_label_rtx ();
22002 jump_around_label
= gen_label_rtx ();
22003 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22004 LEU
, 0, GET_MODE (count_exp
), 1, hot_label
);
22005 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22006 emit_block_move_via_libcall (dst
, src
, count_exp
, false);
22007 emit_jump (jump_around_label
);
22008 emit_label (hot_label
);
22012 /* Step 2: Alignment prologue. */
22014 if (desired_align
> align
)
22016 if (align_bytes
== 0)
22018 /* Except for the first move in epilogue, we no longer know
22019 constant offset in aliasing info. It don't seems to worth
22020 the pain to maintain it for the first move, so throw away
22022 src
= change_address (src
, BLKmode
, srcreg
);
22023 dst
= change_address (dst
, BLKmode
, destreg
);
22024 expand_movmem_prologue (dst
, src
, destreg
, srcreg
, count_exp
, align
,
22029 /* If we know how many bytes need to be stored before dst is
22030 sufficiently aligned, maintain aliasing info accurately. */
22031 dst
= expand_constant_movmem_prologue (dst
, &src
, destreg
, srcreg
,
22032 desired_align
, align_bytes
);
22033 count_exp
= plus_constant (count_exp
, -align_bytes
);
22034 count
-= align_bytes
;
22036 if (need_zero_guard
22037 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22038 || (align_bytes
== 0
22039 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22040 + desired_align
- align
))))
22042 /* It is possible that we copied enough so the main loop will not
22044 gcc_assert (size_needed
> 1);
22045 if (label
== NULL_RTX
)
22046 label
= gen_label_rtx ();
22047 emit_cmp_and_jump_insns (count_exp
,
22048 GEN_INT (size_needed
),
22049 LTU
, 0, counter_mode (count_exp
), 1, label
);
22050 if (expected_size
== -1
22051 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22052 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22054 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22057 if (label
&& size_needed
== 1)
22059 emit_label (label
);
22060 LABEL_NUSES (label
) = 1;
22062 epilogue_size_needed
= 1;
22064 else if (label
== NULL_RTX
)
22065 epilogue_size_needed
= size_needed
;
22067 /* Step 3: Main loop. */
22073 gcc_unreachable ();
22075 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22076 count_exp
, QImode
, 1, expected_size
);
22079 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22080 count_exp
, Pmode
, 1, expected_size
);
22082 case unrolled_loop
:
22083 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
22084 registers for 4 temporaries anyway. */
22085 expand_set_or_movmem_via_loop (dst
, src
, destreg
, srcreg
, NULL
,
22086 count_exp
, Pmode
, TARGET_64BIT
? 4 : 2,
22089 case rep_prefix_8_byte
:
22090 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22093 case rep_prefix_4_byte
:
22094 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22097 case rep_prefix_1_byte
:
22098 expand_movmem_via_rep_mov (dst
, src
, destreg
, srcreg
, count_exp
,
22102 /* Adjust properly the offset of src and dest memory for aliasing. */
22103 if (CONST_INT_P (count_exp
))
22105 src
= adjust_automodify_address_nv (src
, BLKmode
, srcreg
,
22106 (count
/ size_needed
) * size_needed
);
22107 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22108 (count
/ size_needed
) * size_needed
);
22112 src
= change_address (src
, BLKmode
, srcreg
);
22113 dst
= change_address (dst
, BLKmode
, destreg
);
22116 /* Step 4: Epilogue to copy the remaining bytes. */
22120 /* When the main loop is done, COUNT_EXP might hold original count,
22121 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22122 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22123 bytes. Compensate if needed. */
22125 if (size_needed
< epilogue_size_needed
)
22128 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22129 GEN_INT (size_needed
- 1), count_exp
, 1,
22131 if (tmp
!= count_exp
)
22132 emit_move_insn (count_exp
, tmp
);
22134 emit_label (label
);
22135 LABEL_NUSES (label
) = 1;
22138 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22139 expand_movmem_epilogue (dst
, src
, destreg
, srcreg
, count_exp
,
22140 epilogue_size_needed
);
22141 if (jump_around_label
)
22142 emit_label (jump_around_label
);
22146 /* Helper function for memcpy. For QImode value 0xXY produce
22147 0xXYXYXYXY of wide specified by MODE. This is essentially
22148 a * 0x10101010, but we can do slightly better than
22149 synth_mult by unwinding the sequence by hand on CPUs with
22152 promote_duplicated_reg (enum machine_mode mode
, rtx val
)
22154 enum machine_mode valmode
= GET_MODE (val
);
22156 int nops
= mode
== DImode
? 3 : 2;
22158 gcc_assert (mode
== SImode
|| mode
== DImode
);
22159 if (val
== const0_rtx
)
22160 return copy_to_mode_reg (mode
, const0_rtx
);
22161 if (CONST_INT_P (val
))
22163 HOST_WIDE_INT v
= INTVAL (val
) & 255;
22167 if (mode
== DImode
)
22168 v
|= (v
<< 16) << 16;
22169 return copy_to_mode_reg (mode
, gen_int_mode (v
, mode
));
22172 if (valmode
== VOIDmode
)
22174 if (valmode
!= QImode
)
22175 val
= gen_lowpart (QImode
, val
);
22176 if (mode
== QImode
)
22178 if (!TARGET_PARTIAL_REG_STALL
)
22180 if (ix86_cost
->mult_init
[mode
== DImode
? 3 : 2]
22181 + ix86_cost
->mult_bit
* (mode
== DImode
? 8 : 4)
22182 <= (ix86_cost
->shift_const
+ ix86_cost
->add
) * nops
22183 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL
== 0)))
22185 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22186 tmp
= promote_duplicated_reg (mode
, const1_rtx
);
22187 return expand_simple_binop (mode
, MULT
, reg
, tmp
, NULL
, 1,
22192 rtx reg
= convert_modes (mode
, QImode
, val
, true);
22194 if (!TARGET_PARTIAL_REG_STALL
)
22195 if (mode
== SImode
)
22196 emit_insn (gen_movsi_insv_1 (reg
, reg
));
22198 emit_insn (gen_movdi_insv_1 (reg
, reg
));
22201 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (8),
22202 NULL
, 1, OPTAB_DIRECT
);
22204 expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22206 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (16),
22207 NULL
, 1, OPTAB_DIRECT
);
22208 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22209 if (mode
== SImode
)
22211 tmp
= expand_simple_binop (mode
, ASHIFT
, reg
, GEN_INT (32),
22212 NULL
, 1, OPTAB_DIRECT
);
22213 reg
= expand_simple_binop (mode
, IOR
, reg
, tmp
, reg
, 1, OPTAB_DIRECT
);
22218 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
22219 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
22220 alignment from ALIGN to DESIRED_ALIGN. */
22222 promote_duplicated_reg_to_size (rtx val
, int size_needed
, int desired_align
, int align
)
22227 && (size_needed
> 4 || (desired_align
> align
&& desired_align
> 4)))
22228 promoted_val
= promote_duplicated_reg (DImode
, val
);
22229 else if (size_needed
> 2 || (desired_align
> align
&& desired_align
> 2))
22230 promoted_val
= promote_duplicated_reg (SImode
, val
);
22231 else if (size_needed
> 1 || (desired_align
> align
&& desired_align
> 1))
22232 promoted_val
= promote_duplicated_reg (HImode
, val
);
22234 promoted_val
= val
;
22236 return promoted_val
;
22239 /* Expand string clear operation (bzero). Use i386 string operations when
22240 profitable. See expand_movmem comment for explanation of individual
22241 steps performed. */
22243 ix86_expand_setmem (rtx dst
, rtx count_exp
, rtx val_exp
, rtx align_exp
,
22244 rtx expected_align_exp
, rtx expected_size_exp
)
22249 rtx jump_around_label
= NULL
;
22250 HOST_WIDE_INT align
= 1;
22251 unsigned HOST_WIDE_INT count
= 0;
22252 HOST_WIDE_INT expected_size
= -1;
22253 int size_needed
= 0, epilogue_size_needed
;
22254 int desired_align
= 0, align_bytes
= 0;
22255 enum stringop_alg alg
;
22256 rtx promoted_val
= NULL
;
22257 bool force_loopy_epilogue
= false;
22259 bool need_zero_guard
= false;
22261 if (CONST_INT_P (align_exp
))
22262 align
= INTVAL (align_exp
);
22263 /* i386 can do misaligned access on reasonably increased cost. */
22264 if (CONST_INT_P (expected_align_exp
)
22265 && INTVAL (expected_align_exp
) > align
)
22266 align
= INTVAL (expected_align_exp
);
22267 if (CONST_INT_P (count_exp
))
22268 count
= expected_size
= INTVAL (count_exp
);
22269 if (CONST_INT_P (expected_size_exp
) && count
== 0)
22270 expected_size
= INTVAL (expected_size_exp
);
22272 /* Make sure we don't need to care about overflow later on. */
22273 if (count
> ((unsigned HOST_WIDE_INT
) 1 << 30))
22276 /* Step 0: Decide on preferred algorithm, desired alignment and
22277 size of chunks to be copied by main loop. */
22279 alg
= decide_alg (count
, expected_size
, true, &dynamic_check
);
22280 desired_align
= decide_alignment (align
, alg
, expected_size
);
22282 if (!TARGET_ALIGN_STRINGOPS
)
22283 align
= desired_align
;
22285 if (alg
== libcall
)
22287 gcc_assert (alg
!= no_stringop
);
22289 count_exp
= copy_to_mode_reg (counter_mode (count_exp
), count_exp
);
22290 destreg
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
22295 gcc_unreachable ();
22297 need_zero_guard
= true;
22298 size_needed
= GET_MODE_SIZE (Pmode
);
22300 case unrolled_loop
:
22301 need_zero_guard
= true;
22302 size_needed
= GET_MODE_SIZE (Pmode
) * 4;
22304 case rep_prefix_8_byte
:
22307 case rep_prefix_4_byte
:
22310 case rep_prefix_1_byte
:
22314 need_zero_guard
= true;
22318 epilogue_size_needed
= size_needed
;
22320 /* Step 1: Prologue guard. */
22322 /* Alignment code needs count to be in register. */
22323 if (CONST_INT_P (count_exp
) && desired_align
> align
)
22325 if (INTVAL (count_exp
) > desired_align
22326 && INTVAL (count_exp
) > size_needed
)
22329 = get_mem_align_offset (dst
, desired_align
* BITS_PER_UNIT
);
22330 if (align_bytes
<= 0)
22333 align_bytes
= desired_align
- align_bytes
;
22335 if (align_bytes
== 0)
22337 enum machine_mode mode
= SImode
;
22338 if (TARGET_64BIT
&& (count
& ~0xffffffff))
22340 count_exp
= force_reg (mode
, count_exp
);
22343 /* Do the cheap promotion to allow better CSE across the
22344 main loop and epilogue (ie one load of the big constant in the
22345 front of all code. */
22346 if (CONST_INT_P (val_exp
))
22347 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22348 desired_align
, align
);
22349 /* Ensure that alignment prologue won't copy past end of block. */
22350 if (size_needed
> 1 || (desired_align
> 1 && desired_align
> align
))
22352 epilogue_size_needed
= MAX (size_needed
- 1, desired_align
- align
);
22353 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
22354 Make sure it is power of 2. */
22355 epilogue_size_needed
= smallest_pow2_greater_than (epilogue_size_needed
);
22357 /* To improve performance of small blocks, we jump around the VAL
22358 promoting mode. This mean that if the promoted VAL is not constant,
22359 we might not use it in the epilogue and have to use byte
22361 if (epilogue_size_needed
> 2 && !promoted_val
)
22362 force_loopy_epilogue
= true;
22365 if (count
< (unsigned HOST_WIDE_INT
)epilogue_size_needed
)
22367 /* If main algorithm works on QImode, no epilogue is needed.
22368 For small sizes just don't align anything. */
22369 if (size_needed
== 1)
22370 desired_align
= align
;
22377 label
= gen_label_rtx ();
22378 emit_cmp_and_jump_insns (count_exp
,
22379 GEN_INT (epilogue_size_needed
),
22380 LTU
, 0, counter_mode (count_exp
), 1, label
);
22381 if (expected_size
== -1 || expected_size
<= epilogue_size_needed
)
22382 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22384 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22387 if (dynamic_check
!= -1)
22389 rtx hot_label
= gen_label_rtx ();
22390 jump_around_label
= gen_label_rtx ();
22391 emit_cmp_and_jump_insns (count_exp
, GEN_INT (dynamic_check
- 1),
22392 LEU
, 0, counter_mode (count_exp
), 1, hot_label
);
22393 predict_jump (REG_BR_PROB_BASE
* 90 / 100);
22394 set_storage_via_libcall (dst
, count_exp
, val_exp
, false);
22395 emit_jump (jump_around_label
);
22396 emit_label (hot_label
);
22399 /* Step 2: Alignment prologue. */
22401 /* Do the expensive promotion once we branched off the small blocks. */
22403 promoted_val
= promote_duplicated_reg_to_size (val_exp
, size_needed
,
22404 desired_align
, align
);
22405 gcc_assert (desired_align
>= 1 && align
>= 1);
22407 if (desired_align
> align
)
22409 if (align_bytes
== 0)
22411 /* Except for the first move in epilogue, we no longer know
22412 constant offset in aliasing info. It don't seems to worth
22413 the pain to maintain it for the first move, so throw away
22415 dst
= change_address (dst
, BLKmode
, destreg
);
22416 expand_setmem_prologue (dst
, destreg
, promoted_val
, count_exp
, align
,
22421 /* If we know how many bytes need to be stored before dst is
22422 sufficiently aligned, maintain aliasing info accurately. */
22423 dst
= expand_constant_setmem_prologue (dst
, destreg
, promoted_val
,
22424 desired_align
, align_bytes
);
22425 count_exp
= plus_constant (count_exp
, -align_bytes
);
22426 count
-= align_bytes
;
22428 if (need_zero_guard
22429 && (count
< (unsigned HOST_WIDE_INT
) size_needed
22430 || (align_bytes
== 0
22431 && count
< ((unsigned HOST_WIDE_INT
) size_needed
22432 + desired_align
- align
))))
22434 /* It is possible that we copied enough so the main loop will not
22436 gcc_assert (size_needed
> 1);
22437 if (label
== NULL_RTX
)
22438 label
= gen_label_rtx ();
22439 emit_cmp_and_jump_insns (count_exp
,
22440 GEN_INT (size_needed
),
22441 LTU
, 0, counter_mode (count_exp
), 1, label
);
22442 if (expected_size
== -1
22443 || expected_size
< (desired_align
- align
) / 2 + size_needed
)
22444 predict_jump (REG_BR_PROB_BASE
* 20 / 100);
22446 predict_jump (REG_BR_PROB_BASE
* 60 / 100);
22449 if (label
&& size_needed
== 1)
22451 emit_label (label
);
22452 LABEL_NUSES (label
) = 1;
22454 promoted_val
= val_exp
;
22455 epilogue_size_needed
= 1;
22457 else if (label
== NULL_RTX
)
22458 epilogue_size_needed
= size_needed
;
22460 /* Step 3: Main loop. */
22466 gcc_unreachable ();
22468 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22469 count_exp
, QImode
, 1, expected_size
);
22472 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22473 count_exp
, Pmode
, 1, expected_size
);
22475 case unrolled_loop
:
22476 expand_set_or_movmem_via_loop (dst
, NULL
, destreg
, NULL
, promoted_val
,
22477 count_exp
, Pmode
, 4, expected_size
);
22479 case rep_prefix_8_byte
:
22480 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22483 case rep_prefix_4_byte
:
22484 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22487 case rep_prefix_1_byte
:
22488 expand_setmem_via_rep_stos (dst
, destreg
, promoted_val
, count_exp
,
22492 /* Adjust properly the offset of src and dest memory for aliasing. */
22493 if (CONST_INT_P (count_exp
))
22494 dst
= adjust_automodify_address_nv (dst
, BLKmode
, destreg
,
22495 (count
/ size_needed
) * size_needed
);
22497 dst
= change_address (dst
, BLKmode
, destreg
);
22499 /* Step 4: Epilogue to copy the remaining bytes. */
22503 /* When the main loop is done, COUNT_EXP might hold original count,
22504 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
22505 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
22506 bytes. Compensate if needed. */
22508 if (size_needed
< epilogue_size_needed
)
22511 expand_simple_binop (counter_mode (count_exp
), AND
, count_exp
,
22512 GEN_INT (size_needed
- 1), count_exp
, 1,
22514 if (tmp
!= count_exp
)
22515 emit_move_insn (count_exp
, tmp
);
22517 emit_label (label
);
22518 LABEL_NUSES (label
) = 1;
22521 if (count_exp
!= const0_rtx
&& epilogue_size_needed
> 1)
22523 if (force_loopy_epilogue
)
22524 expand_setmem_epilogue_via_loop (dst
, destreg
, val_exp
, count_exp
,
22525 epilogue_size_needed
);
22527 expand_setmem_epilogue (dst
, destreg
, promoted_val
, count_exp
,
22528 epilogue_size_needed
);
22530 if (jump_around_label
)
22531 emit_label (jump_around_label
);
22535 /* Expand the appropriate insns for doing strlen if not just doing
22538 out = result, initialized with the start address
22539 align_rtx = alignment of the address.
22540 scratch = scratch register, initialized with the startaddress when
22541 not aligned, otherwise undefined
22543 This is just the body. It needs the initializations mentioned above and
22544 some address computing at the end. These things are done in i386.md. */
22547 ix86_expand_strlensi_unroll_1 (rtx out
, rtx src
, rtx align_rtx
)
22551 rtx align_2_label
= NULL_RTX
;
22552 rtx align_3_label
= NULL_RTX
;
22553 rtx align_4_label
= gen_label_rtx ();
22554 rtx end_0_label
= gen_label_rtx ();
22556 rtx tmpreg
= gen_reg_rtx (SImode
);
22557 rtx scratch
= gen_reg_rtx (SImode
);
22561 if (CONST_INT_P (align_rtx
))
22562 align
= INTVAL (align_rtx
);
22564 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
22566 /* Is there a known alignment and is it less than 4? */
22569 rtx scratch1
= gen_reg_rtx (Pmode
);
22570 emit_move_insn (scratch1
, out
);
22571 /* Is there a known alignment and is it not 2? */
22574 align_3_label
= gen_label_rtx (); /* Label when aligned to 3-byte */
22575 align_2_label
= gen_label_rtx (); /* Label when aligned to 2-byte */
22577 /* Leave just the 3 lower bits. */
22578 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, GEN_INT (3),
22579 NULL_RTX
, 0, OPTAB_WIDEN
);
22581 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22582 Pmode
, 1, align_4_label
);
22583 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, EQ
, NULL
,
22584 Pmode
, 1, align_2_label
);
22585 emit_cmp_and_jump_insns (align_rtx
, const2_rtx
, GTU
, NULL
,
22586 Pmode
, 1, align_3_label
);
22590 /* Since the alignment is 2, we have to check 2 or 0 bytes;
22591 check if is aligned to 4 - byte. */
22593 align_rtx
= expand_binop (Pmode
, and_optab
, scratch1
, const2_rtx
,
22594 NULL_RTX
, 0, OPTAB_WIDEN
);
22596 emit_cmp_and_jump_insns (align_rtx
, const0_rtx
, EQ
, NULL
,
22597 Pmode
, 1, align_4_label
);
22600 mem
= change_address (src
, QImode
, out
);
22602 /* Now compare the bytes. */
22604 /* Compare the first n unaligned byte on a byte per byte basis. */
22605 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
,
22606 QImode
, 1, end_0_label
);
22608 /* Increment the address. */
22609 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22611 /* Not needed with an alignment of 2 */
22614 emit_label (align_2_label
);
22616 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22619 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22621 emit_label (align_3_label
);
22624 emit_cmp_and_jump_insns (mem
, const0_rtx
, EQ
, NULL
, QImode
, 1,
22627 emit_insn (ix86_gen_add3 (out
, out
, const1_rtx
));
22630 /* Generate loop to check 4 bytes at a time. It is not a good idea to
22631 align this loop. It gives only huge programs, but does not help to
22633 emit_label (align_4_label
);
22635 mem
= change_address (src
, SImode
, out
);
22636 emit_move_insn (scratch
, mem
);
22637 emit_insn (ix86_gen_add3 (out
, out
, GEN_INT (4)));
22639 /* This formula yields a nonzero result iff one of the bytes is zero.
22640 This saves three branches inside loop and many cycles. */
22642 emit_insn (gen_addsi3 (tmpreg
, scratch
, GEN_INT (-0x01010101)));
22643 emit_insn (gen_one_cmplsi2 (scratch
, scratch
));
22644 emit_insn (gen_andsi3 (tmpreg
, tmpreg
, scratch
));
22645 emit_insn (gen_andsi3 (tmpreg
, tmpreg
,
22646 gen_int_mode (0x80808080, SImode
)));
22647 emit_cmp_and_jump_insns (tmpreg
, const0_rtx
, EQ
, 0, SImode
, 1,
22652 rtx reg
= gen_reg_rtx (SImode
);
22653 rtx reg2
= gen_reg_rtx (Pmode
);
22654 emit_move_insn (reg
, tmpreg
);
22655 emit_insn (gen_lshrsi3 (reg
, reg
, GEN_INT (16)));
22657 /* If zero is not in the first two bytes, move two bytes forward. */
22658 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22659 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22660 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22661 emit_insn (gen_rtx_SET (VOIDmode
, tmpreg
,
22662 gen_rtx_IF_THEN_ELSE (SImode
, tmp
,
22665 /* Emit lea manually to avoid clobbering of flags. */
22666 emit_insn (gen_rtx_SET (SImode
, reg2
,
22667 gen_rtx_PLUS (Pmode
, out
, const2_rtx
)));
22669 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22670 tmp
= gen_rtx_EQ (VOIDmode
, tmp
, const0_rtx
);
22671 emit_insn (gen_rtx_SET (VOIDmode
, out
,
22672 gen_rtx_IF_THEN_ELSE (Pmode
, tmp
,
22678 rtx end_2_label
= gen_label_rtx ();
22679 /* Is zero in the first two bytes? */
22681 emit_insn (gen_testsi_ccno_1 (tmpreg
, GEN_INT (0x8080)));
22682 tmp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
22683 tmp
= gen_rtx_NE (VOIDmode
, tmp
, const0_rtx
);
22684 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
22685 gen_rtx_LABEL_REF (VOIDmode
, end_2_label
),
22687 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
22688 JUMP_LABEL (tmp
) = end_2_label
;
22690 /* Not in the first two. Move two bytes forward. */
22691 emit_insn (gen_lshrsi3 (tmpreg
, tmpreg
, GEN_INT (16)));
22692 emit_insn (ix86_gen_add3 (out
, out
, const2_rtx
));
22694 emit_label (end_2_label
);
22698 /* Avoid branch in fixing the byte. */
22699 tmpreg
= gen_lowpart (QImode
, tmpreg
);
22700 emit_insn (gen_addqi3_cc (tmpreg
, tmpreg
, tmpreg
));
22701 tmp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
22702 cmp
= gen_rtx_LTU (VOIDmode
, tmp
, const0_rtx
);
22703 emit_insn (ix86_gen_sub3_carry (out
, out
, GEN_INT (3), tmp
, cmp
));
22705 emit_label (end_0_label
);
22708 /* Expand strlen. */
22711 ix86_expand_strlen (rtx out
, rtx src
, rtx eoschar
, rtx align
)
22713 rtx addr
, scratch1
, scratch2
, scratch3
, scratch4
;
22715 /* The generic case of strlen expander is long. Avoid it's
22716 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
22718 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22719 && !TARGET_INLINE_ALL_STRINGOPS
22720 && !optimize_insn_for_size_p ()
22721 && (!CONST_INT_P (align
) || INTVAL (align
) < 4))
22724 addr
= force_reg (Pmode
, XEXP (src
, 0));
22725 scratch1
= gen_reg_rtx (Pmode
);
22727 if (TARGET_UNROLL_STRLEN
&& eoschar
== const0_rtx
&& optimize
> 1
22728 && !optimize_insn_for_size_p ())
22730 /* Well it seems that some optimizer does not combine a call like
22731 foo(strlen(bar), strlen(bar));
22732 when the move and the subtraction is done here. It does calculate
22733 the length just once when these instructions are done inside of
22734 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
22735 often used and I use one fewer register for the lifetime of
22736 output_strlen_unroll() this is better. */
22738 emit_move_insn (out
, addr
);
22740 ix86_expand_strlensi_unroll_1 (out
, src
, align
);
22742 /* strlensi_unroll_1 returns the address of the zero at the end of
22743 the string, like memchr(), so compute the length by subtracting
22744 the start address. */
22745 emit_insn (ix86_gen_sub3 (out
, out
, addr
));
22751 /* Can't use this if the user has appropriated eax, ecx, or edi. */
22752 if (fixed_regs
[AX_REG
] || fixed_regs
[CX_REG
] || fixed_regs
[DI_REG
])
22755 scratch2
= gen_reg_rtx (Pmode
);
22756 scratch3
= gen_reg_rtx (Pmode
);
22757 scratch4
= force_reg (Pmode
, constm1_rtx
);
22759 emit_move_insn (scratch3
, addr
);
22760 eoschar
= force_reg (QImode
, eoschar
);
22762 src
= replace_equiv_address_nv (src
, scratch3
);
22764 /* If .md starts supporting :P, this can be done in .md. */
22765 unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (4, src
, eoschar
, align
,
22766 scratch4
), UNSPEC_SCAS
);
22767 emit_insn (gen_strlenqi_1 (scratch1
, scratch3
, unspec
));
22768 emit_insn (ix86_gen_one_cmpl2 (scratch2
, scratch1
));
22769 emit_insn (ix86_gen_add3 (out
, scratch2
, constm1_rtx
));
22774 /* For given symbol (function) construct code to compute address of it's PLT
22775 entry in large x86-64 PIC model. */
22777 construct_plt_address (rtx symbol
)
22779 rtx tmp
= gen_reg_rtx (Pmode
);
22780 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, symbol
), UNSPEC_PLTOFF
);
22782 gcc_assert (GET_CODE (symbol
) == SYMBOL_REF
);
22783 gcc_assert (ix86_cmodel
== CM_LARGE_PIC
);
22785 emit_move_insn (tmp
, gen_rtx_CONST (Pmode
, unspec
));
22786 emit_insn (gen_adddi3 (tmp
, tmp
, pic_offset_table_rtx
));
22791 ix86_expand_call (rtx retval
, rtx fnaddr
, rtx callarg1
,
22793 rtx pop
, bool sibcall
)
22795 /* We need to represent that SI and DI registers are clobbered
22797 static int clobbered_registers
[] = {
22798 XMM6_REG
, XMM7_REG
, XMM8_REG
,
22799 XMM9_REG
, XMM10_REG
, XMM11_REG
,
22800 XMM12_REG
, XMM13_REG
, XMM14_REG
,
22801 XMM15_REG
, SI_REG
, DI_REG
22803 rtx vec
[ARRAY_SIZE (clobbered_registers
) + 3];
22804 rtx use
= NULL
, call
;
22805 unsigned int vec_len
;
22807 if (pop
== const0_rtx
)
22809 gcc_assert (!TARGET_64BIT
|| !pop
);
22811 if (TARGET_MACHO
&& !TARGET_64BIT
)
22814 if (flag_pic
&& GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
22815 fnaddr
= machopic_indirect_call_target (fnaddr
);
22820 /* Static functions and indirect calls don't need the pic register. */
22821 if (flag_pic
&& (!TARGET_64BIT
|| ix86_cmodel
== CM_LARGE_PIC
)
22822 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
22823 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr
, 0)))
22824 use_reg (&use
, pic_offset_table_rtx
);
22827 if (TARGET_64BIT
&& INTVAL (callarg2
) >= 0)
22829 rtx al
= gen_rtx_REG (QImode
, AX_REG
);
22830 emit_move_insn (al
, callarg2
);
22831 use_reg (&use
, al
);
22834 if (ix86_cmodel
== CM_LARGE_PIC
22836 && GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
22837 && !local_symbolic_operand (XEXP (fnaddr
, 0), VOIDmode
))
22838 fnaddr
= gen_rtx_MEM (QImode
, construct_plt_address (XEXP (fnaddr
, 0)));
22840 ? !sibcall_insn_operand (XEXP (fnaddr
, 0), Pmode
)
22841 : !call_insn_operand (XEXP (fnaddr
, 0), Pmode
))
22843 fnaddr
= XEXP (fnaddr
, 0);
22844 if (GET_MODE (fnaddr
) != Pmode
)
22845 fnaddr
= convert_to_mode (Pmode
, fnaddr
, 1);
22846 fnaddr
= gen_rtx_MEM (QImode
, copy_to_mode_reg (Pmode
, fnaddr
));
22850 call
= gen_rtx_CALL (VOIDmode
, fnaddr
, callarg1
);
22852 call
= gen_rtx_SET (VOIDmode
, retval
, call
);
22853 vec
[vec_len
++] = call
;
22857 pop
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, pop
);
22858 pop
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, pop
);
22859 vec
[vec_len
++] = pop
;
22862 if (TARGET_64BIT_MS_ABI
22863 && (!callarg2
|| INTVAL (callarg2
) != -2))
22867 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
, gen_rtvec (1, const0_rtx
),
22868 UNSPEC_MS_TO_SYSV_CALL
);
22870 for (i
= 0; i
< ARRAY_SIZE (clobbered_registers
); i
++)
22872 = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers
[i
])
22874 gen_rtx_REG (SSE_REGNO_P (clobbered_registers
[i
])
22876 clobbered_registers
[i
]));
22879 /* Add UNSPEC_CALL_NEEDS_VZEROUPPER decoration. */
22880 if (TARGET_VZEROUPPER
)
22883 if (cfun
->machine
->callee_pass_avx256_p
)
22885 if (cfun
->machine
->callee_return_avx256_p
)
22886 avx256
= callee_return_pass_avx256
;
22888 avx256
= callee_pass_avx256
;
22890 else if (cfun
->machine
->callee_return_avx256_p
)
22891 avx256
= callee_return_avx256
;
22893 avx256
= call_no_avx256
;
22895 if (reload_completed
)
22896 emit_insn (gen_avx_vzeroupper (GEN_INT (avx256
)));
22898 vec
[vec_len
++] = gen_rtx_UNSPEC (VOIDmode
,
22899 gen_rtvec (1, GEN_INT (avx256
)),
22900 UNSPEC_CALL_NEEDS_VZEROUPPER
);
22904 call
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (vec_len
, vec
));
22905 call
= emit_call_insn (call
);
22907 CALL_INSN_FUNCTION_USAGE (call
) = use
;
22913 ix86_split_call_vzeroupper (rtx insn
, rtx vzeroupper
)
22915 rtx pat
= PATTERN (insn
);
22916 rtvec vec
= XVEC (pat
, 0);
22917 int len
= GET_NUM_ELEM (vec
) - 1;
22919 /* Strip off the last entry of the parallel. */
22920 gcc_assert (GET_CODE (RTVEC_ELT (vec
, len
)) == UNSPEC
);
22921 gcc_assert (XINT (RTVEC_ELT (vec
, len
), 1) == UNSPEC_CALL_NEEDS_VZEROUPPER
);
22923 pat
= RTVEC_ELT (vec
, 0);
22925 pat
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (len
, &RTVEC_ELT (vec
, 0)));
22927 emit_insn (gen_avx_vzeroupper (vzeroupper
));
22928 emit_call_insn (pat
);
22931 /* Output the assembly for a call instruction. */
22934 ix86_output_call_insn (rtx insn
, rtx call_op
)
22936 bool direct_p
= constant_call_address_operand (call_op
, Pmode
);
22937 bool seh_nop_p
= false;
22940 if (SIBLING_CALL_P (insn
))
22944 /* SEH epilogue detection requires the indirect branch case
22945 to include REX.W. */
22946 else if (TARGET_SEH
)
22947 xasm
= "rex.W jmp %A0";
22951 output_asm_insn (xasm
, &call_op
);
22955 /* SEH unwinding can require an extra nop to be emitted in several
22956 circumstances. Determine if we have one of those. */
22961 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
22963 /* If we get to another real insn, we don't need the nop. */
22967 /* If we get to the epilogue note, prevent a catch region from
22968 being adjacent to the standard epilogue sequence. If non-
22969 call-exceptions, we'll have done this during epilogue emission. */
22970 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
22971 && !flag_non_call_exceptions
22972 && !can_throw_internal (insn
))
22979 /* If we didn't find a real insn following the call, prevent the
22980 unwinder from looking into the next function. */
22986 xasm
= "call\t%P0";
22988 xasm
= "call\t%A0";
22990 output_asm_insn (xasm
, &call_op
);
22998 /* Clear stack slot assignments remembered from previous functions.
22999 This is called from INIT_EXPANDERS once before RTL is emitted for each
23002 static struct machine_function
*
23003 ix86_init_machine_status (void)
23005 struct machine_function
*f
;
23007 f
= ggc_alloc_cleared_machine_function ();
23008 f
->use_fast_prologue_epilogue_nregs
= -1;
23009 f
->tls_descriptor_call_expanded_p
= 0;
23010 f
->call_abi
= ix86_abi
;
23015 /* Return a MEM corresponding to a stack slot with mode MODE.
23016 Allocate a new slot if necessary.
23018 The RTL for a function can have several slots available: N is
23019 which slot to use. */
23022 assign_386_stack_local (enum machine_mode mode
, enum ix86_stack_slot n
)
23024 struct stack_local_entry
*s
;
23026 gcc_assert (n
< MAX_386_STACK_LOCALS
);
23028 /* Virtual slot is valid only before vregs are instantiated. */
23029 gcc_assert ((n
== SLOT_VIRTUAL
) == !virtuals_instantiated
);
23031 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
23032 if (s
->mode
== mode
&& s
->n
== n
)
23033 return validize_mem (copy_rtx (s
->rtl
));
23035 s
= ggc_alloc_stack_local_entry ();
23038 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
23040 s
->next
= ix86_stack_locals
;
23041 ix86_stack_locals
= s
;
23042 return validize_mem (s
->rtl
);
23045 /* Calculate the length of the memory address in the instruction encoding.
23046 Includes addr32 prefix, does not include the one-byte modrm, opcode,
23047 or other prefixes. */
23050 memory_address_length (rtx addr
)
23052 struct ix86_address parts
;
23053 rtx base
, index
, disp
;
23057 if (GET_CODE (addr
) == PRE_DEC
23058 || GET_CODE (addr
) == POST_INC
23059 || GET_CODE (addr
) == PRE_MODIFY
23060 || GET_CODE (addr
) == POST_MODIFY
)
23063 ok
= ix86_decompose_address (addr
, &parts
);
23066 if (parts
.base
&& GET_CODE (parts
.base
) == SUBREG
)
23067 parts
.base
= SUBREG_REG (parts
.base
);
23068 if (parts
.index
&& GET_CODE (parts
.index
) == SUBREG
)
23069 parts
.index
= SUBREG_REG (parts
.index
);
23072 index
= parts
.index
;
23075 /* Add length of addr32 prefix. */
23076 len
= (GET_CODE (addr
) == ZERO_EXTEND
23077 || GET_CODE (addr
) == AND
);
23080 - esp as the base always wants an index,
23081 - ebp as the base always wants a displacement,
23082 - r12 as the base always wants an index,
23083 - r13 as the base always wants a displacement. */
23085 /* Register Indirect. */
23086 if (base
&& !index
&& !disp
)
23088 /* esp (for its index) and ebp (for its displacement) need
23089 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
23092 && (addr
== arg_pointer_rtx
23093 || addr
== frame_pointer_rtx
23094 || REGNO (addr
) == SP_REG
23095 || REGNO (addr
) == BP_REG
23096 || REGNO (addr
) == R12_REG
23097 || REGNO (addr
) == R13_REG
))
23101 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
23102 is not disp32, but disp32(%rip), so for disp32
23103 SIB byte is needed, unless print_operand_address
23104 optimizes it into disp32(%rip) or (%rip) is implied
23106 else if (disp
&& !base
&& !index
)
23113 if (GET_CODE (disp
) == CONST
)
23114 symbol
= XEXP (disp
, 0);
23115 if (GET_CODE (symbol
) == PLUS
23116 && CONST_INT_P (XEXP (symbol
, 1)))
23117 symbol
= XEXP (symbol
, 0);
23119 if (GET_CODE (symbol
) != LABEL_REF
23120 && (GET_CODE (symbol
) != SYMBOL_REF
23121 || SYMBOL_REF_TLS_MODEL (symbol
) != 0)
23122 && (GET_CODE (symbol
) != UNSPEC
23123 || (XINT (symbol
, 1) != UNSPEC_GOTPCREL
23124 && XINT (symbol
, 1) != UNSPEC_PCREL
23125 && XINT (symbol
, 1) != UNSPEC_GOTNTPOFF
)))
23132 /* Find the length of the displacement constant. */
23135 if (base
&& satisfies_constraint_K (disp
))
23140 /* ebp always wants a displacement. Similarly r13. */
23141 else if (base
&& REG_P (base
)
23142 && (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
23145 /* An index requires the two-byte modrm form.... */
23147 /* ...like esp (or r12), which always wants an index. */
23148 || base
== arg_pointer_rtx
23149 || base
== frame_pointer_rtx
23150 || (base
&& REG_P (base
)
23151 && (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
23168 /* Compute default value for "length_immediate" attribute. When SHORTFORM
23169 is set, expect that insn have 8bit immediate alternative. */
23171 ix86_attr_length_immediate_default (rtx insn
, bool shortform
)
23175 extract_insn_cached (insn
);
23176 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23177 if (CONSTANT_P (recog_data
.operand
[i
]))
23179 enum attr_mode mode
= get_attr_mode (insn
);
23182 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
23184 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
23191 ival
= trunc_int_for_mode (ival
, HImode
);
23194 ival
= trunc_int_for_mode (ival
, SImode
);
23199 if (IN_RANGE (ival
, -128, 127))
23216 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
23221 fatal_insn ("unknown insn mode", insn
);
23226 /* Compute default value for "length_address" attribute. */
23228 ix86_attr_length_address_default (rtx insn
)
23232 if (get_attr_type (insn
) == TYPE_LEA
)
23234 rtx set
= PATTERN (insn
), addr
;
23236 if (GET_CODE (set
) == PARALLEL
)
23237 set
= XVECEXP (set
, 0, 0);
23239 gcc_assert (GET_CODE (set
) == SET
);
23241 addr
= SET_SRC (set
);
23242 if (TARGET_64BIT
&& get_attr_mode (insn
) == MODE_SI
)
23244 if (GET_CODE (addr
) == ZERO_EXTEND
)
23245 addr
= XEXP (addr
, 0);
23246 if (GET_CODE (addr
) == SUBREG
)
23247 addr
= SUBREG_REG (addr
);
23250 return memory_address_length (addr
);
23253 extract_insn_cached (insn
);
23254 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23255 if (MEM_P (recog_data
.operand
[i
]))
23257 constrain_operands_cached (reload_completed
);
23258 if (which_alternative
!= -1)
23260 const char *constraints
= recog_data
.constraints
[i
];
23261 int alt
= which_alternative
;
23263 while (*constraints
== '=' || *constraints
== '+')
23266 while (*constraints
++ != ',')
23268 /* Skip ignored operands. */
23269 if (*constraints
== 'X')
23272 return memory_address_length (XEXP (recog_data
.operand
[i
], 0));
23277 /* Compute default value for "length_vex" attribute. It includes
23278 2 or 3 byte VEX prefix and 1 opcode byte. */
23281 ix86_attr_length_vex_default (rtx insn
, bool has_0f_opcode
, bool has_vex_w
)
23285 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
23286 byte VEX prefix. */
23287 if (!has_0f_opcode
|| has_vex_w
)
23290 /* We can always use 2 byte VEX prefix in 32bit. */
23294 extract_insn_cached (insn
);
23296 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23297 if (REG_P (recog_data
.operand
[i
]))
23299 /* REX.W bit uses 3 byte VEX prefix. */
23300 if (GET_MODE (recog_data
.operand
[i
]) == DImode
23301 && GENERAL_REG_P (recog_data
.operand
[i
]))
23306 /* REX.X or REX.B bits use 3 byte VEX prefix. */
23307 if (MEM_P (recog_data
.operand
[i
])
23308 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
23315 /* Return the maximum number of instructions a cpu can issue. */
23318 ix86_issue_rate (void)
23322 case PROCESSOR_PENTIUM
:
23323 case PROCESSOR_ATOM
:
23327 case PROCESSOR_PENTIUMPRO
:
23328 case PROCESSOR_PENTIUM4
:
23329 case PROCESSOR_CORE2_32
:
23330 case PROCESSOR_CORE2_64
:
23331 case PROCESSOR_COREI7_32
:
23332 case PROCESSOR_COREI7_64
:
23333 case PROCESSOR_ATHLON
:
23335 case PROCESSOR_AMDFAM10
:
23336 case PROCESSOR_NOCONA
:
23337 case PROCESSOR_GENERIC32
:
23338 case PROCESSOR_GENERIC64
:
23339 case PROCESSOR_BDVER1
:
23340 case PROCESSOR_BDVER2
:
23341 case PROCESSOR_BTVER1
:
23349 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
23350 by DEP_INSN and nothing set by DEP_INSN. */
23353 ix86_flags_dependent (rtx insn
, rtx dep_insn
, enum attr_type insn_type
)
23357 /* Simplify the test for uninteresting insns. */
23358 if (insn_type
!= TYPE_SETCC
23359 && insn_type
!= TYPE_ICMOV
23360 && insn_type
!= TYPE_FCMOV
23361 && insn_type
!= TYPE_IBR
)
23364 if ((set
= single_set (dep_insn
)) != 0)
23366 set
= SET_DEST (set
);
23369 else if (GET_CODE (PATTERN (dep_insn
)) == PARALLEL
23370 && XVECLEN (PATTERN (dep_insn
), 0) == 2
23371 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 0)) == SET
23372 && GET_CODE (XVECEXP (PATTERN (dep_insn
), 0, 1)) == SET
)
23374 set
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23375 set2
= SET_DEST (XVECEXP (PATTERN (dep_insn
), 0, 0));
23380 if (!REG_P (set
) || REGNO (set
) != FLAGS_REG
)
23383 /* This test is true if the dependent insn reads the flags but
23384 not any other potentially set register. */
23385 if (!reg_overlap_mentioned_p (set
, PATTERN (insn
)))
23388 if (set2
&& reg_overlap_mentioned_p (set2
, PATTERN (insn
)))
23394 /* Return true iff USE_INSN has a memory address with operands set by
23398 ix86_agi_dependent (rtx set_insn
, rtx use_insn
)
23401 extract_insn_cached (use_insn
);
23402 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
23403 if (MEM_P (recog_data
.operand
[i
]))
23405 rtx addr
= XEXP (recog_data
.operand
[i
], 0);
23406 return modified_in_p (addr
, set_insn
) != 0;
23412 ix86_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
23414 enum attr_type insn_type
, dep_insn_type
;
23415 enum attr_memory memory
;
23417 int dep_insn_code_number
;
23419 /* Anti and output dependencies have zero cost on all CPUs. */
23420 if (REG_NOTE_KIND (link
) != 0)
23423 dep_insn_code_number
= recog_memoized (dep_insn
);
23425 /* If we can't recognize the insns, we can't really do anything. */
23426 if (dep_insn_code_number
< 0 || recog_memoized (insn
) < 0)
23429 insn_type
= get_attr_type (insn
);
23430 dep_insn_type
= get_attr_type (dep_insn
);
23434 case PROCESSOR_PENTIUM
:
23435 /* Address Generation Interlock adds a cycle of latency. */
23436 if (insn_type
== TYPE_LEA
)
23438 rtx addr
= PATTERN (insn
);
23440 if (GET_CODE (addr
) == PARALLEL
)
23441 addr
= XVECEXP (addr
, 0, 0);
23443 gcc_assert (GET_CODE (addr
) == SET
);
23445 addr
= SET_SRC (addr
);
23446 if (modified_in_p (addr
, dep_insn
))
23449 else if (ix86_agi_dependent (dep_insn
, insn
))
23452 /* ??? Compares pair with jump/setcc. */
23453 if (ix86_flags_dependent (insn
, dep_insn
, insn_type
))
23456 /* Floating point stores require value to be ready one cycle earlier. */
23457 if (insn_type
== TYPE_FMOV
23458 && get_attr_memory (insn
) == MEMORY_STORE
23459 && !ix86_agi_dependent (dep_insn
, insn
))
23463 case PROCESSOR_PENTIUMPRO
:
23464 memory
= get_attr_memory (insn
);
23466 /* INT->FP conversion is expensive. */
23467 if (get_attr_fp_int_src (dep_insn
))
23470 /* There is one cycle extra latency between an FP op and a store. */
23471 if (insn_type
== TYPE_FMOV
23472 && (set
= single_set (dep_insn
)) != NULL_RTX
23473 && (set2
= single_set (insn
)) != NULL_RTX
23474 && rtx_equal_p (SET_DEST (set
), SET_SRC (set2
))
23475 && MEM_P (SET_DEST (set2
)))
23478 /* Show ability of reorder buffer to hide latency of load by executing
23479 in parallel with previous instruction in case
23480 previous instruction is not needed to compute the address. */
23481 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23482 && !ix86_agi_dependent (dep_insn
, insn
))
23484 /* Claim moves to take one cycle, as core can issue one load
23485 at time and the next load can start cycle later. */
23486 if (dep_insn_type
== TYPE_IMOV
23487 || dep_insn_type
== TYPE_FMOV
)
23495 memory
= get_attr_memory (insn
);
23497 /* The esp dependency is resolved before the instruction is really
23499 if ((insn_type
== TYPE_PUSH
|| insn_type
== TYPE_POP
)
23500 && (dep_insn_type
== TYPE_PUSH
|| dep_insn_type
== TYPE_POP
))
23503 /* INT->FP conversion is expensive. */
23504 if (get_attr_fp_int_src (dep_insn
))
23507 /* Show ability of reorder buffer to hide latency of load by executing
23508 in parallel with previous instruction in case
23509 previous instruction is not needed to compute the address. */
23510 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23511 && !ix86_agi_dependent (dep_insn
, insn
))
23513 /* Claim moves to take one cycle, as core can issue one load
23514 at time and the next load can start cycle later. */
23515 if (dep_insn_type
== TYPE_IMOV
23516 || dep_insn_type
== TYPE_FMOV
)
23525 case PROCESSOR_ATHLON
:
23527 case PROCESSOR_AMDFAM10
:
23528 case PROCESSOR_BDVER1
:
23529 case PROCESSOR_BDVER2
:
23530 case PROCESSOR_BTVER1
:
23531 case PROCESSOR_ATOM
:
23532 case PROCESSOR_GENERIC32
:
23533 case PROCESSOR_GENERIC64
:
23534 memory
= get_attr_memory (insn
);
23536 /* Show ability of reorder buffer to hide latency of load by executing
23537 in parallel with previous instruction in case
23538 previous instruction is not needed to compute the address. */
23539 if ((memory
== MEMORY_LOAD
|| memory
== MEMORY_BOTH
)
23540 && !ix86_agi_dependent (dep_insn
, insn
))
23542 enum attr_unit unit
= get_attr_unit (insn
);
23545 /* Because of the difference between the length of integer and
23546 floating unit pipeline preparation stages, the memory operands
23547 for floating point are cheaper.
23549 ??? For Athlon it the difference is most probably 2. */
23550 if (unit
== UNIT_INTEGER
|| unit
== UNIT_UNKNOWN
)
23553 loadcost
= TARGET_ATHLON
? 2 : 0;
23555 if (cost
>= loadcost
)
23568 /* How many alternative schedules to try. This should be as wide as the
23569 scheduling freedom in the DFA, but no wider. Making this value too
23570 large results extra work for the scheduler. */
23573 ia32_multipass_dfa_lookahead (void)
23577 case PROCESSOR_PENTIUM
:
23580 case PROCESSOR_PENTIUMPRO
:
23584 case PROCESSOR_CORE2_32
:
23585 case PROCESSOR_CORE2_64
:
23586 case PROCESSOR_COREI7_32
:
23587 case PROCESSOR_COREI7_64
:
23588 /* Generally, we want haifa-sched:max_issue() to look ahead as far
23589 as many instructions can be executed on a cycle, i.e.,
23590 issue_rate. I wonder why tuning for many CPUs does not do this. */
23591 return ix86_issue_rate ();
23600 /* Model decoder of Core 2/i7.
23601 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
23602 track the instruction fetch block boundaries and make sure that long
23603 (9+ bytes) instructions are assigned to D0. */
23605 /* Maximum length of an insn that can be handled by
23606 a secondary decoder unit. '8' for Core 2/i7. */
23607 static int core2i7_secondary_decoder_max_insn_size
;
23609 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
23610 '16' for Core 2/i7. */
23611 static int core2i7_ifetch_block_size
;
23613 /* Maximum number of instructions decoder can handle per cycle.
23614 '6' for Core 2/i7. */
23615 static int core2i7_ifetch_block_max_insns
;
23617 typedef struct ix86_first_cycle_multipass_data_
*
23618 ix86_first_cycle_multipass_data_t
;
23619 typedef const struct ix86_first_cycle_multipass_data_
*
23620 const_ix86_first_cycle_multipass_data_t
;
23622 /* A variable to store target state across calls to max_issue within
23624 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data
,
23625 *ix86_first_cycle_multipass_data
= &_ix86_first_cycle_multipass_data
;
23627 /* Initialize DATA. */
23629 core2i7_first_cycle_multipass_init (void *_data
)
23631 ix86_first_cycle_multipass_data_t data
23632 = (ix86_first_cycle_multipass_data_t
) _data
;
23634 data
->ifetch_block_len
= 0;
23635 data
->ifetch_block_n_insns
= 0;
23636 data
->ready_try_change
= NULL
;
23637 data
->ready_try_change_size
= 0;
23640 /* Advancing the cycle; reset ifetch block counts. */
23642 core2i7_dfa_post_advance_cycle (void)
23644 ix86_first_cycle_multipass_data_t data
= ix86_first_cycle_multipass_data
;
23646 gcc_assert (data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23648 data
->ifetch_block_len
= 0;
23649 data
->ifetch_block_n_insns
= 0;
23652 static int min_insn_size (rtx
);
23654 /* Filter out insns from ready_try that the core will not be able to issue
23655 on current cycle due to decoder. */
23657 core2i7_first_cycle_multipass_filter_ready_try
23658 (const_ix86_first_cycle_multipass_data_t data
,
23659 char *ready_try
, int n_ready
, bool first_cycle_insn_p
)
23666 if (ready_try
[n_ready
])
23669 insn
= get_ready_element (n_ready
);
23670 insn_size
= min_insn_size (insn
);
23672 if (/* If this is a too long an insn for a secondary decoder ... */
23673 (!first_cycle_insn_p
23674 && insn_size
> core2i7_secondary_decoder_max_insn_size
)
23675 /* ... or it would not fit into the ifetch block ... */
23676 || data
->ifetch_block_len
+ insn_size
> core2i7_ifetch_block_size
23677 /* ... or the decoder is full already ... */
23678 || data
->ifetch_block_n_insns
+ 1 > core2i7_ifetch_block_max_insns
)
23679 /* ... mask the insn out. */
23681 ready_try
[n_ready
] = 1;
23683 if (data
->ready_try_change
)
23684 SET_BIT (data
->ready_try_change
, n_ready
);
23689 /* Prepare for a new round of multipass lookahead scheduling. */
23691 core2i7_first_cycle_multipass_begin (void *_data
, char *ready_try
, int n_ready
,
23692 bool first_cycle_insn_p
)
23694 ix86_first_cycle_multipass_data_t data
23695 = (ix86_first_cycle_multipass_data_t
) _data
;
23696 const_ix86_first_cycle_multipass_data_t prev_data
23697 = ix86_first_cycle_multipass_data
;
23699 /* Restore the state from the end of the previous round. */
23700 data
->ifetch_block_len
= prev_data
->ifetch_block_len
;
23701 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
;
23703 /* Filter instructions that cannot be issued on current cycle due to
23704 decoder restrictions. */
23705 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23706 first_cycle_insn_p
);
23709 /* INSN is being issued in current solution. Account for its impact on
23710 the decoder model. */
23712 core2i7_first_cycle_multipass_issue (void *_data
, char *ready_try
, int n_ready
,
23713 rtx insn
, const void *_prev_data
)
23715 ix86_first_cycle_multipass_data_t data
23716 = (ix86_first_cycle_multipass_data_t
) _data
;
23717 const_ix86_first_cycle_multipass_data_t prev_data
23718 = (const_ix86_first_cycle_multipass_data_t
) _prev_data
;
23720 int insn_size
= min_insn_size (insn
);
23722 data
->ifetch_block_len
= prev_data
->ifetch_block_len
+ insn_size
;
23723 data
->ifetch_block_n_insns
= prev_data
->ifetch_block_n_insns
+ 1;
23724 gcc_assert (data
->ifetch_block_len
<= core2i7_ifetch_block_size
23725 && data
->ifetch_block_n_insns
<= core2i7_ifetch_block_max_insns
);
23727 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
23728 if (!data
->ready_try_change
)
23730 data
->ready_try_change
= sbitmap_alloc (n_ready
);
23731 data
->ready_try_change_size
= n_ready
;
23733 else if (data
->ready_try_change_size
< n_ready
)
23735 data
->ready_try_change
= sbitmap_resize (data
->ready_try_change
,
23737 data
->ready_try_change_size
= n_ready
;
23739 sbitmap_zero (data
->ready_try_change
);
23741 /* Filter out insns from ready_try that the core will not be able to issue
23742 on current cycle due to decoder. */
23743 core2i7_first_cycle_multipass_filter_ready_try (data
, ready_try
, n_ready
,
23747 /* Revert the effect on ready_try. */
23749 core2i7_first_cycle_multipass_backtrack (const void *_data
,
23751 int n_ready ATTRIBUTE_UNUSED
)
23753 const_ix86_first_cycle_multipass_data_t data
23754 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23755 unsigned int i
= 0;
23756 sbitmap_iterator sbi
;
23758 gcc_assert (sbitmap_last_set_bit (data
->ready_try_change
) < n_ready
);
23759 EXECUTE_IF_SET_IN_SBITMAP (data
->ready_try_change
, 0, i
, sbi
)
23765 /* Save the result of multipass lookahead scheduling for the next round. */
23767 core2i7_first_cycle_multipass_end (const void *_data
)
23769 const_ix86_first_cycle_multipass_data_t data
23770 = (const_ix86_first_cycle_multipass_data_t
) _data
;
23771 ix86_first_cycle_multipass_data_t next_data
23772 = ix86_first_cycle_multipass_data
;
23776 next_data
->ifetch_block_len
= data
->ifetch_block_len
;
23777 next_data
->ifetch_block_n_insns
= data
->ifetch_block_n_insns
;
23781 /* Deallocate target data. */
23783 core2i7_first_cycle_multipass_fini (void *_data
)
23785 ix86_first_cycle_multipass_data_t data
23786 = (ix86_first_cycle_multipass_data_t
) _data
;
23788 if (data
->ready_try_change
)
23790 sbitmap_free (data
->ready_try_change
);
23791 data
->ready_try_change
= NULL
;
23792 data
->ready_try_change_size
= 0;
23796 /* Prepare for scheduling pass. */
23798 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED
,
23799 int verbose ATTRIBUTE_UNUSED
,
23800 int max_uid ATTRIBUTE_UNUSED
)
23802 /* Install scheduling hooks for current CPU. Some of these hooks are used
23803 in time-critical parts of the scheduler, so we only set them up when
23804 they are actually used. */
23807 case PROCESSOR_CORE2_32
:
23808 case PROCESSOR_CORE2_64
:
23809 case PROCESSOR_COREI7_32
:
23810 case PROCESSOR_COREI7_64
:
23811 targetm
.sched
.dfa_post_advance_cycle
23812 = core2i7_dfa_post_advance_cycle
;
23813 targetm
.sched
.first_cycle_multipass_init
23814 = core2i7_first_cycle_multipass_init
;
23815 targetm
.sched
.first_cycle_multipass_begin
23816 = core2i7_first_cycle_multipass_begin
;
23817 targetm
.sched
.first_cycle_multipass_issue
23818 = core2i7_first_cycle_multipass_issue
;
23819 targetm
.sched
.first_cycle_multipass_backtrack
23820 = core2i7_first_cycle_multipass_backtrack
;
23821 targetm
.sched
.first_cycle_multipass_end
23822 = core2i7_first_cycle_multipass_end
;
23823 targetm
.sched
.first_cycle_multipass_fini
23824 = core2i7_first_cycle_multipass_fini
;
23826 /* Set decoder parameters. */
23827 core2i7_secondary_decoder_max_insn_size
= 8;
23828 core2i7_ifetch_block_size
= 16;
23829 core2i7_ifetch_block_max_insns
= 6;
23833 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
23834 targetm
.sched
.first_cycle_multipass_init
= NULL
;
23835 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
23836 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
23837 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
23838 targetm
.sched
.first_cycle_multipass_end
= NULL
;
23839 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
23845 /* Compute the alignment given to a constant that is being placed in memory.
23846 EXP is the constant and ALIGN is the alignment that the object would
23848 The value of this function is used instead of that alignment to align
23852 ix86_constant_alignment (tree exp
, int align
)
23854 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
23855 || TREE_CODE (exp
) == INTEGER_CST
)
23857 if (TYPE_MODE (TREE_TYPE (exp
)) == DFmode
&& align
< 64)
23859 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp
))) && align
< 128)
23862 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
23863 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
23864 return BITS_PER_WORD
;
23869 /* Compute the alignment for a static variable.
23870 TYPE is the data type, and ALIGN is the alignment that
23871 the object would ordinarily have. The value of this function is used
23872 instead of that alignment to align the object. */
23875 ix86_data_alignment (tree type
, int align
)
23877 int max_align
= optimize_size
? BITS_PER_WORD
: MIN (256, MAX_OFILE_ALIGNMENT
);
23879 if (AGGREGATE_TYPE_P (type
)
23880 && TYPE_SIZE (type
)
23881 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
23882 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= (unsigned) max_align
23883 || TREE_INT_CST_HIGH (TYPE_SIZE (type
)))
23884 && align
< max_align
)
23887 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23888 to 16byte boundary. */
23891 if (AGGREGATE_TYPE_P (type
)
23892 && TYPE_SIZE (type
)
23893 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
23894 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 128
23895 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
23899 if (TREE_CODE (type
) == ARRAY_TYPE
)
23901 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
23903 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
23906 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
23909 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
23911 if ((TYPE_MODE (type
) == XCmode
23912 || TYPE_MODE (type
) == TCmode
) && align
< 128)
23915 else if ((TREE_CODE (type
) == RECORD_TYPE
23916 || TREE_CODE (type
) == UNION_TYPE
23917 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
23918 && TYPE_FIELDS (type
))
23920 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
23922 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
23925 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
23926 || TREE_CODE (type
) == INTEGER_TYPE
)
23928 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
23930 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
23937 /* Compute the alignment for a local variable or a stack slot. EXP is
23938 the data type or decl itself, MODE is the widest mode available and
23939 ALIGN is the alignment that the object would ordinarily have. The
23940 value of this macro is used instead of that alignment to align the
23944 ix86_local_alignment (tree exp
, enum machine_mode mode
,
23945 unsigned int align
)
23949 if (exp
&& DECL_P (exp
))
23951 type
= TREE_TYPE (exp
);
23960 /* Don't do dynamic stack realignment for long long objects with
23961 -mpreferred-stack-boundary=2. */
23964 && ix86_preferred_stack_boundary
< 64
23965 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
23966 && (!type
|| !TYPE_USER_ALIGN (type
))
23967 && (!decl
|| !DECL_USER_ALIGN (decl
)))
23970 /* If TYPE is NULL, we are allocating a stack slot for caller-save
23971 register in MODE. We will return the largest alignment of XF
23975 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
23976 align
= GET_MODE_ALIGNMENT (DFmode
);
23980 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
23981 to 16byte boundary. Exact wording is:
23983 An array uses the same alignment as its elements, except that a local or
23984 global array variable of length at least 16 bytes or
23985 a C99 variable-length array variable always has alignment of at least 16 bytes.
23987 This was added to allow use of aligned SSE instructions at arrays. This
23988 rule is meant for static storage (where compiler can not do the analysis
23989 by itself). We follow it for automatic variables only when convenient.
23990 We fully control everything in the function compiled and functions from
23991 other unit can not rely on the alignment.
23993 Exclude va_list type. It is the common case of local array where
23994 we can not benefit from the alignment. */
23995 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
23998 if (AGGREGATE_TYPE_P (type
)
23999 && (va_list_type_node
== NULL_TREE
24000 || (TYPE_MAIN_VARIANT (type
)
24001 != TYPE_MAIN_VARIANT (va_list_type_node
)))
24002 && TYPE_SIZE (type
)
24003 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
24004 && (TREE_INT_CST_LOW (TYPE_SIZE (type
)) >= 16
24005 || TREE_INT_CST_HIGH (TYPE_SIZE (type
))) && align
< 128)
24008 if (TREE_CODE (type
) == ARRAY_TYPE
)
24010 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
24012 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
24015 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
24017 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
24019 if ((TYPE_MODE (type
) == XCmode
24020 || TYPE_MODE (type
) == TCmode
) && align
< 128)
24023 else if ((TREE_CODE (type
) == RECORD_TYPE
24024 || TREE_CODE (type
) == UNION_TYPE
24025 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
24026 && TYPE_FIELDS (type
))
24028 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
24030 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
24033 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
24034 || TREE_CODE (type
) == INTEGER_TYPE
)
24037 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
24039 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
24045 /* Compute the minimum required alignment for dynamic stack realignment
24046 purposes for a local variable, parameter or a stack slot. EXP is
24047 the data type or decl itself, MODE is its mode and ALIGN is the
24048 alignment that the object would ordinarily have. */
24051 ix86_minimum_alignment (tree exp
, enum machine_mode mode
,
24052 unsigned int align
)
24056 if (exp
&& DECL_P (exp
))
24058 type
= TREE_TYPE (exp
);
24067 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
24070 /* Don't do dynamic stack realignment for long long objects with
24071 -mpreferred-stack-boundary=2. */
24072 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
24073 && (!type
|| !TYPE_USER_ALIGN (type
))
24074 && (!decl
|| !DECL_USER_ALIGN (decl
)))
24080 /* Find a location for the static chain incoming to a nested function.
24081 This is a register, unless all free registers are used by arguments. */
24084 ix86_static_chain (const_tree fndecl
, bool incoming_p
)
24088 if (!DECL_STATIC_CHAIN (fndecl
))
24093 /* We always use R10 in 64-bit mode. */
24101 /* By default in 32-bit mode we use ECX to pass the static chain. */
24104 fntype
= TREE_TYPE (fndecl
);
24105 ccvt
= ix86_get_callcvt (fntype
);
24106 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
24108 /* Fastcall functions use ecx/edx for arguments, which leaves
24109 us with EAX for the static chain.
24110 Thiscall functions use ecx for arguments, which also
24111 leaves us with EAX for the static chain. */
24114 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
24116 /* For regparm 3, we have no free call-clobbered registers in
24117 which to store the static chain. In order to implement this,
24118 we have the trampoline push the static chain to the stack.
24119 However, we can't push a value below the return address when
24120 we call the nested function directly, so we have to use an
24121 alternate entry point. For this we use ESI, and have the
24122 alternate entry point push ESI, so that things appear the
24123 same once we're executing the nested function. */
24126 if (fndecl
== current_function_decl
)
24127 ix86_static_chain_on_stack
= true;
24128 return gen_frame_mem (SImode
,
24129 plus_constant (arg_pointer_rtx
, -8));
24135 return gen_rtx_REG (Pmode
, regno
);
24138 /* Emit RTL insns to initialize the variable parts of a trampoline.
24139 FNDECL is the decl of the target address; M_TRAMP is a MEM for
24140 the trampoline, and CHAIN_VALUE is an RTX for the static chain
24141 to be passed to the target function. */
24144 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
24150 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
24156 /* Load the function address to r11. Try to load address using
24157 the shorter movl instead of movabs. We may want to support
24158 movq for kernel mode, but kernel does not use trampolines at
24160 if (x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
24162 fnaddr
= copy_to_mode_reg (DImode
, fnaddr
);
24164 mem
= adjust_address (m_tramp
, HImode
, offset
);
24165 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
24167 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
24168 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
24173 mem
= adjust_address (m_tramp
, HImode
, offset
);
24174 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
24176 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
24177 emit_move_insn (mem
, fnaddr
);
24181 /* Load static chain using movabs to r10. Use the
24182 shorter movl instead of movabs for x32. */
24194 mem
= adjust_address (m_tramp
, HImode
, offset
);
24195 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
24197 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
24198 emit_move_insn (mem
, chain_value
);
24201 /* Jump to r11; the last (unused) byte is a nop, only there to
24202 pad the write out to a single 32-bit store. */
24203 mem
= adjust_address (m_tramp
, SImode
, offset
);
24204 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
24211 /* Depending on the static chain location, either load a register
24212 with a constant, or push the constant to the stack. All of the
24213 instructions are the same size. */
24214 chain
= ix86_static_chain (fndecl
, true);
24217 switch (REGNO (chain
))
24220 opcode
= 0xb8; break;
24222 opcode
= 0xb9; break;
24224 gcc_unreachable ();
24230 mem
= adjust_address (m_tramp
, QImode
, offset
);
24231 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
24233 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24234 emit_move_insn (mem
, chain_value
);
24237 mem
= adjust_address (m_tramp
, QImode
, offset
);
24238 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
24240 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
24242 /* Compute offset from the end of the jmp to the target function.
24243 In the case in which the trampoline stores the static chain on
24244 the stack, we need to skip the first insn which pushes the
24245 (call-saved) register static chain; this push is 1 byte. */
24247 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
24248 plus_constant (XEXP (m_tramp
, 0),
24249 offset
- (MEM_P (chain
) ? 1 : 0)),
24250 NULL_RTX
, 1, OPTAB_DIRECT
);
24251 emit_move_insn (mem
, disp
);
24254 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
24256 #ifdef HAVE_ENABLE_EXECUTE_STACK
24257 #ifdef CHECK_EXECUTE_STACK_ENABLED
24258 if (CHECK_EXECUTE_STACK_ENABLED
)
24260 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
24261 LCT_NORMAL
, VOIDmode
, 1, XEXP (m_tramp
, 0), Pmode
);
24265 /* The following file contains several enumerations and data structures
24266 built from the definitions in i386-builtin-types.def. */
24268 #include "i386-builtin-types.inc"
24270 /* Table for the ix86 builtin non-function types. */
24271 static GTY(()) tree ix86_builtin_type_tab
[(int) IX86_BT_LAST_CPTR
+ 1];
24273 /* Retrieve an element from the above table, building some of
24274 the types lazily. */
24277 ix86_get_builtin_type (enum ix86_builtin_type tcode
)
24279 unsigned int index
;
24282 gcc_assert ((unsigned)tcode
< ARRAY_SIZE(ix86_builtin_type_tab
));
24284 type
= ix86_builtin_type_tab
[(int) tcode
];
24288 gcc_assert (tcode
> IX86_BT_LAST_PRIM
);
24289 if (tcode
<= IX86_BT_LAST_VECT
)
24291 enum machine_mode mode
;
24293 index
= tcode
- IX86_BT_LAST_PRIM
- 1;
24294 itype
= ix86_get_builtin_type (ix86_builtin_type_vect_base
[index
]);
24295 mode
= ix86_builtin_type_vect_mode
[index
];
24297 type
= build_vector_type_for_mode (itype
, mode
);
24303 index
= tcode
- IX86_BT_LAST_VECT
- 1;
24304 if (tcode
<= IX86_BT_LAST_PTR
)
24305 quals
= TYPE_UNQUALIFIED
;
24307 quals
= TYPE_QUAL_CONST
;
24309 itype
= ix86_get_builtin_type (ix86_builtin_type_ptr_base
[index
]);
24310 if (quals
!= TYPE_UNQUALIFIED
)
24311 itype
= build_qualified_type (itype
, quals
);
24313 type
= build_pointer_type (itype
);
24316 ix86_builtin_type_tab
[(int) tcode
] = type
;
24320 /* Table for the ix86 builtin function types. */
24321 static GTY(()) tree ix86_builtin_func_type_tab
[(int) IX86_BT_LAST_ALIAS
+ 1];
24323 /* Retrieve an element from the above table, building some of
24324 the types lazily. */
24327 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode
)
24331 gcc_assert ((unsigned)tcode
< ARRAY_SIZE (ix86_builtin_func_type_tab
));
24333 type
= ix86_builtin_func_type_tab
[(int) tcode
];
24337 if (tcode
<= IX86_BT_LAST_FUNC
)
24339 unsigned start
= ix86_builtin_func_start
[(int) tcode
];
24340 unsigned after
= ix86_builtin_func_start
[(int) tcode
+ 1];
24341 tree rtype
, atype
, args
= void_list_node
;
24344 rtype
= ix86_get_builtin_type (ix86_builtin_func_args
[start
]);
24345 for (i
= after
- 1; i
> start
; --i
)
24347 atype
= ix86_get_builtin_type (ix86_builtin_func_args
[i
]);
24348 args
= tree_cons (NULL
, atype
, args
);
24351 type
= build_function_type (rtype
, args
);
24355 unsigned index
= tcode
- IX86_BT_LAST_FUNC
- 1;
24356 enum ix86_builtin_func_type icode
;
24358 icode
= ix86_builtin_func_alias_base
[index
];
24359 type
= ix86_get_builtin_func_type (icode
);
24362 ix86_builtin_func_type_tab
[(int) tcode
] = type
;
24367 /* Codes for all the SSE/MMX builtins. */
24370 IX86_BUILTIN_ADDPS
,
24371 IX86_BUILTIN_ADDSS
,
24372 IX86_BUILTIN_DIVPS
,
24373 IX86_BUILTIN_DIVSS
,
24374 IX86_BUILTIN_MULPS
,
24375 IX86_BUILTIN_MULSS
,
24376 IX86_BUILTIN_SUBPS
,
24377 IX86_BUILTIN_SUBSS
,
24379 IX86_BUILTIN_CMPEQPS
,
24380 IX86_BUILTIN_CMPLTPS
,
24381 IX86_BUILTIN_CMPLEPS
,
24382 IX86_BUILTIN_CMPGTPS
,
24383 IX86_BUILTIN_CMPGEPS
,
24384 IX86_BUILTIN_CMPNEQPS
,
24385 IX86_BUILTIN_CMPNLTPS
,
24386 IX86_BUILTIN_CMPNLEPS
,
24387 IX86_BUILTIN_CMPNGTPS
,
24388 IX86_BUILTIN_CMPNGEPS
,
24389 IX86_BUILTIN_CMPORDPS
,
24390 IX86_BUILTIN_CMPUNORDPS
,
24391 IX86_BUILTIN_CMPEQSS
,
24392 IX86_BUILTIN_CMPLTSS
,
24393 IX86_BUILTIN_CMPLESS
,
24394 IX86_BUILTIN_CMPNEQSS
,
24395 IX86_BUILTIN_CMPNLTSS
,
24396 IX86_BUILTIN_CMPNLESS
,
24397 IX86_BUILTIN_CMPNGTSS
,
24398 IX86_BUILTIN_CMPNGESS
,
24399 IX86_BUILTIN_CMPORDSS
,
24400 IX86_BUILTIN_CMPUNORDSS
,
24402 IX86_BUILTIN_COMIEQSS
,
24403 IX86_BUILTIN_COMILTSS
,
24404 IX86_BUILTIN_COMILESS
,
24405 IX86_BUILTIN_COMIGTSS
,
24406 IX86_BUILTIN_COMIGESS
,
24407 IX86_BUILTIN_COMINEQSS
,
24408 IX86_BUILTIN_UCOMIEQSS
,
24409 IX86_BUILTIN_UCOMILTSS
,
24410 IX86_BUILTIN_UCOMILESS
,
24411 IX86_BUILTIN_UCOMIGTSS
,
24412 IX86_BUILTIN_UCOMIGESS
,
24413 IX86_BUILTIN_UCOMINEQSS
,
24415 IX86_BUILTIN_CVTPI2PS
,
24416 IX86_BUILTIN_CVTPS2PI
,
24417 IX86_BUILTIN_CVTSI2SS
,
24418 IX86_BUILTIN_CVTSI642SS
,
24419 IX86_BUILTIN_CVTSS2SI
,
24420 IX86_BUILTIN_CVTSS2SI64
,
24421 IX86_BUILTIN_CVTTPS2PI
,
24422 IX86_BUILTIN_CVTTSS2SI
,
24423 IX86_BUILTIN_CVTTSS2SI64
,
24425 IX86_BUILTIN_MAXPS
,
24426 IX86_BUILTIN_MAXSS
,
24427 IX86_BUILTIN_MINPS
,
24428 IX86_BUILTIN_MINSS
,
24430 IX86_BUILTIN_LOADUPS
,
24431 IX86_BUILTIN_STOREUPS
,
24432 IX86_BUILTIN_MOVSS
,
24434 IX86_BUILTIN_MOVHLPS
,
24435 IX86_BUILTIN_MOVLHPS
,
24436 IX86_BUILTIN_LOADHPS
,
24437 IX86_BUILTIN_LOADLPS
,
24438 IX86_BUILTIN_STOREHPS
,
24439 IX86_BUILTIN_STORELPS
,
24441 IX86_BUILTIN_MASKMOVQ
,
24442 IX86_BUILTIN_MOVMSKPS
,
24443 IX86_BUILTIN_PMOVMSKB
,
24445 IX86_BUILTIN_MOVNTPS
,
24446 IX86_BUILTIN_MOVNTQ
,
24448 IX86_BUILTIN_LOADDQU
,
24449 IX86_BUILTIN_STOREDQU
,
24451 IX86_BUILTIN_PACKSSWB
,
24452 IX86_BUILTIN_PACKSSDW
,
24453 IX86_BUILTIN_PACKUSWB
,
24455 IX86_BUILTIN_PADDB
,
24456 IX86_BUILTIN_PADDW
,
24457 IX86_BUILTIN_PADDD
,
24458 IX86_BUILTIN_PADDQ
,
24459 IX86_BUILTIN_PADDSB
,
24460 IX86_BUILTIN_PADDSW
,
24461 IX86_BUILTIN_PADDUSB
,
24462 IX86_BUILTIN_PADDUSW
,
24463 IX86_BUILTIN_PSUBB
,
24464 IX86_BUILTIN_PSUBW
,
24465 IX86_BUILTIN_PSUBD
,
24466 IX86_BUILTIN_PSUBQ
,
24467 IX86_BUILTIN_PSUBSB
,
24468 IX86_BUILTIN_PSUBSW
,
24469 IX86_BUILTIN_PSUBUSB
,
24470 IX86_BUILTIN_PSUBUSW
,
24473 IX86_BUILTIN_PANDN
,
24477 IX86_BUILTIN_PAVGB
,
24478 IX86_BUILTIN_PAVGW
,
24480 IX86_BUILTIN_PCMPEQB
,
24481 IX86_BUILTIN_PCMPEQW
,
24482 IX86_BUILTIN_PCMPEQD
,
24483 IX86_BUILTIN_PCMPGTB
,
24484 IX86_BUILTIN_PCMPGTW
,
24485 IX86_BUILTIN_PCMPGTD
,
24487 IX86_BUILTIN_PMADDWD
,
24489 IX86_BUILTIN_PMAXSW
,
24490 IX86_BUILTIN_PMAXUB
,
24491 IX86_BUILTIN_PMINSW
,
24492 IX86_BUILTIN_PMINUB
,
24494 IX86_BUILTIN_PMULHUW
,
24495 IX86_BUILTIN_PMULHW
,
24496 IX86_BUILTIN_PMULLW
,
24498 IX86_BUILTIN_PSADBW
,
24499 IX86_BUILTIN_PSHUFW
,
24501 IX86_BUILTIN_PSLLW
,
24502 IX86_BUILTIN_PSLLD
,
24503 IX86_BUILTIN_PSLLQ
,
24504 IX86_BUILTIN_PSRAW
,
24505 IX86_BUILTIN_PSRAD
,
24506 IX86_BUILTIN_PSRLW
,
24507 IX86_BUILTIN_PSRLD
,
24508 IX86_BUILTIN_PSRLQ
,
24509 IX86_BUILTIN_PSLLWI
,
24510 IX86_BUILTIN_PSLLDI
,
24511 IX86_BUILTIN_PSLLQI
,
24512 IX86_BUILTIN_PSRAWI
,
24513 IX86_BUILTIN_PSRADI
,
24514 IX86_BUILTIN_PSRLWI
,
24515 IX86_BUILTIN_PSRLDI
,
24516 IX86_BUILTIN_PSRLQI
,
24518 IX86_BUILTIN_PUNPCKHBW
,
24519 IX86_BUILTIN_PUNPCKHWD
,
24520 IX86_BUILTIN_PUNPCKHDQ
,
24521 IX86_BUILTIN_PUNPCKLBW
,
24522 IX86_BUILTIN_PUNPCKLWD
,
24523 IX86_BUILTIN_PUNPCKLDQ
,
24525 IX86_BUILTIN_SHUFPS
,
24527 IX86_BUILTIN_RCPPS
,
24528 IX86_BUILTIN_RCPSS
,
24529 IX86_BUILTIN_RSQRTPS
,
24530 IX86_BUILTIN_RSQRTPS_NR
,
24531 IX86_BUILTIN_RSQRTSS
,
24532 IX86_BUILTIN_RSQRTF
,
24533 IX86_BUILTIN_SQRTPS
,
24534 IX86_BUILTIN_SQRTPS_NR
,
24535 IX86_BUILTIN_SQRTSS
,
24537 IX86_BUILTIN_UNPCKHPS
,
24538 IX86_BUILTIN_UNPCKLPS
,
24540 IX86_BUILTIN_ANDPS
,
24541 IX86_BUILTIN_ANDNPS
,
24543 IX86_BUILTIN_XORPS
,
24546 IX86_BUILTIN_LDMXCSR
,
24547 IX86_BUILTIN_STMXCSR
,
24548 IX86_BUILTIN_SFENCE
,
24550 /* 3DNow! Original */
24551 IX86_BUILTIN_FEMMS
,
24552 IX86_BUILTIN_PAVGUSB
,
24553 IX86_BUILTIN_PF2ID
,
24554 IX86_BUILTIN_PFACC
,
24555 IX86_BUILTIN_PFADD
,
24556 IX86_BUILTIN_PFCMPEQ
,
24557 IX86_BUILTIN_PFCMPGE
,
24558 IX86_BUILTIN_PFCMPGT
,
24559 IX86_BUILTIN_PFMAX
,
24560 IX86_BUILTIN_PFMIN
,
24561 IX86_BUILTIN_PFMUL
,
24562 IX86_BUILTIN_PFRCP
,
24563 IX86_BUILTIN_PFRCPIT1
,
24564 IX86_BUILTIN_PFRCPIT2
,
24565 IX86_BUILTIN_PFRSQIT1
,
24566 IX86_BUILTIN_PFRSQRT
,
24567 IX86_BUILTIN_PFSUB
,
24568 IX86_BUILTIN_PFSUBR
,
24569 IX86_BUILTIN_PI2FD
,
24570 IX86_BUILTIN_PMULHRW
,
24572 /* 3DNow! Athlon Extensions */
24573 IX86_BUILTIN_PF2IW
,
24574 IX86_BUILTIN_PFNACC
,
24575 IX86_BUILTIN_PFPNACC
,
24576 IX86_BUILTIN_PI2FW
,
24577 IX86_BUILTIN_PSWAPDSI
,
24578 IX86_BUILTIN_PSWAPDSF
,
24581 IX86_BUILTIN_ADDPD
,
24582 IX86_BUILTIN_ADDSD
,
24583 IX86_BUILTIN_DIVPD
,
24584 IX86_BUILTIN_DIVSD
,
24585 IX86_BUILTIN_MULPD
,
24586 IX86_BUILTIN_MULSD
,
24587 IX86_BUILTIN_SUBPD
,
24588 IX86_BUILTIN_SUBSD
,
24590 IX86_BUILTIN_CMPEQPD
,
24591 IX86_BUILTIN_CMPLTPD
,
24592 IX86_BUILTIN_CMPLEPD
,
24593 IX86_BUILTIN_CMPGTPD
,
24594 IX86_BUILTIN_CMPGEPD
,
24595 IX86_BUILTIN_CMPNEQPD
,
24596 IX86_BUILTIN_CMPNLTPD
,
24597 IX86_BUILTIN_CMPNLEPD
,
24598 IX86_BUILTIN_CMPNGTPD
,
24599 IX86_BUILTIN_CMPNGEPD
,
24600 IX86_BUILTIN_CMPORDPD
,
24601 IX86_BUILTIN_CMPUNORDPD
,
24602 IX86_BUILTIN_CMPEQSD
,
24603 IX86_BUILTIN_CMPLTSD
,
24604 IX86_BUILTIN_CMPLESD
,
24605 IX86_BUILTIN_CMPNEQSD
,
24606 IX86_BUILTIN_CMPNLTSD
,
24607 IX86_BUILTIN_CMPNLESD
,
24608 IX86_BUILTIN_CMPORDSD
,
24609 IX86_BUILTIN_CMPUNORDSD
,
24611 IX86_BUILTIN_COMIEQSD
,
24612 IX86_BUILTIN_COMILTSD
,
24613 IX86_BUILTIN_COMILESD
,
24614 IX86_BUILTIN_COMIGTSD
,
24615 IX86_BUILTIN_COMIGESD
,
24616 IX86_BUILTIN_COMINEQSD
,
24617 IX86_BUILTIN_UCOMIEQSD
,
24618 IX86_BUILTIN_UCOMILTSD
,
24619 IX86_BUILTIN_UCOMILESD
,
24620 IX86_BUILTIN_UCOMIGTSD
,
24621 IX86_BUILTIN_UCOMIGESD
,
24622 IX86_BUILTIN_UCOMINEQSD
,
24624 IX86_BUILTIN_MAXPD
,
24625 IX86_BUILTIN_MAXSD
,
24626 IX86_BUILTIN_MINPD
,
24627 IX86_BUILTIN_MINSD
,
24629 IX86_BUILTIN_ANDPD
,
24630 IX86_BUILTIN_ANDNPD
,
24632 IX86_BUILTIN_XORPD
,
24634 IX86_BUILTIN_SQRTPD
,
24635 IX86_BUILTIN_SQRTSD
,
24637 IX86_BUILTIN_UNPCKHPD
,
24638 IX86_BUILTIN_UNPCKLPD
,
24640 IX86_BUILTIN_SHUFPD
,
24642 IX86_BUILTIN_LOADUPD
,
24643 IX86_BUILTIN_STOREUPD
,
24644 IX86_BUILTIN_MOVSD
,
24646 IX86_BUILTIN_LOADHPD
,
24647 IX86_BUILTIN_LOADLPD
,
24649 IX86_BUILTIN_CVTDQ2PD
,
24650 IX86_BUILTIN_CVTDQ2PS
,
24652 IX86_BUILTIN_CVTPD2DQ
,
24653 IX86_BUILTIN_CVTPD2PI
,
24654 IX86_BUILTIN_CVTPD2PS
,
24655 IX86_BUILTIN_CVTTPD2DQ
,
24656 IX86_BUILTIN_CVTTPD2PI
,
24658 IX86_BUILTIN_CVTPI2PD
,
24659 IX86_BUILTIN_CVTSI2SD
,
24660 IX86_BUILTIN_CVTSI642SD
,
24662 IX86_BUILTIN_CVTSD2SI
,
24663 IX86_BUILTIN_CVTSD2SI64
,
24664 IX86_BUILTIN_CVTSD2SS
,
24665 IX86_BUILTIN_CVTSS2SD
,
24666 IX86_BUILTIN_CVTTSD2SI
,
24667 IX86_BUILTIN_CVTTSD2SI64
,
24669 IX86_BUILTIN_CVTPS2DQ
,
24670 IX86_BUILTIN_CVTPS2PD
,
24671 IX86_BUILTIN_CVTTPS2DQ
,
24673 IX86_BUILTIN_MOVNTI
,
24674 IX86_BUILTIN_MOVNTPD
,
24675 IX86_BUILTIN_MOVNTDQ
,
24677 IX86_BUILTIN_MOVQ128
,
24680 IX86_BUILTIN_MASKMOVDQU
,
24681 IX86_BUILTIN_MOVMSKPD
,
24682 IX86_BUILTIN_PMOVMSKB128
,
24684 IX86_BUILTIN_PACKSSWB128
,
24685 IX86_BUILTIN_PACKSSDW128
,
24686 IX86_BUILTIN_PACKUSWB128
,
24688 IX86_BUILTIN_PADDB128
,
24689 IX86_BUILTIN_PADDW128
,
24690 IX86_BUILTIN_PADDD128
,
24691 IX86_BUILTIN_PADDQ128
,
24692 IX86_BUILTIN_PADDSB128
,
24693 IX86_BUILTIN_PADDSW128
,
24694 IX86_BUILTIN_PADDUSB128
,
24695 IX86_BUILTIN_PADDUSW128
,
24696 IX86_BUILTIN_PSUBB128
,
24697 IX86_BUILTIN_PSUBW128
,
24698 IX86_BUILTIN_PSUBD128
,
24699 IX86_BUILTIN_PSUBQ128
,
24700 IX86_BUILTIN_PSUBSB128
,
24701 IX86_BUILTIN_PSUBSW128
,
24702 IX86_BUILTIN_PSUBUSB128
,
24703 IX86_BUILTIN_PSUBUSW128
,
24705 IX86_BUILTIN_PAND128
,
24706 IX86_BUILTIN_PANDN128
,
24707 IX86_BUILTIN_POR128
,
24708 IX86_BUILTIN_PXOR128
,
24710 IX86_BUILTIN_PAVGB128
,
24711 IX86_BUILTIN_PAVGW128
,
24713 IX86_BUILTIN_PCMPEQB128
,
24714 IX86_BUILTIN_PCMPEQW128
,
24715 IX86_BUILTIN_PCMPEQD128
,
24716 IX86_BUILTIN_PCMPGTB128
,
24717 IX86_BUILTIN_PCMPGTW128
,
24718 IX86_BUILTIN_PCMPGTD128
,
24720 IX86_BUILTIN_PMADDWD128
,
24722 IX86_BUILTIN_PMAXSW128
,
24723 IX86_BUILTIN_PMAXUB128
,
24724 IX86_BUILTIN_PMINSW128
,
24725 IX86_BUILTIN_PMINUB128
,
24727 IX86_BUILTIN_PMULUDQ
,
24728 IX86_BUILTIN_PMULUDQ128
,
24729 IX86_BUILTIN_PMULHUW128
,
24730 IX86_BUILTIN_PMULHW128
,
24731 IX86_BUILTIN_PMULLW128
,
24733 IX86_BUILTIN_PSADBW128
,
24734 IX86_BUILTIN_PSHUFHW
,
24735 IX86_BUILTIN_PSHUFLW
,
24736 IX86_BUILTIN_PSHUFD
,
24738 IX86_BUILTIN_PSLLDQI128
,
24739 IX86_BUILTIN_PSLLWI128
,
24740 IX86_BUILTIN_PSLLDI128
,
24741 IX86_BUILTIN_PSLLQI128
,
24742 IX86_BUILTIN_PSRAWI128
,
24743 IX86_BUILTIN_PSRADI128
,
24744 IX86_BUILTIN_PSRLDQI128
,
24745 IX86_BUILTIN_PSRLWI128
,
24746 IX86_BUILTIN_PSRLDI128
,
24747 IX86_BUILTIN_PSRLQI128
,
24749 IX86_BUILTIN_PSLLDQ128
,
24750 IX86_BUILTIN_PSLLW128
,
24751 IX86_BUILTIN_PSLLD128
,
24752 IX86_BUILTIN_PSLLQ128
,
24753 IX86_BUILTIN_PSRAW128
,
24754 IX86_BUILTIN_PSRAD128
,
24755 IX86_BUILTIN_PSRLW128
,
24756 IX86_BUILTIN_PSRLD128
,
24757 IX86_BUILTIN_PSRLQ128
,
24759 IX86_BUILTIN_PUNPCKHBW128
,
24760 IX86_BUILTIN_PUNPCKHWD128
,
24761 IX86_BUILTIN_PUNPCKHDQ128
,
24762 IX86_BUILTIN_PUNPCKHQDQ128
,
24763 IX86_BUILTIN_PUNPCKLBW128
,
24764 IX86_BUILTIN_PUNPCKLWD128
,
24765 IX86_BUILTIN_PUNPCKLDQ128
,
24766 IX86_BUILTIN_PUNPCKLQDQ128
,
24768 IX86_BUILTIN_CLFLUSH
,
24769 IX86_BUILTIN_MFENCE
,
24770 IX86_BUILTIN_LFENCE
,
24771 IX86_BUILTIN_PAUSE
,
24773 IX86_BUILTIN_BSRSI
,
24774 IX86_BUILTIN_BSRDI
,
24775 IX86_BUILTIN_RDPMC
,
24776 IX86_BUILTIN_RDTSC
,
24777 IX86_BUILTIN_RDTSCP
,
24778 IX86_BUILTIN_ROLQI
,
24779 IX86_BUILTIN_ROLHI
,
24780 IX86_BUILTIN_RORQI
,
24781 IX86_BUILTIN_RORHI
,
24784 IX86_BUILTIN_ADDSUBPS
,
24785 IX86_BUILTIN_HADDPS
,
24786 IX86_BUILTIN_HSUBPS
,
24787 IX86_BUILTIN_MOVSHDUP
,
24788 IX86_BUILTIN_MOVSLDUP
,
24789 IX86_BUILTIN_ADDSUBPD
,
24790 IX86_BUILTIN_HADDPD
,
24791 IX86_BUILTIN_HSUBPD
,
24792 IX86_BUILTIN_LDDQU
,
24794 IX86_BUILTIN_MONITOR
,
24795 IX86_BUILTIN_MWAIT
,
24798 IX86_BUILTIN_PHADDW
,
24799 IX86_BUILTIN_PHADDD
,
24800 IX86_BUILTIN_PHADDSW
,
24801 IX86_BUILTIN_PHSUBW
,
24802 IX86_BUILTIN_PHSUBD
,
24803 IX86_BUILTIN_PHSUBSW
,
24804 IX86_BUILTIN_PMADDUBSW
,
24805 IX86_BUILTIN_PMULHRSW
,
24806 IX86_BUILTIN_PSHUFB
,
24807 IX86_BUILTIN_PSIGNB
,
24808 IX86_BUILTIN_PSIGNW
,
24809 IX86_BUILTIN_PSIGND
,
24810 IX86_BUILTIN_PALIGNR
,
24811 IX86_BUILTIN_PABSB
,
24812 IX86_BUILTIN_PABSW
,
24813 IX86_BUILTIN_PABSD
,
24815 IX86_BUILTIN_PHADDW128
,
24816 IX86_BUILTIN_PHADDD128
,
24817 IX86_BUILTIN_PHADDSW128
,
24818 IX86_BUILTIN_PHSUBW128
,
24819 IX86_BUILTIN_PHSUBD128
,
24820 IX86_BUILTIN_PHSUBSW128
,
24821 IX86_BUILTIN_PMADDUBSW128
,
24822 IX86_BUILTIN_PMULHRSW128
,
24823 IX86_BUILTIN_PSHUFB128
,
24824 IX86_BUILTIN_PSIGNB128
,
24825 IX86_BUILTIN_PSIGNW128
,
24826 IX86_BUILTIN_PSIGND128
,
24827 IX86_BUILTIN_PALIGNR128
,
24828 IX86_BUILTIN_PABSB128
,
24829 IX86_BUILTIN_PABSW128
,
24830 IX86_BUILTIN_PABSD128
,
24832 /* AMDFAM10 - SSE4A New Instructions. */
24833 IX86_BUILTIN_MOVNTSD
,
24834 IX86_BUILTIN_MOVNTSS
,
24835 IX86_BUILTIN_EXTRQI
,
24836 IX86_BUILTIN_EXTRQ
,
24837 IX86_BUILTIN_INSERTQI
,
24838 IX86_BUILTIN_INSERTQ
,
24841 IX86_BUILTIN_BLENDPD
,
24842 IX86_BUILTIN_BLENDPS
,
24843 IX86_BUILTIN_BLENDVPD
,
24844 IX86_BUILTIN_BLENDVPS
,
24845 IX86_BUILTIN_PBLENDVB128
,
24846 IX86_BUILTIN_PBLENDW128
,
24851 IX86_BUILTIN_INSERTPS128
,
24853 IX86_BUILTIN_MOVNTDQA
,
24854 IX86_BUILTIN_MPSADBW128
,
24855 IX86_BUILTIN_PACKUSDW128
,
24856 IX86_BUILTIN_PCMPEQQ
,
24857 IX86_BUILTIN_PHMINPOSUW128
,
24859 IX86_BUILTIN_PMAXSB128
,
24860 IX86_BUILTIN_PMAXSD128
,
24861 IX86_BUILTIN_PMAXUD128
,
24862 IX86_BUILTIN_PMAXUW128
,
24864 IX86_BUILTIN_PMINSB128
,
24865 IX86_BUILTIN_PMINSD128
,
24866 IX86_BUILTIN_PMINUD128
,
24867 IX86_BUILTIN_PMINUW128
,
24869 IX86_BUILTIN_PMOVSXBW128
,
24870 IX86_BUILTIN_PMOVSXBD128
,
24871 IX86_BUILTIN_PMOVSXBQ128
,
24872 IX86_BUILTIN_PMOVSXWD128
,
24873 IX86_BUILTIN_PMOVSXWQ128
,
24874 IX86_BUILTIN_PMOVSXDQ128
,
24876 IX86_BUILTIN_PMOVZXBW128
,
24877 IX86_BUILTIN_PMOVZXBD128
,
24878 IX86_BUILTIN_PMOVZXBQ128
,
24879 IX86_BUILTIN_PMOVZXWD128
,
24880 IX86_BUILTIN_PMOVZXWQ128
,
24881 IX86_BUILTIN_PMOVZXDQ128
,
24883 IX86_BUILTIN_PMULDQ128
,
24884 IX86_BUILTIN_PMULLD128
,
24886 IX86_BUILTIN_ROUNDPD
,
24887 IX86_BUILTIN_ROUNDPS
,
24888 IX86_BUILTIN_ROUNDSD
,
24889 IX86_BUILTIN_ROUNDSS
,
24891 IX86_BUILTIN_FLOORPD
,
24892 IX86_BUILTIN_CEILPD
,
24893 IX86_BUILTIN_TRUNCPD
,
24894 IX86_BUILTIN_RINTPD
,
24895 IX86_BUILTIN_ROUNDPD_AZ
,
24896 IX86_BUILTIN_FLOORPS
,
24897 IX86_BUILTIN_CEILPS
,
24898 IX86_BUILTIN_TRUNCPS
,
24899 IX86_BUILTIN_RINTPS
,
24900 IX86_BUILTIN_ROUNDPS_AZ
,
24902 IX86_BUILTIN_PTESTZ
,
24903 IX86_BUILTIN_PTESTC
,
24904 IX86_BUILTIN_PTESTNZC
,
24906 IX86_BUILTIN_VEC_INIT_V2SI
,
24907 IX86_BUILTIN_VEC_INIT_V4HI
,
24908 IX86_BUILTIN_VEC_INIT_V8QI
,
24909 IX86_BUILTIN_VEC_EXT_V2DF
,
24910 IX86_BUILTIN_VEC_EXT_V2DI
,
24911 IX86_BUILTIN_VEC_EXT_V4SF
,
24912 IX86_BUILTIN_VEC_EXT_V4SI
,
24913 IX86_BUILTIN_VEC_EXT_V8HI
,
24914 IX86_BUILTIN_VEC_EXT_V2SI
,
24915 IX86_BUILTIN_VEC_EXT_V4HI
,
24916 IX86_BUILTIN_VEC_EXT_V16QI
,
24917 IX86_BUILTIN_VEC_SET_V2DI
,
24918 IX86_BUILTIN_VEC_SET_V4SF
,
24919 IX86_BUILTIN_VEC_SET_V4SI
,
24920 IX86_BUILTIN_VEC_SET_V8HI
,
24921 IX86_BUILTIN_VEC_SET_V4HI
,
24922 IX86_BUILTIN_VEC_SET_V16QI
,
24924 IX86_BUILTIN_VEC_PACK_SFIX
,
24925 IX86_BUILTIN_VEC_PACK_SFIX256
,
24928 IX86_BUILTIN_CRC32QI
,
24929 IX86_BUILTIN_CRC32HI
,
24930 IX86_BUILTIN_CRC32SI
,
24931 IX86_BUILTIN_CRC32DI
,
24933 IX86_BUILTIN_PCMPESTRI128
,
24934 IX86_BUILTIN_PCMPESTRM128
,
24935 IX86_BUILTIN_PCMPESTRA128
,
24936 IX86_BUILTIN_PCMPESTRC128
,
24937 IX86_BUILTIN_PCMPESTRO128
,
24938 IX86_BUILTIN_PCMPESTRS128
,
24939 IX86_BUILTIN_PCMPESTRZ128
,
24940 IX86_BUILTIN_PCMPISTRI128
,
24941 IX86_BUILTIN_PCMPISTRM128
,
24942 IX86_BUILTIN_PCMPISTRA128
,
24943 IX86_BUILTIN_PCMPISTRC128
,
24944 IX86_BUILTIN_PCMPISTRO128
,
24945 IX86_BUILTIN_PCMPISTRS128
,
24946 IX86_BUILTIN_PCMPISTRZ128
,
24948 IX86_BUILTIN_PCMPGTQ
,
24950 /* AES instructions */
24951 IX86_BUILTIN_AESENC128
,
24952 IX86_BUILTIN_AESENCLAST128
,
24953 IX86_BUILTIN_AESDEC128
,
24954 IX86_BUILTIN_AESDECLAST128
,
24955 IX86_BUILTIN_AESIMC128
,
24956 IX86_BUILTIN_AESKEYGENASSIST128
,
24958 /* PCLMUL instruction */
24959 IX86_BUILTIN_PCLMULQDQ128
,
24962 IX86_BUILTIN_ADDPD256
,
24963 IX86_BUILTIN_ADDPS256
,
24964 IX86_BUILTIN_ADDSUBPD256
,
24965 IX86_BUILTIN_ADDSUBPS256
,
24966 IX86_BUILTIN_ANDPD256
,
24967 IX86_BUILTIN_ANDPS256
,
24968 IX86_BUILTIN_ANDNPD256
,
24969 IX86_BUILTIN_ANDNPS256
,
24970 IX86_BUILTIN_BLENDPD256
,
24971 IX86_BUILTIN_BLENDPS256
,
24972 IX86_BUILTIN_BLENDVPD256
,
24973 IX86_BUILTIN_BLENDVPS256
,
24974 IX86_BUILTIN_DIVPD256
,
24975 IX86_BUILTIN_DIVPS256
,
24976 IX86_BUILTIN_DPPS256
,
24977 IX86_BUILTIN_HADDPD256
,
24978 IX86_BUILTIN_HADDPS256
,
24979 IX86_BUILTIN_HSUBPD256
,
24980 IX86_BUILTIN_HSUBPS256
,
24981 IX86_BUILTIN_MAXPD256
,
24982 IX86_BUILTIN_MAXPS256
,
24983 IX86_BUILTIN_MINPD256
,
24984 IX86_BUILTIN_MINPS256
,
24985 IX86_BUILTIN_MULPD256
,
24986 IX86_BUILTIN_MULPS256
,
24987 IX86_BUILTIN_ORPD256
,
24988 IX86_BUILTIN_ORPS256
,
24989 IX86_BUILTIN_SHUFPD256
,
24990 IX86_BUILTIN_SHUFPS256
,
24991 IX86_BUILTIN_SUBPD256
,
24992 IX86_BUILTIN_SUBPS256
,
24993 IX86_BUILTIN_XORPD256
,
24994 IX86_BUILTIN_XORPS256
,
24995 IX86_BUILTIN_CMPSD
,
24996 IX86_BUILTIN_CMPSS
,
24997 IX86_BUILTIN_CMPPD
,
24998 IX86_BUILTIN_CMPPS
,
24999 IX86_BUILTIN_CMPPD256
,
25000 IX86_BUILTIN_CMPPS256
,
25001 IX86_BUILTIN_CVTDQ2PD256
,
25002 IX86_BUILTIN_CVTDQ2PS256
,
25003 IX86_BUILTIN_CVTPD2PS256
,
25004 IX86_BUILTIN_CVTPS2DQ256
,
25005 IX86_BUILTIN_CVTPS2PD256
,
25006 IX86_BUILTIN_CVTTPD2DQ256
,
25007 IX86_BUILTIN_CVTPD2DQ256
,
25008 IX86_BUILTIN_CVTTPS2DQ256
,
25009 IX86_BUILTIN_EXTRACTF128PD256
,
25010 IX86_BUILTIN_EXTRACTF128PS256
,
25011 IX86_BUILTIN_EXTRACTF128SI256
,
25012 IX86_BUILTIN_VZEROALL
,
25013 IX86_BUILTIN_VZEROUPPER
,
25014 IX86_BUILTIN_VPERMILVARPD
,
25015 IX86_BUILTIN_VPERMILVARPS
,
25016 IX86_BUILTIN_VPERMILVARPD256
,
25017 IX86_BUILTIN_VPERMILVARPS256
,
25018 IX86_BUILTIN_VPERMILPD
,
25019 IX86_BUILTIN_VPERMILPS
,
25020 IX86_BUILTIN_VPERMILPD256
,
25021 IX86_BUILTIN_VPERMILPS256
,
25022 IX86_BUILTIN_VPERMIL2PD
,
25023 IX86_BUILTIN_VPERMIL2PS
,
25024 IX86_BUILTIN_VPERMIL2PD256
,
25025 IX86_BUILTIN_VPERMIL2PS256
,
25026 IX86_BUILTIN_VPERM2F128PD256
,
25027 IX86_BUILTIN_VPERM2F128PS256
,
25028 IX86_BUILTIN_VPERM2F128SI256
,
25029 IX86_BUILTIN_VBROADCASTSS
,
25030 IX86_BUILTIN_VBROADCASTSD256
,
25031 IX86_BUILTIN_VBROADCASTSS256
,
25032 IX86_BUILTIN_VBROADCASTPD256
,
25033 IX86_BUILTIN_VBROADCASTPS256
,
25034 IX86_BUILTIN_VINSERTF128PD256
,
25035 IX86_BUILTIN_VINSERTF128PS256
,
25036 IX86_BUILTIN_VINSERTF128SI256
,
25037 IX86_BUILTIN_LOADUPD256
,
25038 IX86_BUILTIN_LOADUPS256
,
25039 IX86_BUILTIN_STOREUPD256
,
25040 IX86_BUILTIN_STOREUPS256
,
25041 IX86_BUILTIN_LDDQU256
,
25042 IX86_BUILTIN_MOVNTDQ256
,
25043 IX86_BUILTIN_MOVNTPD256
,
25044 IX86_BUILTIN_MOVNTPS256
,
25045 IX86_BUILTIN_LOADDQU256
,
25046 IX86_BUILTIN_STOREDQU256
,
25047 IX86_BUILTIN_MASKLOADPD
,
25048 IX86_BUILTIN_MASKLOADPS
,
25049 IX86_BUILTIN_MASKSTOREPD
,
25050 IX86_BUILTIN_MASKSTOREPS
,
25051 IX86_BUILTIN_MASKLOADPD256
,
25052 IX86_BUILTIN_MASKLOADPS256
,
25053 IX86_BUILTIN_MASKSTOREPD256
,
25054 IX86_BUILTIN_MASKSTOREPS256
,
25055 IX86_BUILTIN_MOVSHDUP256
,
25056 IX86_BUILTIN_MOVSLDUP256
,
25057 IX86_BUILTIN_MOVDDUP256
,
25059 IX86_BUILTIN_SQRTPD256
,
25060 IX86_BUILTIN_SQRTPS256
,
25061 IX86_BUILTIN_SQRTPS_NR256
,
25062 IX86_BUILTIN_RSQRTPS256
,
25063 IX86_BUILTIN_RSQRTPS_NR256
,
25065 IX86_BUILTIN_RCPPS256
,
25067 IX86_BUILTIN_ROUNDPD256
,
25068 IX86_BUILTIN_ROUNDPS256
,
25070 IX86_BUILTIN_FLOORPD256
,
25071 IX86_BUILTIN_CEILPD256
,
25072 IX86_BUILTIN_TRUNCPD256
,
25073 IX86_BUILTIN_RINTPD256
,
25074 IX86_BUILTIN_ROUNDPD_AZ256
,
25075 IX86_BUILTIN_FLOORPS256
,
25076 IX86_BUILTIN_CEILPS256
,
25077 IX86_BUILTIN_TRUNCPS256
,
25078 IX86_BUILTIN_RINTPS256
,
25079 IX86_BUILTIN_ROUNDPS_AZ256
,
25081 IX86_BUILTIN_UNPCKHPD256
,
25082 IX86_BUILTIN_UNPCKLPD256
,
25083 IX86_BUILTIN_UNPCKHPS256
,
25084 IX86_BUILTIN_UNPCKLPS256
,
25086 IX86_BUILTIN_SI256_SI
,
25087 IX86_BUILTIN_PS256_PS
,
25088 IX86_BUILTIN_PD256_PD
,
25089 IX86_BUILTIN_SI_SI256
,
25090 IX86_BUILTIN_PS_PS256
,
25091 IX86_BUILTIN_PD_PD256
,
25093 IX86_BUILTIN_VTESTZPD
,
25094 IX86_BUILTIN_VTESTCPD
,
25095 IX86_BUILTIN_VTESTNZCPD
,
25096 IX86_BUILTIN_VTESTZPS
,
25097 IX86_BUILTIN_VTESTCPS
,
25098 IX86_BUILTIN_VTESTNZCPS
,
25099 IX86_BUILTIN_VTESTZPD256
,
25100 IX86_BUILTIN_VTESTCPD256
,
25101 IX86_BUILTIN_VTESTNZCPD256
,
25102 IX86_BUILTIN_VTESTZPS256
,
25103 IX86_BUILTIN_VTESTCPS256
,
25104 IX86_BUILTIN_VTESTNZCPS256
,
25105 IX86_BUILTIN_PTESTZ256
,
25106 IX86_BUILTIN_PTESTC256
,
25107 IX86_BUILTIN_PTESTNZC256
,
25109 IX86_BUILTIN_MOVMSKPD256
,
25110 IX86_BUILTIN_MOVMSKPS256
,
25113 IX86_BUILTIN_MPSADBW256
,
25114 IX86_BUILTIN_PABSB256
,
25115 IX86_BUILTIN_PABSW256
,
25116 IX86_BUILTIN_PABSD256
,
25117 IX86_BUILTIN_PACKSSDW256
,
25118 IX86_BUILTIN_PACKSSWB256
,
25119 IX86_BUILTIN_PACKUSDW256
,
25120 IX86_BUILTIN_PACKUSWB256
,
25121 IX86_BUILTIN_PADDB256
,
25122 IX86_BUILTIN_PADDW256
,
25123 IX86_BUILTIN_PADDD256
,
25124 IX86_BUILTIN_PADDQ256
,
25125 IX86_BUILTIN_PADDSB256
,
25126 IX86_BUILTIN_PADDSW256
,
25127 IX86_BUILTIN_PADDUSB256
,
25128 IX86_BUILTIN_PADDUSW256
,
25129 IX86_BUILTIN_PALIGNR256
,
25130 IX86_BUILTIN_AND256I
,
25131 IX86_BUILTIN_ANDNOT256I
,
25132 IX86_BUILTIN_PAVGB256
,
25133 IX86_BUILTIN_PAVGW256
,
25134 IX86_BUILTIN_PBLENDVB256
,
25135 IX86_BUILTIN_PBLENDVW256
,
25136 IX86_BUILTIN_PCMPEQB256
,
25137 IX86_BUILTIN_PCMPEQW256
,
25138 IX86_BUILTIN_PCMPEQD256
,
25139 IX86_BUILTIN_PCMPEQQ256
,
25140 IX86_BUILTIN_PCMPGTB256
,
25141 IX86_BUILTIN_PCMPGTW256
,
25142 IX86_BUILTIN_PCMPGTD256
,
25143 IX86_BUILTIN_PCMPGTQ256
,
25144 IX86_BUILTIN_PHADDW256
,
25145 IX86_BUILTIN_PHADDD256
,
25146 IX86_BUILTIN_PHADDSW256
,
25147 IX86_BUILTIN_PHSUBW256
,
25148 IX86_BUILTIN_PHSUBD256
,
25149 IX86_BUILTIN_PHSUBSW256
,
25150 IX86_BUILTIN_PMADDUBSW256
,
25151 IX86_BUILTIN_PMADDWD256
,
25152 IX86_BUILTIN_PMAXSB256
,
25153 IX86_BUILTIN_PMAXSW256
,
25154 IX86_BUILTIN_PMAXSD256
,
25155 IX86_BUILTIN_PMAXUB256
,
25156 IX86_BUILTIN_PMAXUW256
,
25157 IX86_BUILTIN_PMAXUD256
,
25158 IX86_BUILTIN_PMINSB256
,
25159 IX86_BUILTIN_PMINSW256
,
25160 IX86_BUILTIN_PMINSD256
,
25161 IX86_BUILTIN_PMINUB256
,
25162 IX86_BUILTIN_PMINUW256
,
25163 IX86_BUILTIN_PMINUD256
,
25164 IX86_BUILTIN_PMOVMSKB256
,
25165 IX86_BUILTIN_PMOVSXBW256
,
25166 IX86_BUILTIN_PMOVSXBD256
,
25167 IX86_BUILTIN_PMOVSXBQ256
,
25168 IX86_BUILTIN_PMOVSXWD256
,
25169 IX86_BUILTIN_PMOVSXWQ256
,
25170 IX86_BUILTIN_PMOVSXDQ256
,
25171 IX86_BUILTIN_PMOVZXBW256
,
25172 IX86_BUILTIN_PMOVZXBD256
,
25173 IX86_BUILTIN_PMOVZXBQ256
,
25174 IX86_BUILTIN_PMOVZXWD256
,
25175 IX86_BUILTIN_PMOVZXWQ256
,
25176 IX86_BUILTIN_PMOVZXDQ256
,
25177 IX86_BUILTIN_PMULDQ256
,
25178 IX86_BUILTIN_PMULHRSW256
,
25179 IX86_BUILTIN_PMULHUW256
,
25180 IX86_BUILTIN_PMULHW256
,
25181 IX86_BUILTIN_PMULLW256
,
25182 IX86_BUILTIN_PMULLD256
,
25183 IX86_BUILTIN_PMULUDQ256
,
25184 IX86_BUILTIN_POR256
,
25185 IX86_BUILTIN_PSADBW256
,
25186 IX86_BUILTIN_PSHUFB256
,
25187 IX86_BUILTIN_PSHUFD256
,
25188 IX86_BUILTIN_PSHUFHW256
,
25189 IX86_BUILTIN_PSHUFLW256
,
25190 IX86_BUILTIN_PSIGNB256
,
25191 IX86_BUILTIN_PSIGNW256
,
25192 IX86_BUILTIN_PSIGND256
,
25193 IX86_BUILTIN_PSLLDQI256
,
25194 IX86_BUILTIN_PSLLWI256
,
25195 IX86_BUILTIN_PSLLW256
,
25196 IX86_BUILTIN_PSLLDI256
,
25197 IX86_BUILTIN_PSLLD256
,
25198 IX86_BUILTIN_PSLLQI256
,
25199 IX86_BUILTIN_PSLLQ256
,
25200 IX86_BUILTIN_PSRAWI256
,
25201 IX86_BUILTIN_PSRAW256
,
25202 IX86_BUILTIN_PSRADI256
,
25203 IX86_BUILTIN_PSRAD256
,
25204 IX86_BUILTIN_PSRLDQI256
,
25205 IX86_BUILTIN_PSRLWI256
,
25206 IX86_BUILTIN_PSRLW256
,
25207 IX86_BUILTIN_PSRLDI256
,
25208 IX86_BUILTIN_PSRLD256
,
25209 IX86_BUILTIN_PSRLQI256
,
25210 IX86_BUILTIN_PSRLQ256
,
25211 IX86_BUILTIN_PSUBB256
,
25212 IX86_BUILTIN_PSUBW256
,
25213 IX86_BUILTIN_PSUBD256
,
25214 IX86_BUILTIN_PSUBQ256
,
25215 IX86_BUILTIN_PSUBSB256
,
25216 IX86_BUILTIN_PSUBSW256
,
25217 IX86_BUILTIN_PSUBUSB256
,
25218 IX86_BUILTIN_PSUBUSW256
,
25219 IX86_BUILTIN_PUNPCKHBW256
,
25220 IX86_BUILTIN_PUNPCKHWD256
,
25221 IX86_BUILTIN_PUNPCKHDQ256
,
25222 IX86_BUILTIN_PUNPCKHQDQ256
,
25223 IX86_BUILTIN_PUNPCKLBW256
,
25224 IX86_BUILTIN_PUNPCKLWD256
,
25225 IX86_BUILTIN_PUNPCKLDQ256
,
25226 IX86_BUILTIN_PUNPCKLQDQ256
,
25227 IX86_BUILTIN_PXOR256
,
25228 IX86_BUILTIN_MOVNTDQA256
,
25229 IX86_BUILTIN_VBROADCASTSS_PS
,
25230 IX86_BUILTIN_VBROADCASTSS_PS256
,
25231 IX86_BUILTIN_VBROADCASTSD_PD256
,
25232 IX86_BUILTIN_VBROADCASTSI256
,
25233 IX86_BUILTIN_PBLENDD256
,
25234 IX86_BUILTIN_PBLENDD128
,
25235 IX86_BUILTIN_PBROADCASTB256
,
25236 IX86_BUILTIN_PBROADCASTW256
,
25237 IX86_BUILTIN_PBROADCASTD256
,
25238 IX86_BUILTIN_PBROADCASTQ256
,
25239 IX86_BUILTIN_PBROADCASTB128
,
25240 IX86_BUILTIN_PBROADCASTW128
,
25241 IX86_BUILTIN_PBROADCASTD128
,
25242 IX86_BUILTIN_PBROADCASTQ128
,
25243 IX86_BUILTIN_VPERMVARSI256
,
25244 IX86_BUILTIN_VPERMDF256
,
25245 IX86_BUILTIN_VPERMVARSF256
,
25246 IX86_BUILTIN_VPERMDI256
,
25247 IX86_BUILTIN_VPERMTI256
,
25248 IX86_BUILTIN_VEXTRACT128I256
,
25249 IX86_BUILTIN_VINSERT128I256
,
25250 IX86_BUILTIN_MASKLOADD
,
25251 IX86_BUILTIN_MASKLOADQ
,
25252 IX86_BUILTIN_MASKLOADD256
,
25253 IX86_BUILTIN_MASKLOADQ256
,
25254 IX86_BUILTIN_MASKSTORED
,
25255 IX86_BUILTIN_MASKSTOREQ
,
25256 IX86_BUILTIN_MASKSTORED256
,
25257 IX86_BUILTIN_MASKSTOREQ256
,
25258 IX86_BUILTIN_PSLLVV4DI
,
25259 IX86_BUILTIN_PSLLVV2DI
,
25260 IX86_BUILTIN_PSLLVV8SI
,
25261 IX86_BUILTIN_PSLLVV4SI
,
25262 IX86_BUILTIN_PSRAVV8SI
,
25263 IX86_BUILTIN_PSRAVV4SI
,
25264 IX86_BUILTIN_PSRLVV4DI
,
25265 IX86_BUILTIN_PSRLVV2DI
,
25266 IX86_BUILTIN_PSRLVV8SI
,
25267 IX86_BUILTIN_PSRLVV4SI
,
25269 IX86_BUILTIN_GATHERSIV2DF
,
25270 IX86_BUILTIN_GATHERSIV4DF
,
25271 IX86_BUILTIN_GATHERDIV2DF
,
25272 IX86_BUILTIN_GATHERDIV4DF
,
25273 IX86_BUILTIN_GATHERSIV4SF
,
25274 IX86_BUILTIN_GATHERSIV8SF
,
25275 IX86_BUILTIN_GATHERDIV4SF
,
25276 IX86_BUILTIN_GATHERDIV8SF
,
25277 IX86_BUILTIN_GATHERSIV2DI
,
25278 IX86_BUILTIN_GATHERSIV4DI
,
25279 IX86_BUILTIN_GATHERDIV2DI
,
25280 IX86_BUILTIN_GATHERDIV4DI
,
25281 IX86_BUILTIN_GATHERSIV4SI
,
25282 IX86_BUILTIN_GATHERSIV8SI
,
25283 IX86_BUILTIN_GATHERDIV4SI
,
25284 IX86_BUILTIN_GATHERDIV8SI
,
25286 /* Alternate 4 element gather for the vectorizer where
25287 all operands are 32-byte wide. */
25288 IX86_BUILTIN_GATHERALTSIV4DF
,
25289 IX86_BUILTIN_GATHERALTDIV8SF
,
25290 IX86_BUILTIN_GATHERALTSIV4DI
,
25291 IX86_BUILTIN_GATHERALTDIV8SI
,
25293 /* TFmode support builtins. */
25295 IX86_BUILTIN_HUGE_VALQ
,
25296 IX86_BUILTIN_FABSQ
,
25297 IX86_BUILTIN_COPYSIGNQ
,
25299 /* Vectorizer support builtins. */
25300 IX86_BUILTIN_CPYSGNPS
,
25301 IX86_BUILTIN_CPYSGNPD
,
25302 IX86_BUILTIN_CPYSGNPS256
,
25303 IX86_BUILTIN_CPYSGNPD256
,
25305 /* FMA4 instructions. */
25306 IX86_BUILTIN_VFMADDSS
,
25307 IX86_BUILTIN_VFMADDSD
,
25308 IX86_BUILTIN_VFMADDPS
,
25309 IX86_BUILTIN_VFMADDPD
,
25310 IX86_BUILTIN_VFMADDPS256
,
25311 IX86_BUILTIN_VFMADDPD256
,
25312 IX86_BUILTIN_VFMADDSUBPS
,
25313 IX86_BUILTIN_VFMADDSUBPD
,
25314 IX86_BUILTIN_VFMADDSUBPS256
,
25315 IX86_BUILTIN_VFMADDSUBPD256
,
25317 /* FMA3 instructions. */
25318 IX86_BUILTIN_VFMADDSS3
,
25319 IX86_BUILTIN_VFMADDSD3
,
25321 /* XOP instructions. */
25322 IX86_BUILTIN_VPCMOV
,
25323 IX86_BUILTIN_VPCMOV_V2DI
,
25324 IX86_BUILTIN_VPCMOV_V4SI
,
25325 IX86_BUILTIN_VPCMOV_V8HI
,
25326 IX86_BUILTIN_VPCMOV_V16QI
,
25327 IX86_BUILTIN_VPCMOV_V4SF
,
25328 IX86_BUILTIN_VPCMOV_V2DF
,
25329 IX86_BUILTIN_VPCMOV256
,
25330 IX86_BUILTIN_VPCMOV_V4DI256
,
25331 IX86_BUILTIN_VPCMOV_V8SI256
,
25332 IX86_BUILTIN_VPCMOV_V16HI256
,
25333 IX86_BUILTIN_VPCMOV_V32QI256
,
25334 IX86_BUILTIN_VPCMOV_V8SF256
,
25335 IX86_BUILTIN_VPCMOV_V4DF256
,
25337 IX86_BUILTIN_VPPERM
,
25339 IX86_BUILTIN_VPMACSSWW
,
25340 IX86_BUILTIN_VPMACSWW
,
25341 IX86_BUILTIN_VPMACSSWD
,
25342 IX86_BUILTIN_VPMACSWD
,
25343 IX86_BUILTIN_VPMACSSDD
,
25344 IX86_BUILTIN_VPMACSDD
,
25345 IX86_BUILTIN_VPMACSSDQL
,
25346 IX86_BUILTIN_VPMACSSDQH
,
25347 IX86_BUILTIN_VPMACSDQL
,
25348 IX86_BUILTIN_VPMACSDQH
,
25349 IX86_BUILTIN_VPMADCSSWD
,
25350 IX86_BUILTIN_VPMADCSWD
,
25352 IX86_BUILTIN_VPHADDBW
,
25353 IX86_BUILTIN_VPHADDBD
,
25354 IX86_BUILTIN_VPHADDBQ
,
25355 IX86_BUILTIN_VPHADDWD
,
25356 IX86_BUILTIN_VPHADDWQ
,
25357 IX86_BUILTIN_VPHADDDQ
,
25358 IX86_BUILTIN_VPHADDUBW
,
25359 IX86_BUILTIN_VPHADDUBD
,
25360 IX86_BUILTIN_VPHADDUBQ
,
25361 IX86_BUILTIN_VPHADDUWD
,
25362 IX86_BUILTIN_VPHADDUWQ
,
25363 IX86_BUILTIN_VPHADDUDQ
,
25364 IX86_BUILTIN_VPHSUBBW
,
25365 IX86_BUILTIN_VPHSUBWD
,
25366 IX86_BUILTIN_VPHSUBDQ
,
25368 IX86_BUILTIN_VPROTB
,
25369 IX86_BUILTIN_VPROTW
,
25370 IX86_BUILTIN_VPROTD
,
25371 IX86_BUILTIN_VPROTQ
,
25372 IX86_BUILTIN_VPROTB_IMM
,
25373 IX86_BUILTIN_VPROTW_IMM
,
25374 IX86_BUILTIN_VPROTD_IMM
,
25375 IX86_BUILTIN_VPROTQ_IMM
,
25377 IX86_BUILTIN_VPSHLB
,
25378 IX86_BUILTIN_VPSHLW
,
25379 IX86_BUILTIN_VPSHLD
,
25380 IX86_BUILTIN_VPSHLQ
,
25381 IX86_BUILTIN_VPSHAB
,
25382 IX86_BUILTIN_VPSHAW
,
25383 IX86_BUILTIN_VPSHAD
,
25384 IX86_BUILTIN_VPSHAQ
,
25386 IX86_BUILTIN_VFRCZSS
,
25387 IX86_BUILTIN_VFRCZSD
,
25388 IX86_BUILTIN_VFRCZPS
,
25389 IX86_BUILTIN_VFRCZPD
,
25390 IX86_BUILTIN_VFRCZPS256
,
25391 IX86_BUILTIN_VFRCZPD256
,
25393 IX86_BUILTIN_VPCOMEQUB
,
25394 IX86_BUILTIN_VPCOMNEUB
,
25395 IX86_BUILTIN_VPCOMLTUB
,
25396 IX86_BUILTIN_VPCOMLEUB
,
25397 IX86_BUILTIN_VPCOMGTUB
,
25398 IX86_BUILTIN_VPCOMGEUB
,
25399 IX86_BUILTIN_VPCOMFALSEUB
,
25400 IX86_BUILTIN_VPCOMTRUEUB
,
25402 IX86_BUILTIN_VPCOMEQUW
,
25403 IX86_BUILTIN_VPCOMNEUW
,
25404 IX86_BUILTIN_VPCOMLTUW
,
25405 IX86_BUILTIN_VPCOMLEUW
,
25406 IX86_BUILTIN_VPCOMGTUW
,
25407 IX86_BUILTIN_VPCOMGEUW
,
25408 IX86_BUILTIN_VPCOMFALSEUW
,
25409 IX86_BUILTIN_VPCOMTRUEUW
,
25411 IX86_BUILTIN_VPCOMEQUD
,
25412 IX86_BUILTIN_VPCOMNEUD
,
25413 IX86_BUILTIN_VPCOMLTUD
,
25414 IX86_BUILTIN_VPCOMLEUD
,
25415 IX86_BUILTIN_VPCOMGTUD
,
25416 IX86_BUILTIN_VPCOMGEUD
,
25417 IX86_BUILTIN_VPCOMFALSEUD
,
25418 IX86_BUILTIN_VPCOMTRUEUD
,
25420 IX86_BUILTIN_VPCOMEQUQ
,
25421 IX86_BUILTIN_VPCOMNEUQ
,
25422 IX86_BUILTIN_VPCOMLTUQ
,
25423 IX86_BUILTIN_VPCOMLEUQ
,
25424 IX86_BUILTIN_VPCOMGTUQ
,
25425 IX86_BUILTIN_VPCOMGEUQ
,
25426 IX86_BUILTIN_VPCOMFALSEUQ
,
25427 IX86_BUILTIN_VPCOMTRUEUQ
,
25429 IX86_BUILTIN_VPCOMEQB
,
25430 IX86_BUILTIN_VPCOMNEB
,
25431 IX86_BUILTIN_VPCOMLTB
,
25432 IX86_BUILTIN_VPCOMLEB
,
25433 IX86_BUILTIN_VPCOMGTB
,
25434 IX86_BUILTIN_VPCOMGEB
,
25435 IX86_BUILTIN_VPCOMFALSEB
,
25436 IX86_BUILTIN_VPCOMTRUEB
,
25438 IX86_BUILTIN_VPCOMEQW
,
25439 IX86_BUILTIN_VPCOMNEW
,
25440 IX86_BUILTIN_VPCOMLTW
,
25441 IX86_BUILTIN_VPCOMLEW
,
25442 IX86_BUILTIN_VPCOMGTW
,
25443 IX86_BUILTIN_VPCOMGEW
,
25444 IX86_BUILTIN_VPCOMFALSEW
,
25445 IX86_BUILTIN_VPCOMTRUEW
,
25447 IX86_BUILTIN_VPCOMEQD
,
25448 IX86_BUILTIN_VPCOMNED
,
25449 IX86_BUILTIN_VPCOMLTD
,
25450 IX86_BUILTIN_VPCOMLED
,
25451 IX86_BUILTIN_VPCOMGTD
,
25452 IX86_BUILTIN_VPCOMGED
,
25453 IX86_BUILTIN_VPCOMFALSED
,
25454 IX86_BUILTIN_VPCOMTRUED
,
25456 IX86_BUILTIN_VPCOMEQQ
,
25457 IX86_BUILTIN_VPCOMNEQ
,
25458 IX86_BUILTIN_VPCOMLTQ
,
25459 IX86_BUILTIN_VPCOMLEQ
,
25460 IX86_BUILTIN_VPCOMGTQ
,
25461 IX86_BUILTIN_VPCOMGEQ
,
25462 IX86_BUILTIN_VPCOMFALSEQ
,
25463 IX86_BUILTIN_VPCOMTRUEQ
,
25465 /* LWP instructions. */
25466 IX86_BUILTIN_LLWPCB
,
25467 IX86_BUILTIN_SLWPCB
,
25468 IX86_BUILTIN_LWPVAL32
,
25469 IX86_BUILTIN_LWPVAL64
,
25470 IX86_BUILTIN_LWPINS32
,
25471 IX86_BUILTIN_LWPINS64
,
25475 /* BMI instructions. */
25476 IX86_BUILTIN_BEXTR32
,
25477 IX86_BUILTIN_BEXTR64
,
25480 /* TBM instructions. */
25481 IX86_BUILTIN_BEXTRI32
,
25482 IX86_BUILTIN_BEXTRI64
,
25484 /* BMI2 instructions. */
25485 IX86_BUILTIN_BZHI32
,
25486 IX86_BUILTIN_BZHI64
,
25487 IX86_BUILTIN_PDEP32
,
25488 IX86_BUILTIN_PDEP64
,
25489 IX86_BUILTIN_PEXT32
,
25490 IX86_BUILTIN_PEXT64
,
25492 /* FSGSBASE instructions. */
25493 IX86_BUILTIN_RDFSBASE32
,
25494 IX86_BUILTIN_RDFSBASE64
,
25495 IX86_BUILTIN_RDGSBASE32
,
25496 IX86_BUILTIN_RDGSBASE64
,
25497 IX86_BUILTIN_WRFSBASE32
,
25498 IX86_BUILTIN_WRFSBASE64
,
25499 IX86_BUILTIN_WRGSBASE32
,
25500 IX86_BUILTIN_WRGSBASE64
,
25502 /* RDRND instructions. */
25503 IX86_BUILTIN_RDRAND16_STEP
,
25504 IX86_BUILTIN_RDRAND32_STEP
,
25505 IX86_BUILTIN_RDRAND64_STEP
,
25507 /* F16C instructions. */
25508 IX86_BUILTIN_CVTPH2PS
,
25509 IX86_BUILTIN_CVTPH2PS256
,
25510 IX86_BUILTIN_CVTPS2PH
,
25511 IX86_BUILTIN_CVTPS2PH256
,
25513 /* CFString built-in for darwin */
25514 IX86_BUILTIN_CFSTRING
,
25519 /* Table for the ix86 builtin decls. */
25520 static GTY(()) tree ix86_builtins
[(int) IX86_BUILTIN_MAX
];
25522 /* Table of all of the builtin functions that are possible with different ISA's
25523 but are waiting to be built until a function is declared to use that
25525 struct builtin_isa
{
25526 const char *name
; /* function name */
25527 enum ix86_builtin_func_type tcode
; /* type to use in the declaration */
25528 HOST_WIDE_INT isa
; /* isa_flags this builtin is defined for */
25529 bool const_p
; /* true if the declaration is constant */
25530 bool set_and_not_built_p
;
25533 static struct builtin_isa ix86_builtins_isa
[(int) IX86_BUILTIN_MAX
];
25536 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
25537 of which isa_flags to use in the ix86_builtins_isa array. Stores the
25538 function decl in the ix86_builtins array. Returns the function decl or
25539 NULL_TREE, if the builtin was not added.
25541 If the front end has a special hook for builtin functions, delay adding
25542 builtin functions that aren't in the current ISA until the ISA is changed
25543 with function specific optimization. Doing so, can save about 300K for the
25544 default compiler. When the builtin is expanded, check at that time whether
25547 If the front end doesn't have a special hook, record all builtins, even if
25548 it isn't an instruction set in the current ISA in case the user uses
25549 function specific options for a different ISA, so that we don't get scope
25550 errors if a builtin is added in the middle of a function scope. */
25553 def_builtin (HOST_WIDE_INT mask
, const char *name
,
25554 enum ix86_builtin_func_type tcode
,
25555 enum ix86_builtins code
)
25557 tree decl
= NULL_TREE
;
25559 if (!(mask
& OPTION_MASK_ISA_64BIT
) || TARGET_64BIT
)
25561 ix86_builtins_isa
[(int) code
].isa
= mask
;
25563 mask
&= ~OPTION_MASK_ISA_64BIT
;
25565 || (mask
& ix86_isa_flags
) != 0
25566 || (lang_hooks
.builtin_function
25567 == lang_hooks
.builtin_function_ext_scope
))
25570 tree type
= ix86_get_builtin_func_type (tcode
);
25571 decl
= add_builtin_function (name
, type
, code
, BUILT_IN_MD
,
25573 ix86_builtins
[(int) code
] = decl
;
25574 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= false;
25578 ix86_builtins
[(int) code
] = NULL_TREE
;
25579 ix86_builtins_isa
[(int) code
].tcode
= tcode
;
25580 ix86_builtins_isa
[(int) code
].name
= name
;
25581 ix86_builtins_isa
[(int) code
].const_p
= false;
25582 ix86_builtins_isa
[(int) code
].set_and_not_built_p
= true;
25589 /* Like def_builtin, but also marks the function decl "const". */
25592 def_builtin_const (HOST_WIDE_INT mask
, const char *name
,
25593 enum ix86_builtin_func_type tcode
, enum ix86_builtins code
)
25595 tree decl
= def_builtin (mask
, name
, tcode
, code
);
25597 TREE_READONLY (decl
) = 1;
25599 ix86_builtins_isa
[(int) code
].const_p
= true;
25604 /* Add any new builtin functions for a given ISA that may not have been
25605 declared. This saves a bit of space compared to adding all of the
25606 declarations to the tree, even if we didn't use them. */
25609 ix86_add_new_builtins (HOST_WIDE_INT isa
)
25613 for (i
= 0; i
< (int)IX86_BUILTIN_MAX
; i
++)
25615 if ((ix86_builtins_isa
[i
].isa
& isa
) != 0
25616 && ix86_builtins_isa
[i
].set_and_not_built_p
)
25620 /* Don't define the builtin again. */
25621 ix86_builtins_isa
[i
].set_and_not_built_p
= false;
25623 type
= ix86_get_builtin_func_type (ix86_builtins_isa
[i
].tcode
);
25624 decl
= add_builtin_function_ext_scope (ix86_builtins_isa
[i
].name
,
25625 type
, i
, BUILT_IN_MD
, NULL
,
25628 ix86_builtins
[i
] = decl
;
25629 if (ix86_builtins_isa
[i
].const_p
)
25630 TREE_READONLY (decl
) = 1;
25635 /* Bits for builtin_description.flag. */
25637 /* Set when we don't support the comparison natively, and should
25638 swap_comparison in order to support it. */
25639 #define BUILTIN_DESC_SWAP_OPERANDS 1
25641 struct builtin_description
25643 const HOST_WIDE_INT mask
;
25644 const enum insn_code icode
;
25645 const char *const name
;
25646 const enum ix86_builtins code
;
25647 const enum rtx_code comparison
;
25651 static const struct builtin_description bdesc_comi
[] =
25653 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS
, UNEQ
, 0 },
25654 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS
, UNLT
, 0 },
25655 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS
, UNLE
, 0 },
25656 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS
, GT
, 0 },
25657 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS
, GE
, 0 },
25658 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_comi
, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS
, LTGT
, 0 },
25659 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS
, UNEQ
, 0 },
25660 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS
, UNLT
, 0 },
25661 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS
, UNLE
, 0 },
25662 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS
, GT
, 0 },
25663 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS
, GE
, 0 },
25664 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_ucomi
, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS
, LTGT
, 0 },
25665 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD
, UNEQ
, 0 },
25666 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD
, UNLT
, 0 },
25667 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD
, UNLE
, 0 },
25668 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD
, GT
, 0 },
25669 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD
, GE
, 0 },
25670 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_comi
, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD
, LTGT
, 0 },
25671 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD
, UNEQ
, 0 },
25672 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD
, UNLT
, 0 },
25673 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD
, UNLE
, 0 },
25674 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD
, GT
, 0 },
25675 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD
, GE
, 0 },
25676 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ucomi
, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD
, LTGT
, 0 },
25679 static const struct builtin_description bdesc_pcmpestr
[] =
25682 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128
, UNKNOWN
, 0 },
25683 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128
, UNKNOWN
, 0 },
25684 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128
, UNKNOWN
, (int) CCAmode
},
25685 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128
, UNKNOWN
, (int) CCCmode
},
25686 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128
, UNKNOWN
, (int) CCOmode
},
25687 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128
, UNKNOWN
, (int) CCSmode
},
25688 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpestr
, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128
, UNKNOWN
, (int) CCZmode
},
25691 static const struct builtin_description bdesc_pcmpistr
[] =
25694 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128
, UNKNOWN
, 0 },
25695 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128
, UNKNOWN
, 0 },
25696 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128
, UNKNOWN
, (int) CCAmode
},
25697 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128
, UNKNOWN
, (int) CCCmode
},
25698 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128
, UNKNOWN
, (int) CCOmode
},
25699 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128
, UNKNOWN
, (int) CCSmode
},
25700 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_pcmpistr
, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128
, UNKNOWN
, (int) CCZmode
},
25703 /* Special builtins with variable number of arguments. */
25704 static const struct builtin_description bdesc_special_args
[] =
25706 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtsc
, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25707 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdtscp
, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP
, UNKNOWN
, (int) UINT64_FTYPE_PUNSIGNED
},
25708 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_pause
, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25711 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_emms
, "__builtin_ia32_emms", IX86_BUILTIN_EMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25714 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_femms
, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25717 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25718 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movntv4sf
, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25719 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movups
, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25721 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadhps_exp
, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25722 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_loadlps_exp
, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_PCV2SF
},
25723 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storehps
, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25724 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_storelps
, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS
, UNKNOWN
, (int) VOID_FTYPE_PV2SF_V4SF
},
25726 /* SSE or 3DNow!A */
25727 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_sfence
, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25728 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_sse_movntdi
, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ
, UNKNOWN
, (int) VOID_FTYPE_PULONGLONG_ULONGLONG
},
25731 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lfence
, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25732 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_mfence
, 0, IX86_BUILTIN_MFENCE
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25733 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25734 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V16QI
},
25735 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2df
, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25736 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntv2di
, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI
},
25737 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movntsi
, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI
, UNKNOWN
, (int) VOID_FTYPE_PINT_INT
},
25738 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movupd
, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD
, UNKNOWN
, (int) V2DF_FTYPE_PCDOUBLE
},
25739 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movdqu
, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25741 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadhpd_exp
, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25742 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_loadlpd_exp
, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_PCDOUBLE
},
25745 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_lddqu
, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU
, UNKNOWN
, (int) V16QI_FTYPE_PCCHAR
},
25748 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_movntdqa
, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA
, UNKNOWN
, (int) V2DI_FTYPE_PV2DI
},
25751 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv2df
, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V2DF
},
25752 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_vmmovntv4sf
, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V4SF
},
25755 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroall
, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25756 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vzeroupper
, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER
, UNKNOWN
, (int) VOID_FTYPE_VOID
},
25758 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4sf
, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS
, UNKNOWN
, (int) V4SF_FTYPE_PCFLOAT
},
25759 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv4df
, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25760 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_dupv8sf
, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25761 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v4df
, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV2DF
},
25762 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vbroadcastf128_v8sf
, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV4SF
},
25764 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCDOUBLE
},
25765 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCFLOAT
},
25766 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movupd256
, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
25767 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movups256
, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
25768 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
25769 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movdqu256
, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256
, UNKNOWN
, (int) VOID_FTYPE_PCHAR_V32QI
},
25770 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_lddqu256
, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256
, UNKNOWN
, (int) V32QI_FTYPE_PCCHAR
},
25772 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4di
, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI
},
25773 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv4df
, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256
, UNKNOWN
, (int) VOID_FTYPE_PDOUBLE_V4DF
},
25774 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movntv8sf
, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256
, UNKNOWN
, (int) VOID_FTYPE_PFLOAT_V8SF
},
25776 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd
, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD
, UNKNOWN
, (int) V2DF_FTYPE_PCV2DF_V2DI
},
25777 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps
, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS
, UNKNOWN
, (int) V4SF_FTYPE_PCV4SF_V4SI
},
25778 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadpd256
, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256
, UNKNOWN
, (int) V4DF_FTYPE_PCV4DF_V4DI
},
25779 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskloadps256
, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256
, UNKNOWN
, (int) V8SF_FTYPE_PCV8SF_V8SI
},
25780 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd
, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD
, UNKNOWN
, (int) VOID_FTYPE_PV2DF_V2DI_V2DF
},
25781 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps
, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS
, UNKNOWN
, (int) VOID_FTYPE_PV4SF_V4SI_V4SF
},
25782 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstorepd256
, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256
, UNKNOWN
, (int) VOID_FTYPE_PV4DF_V4DI_V4DF
},
25783 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_maskstoreps256
, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256
, UNKNOWN
, (int) VOID_FTYPE_PV8SF_V8SI_V8SF
},
25786 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_movntdqa
, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256
, UNKNOWN
, (int) V4DI_FTYPE_PV4DI
},
25787 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd
, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD
, UNKNOWN
, (int) V4SI_FTYPE_PCV4SI_V4SI
},
25788 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq
, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ
, UNKNOWN
, (int) V2DI_FTYPE_PCV2DI_V2DI
},
25789 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadd256
, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256
, UNKNOWN
, (int) V8SI_FTYPE_PCV8SI_V8SI
},
25790 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskloadq256
, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256
, UNKNOWN
, (int) V4DI_FTYPE_PCV4DI_V4DI
},
25791 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored
, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED
, UNKNOWN
, (int) VOID_FTYPE_PV4SI_V4SI_V4SI
},
25792 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq
, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ
, UNKNOWN
, (int) VOID_FTYPE_PV2DI_V2DI_V2DI
},
25793 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstored256
, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256
, UNKNOWN
, (int) VOID_FTYPE_PV8SI_V8SI_V8SI
},
25794 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_maskstoreq256
, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256
, UNKNOWN
, (int) VOID_FTYPE_PV4DI_V4DI_V4DI
},
25796 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_llwpcb
, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB
, UNKNOWN
, (int) VOID_FTYPE_PVOID
},
25797 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_slwpcb
, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB
, UNKNOWN
, (int) PVOID_FTYPE_VOID
},
25798 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvalsi3
, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32
, UNKNOWN
, (int) VOID_FTYPE_UINT_UINT_UINT
},
25799 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpvaldi3
, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64
, UNKNOWN
, (int) VOID_FTYPE_UINT64_UINT_UINT
},
25800 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinssi3
, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32
, UNKNOWN
, (int) UCHAR_FTYPE_UINT_UINT_UINT
},
25801 { OPTION_MASK_ISA_LWP
, CODE_FOR_lwp_lwpinsdi3
, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64
, UNKNOWN
, (int) UCHAR_FTYPE_UINT64_UINT_UINT
},
25804 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasesi
, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
25805 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdfsbasedi
, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25806 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasesi
, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32
, UNKNOWN
, (int) UNSIGNED_FTYPE_VOID
},
25807 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_rdgsbasedi
, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64
, UNKNOWN
, (int) UINT64_FTYPE_VOID
},
25808 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasesi
, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
25809 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrfsbasedi
, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
25810 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasesi
, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32
, UNKNOWN
, (int) VOID_FTYPE_UNSIGNED
},
25811 { OPTION_MASK_ISA_FSGSBASE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_wrgsbasedi
, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64
, UNKNOWN
, (int) VOID_FTYPE_UINT64
},
25814 /* Builtins with variable number of arguments. */
25815 static const struct builtin_description bdesc_args
[] =
25817 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr
, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI
, UNKNOWN
, (int) INT_FTYPE_INT
},
25818 { OPTION_MASK_ISA_64BIT
, CODE_FOR_bsr_rex64
, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI
, UNKNOWN
, (int) INT64_FTYPE_INT64
},
25819 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rdpmc
, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC
, UNKNOWN
, (int) UINT64_FTYPE_INT
},
25820 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlqi3
, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
25821 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotlhi3
, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
25822 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrqi3
, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI
, UNKNOWN
, (int) UINT8_FTYPE_UINT8_INT
},
25823 { ~OPTION_MASK_ISA_64BIT
, CODE_FOR_rotrhi3
, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI
, UNKNOWN
, (int) UINT16_FTYPE_UINT16_INT
},
25826 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv8qi3
, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25827 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv4hi3
, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25828 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_addv2si3
, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25829 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv8qi3
, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25830 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv4hi3
, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25831 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_subv2si3
, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25833 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv8qi3
, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25834 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ssaddv4hi3
, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25835 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv8qi3
, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25836 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_sssubv4hi3
, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25837 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv8qi3
, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25838 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_usaddv4hi3
, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25839 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv8qi3
, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25840 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ussubv4hi3
, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25842 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_mulv4hi3
, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25843 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_smulv4hi3_highpart
, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25845 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andv2si3
, "__builtin_ia32_pand", IX86_BUILTIN_PAND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25846 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_andnotv2si3
, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25847 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_iorv2si3
, "__builtin_ia32_por", IX86_BUILTIN_POR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25848 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_xorv2si3
, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25850 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv8qi3
, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25851 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv4hi3
, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25852 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_eqv2si3
, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25853 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv8qi3
, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25854 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv4hi3
, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25855 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_gtv2si3
, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25857 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhbw
, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25858 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhwd
, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25859 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckhdq
, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25860 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklbw
, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25861 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpcklwd
, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25862 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_punpckldq
, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
25864 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packsswb
, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
25865 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packssdw
, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW
, UNKNOWN
, (int) V4HI_FTYPE_V2SI_V2SI
},
25866 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_packuswb
, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB
, UNKNOWN
, (int) V8QI_FTYPE_V4HI_V4HI
},
25868 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_pmaddwd
, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD
, UNKNOWN
, (int) V2SI_FTYPE_V4HI_V4HI
},
25870 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
25871 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
25872 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
25873 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv4hi3
, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
25874 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv2si3
, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
25875 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashlv1di3
, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
25877 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
25878 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
25879 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_SI_COUNT
},
25880 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv4hi3
, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
25881 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv2si3
, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
25882 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_lshrv1di3
, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_COUNT
},
25884 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_SI_COUNT
},
25885 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_SI_COUNT
},
25886 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv4hi3
, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI_COUNT
},
25887 { OPTION_MASK_ISA_MMX
, CODE_FOR_mmx_ashrv2si3
, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI_COUNT
},
25890 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pf2id
, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
25891 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_floatv2si2
, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
25892 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpv2sf2
, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
25893 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqrtv2sf2
, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
25895 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25896 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_haddv2sf3
, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25897 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_addv2sf3
, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25898 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_eqv2sf3
, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
25899 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gev2sf3
, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
25900 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_gtv2sf3
, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT
, UNKNOWN
, (int) V2SI_FTYPE_V2SF_V2SF
},
25901 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_smaxv2sf3
, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25902 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_sminv2sf3
, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25903 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_mulv2sf3
, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25904 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit1v2sf3
, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25905 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rcpit2v2sf3
, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25906 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_rsqit1v2sf3
, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25907 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subv2sf3
, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25908 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_subrv2sf3
, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25909 { OPTION_MASK_ISA_3DNOW
, CODE_FOR_mmx_pmulhrwv4hi3
, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25912 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pf2iw
, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW
, UNKNOWN
, (int) V2SI_FTYPE_V2SF
},
25913 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pi2fw
, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW
, UNKNOWN
, (int) V2SF_FTYPE_V2SI
},
25914 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2si2
, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
25915 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pswapdv2sf2
, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF
, UNKNOWN
, (int) V2SF_FTYPE_V2SF
},
25916 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_hsubv2sf3
, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25917 { OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_addsubv2sf3
, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC
, UNKNOWN
, (int) V2SF_FTYPE_V2SF_V2SF
},
25920 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movmskps
, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
25921 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_sqrtv4sf2
, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25922 { OPTION_MASK_ISA_SSE
, CODE_FOR_sqrtv4sf2
, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25923 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rsqrtv4sf2
, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25924 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtv4sf2
, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25925 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_rcpv4sf2
, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
25926 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtps2pi
, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
25927 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtss2si
, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
25928 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtss2siq
, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
25929 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttps2pi
, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI
, UNKNOWN
, (int) V2SI_FTYPE_V4SF
},
25930 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvttss2si
, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI
, UNKNOWN
, (int) INT_FTYPE_V4SF
},
25931 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvttss2siq
, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64
, UNKNOWN
, (int) INT64_FTYPE_V4SF
},
25933 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_shufps
, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
25935 { OPTION_MASK_ISA_SSE
, CODE_FOR_addv4sf3
, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25936 { OPTION_MASK_ISA_SSE
, CODE_FOR_subv4sf3
, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25937 { OPTION_MASK_ISA_SSE
, CODE_FOR_mulv4sf3
, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25938 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_divv4sf3
, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25939 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmaddv4sf3
, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25940 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsubv4sf3
, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25941 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmulv4sf3
, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25942 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmdivv4sf3
, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25944 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
25945 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
25946 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25947 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25948 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25949 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
25950 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25951 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25952 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
25953 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25954 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25955 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_maskcmpv4sf3
, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
25956 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS
, EQ
, (int) V4SF_FTYPE_V4SF_V4SF
},
25957 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS
, LT
, (int) V4SF_FTYPE_V4SF_V4SF
},
25958 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS
, LE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25959 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS
, UNORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
25960 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS
, NE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25961 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF
},
25962 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF
},
25963 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS
, UNGE
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25964 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS
, UNGT
, (int) V4SF_FTYPE_V4SF_V4SF_SWAP
},
25965 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmmaskcmpv4sf3
, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS
, ORDERED
, (int) V4SF_FTYPE_V4SF_V4SF
},
25967 { OPTION_MASK_ISA_SSE
, CODE_FOR_sminv4sf3
, "__builtin_ia32_minps", IX86_BUILTIN_MINPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25968 { OPTION_MASK_ISA_SSE
, CODE_FOR_smaxv4sf3
, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25969 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsminv4sf3
, "__builtin_ia32_minss", IX86_BUILTIN_MINSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25970 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsmaxv4sf3
, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25972 { OPTION_MASK_ISA_SSE
, CODE_FOR_andv4sf3
, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25973 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_andnotv4sf3
, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25974 { OPTION_MASK_ISA_SSE
, CODE_FOR_iorv4sf3
, "__builtin_ia32_orps", IX86_BUILTIN_ORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25975 { OPTION_MASK_ISA_SSE
, CODE_FOR_xorv4sf3
, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25977 { OPTION_MASK_ISA_SSE
, CODE_FOR_copysignv4sf3
, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25979 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movss
, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25980 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movhlps_exp
, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25981 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_movlhps_exp
, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25982 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_highv4sf
, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25983 { OPTION_MASK_ISA_SSE
, CODE_FOR_vec_interleave_lowv4sf
, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
25985 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtpi2ps
, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2SI
},
25986 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_cvtsi2ss
, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_SI
},
25987 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse_cvtsi2ssq
, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS
, UNKNOWN
, V4SF_FTYPE_V4SF_DI
},
25989 { OPTION_MASK_ISA_SSE
, CODE_FOR_rsqrtsf2
, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF
, UNKNOWN
, (int) FLOAT_FTYPE_FLOAT
},
25991 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmsqrtv4sf2
, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
25992 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrsqrtv4sf2
, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
25993 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse_vmrcpv4sf2
, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_VEC_MERGE
},
25995 /* SSE MMX or 3Dnow!A */
25996 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv8qi3
, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
25997 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uavgv4hi3
, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
25998 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umulv4hi3_highpart
, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26000 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_umaxv8qi3
, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26001 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_smaxv4hi3
, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26002 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_uminv8qi3
, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26003 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_sminv4hi3
, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26005 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_psadbw
, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW
, UNKNOWN
, (int) V1DI_FTYPE_V8QI_V8QI
},
26006 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pmovmskb
, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB
, UNKNOWN
, (int) INT_FTYPE_V8QI
},
26008 { OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
, CODE_FOR_mmx_pshufw
, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_INT
},
26011 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_shufpd
, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26013 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movmskpd
, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26014 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmovmskb
, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128
, UNKNOWN
, (int) INT_FTYPE_V16QI
},
26015 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sqrtv2df2
, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26016 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtdq2pd
, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SI
},
26017 { OPTION_MASK_ISA_SSE2
, CODE_FOR_floatv4siv4sf2
, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SI
},
26019 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2dq
, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26020 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2pi
, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26021 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpd2ps
, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS
, UNKNOWN
, (int) V4SF_FTYPE_V2DF
},
26022 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2dq
, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V2DF
},
26023 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttpd2pi
, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI
, UNKNOWN
, (int) V2SI_FTYPE_V2DF
},
26025 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtpi2pd
, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD
, UNKNOWN
, (int) V2DF_FTYPE_V2SI
},
26027 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2si
, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26028 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvttsd2si
, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI
, UNKNOWN
, (int) INT_FTYPE_V2DF
},
26029 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsd2siq
, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26030 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvttsd2siq
, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64
, UNKNOWN
, (int) INT64_FTYPE_V2DF
},
26032 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2dq
, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26033 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtps2pd
, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD
, UNKNOWN
, (int) V2DF_FTYPE_V4SF
},
26034 { OPTION_MASK_ISA_SSE2
, CODE_FOR_fix_truncv4sfv4si2
, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ
, UNKNOWN
, (int) V4SI_FTYPE_V4SF
},
26036 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2df3
, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26037 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2df3
, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26038 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv2df3
, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26039 { OPTION_MASK_ISA_SSE2
, CODE_FOR_divv2df3
, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26040 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmaddv2df3
, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26041 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsubv2df3
, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26042 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmulv2df3
, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26043 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmdivv2df3
, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26045 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26046 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26047 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26048 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26049 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26050 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26051 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26052 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26053 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26054 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26055 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF_SWAP
},
26056 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_maskcmpv2df3
, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26057 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD
, EQ
, (int) V2DF_FTYPE_V2DF_V2DF
},
26058 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD
, LT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26059 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD
, LE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26060 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD
, UNORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26061 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD
, NE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26062 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD
, UNGE
, (int) V2DF_FTYPE_V2DF_V2DF
},
26063 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD
, UNGT
, (int) V2DF_FTYPE_V2DF_V2DF
},
26064 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmmaskcmpv2df3
, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD
, ORDERED
, (int) V2DF_FTYPE_V2DF_V2DF
},
26066 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv2df3
, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26067 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv2df3
, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26068 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsminv2df3
, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26069 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsmaxv2df3
, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26071 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2df3
, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26072 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2df3
, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26073 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2df3
, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26074 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2df3
, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26076 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysignv2df3
, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26078 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_movsd
, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26079 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2df
, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26080 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2df
, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26082 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_pack_sfix_v2df
, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX
, UNKNOWN
, (int) V4SI_FTYPE_V2DF_V2DF
},
26084 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv16qi3
, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26085 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv8hi3
, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26086 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv4si3
, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26087 { OPTION_MASK_ISA_SSE2
, CODE_FOR_addv2di3
, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26088 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv16qi3
, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26089 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv8hi3
, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26090 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv4si3
, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26091 { OPTION_MASK_ISA_SSE2
, CODE_FOR_subv2di3
, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26093 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv16qi3
, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26094 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ssaddv8hi3
, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26095 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv16qi3
, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26096 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_sssubv8hi3
, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26097 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv16qi3
, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26098 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_usaddv8hi3
, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26099 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv16qi3
, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26100 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ussubv8hi3
, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26102 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mulv8hi3
, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26103 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smulv8hi3_highpart
, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128
, UNKNOWN
,(int) V8HI_FTYPE_V8HI_V8HI
},
26105 { OPTION_MASK_ISA_SSE2
, CODE_FOR_andv2di3
, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26106 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_andnotv2di3
, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26107 { OPTION_MASK_ISA_SSE2
, CODE_FOR_iorv2di3
, "__builtin_ia32_por128", IX86_BUILTIN_POR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26108 { OPTION_MASK_ISA_SSE2
, CODE_FOR_xorv2di3
, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26110 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv16qi3
, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26111 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_uavgv8hi3
, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26113 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv16qi3
, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26114 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv8hi3
, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26115 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_eqv4si3
, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26116 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv16qi3
, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26117 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv8hi3
, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26118 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_gtv4si3
, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26120 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umaxv16qi3
, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26121 { OPTION_MASK_ISA_SSE2
, CODE_FOR_smaxv8hi3
, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26122 { OPTION_MASK_ISA_SSE2
, CODE_FOR_uminv16qi3
, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26123 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sminv8hi3
, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26125 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv16qi
, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26126 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv8hi
, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26127 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv4si
, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26128 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_highv2di
, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26129 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv16qi
, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26130 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv8hi
, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26131 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv4si
, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26132 { OPTION_MASK_ISA_SSE2
, CODE_FOR_vec_interleave_lowv2di
, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26134 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packsswb
, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26135 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packssdw
, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26136 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_packuswb
, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128
, UNKNOWN
, (int) V16QI_FTYPE_V8HI_V8HI
},
26138 { OPTION_MASK_ISA_SSE2
, CODE_FOR_umulv8hi3_highpart
, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26139 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_psadbw
, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI_V16QI
},
26141 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv1siv1di3
, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ
, UNKNOWN
, (int) V1DI_FTYPE_V2SI_V2SI
},
26142 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_umulv2siv2di3
, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26144 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pmaddwd
, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI_V8HI
},
26146 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsi2sd
, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_SI
},
26147 { OPTION_MASK_ISA_SSE2
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse2_cvtsi2sdq
, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_DI
},
26148 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtsd2ss
, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V2DF
},
26149 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_cvtss2sd
, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V4SF
},
26151 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_ashlv1ti3
, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26152 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26153 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26154 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26155 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv8hi3
, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26156 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv4si3
, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26157 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashlv2di3
, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26159 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_lshrv1ti3
, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT_CONVERT
},
26160 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26161 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26162 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_SI_COUNT
},
26163 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv8hi3
, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26164 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv4si3
, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26165 { OPTION_MASK_ISA_SSE2
, CODE_FOR_lshrv2di3
, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_COUNT
},
26167 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_SI_COUNT
},
26168 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_SI_COUNT
},
26169 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv8hi3
, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_COUNT
},
26170 { OPTION_MASK_ISA_SSE2
, CODE_FOR_ashrv4si3
, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_COUNT
},
26172 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufd
, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_INT
},
26173 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshuflw
, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26174 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_pshufhw
, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_INT
},
26176 { OPTION_MASK_ISA_SSE2
, CODE_FOR_sse2_vmsqrtv2df2
, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_VEC_MERGE
},
26178 { OPTION_MASK_ISA_SSE2
, CODE_FOR_abstf2
, 0, IX86_BUILTIN_FABSQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128
},
26179 { OPTION_MASK_ISA_SSE2
, CODE_FOR_copysigntf3
, 0, IX86_BUILTIN_COPYSIGNQ
, UNKNOWN
, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128
},
26181 { OPTION_MASK_ISA_SSE
, CODE_FOR_sse2_movq128
, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26184 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_addv1di3
, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26185 { OPTION_MASK_ISA_SSE2
, CODE_FOR_mmx_subv1di3
, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI
},
26188 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movshdup
, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26189 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_movsldup
, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26191 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv4sf3
, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26192 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_addsubv2df3
, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26193 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv4sf3
, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26194 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_haddv2df3
, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26195 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv4sf3
, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF
},
26196 { OPTION_MASK_ISA_SSE3
, CODE_FOR_sse3_hsubv2df3
, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF
},
26199 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv16qi2
, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26200 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8qi2
, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI
},
26201 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv8hi2
, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26202 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4hi2
, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI
},
26203 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv4si2
, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26204 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_absv2si2
, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI
},
26206 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv8hi3
, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26207 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddwv4hi3
, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26208 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv4si3
, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26209 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phadddv2si3
, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26210 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv8hi3
, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26211 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phaddswv4hi3
, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26212 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv8hi3
, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26213 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubwv4hi3
, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26214 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv4si3
, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26215 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubdv2si3
, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26216 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv8hi3
, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26217 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_phsubswv4hi3
, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26218 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw128
, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI_V16QI
},
26219 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmaddubsw
, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW
, UNKNOWN
, (int) V4HI_FTYPE_V8QI_V8QI
},
26220 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv8hi3
, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26221 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pmulhrswv4hi3
, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26222 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv16qi3
, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26223 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_pshufbv8qi3
, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26224 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv16qi3
, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26225 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8qi3
, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB
, UNKNOWN
, (int) V8QI_FTYPE_V8QI_V8QI
},
26226 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv8hi3
, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26227 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4hi3
, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW
, UNKNOWN
, (int) V4HI_FTYPE_V4HI_V4HI
},
26228 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv4si3
, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26229 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_psignv2si3
, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND
, UNKNOWN
, (int) V2SI_FTYPE_V2SI_V2SI
},
26232 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrti
, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
},
26233 { OPTION_MASK_ISA_SSSE3
, CODE_FOR_ssse3_palignrdi
, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR
, UNKNOWN
, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
},
26236 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendpd
, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26237 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendps
, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26238 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvpd
, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_V2DF
},
26239 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_blendvps
, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_V4SF
},
26240 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dppd
, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26241 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_dpps
, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26242 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_insertps
, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26243 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mpsadbw
, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_INT
},
26244 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendvb
, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI_V16QI
},
26245 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_pblendw
, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI_INT
},
26247 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv8qiv8hi2
, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26248 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4qiv4si2
, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26249 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2qiv2di2
, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26250 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv4hiv4si2
, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26251 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2hiv2di2
, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26252 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_sign_extendv2siv2di2
, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26253 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv8qiv8hi2
, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128
, UNKNOWN
, (int) V8HI_FTYPE_V16QI
},
26254 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4qiv4si2
, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128
, UNKNOWN
, (int) V4SI_FTYPE_V16QI
},
26255 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2qiv2di2
, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128
, UNKNOWN
, (int) V2DI_FTYPE_V16QI
},
26256 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv4hiv4si2
, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128
, UNKNOWN
, (int) V4SI_FTYPE_V8HI
},
26257 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2hiv2di2
, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128
, UNKNOWN
, (int) V2DI_FTYPE_V8HI
},
26258 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_zero_extendv2siv2di2
, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI
},
26259 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_phminposuw
, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26261 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_packusdw
, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128
, UNKNOWN
, (int) V8HI_FTYPE_V4SI_V4SI
},
26262 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_eqv2di3
, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26263 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv16qi3
, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26264 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_smaxv4si3
, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26265 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv4si3
, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26266 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_umaxv8hi3
, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26267 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv16qi3
, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI_V16QI
},
26268 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sminv4si3
, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26269 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv4si3
, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26270 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_uminv8hi3
, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI_V8HI
},
26271 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_sse4_1_mulv2siv2di3
, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V4SI_V4SI
},
26272 { OPTION_MASK_ISA_SSE4_1
, CODE_FOR_mulv4si3
, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26275 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26276 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26277 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundsd
, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26278 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundss
, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26280 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD
, (enum rtx_code
) ROUND_FLOOR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26281 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD
, (enum rtx_code
) ROUND_CEIL
, (int) V2DF_FTYPE_V2DF_ROUND
},
26282 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD
, (enum rtx_code
) ROUND_TRUNC
, (int) V2DF_FTYPE_V2DF_ROUND
},
26283 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundpd
, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD
, (enum rtx_code
) ROUND_MXCSR
, (int) V2DF_FTYPE_V2DF_ROUND
},
26285 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv2df2
, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ
, UNKNOWN
, (int) V2DF_FTYPE_V2DF
},
26287 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS
, (enum rtx_code
) ROUND_FLOOR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26288 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS
, (enum rtx_code
) ROUND_CEIL
, (int) V4SF_FTYPE_V4SF_ROUND
},
26289 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS
, (enum rtx_code
) ROUND_TRUNC
, (int) V4SF_FTYPE_V4SF_ROUND
},
26290 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_roundps
, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS
, (enum rtx_code
) ROUND_MXCSR
, (int) V4SF_FTYPE_V4SF_ROUND
},
26292 { OPTION_MASK_ISA_ROUND
, CODE_FOR_roundv4sf2
, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26294 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ
, EQ
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26295 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC
, LTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26296 { OPTION_MASK_ISA_ROUND
, CODE_FOR_sse4_1_ptest
, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC
, GTU
, (int) INT_FTYPE_V2DI_V2DI_PTEST
},
26299 { OPTION_MASK_ISA_SSE4_2
, CODE_FOR_sse4_2_gtv2di3
, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26300 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32qi
, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UCHAR
},
26301 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32hi
, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI
, UNKNOWN
, (int) UINT_FTYPE_UINT_USHORT
},
26302 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
, CODE_FOR_sse4_2_crc32si
, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26303 { OPTION_MASK_ISA_SSE4_2
| OPTION_MASK_ISA_CRC32
| OPTION_MASK_ISA_64BIT
, CODE_FOR_sse4_2_crc32di
, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26306 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrqi
, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_UINT_UINT
},
26307 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_extrq
, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V16QI
},
26308 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertqi
, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT
},
26309 { OPTION_MASK_ISA_SSE4A
, CODE_FOR_sse4a_insertq
, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26312 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aeskeygenassist
, 0, IX86_BUILTIN_AESKEYGENASSIST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_INT
},
26313 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesimc
, 0, IX86_BUILTIN_AESIMC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26315 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenc
, 0, IX86_BUILTIN_AESENC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26316 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesenclast
, 0, IX86_BUILTIN_AESENCLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26317 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdec
, 0, IX86_BUILTIN_AESDEC128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26318 { OPTION_MASK_ISA_SSE2
, CODE_FOR_aesdeclast
, 0, IX86_BUILTIN_AESDECLAST128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26321 { OPTION_MASK_ISA_SSE2
, CODE_FOR_pclmulqdq
, 0, IX86_BUILTIN_PCLMULQDQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI_INT
},
26324 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv4df3
, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26325 { OPTION_MASK_ISA_AVX
, CODE_FOR_addv8sf3
, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26326 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv4df3
, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26327 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_addsubv8sf3
, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26328 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv4df3
, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26329 { OPTION_MASK_ISA_AVX
, CODE_FOR_andv8sf3
, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26330 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv4df3
, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26331 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_andnotv8sf3
, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26332 { OPTION_MASK_ISA_AVX
, CODE_FOR_divv4df3
, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26333 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_divv8sf3
, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26334 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv4df3
, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26335 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv8sf3
, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26336 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_hsubv4df3
, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26337 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_haddv8sf3
, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26338 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv4df3
, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26339 { OPTION_MASK_ISA_AVX
, CODE_FOR_smaxv8sf3
, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26340 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv4df3
, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26341 { OPTION_MASK_ISA_AVX
, CODE_FOR_sminv8sf3
, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26342 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv4df3
, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26343 { OPTION_MASK_ISA_AVX
, CODE_FOR_mulv8sf3
, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26344 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv4df3
, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26345 { OPTION_MASK_ISA_AVX
, CODE_FOR_iorv8sf3
, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26346 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv4df3
, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26347 { OPTION_MASK_ISA_AVX
, CODE_FOR_subv8sf3
, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26348 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv4df3
, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26349 { OPTION_MASK_ISA_AVX
, CODE_FOR_xorv8sf3
, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26351 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv2df3
, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DI
},
26352 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4sf3
, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SI
},
26353 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv4df3
, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DI
},
26354 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilvarv8sf3
, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SI
},
26356 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendpd256
, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26357 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendps256
, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26358 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvpd256
, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_V4DF
},
26359 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_blendvps256
, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_V8SF
},
26360 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_dpps256
, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26361 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufpd256
, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26362 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_shufps256
, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26363 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv2df3
, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26364 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vmcmpv4sf3
, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26365 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv2df3
, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_V2DF_INT
},
26366 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4sf3
, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_V4SF_INT
},
26367 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv4df3
, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26368 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cmpv8sf3
, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26369 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v4df
, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF_INT
},
26370 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8sf
, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF_INT
},
26371 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vextractf128v8si
, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI_INT
},
26372 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv4siv4df2
, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SI
},
26373 { OPTION_MASK_ISA_AVX
, CODE_FOR_floatv8siv8sf2
, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SI
},
26374 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2ps256
, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256
, UNKNOWN
, (int) V4SF_FTYPE_V4DF
},
26375 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2dq256
, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26376 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtps2pd256
, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4SF
},
26377 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv4dfv4si2
, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26378 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_cvtpd2dq256
, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256
, UNKNOWN
, (int) V4SI_FTYPE_V4DF
},
26379 { OPTION_MASK_ISA_AVX
, CODE_FOR_fix_truncv8sfv8si2
, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SF
},
26380 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v4df3
, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF_INT
},
26381 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8sf3
, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF_INT
},
26382 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vperm2f128v8si3
, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26383 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv2df
, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD
, UNKNOWN
, (int) V2DF_FTYPE_V2DF_INT
},
26384 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4sf
, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF_INT
},
26385 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv4df
, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26386 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vpermilv8sf
, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26387 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v4df
, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V2DF_INT
},
26388 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8sf
, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V4SF_INT
},
26389 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vinsertf128v8si
, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_INT
},
26391 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movshdup256
, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26392 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movsldup256
, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26393 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movddup256
, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26395 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv4df2
, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26396 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_sqrtv8sf2
, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26397 { OPTION_MASK_ISA_AVX
, CODE_FOR_sqrtv8sf2
, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26398 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rsqrtv8sf2
, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26399 { OPTION_MASK_ISA_AVX
, CODE_FOR_rsqrtv8sf2
, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26401 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_rcpv8sf2
, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26403 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26404 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_INT
},
26406 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256
, (enum rtx_code
) ROUND_FLOOR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26407 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256
, (enum rtx_code
) ROUND_CEIL
, (int) V4DF_FTYPE_V4DF_ROUND
},
26408 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256
, (enum rtx_code
) ROUND_TRUNC
, (int) V4DF_FTYPE_V4DF_ROUND
},
26409 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundpd256
, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256
, (enum rtx_code
) ROUND_MXCSR
, (int) V4DF_FTYPE_V4DF_ROUND
},
26411 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv4df2
, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF
},
26413 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256
, (enum rtx_code
) ROUND_FLOOR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26414 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256
, (enum rtx_code
) ROUND_CEIL
, (int) V8SF_FTYPE_V8SF_ROUND
},
26415 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256
, (enum rtx_code
) ROUND_TRUNC
, (int) V8SF_FTYPE_V8SF_ROUND
},
26416 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_roundps256
, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256
, (enum rtx_code
) ROUND_MXCSR
, (int) V8SF_FTYPE_V8SF_ROUND
},
26418 { OPTION_MASK_ISA_AVX
, CODE_FOR_roundv8sf2
, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF
},
26420 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhpd256
, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26421 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklpd256
, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26422 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpckhps256
, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26423 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_unpcklps256
, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26425 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_si256_si
, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26426 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ps256_ps
, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26427 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_pd256_pd
, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26428 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8si
, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256
, UNKNOWN
, (int) V4SI_FTYPE_V8SI
},
26429 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v8sf
, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256
, UNKNOWN
, (int) V4SF_FTYPE_V8SF
},
26430 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_extract_lo_v4df
, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256
, UNKNOWN
, (int) V2DF_FTYPE_V4DF
},
26432 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD
, EQ
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26433 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD
, LTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26434 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd
, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD
, GTU
, (int) INT_FTYPE_V2DF_V2DF_PTEST
},
26435 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS
, EQ
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26436 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS
, LTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26437 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps
, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS
, GTU
, (int) INT_FTYPE_V4SF_V4SF_PTEST
},
26438 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256
, EQ
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26439 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256
, LTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26440 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestpd256
, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256
, GTU
, (int) INT_FTYPE_V4DF_V4DF_PTEST
},
26441 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256
, EQ
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26442 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256
, LTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26443 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_vtestps256
, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256
, GTU
, (int) INT_FTYPE_V8SF_V8SF_PTEST
},
26444 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256
, EQ
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26445 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256
, LTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26446 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_ptest256
, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256
, GTU
, (int) INT_FTYPE_V4DI_V4DI_PTEST
},
26448 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskpd256
, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256
, UNKNOWN
, (int) INT_FTYPE_V4DF
},
26449 { OPTION_MASK_ISA_AVX
, CODE_FOR_avx_movmskps256
, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256
, UNKNOWN
, (int) INT_FTYPE_V8SF
},
26451 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv8sf3
, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26452 { OPTION_MASK_ISA_AVX
, CODE_FOR_copysignv4df3
, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_V4DF
},
26454 { OPTION_MASK_ISA_AVX
, CODE_FOR_vec_pack_sfix_v4df
, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256
, UNKNOWN
, (int) V8SI_FTYPE_V4DF_V4DF
},
26457 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mpsadbw
, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_INT
},
26458 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv32qi2
, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI
},
26459 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv16hi2
, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI
},
26460 { OPTION_MASK_ISA_AVX2
, CODE_FOR_absv8si2
, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI
},
26461 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packssdw
, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26462 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packsswb
, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26463 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packusdw
, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256
, UNKNOWN
, (int) V16HI_FTYPE_V8SI_V8SI
},
26464 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_packuswb
, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256
, UNKNOWN
, (int) V32QI_FTYPE_V16HI_V16HI
},
26465 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv32qi3
, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26466 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv16hi3
, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26467 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv8si3
, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26468 { OPTION_MASK_ISA_AVX2
, CODE_FOR_addv4di3
, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26469 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv32qi3
, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26470 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ssaddv16hi3
, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26471 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv32qi3
, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26472 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_usaddv16hi3
, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26473 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_palignrv2ti
, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
},
26474 { OPTION_MASK_ISA_AVX2
, CODE_FOR_andv4di3
, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26475 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_andnotv4di3
, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26476 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv32qi3
, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26477 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_uavgv16hi3
, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26478 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendvb
, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI_V32QI
},
26479 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblendw
, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI_INT
},
26480 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv32qi3
, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26481 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv16hi3
, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26482 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv8si3
, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26483 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_eqv4di3
, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26484 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv32qi3
, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26485 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv16hi3
, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26486 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv8si3
, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26487 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_gtv4di3
, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26488 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddwv16hi3
, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26489 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phadddv8si3
, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26490 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phaddswv16hi3
, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26491 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubwv16hi3
, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26492 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubdv8si3
, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26493 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_phsubswv16hi3
, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26494 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddubsw256
, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26495 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmaddwd
, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256
, UNKNOWN
, (int) V8SI_FTYPE_V16HI_V16HI
},
26496 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv32qi3
, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26497 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv16hi3
, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26498 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smaxv8si3
, "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26499 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv32qi3
, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26500 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv16hi3
, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26501 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umaxv8si3
, "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26502 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv32qi3
, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26503 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv16hi3
, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26504 { OPTION_MASK_ISA_AVX2
, CODE_FOR_sminv8si3
, "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26505 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv32qi3
, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26506 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv16hi3
, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26507 { OPTION_MASK_ISA_AVX2
, CODE_FOR_uminv8si3
, "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26508 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pmovmskb
, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256
, UNKNOWN
, (int) INT_FTYPE_V32QI
},
26509 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv16qiv16hi2
, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26510 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8qiv8si2
, "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26511 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4qiv4di2
, "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26512 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv8hiv8si2
, "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26513 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4hiv4di2
, "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26514 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sign_extendv4siv4di2
, "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26515 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv16qiv16hi2
, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16QI
},
26516 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8qiv8si2
, "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256
, UNKNOWN
, (int) V8SI_FTYPE_V16QI
},
26517 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4qiv4di2
, "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V16QI
},
26518 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv8hiv8si2
, "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256
, UNKNOWN
, (int) V8SI_FTYPE_V8HI
},
26519 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4hiv4di2
, "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8HI
},
26520 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_zero_extendv4siv4di2
, "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4SI
},
26521 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_mulv4siv4di3
, "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26522 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulhrswv16hi3
, "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26523 { OPTION_MASK_ISA_AVX2
, CODE_FOR_umulv16hi3_highpart
, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26524 { OPTION_MASK_ISA_AVX2
, CODE_FOR_smulv16hi3_highpart
, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26525 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv16hi3
, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26526 { OPTION_MASK_ISA_AVX2
, CODE_FOR_mulv8si3
, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26527 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_umulv4siv4di3
, "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V8SI_V8SI
},
26528 { OPTION_MASK_ISA_AVX2
, CODE_FOR_iorv4di3
, "__builtin_ia32_por256", IX86_BUILTIN_POR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26529 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psadbw
, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256
, UNKNOWN
, (int) V16HI_FTYPE_V32QI_V32QI
},
26530 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufbv32qi3
, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26531 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufdv3
, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_INT
},
26532 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshufhwv3
, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26533 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pshuflwv3
, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_INT
},
26534 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv32qi3
, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26535 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv16hi3
, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26536 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_psignv8si3
, "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26537 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlv2ti3
, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26538 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26539 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv16hi3
, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26540 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26541 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv8si3
, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26542 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26543 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashlv4di3
, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26544 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26545 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv16hi3
, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26546 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26547 { OPTION_MASK_ISA_AVX2
, CODE_FOR_ashrv8si3
, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26548 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrv2ti3
, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_CONVERT
},
26549 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_SI_COUNT
},
26550 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv16hi3
, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V8HI_COUNT
},
26551 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_SI_COUNT
},
26552 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv8si3
, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V4SI_COUNT
},
26553 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT_COUNT
},
26554 { OPTION_MASK_ISA_AVX2
, CODE_FOR_lshrv4di3
, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_COUNT
},
26555 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv32qi3
, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26556 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv16hi3
, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26557 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv8si3
, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26558 { OPTION_MASK_ISA_AVX2
, CODE_FOR_subv4di3
, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26559 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv32qi3
, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26560 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_sssubv16hi3
, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26561 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv32qi3
, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26562 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ussubv16hi3
, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26563 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv32qi
, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26564 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv16hi
, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26565 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv8si
, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26566 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_highv4di
, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26567 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv32qi
, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256
, UNKNOWN
, (int) V32QI_FTYPE_V32QI_V32QI
},
26568 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv16hi
, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256
, UNKNOWN
, (int) V16HI_FTYPE_V16HI_V16HI
},
26569 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv8si
, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26570 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_interleave_lowv4di
, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26571 { OPTION_MASK_ISA_AVX2
, CODE_FOR_xorv4di3
, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26572 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4sf
, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS
, UNKNOWN
, (int) V4SF_FTYPE_V4SF
},
26573 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv8sf
, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256
, UNKNOWN
, (int) V8SF_FTYPE_V4SF
},
26574 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vec_dupv4df
, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256
, UNKNOWN
, (int) V4DF_FTYPE_V2DF
},
26575 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_vbroadcasti128_v4di
, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26576 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv4si
, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI_INT
},
26577 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pblenddv8si
, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI_INT
},
26578 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv32qi
, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256
, UNKNOWN
, (int) V32QI_FTYPE_V16QI
},
26579 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16hi
, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256
, UNKNOWN
, (int) V16HI_FTYPE_V8HI
},
26580 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8si
, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256
, UNKNOWN
, (int) V8SI_FTYPE_V4SI
},
26581 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4di
, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256
, UNKNOWN
, (int) V4DI_FTYPE_V2DI
},
26582 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv16qi
, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128
, UNKNOWN
, (int) V16QI_FTYPE_V16QI
},
26583 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv8hi
, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128
, UNKNOWN
, (int) V8HI_FTYPE_V8HI
},
26584 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv4si
, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128
, UNKNOWN
, (int) V4SI_FTYPE_V4SI
},
26585 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_pbroadcastv2di
, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128
, UNKNOWN
, (int) V2DI_FTYPE_V2DI
},
26586 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8si
, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26587 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4df
, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256
, UNKNOWN
, (int) V4DF_FTYPE_V4DF_INT
},
26588 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permvarv8sf
, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256
, UNKNOWN
, (int) V8SF_FTYPE_V8SF_V8SF
},
26589 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv4di
, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_INT
},
26590 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_permv2ti
, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI_INT
},
26591 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_extracti128
, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256
, UNKNOWN
, (int) V2DI_FTYPE_V4DI_INT
},
26592 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_inserti128
, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V2DI_INT
},
26593 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4di
, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26594 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv2di
, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26595 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv8si
, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26596 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashlvv4si
, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26597 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv8si
, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26598 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_ashrvv4si
, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26599 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4di
, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI
, UNKNOWN
, (int) V4DI_FTYPE_V4DI_V4DI
},
26600 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv2di
, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI
, UNKNOWN
, (int) V2DI_FTYPE_V2DI_V2DI
},
26601 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv8si
, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI
, UNKNOWN
, (int) V8SI_FTYPE_V8SI_V8SI
},
26602 { OPTION_MASK_ISA_AVX2
, CODE_FOR_avx2_lshrvv4si
, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI
, UNKNOWN
, (int) V4SI_FTYPE_V4SI_V4SI
},
26604 { OPTION_MASK_ISA_LZCNT
, CODE_FOR_clzhi2_lzcnt
, "__builtin_clzs", IX86_BUILTIN_CLZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26607 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_si
, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26608 { OPTION_MASK_ISA_BMI
, CODE_FOR_bmi_bextr_di
, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26609 { OPTION_MASK_ISA_BMI
, CODE_FOR_ctzhi2
, "__builtin_ctzs", IX86_BUILTIN_CTZS
, UNKNOWN
, (int) UINT16_FTYPE_UINT16
},
26612 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_si
, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26613 { OPTION_MASK_ISA_TBM
, CODE_FOR_tbm_bextri_di
, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26616 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps
, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS
, UNKNOWN
, (int) V4SF_FTYPE_V8HI
},
26617 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtph2ps256
, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256
, UNKNOWN
, (int) V8SF_FTYPE_V8HI
},
26618 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph
, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH
, UNKNOWN
, (int) V8HI_FTYPE_V4SF_INT
},
26619 { OPTION_MASK_ISA_F16C
, CODE_FOR_vcvtps2ph256
, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256
, UNKNOWN
, (int) V8HI_FTYPE_V8SF_INT
},
26622 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_si3
, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26623 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_bzhi_di3
, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26624 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_si3
, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26625 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pdep_di3
, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26626 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_si3
, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32
, UNKNOWN
, (int) UINT_FTYPE_UINT_UINT
},
26627 { OPTION_MASK_ISA_BMI2
, CODE_FOR_bmi2_pext_di3
, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64
, UNKNOWN
, (int) UINT64_FTYPE_UINT64_UINT64
},
26630 /* FMA4 and XOP. */
26631 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
26632 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
26633 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
26634 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
26635 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
26636 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
26637 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
26638 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
26639 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
26640 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
26641 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
26642 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
26643 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
26644 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
26645 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
26646 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
26647 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
26648 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
26649 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
26650 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
26651 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
26652 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
26653 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
26654 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
26655 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
26656 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
26657 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
26658 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
26659 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
26660 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
26661 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
26662 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
26663 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
26664 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
26665 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
26666 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
26667 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
26668 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
26669 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
26670 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
26671 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
26672 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
26673 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
26674 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
26675 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
26676 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
26677 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
26678 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
26679 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
26680 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
26681 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
26682 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
26684 static const struct builtin_description bdesc_multi_arg
[] =
26686 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v4sf
,
26687 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS
,
26688 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26689 { OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_vmfmadd_v2df
,
26690 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD
,
26691 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26693 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v4sf
,
26694 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3
,
26695 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26696 { OPTION_MASK_ISA_FMA
, CODE_FOR_fmai_vmfmadd_v2df
,
26697 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3
,
26698 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26700 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4sf
,
26701 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS
,
26702 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26703 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v2df
,
26704 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD
,
26705 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26706 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v8sf
,
26707 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256
,
26708 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26709 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fma4i_fmadd_v4df
,
26710 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256
,
26711 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26713 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4sf
,
26714 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS
,
26715 UNKNOWN
, (int)MULTI_ARG_3_SF
},
26716 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v2df
,
26717 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD
,
26718 UNKNOWN
, (int)MULTI_ARG_3_DF
},
26719 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v8sf
,
26720 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256
,
26721 UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26722 { OPTION_MASK_ISA_FMA
| OPTION_MASK_ISA_FMA4
, CODE_FOR_fmaddsub_v4df
,
26723 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256
,
26724 UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26726 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26727 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2di
, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI
, UNKNOWN
, (int)MULTI_ARG_3_DI
},
26728 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4si
, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26729 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8hi
, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26730 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16qi
, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI
,UNKNOWN
, (int)MULTI_ARG_3_QI
},
26731 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v2df
, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF
, UNKNOWN
, (int)MULTI_ARG_3_DF
},
26732 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4sf
, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF
, UNKNOWN
, (int)MULTI_ARG_3_SF
},
26734 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26735 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4di256
, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256
, UNKNOWN
, (int)MULTI_ARG_3_DI2
},
26736 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8si256
, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256
, UNKNOWN
, (int)MULTI_ARG_3_SI2
},
26737 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v16hi256
, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256
, UNKNOWN
, (int)MULTI_ARG_3_HI2
},
26738 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v32qi256
, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256
, UNKNOWN
, (int)MULTI_ARG_3_QI2
},
26739 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v4df256
, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256
, UNKNOWN
, (int)MULTI_ARG_3_DF2
},
26740 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcmov_v8sf256
, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256
, UNKNOWN
, (int)MULTI_ARG_3_SF2
},
26742 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pperm
, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM
, UNKNOWN
, (int)MULTI_ARG_3_QI
},
26744 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssww
, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26745 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsww
, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW
, UNKNOWN
, (int)MULTI_ARG_3_HI
},
26746 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsswd
, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26747 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacswd
, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26748 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdd
, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26749 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdd
, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD
, UNKNOWN
, (int)MULTI_ARG_3_SI
},
26750 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdql
, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26751 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacssdqh
, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26752 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdql
, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26753 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmacsdqh
, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH
, UNKNOWN
, (int)MULTI_ARG_3_SI_DI
},
26754 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcsswd
, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26755 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pmadcswd
, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD
, UNKNOWN
, (int)MULTI_ARG_3_HI_SI
},
26757 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv2di3
, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26758 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv4si3
, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26759 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv8hi3
, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26760 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vrotlv16qi3
, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26761 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv2di3
, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM
, UNKNOWN
, (int)MULTI_ARG_2_DI_IMM
},
26762 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv4si3
, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM
, UNKNOWN
, (int)MULTI_ARG_2_SI_IMM
},
26763 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv8hi3
, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM
, UNKNOWN
, (int)MULTI_ARG_2_HI_IMM
},
26764 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_rotlv16qi3
, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM
, UNKNOWN
, (int)MULTI_ARG_2_QI_IMM
},
26765 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav2di3
, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26766 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav4si3
, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26767 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav8hi3
, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26768 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shav16qi3
, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26769 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv2di3
, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ
, UNKNOWN
, (int)MULTI_ARG_2_DI
},
26770 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv4si3
, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD
, UNKNOWN
, (int)MULTI_ARG_2_SI
},
26771 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv8hi3
, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW
, UNKNOWN
, (int)MULTI_ARG_2_HI
},
26772 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_shlv16qi3
, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB
, UNKNOWN
, (int)MULTI_ARG_2_QI
},
26774 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv4sf2
, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS
, UNKNOWN
, (int)MULTI_ARG_2_SF
},
26775 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vmfrczv2df2
, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD
, UNKNOWN
, (int)MULTI_ARG_2_DF
},
26776 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4sf2
, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS
, UNKNOWN
, (int)MULTI_ARG_1_SF
},
26777 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv2df2
, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD
, UNKNOWN
, (int)MULTI_ARG_1_DF
},
26778 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv8sf2
, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256
, UNKNOWN
, (int)MULTI_ARG_1_SF2
},
26779 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_frczv4df2
, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256
, UNKNOWN
, (int)MULTI_ARG_1_DF2
},
26781 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbw
, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26782 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbd
, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
26783 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddbq
, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
26784 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwd
, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26785 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddwq
, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
26786 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadddq
, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26787 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubw
, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26788 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubd
, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD
, UNKNOWN
, (int)MULTI_ARG_1_QI_SI
},
26789 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddubq
, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ
, UNKNOWN
, (int)MULTI_ARG_1_QI_DI
},
26790 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwd
, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26791 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phadduwq
, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ
, UNKNOWN
, (int)MULTI_ARG_1_HI_DI
},
26792 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phaddudq
, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26793 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubbw
, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW
, UNKNOWN
, (int)MULTI_ARG_1_QI_HI
},
26794 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubwd
, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD
, UNKNOWN
, (int)MULTI_ARG_1_HI_SI
},
26795 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_phsubdq
, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ
, UNKNOWN
, (int)MULTI_ARG_1_SI_DI
},
26797 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
26798 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26799 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26800 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB
, LT
, (int)MULTI_ARG_2_QI_CMP
},
26801 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB
, LE
, (int)MULTI_ARG_2_QI_CMP
},
26802 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB
, GT
, (int)MULTI_ARG_2_QI_CMP
},
26803 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv16qi3
, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB
, GE
, (int)MULTI_ARG_2_QI_CMP
},
26805 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
26806 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26807 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26808 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW
, LT
, (int)MULTI_ARG_2_HI_CMP
},
26809 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW
, LE
, (int)MULTI_ARG_2_HI_CMP
},
26810 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW
, GT
, (int)MULTI_ARG_2_HI_CMP
},
26811 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv8hi3
, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW
, GE
, (int)MULTI_ARG_2_HI_CMP
},
26813 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
26814 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
26815 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED
, NE
, (int)MULTI_ARG_2_SI_CMP
},
26816 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD
, LT
, (int)MULTI_ARG_2_SI_CMP
},
26817 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED
, LE
, (int)MULTI_ARG_2_SI_CMP
},
26818 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD
, GT
, (int)MULTI_ARG_2_SI_CMP
},
26819 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv4si3
, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED
, GE
, (int)MULTI_ARG_2_SI_CMP
},
26821 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
26822 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
26823 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
26824 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ
, LT
, (int)MULTI_ARG_2_DI_CMP
},
26825 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ
, LE
, (int)MULTI_ARG_2_DI_CMP
},
26826 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ
, GT
, (int)MULTI_ARG_2_DI_CMP
},
26827 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmpv2di3
, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ
, GE
, (int)MULTI_ARG_2_DI_CMP
},
26829 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB
, EQ
, (int)MULTI_ARG_2_QI_CMP
},
26830 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26831 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v16qi3
,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB
, NE
, (int)MULTI_ARG_2_QI_CMP
},
26832 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB
, LTU
, (int)MULTI_ARG_2_QI_CMP
},
26833 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB
, LEU
, (int)MULTI_ARG_2_QI_CMP
},
26834 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB
, GTU
, (int)MULTI_ARG_2_QI_CMP
},
26835 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv16qi3
, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB
, GEU
, (int)MULTI_ARG_2_QI_CMP
},
26837 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW
, EQ
, (int)MULTI_ARG_2_HI_CMP
},
26838 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26839 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v8hi3
, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW
, NE
, (int)MULTI_ARG_2_HI_CMP
},
26840 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW
, LTU
, (int)MULTI_ARG_2_HI_CMP
},
26841 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW
, LEU
, (int)MULTI_ARG_2_HI_CMP
},
26842 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW
, GTU
, (int)MULTI_ARG_2_HI_CMP
},
26843 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv8hi3
, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW
, GEU
, (int)MULTI_ARG_2_HI_CMP
},
26845 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD
, EQ
, (int)MULTI_ARG_2_SI_CMP
},
26846 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
26847 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v4si3
, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD
, NE
, (int)MULTI_ARG_2_SI_CMP
},
26848 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD
, LTU
, (int)MULTI_ARG_2_SI_CMP
},
26849 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD
, LEU
, (int)MULTI_ARG_2_SI_CMP
},
26850 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD
, GTU
, (int)MULTI_ARG_2_SI_CMP
},
26851 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv4si3
, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD
, GEU
, (int)MULTI_ARG_2_SI_CMP
},
26853 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ
, EQ
, (int)MULTI_ARG_2_DI_CMP
},
26854 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
26855 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_uns2v2di3
, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ
, NE
, (int)MULTI_ARG_2_DI_CMP
},
26856 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ
, LTU
, (int)MULTI_ARG_2_DI_CMP
},
26857 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ
, LEU
, (int)MULTI_ARG_2_DI_CMP
},
26858 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ
, GTU
, (int)MULTI_ARG_2_DI_CMP
},
26859 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_maskcmp_unsv2di3
, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ
, GEU
, (int)MULTI_ARG_2_DI_CMP
},
26861 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
26862 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
26863 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
26864 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ
, (enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
26865 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_QI_TF
},
26866 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_HI_TF
},
26867 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_SI_TF
},
26868 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ
,(enum rtx_code
) PCOM_FALSE
, (int)MULTI_ARG_2_DI_TF
},
26870 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
26871 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
26872 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
26873 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
26874 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv16qi3
, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_QI_TF
},
26875 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv8hi3
, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_HI_TF
},
26876 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv4si3
, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_SI_TF
},
26877 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_pcom_tfv2di3
, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ
, (enum rtx_code
) PCOM_TRUE
, (int)MULTI_ARG_2_DI_TF
},
26879 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v2df3
, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I
},
26880 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4sf3
, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I
},
26881 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v4df3
, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256
, UNKNOWN
, (int)MULTI_ARG_4_DF2_DI_I1
},
26882 { OPTION_MASK_ISA_XOP
, CODE_FOR_xop_vpermil2v8sf3
, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256
, UNKNOWN
, (int)MULTI_ARG_4_SF2_SI_I1
},
26886 /* TM vector builtins. */
26888 /* Reuse the existing x86-specific `struct builtin_description' cause
26889 we're lazy. Add casts to make them fit. */
26890 static const struct builtin_description bdesc_tm
[] =
26892 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
26893 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
26894 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM64", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M64
, UNKNOWN
, VOID_FTYPE_PV2SI_V2SI
},
26895 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
26896 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
26897 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
26898 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM64", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M64
, UNKNOWN
, V2SI_FTYPE_PCV2SI
},
26900 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
26901 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaRM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
26902 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_WaWM128", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M128
, UNKNOWN
, VOID_FTYPE_PV4SF_V4SF
},
26903 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
26904 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaRM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
26905 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RaWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
26906 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_RfWM128", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M128
, UNKNOWN
, V4SF_FTYPE_PCV4SF
},
26908 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
26909 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaRM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAR_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
26910 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_WaWM256", (enum ix86_builtins
) BUILT_IN_TM_STORE_WAW_M256
, UNKNOWN
, VOID_FTYPE_PV8SF_V8SF
},
26911 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
26912 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaRM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAR_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
26913 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RaWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RAW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
26914 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_RfWM256", (enum ix86_builtins
) BUILT_IN_TM_LOAD_RFW_M256
, UNKNOWN
, V8SF_FTYPE_PCV8SF
},
26916 { OPTION_MASK_ISA_MMX
, CODE_FOR_nothing
, "__builtin__ITM_LM64", (enum ix86_builtins
) BUILT_IN_TM_LOG_M64
, UNKNOWN
, VOID_FTYPE_PCVOID
},
26917 { OPTION_MASK_ISA_SSE
, CODE_FOR_nothing
, "__builtin__ITM_LM128", (enum ix86_builtins
) BUILT_IN_TM_LOG_M128
, UNKNOWN
, VOID_FTYPE_PCVOID
},
26918 { OPTION_MASK_ISA_AVX
, CODE_FOR_nothing
, "__builtin__ITM_LM256", (enum ix86_builtins
) BUILT_IN_TM_LOG_M256
, UNKNOWN
, VOID_FTYPE_PCVOID
},
26921 /* TM callbacks. */
26923 /* Return the builtin decl needed to load a vector of TYPE. */
26926 ix86_builtin_tm_load (tree type
)
26928 if (TREE_CODE (type
) == VECTOR_TYPE
)
26930 switch (tree_low_cst (TYPE_SIZE (type
), 1))
26933 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64
);
26935 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128
);
26937 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256
);
26943 /* Return the builtin decl needed to store a vector of TYPE. */
26946 ix86_builtin_tm_store (tree type
)
26948 if (TREE_CODE (type
) == VECTOR_TYPE
)
26950 switch (tree_low_cst (TYPE_SIZE (type
), 1))
26953 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64
);
26955 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128
);
26957 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256
);
26963 /* Initialize the transactional memory vector load/store builtins. */
26966 ix86_init_tm_builtins (void)
26968 enum ix86_builtin_func_type ftype
;
26969 const struct builtin_description
*d
;
26972 tree attrs_load
, attrs_type_load
, attrs_store
, attrs_type_store
;
26973 tree attrs_log
, attrs_type_log
;
26978 /* Use whatever attributes a normal TM load has. */
26979 decl
= builtin_decl_explicit (BUILT_IN_TM_LOAD_1
);
26980 attrs_load
= DECL_ATTRIBUTES (decl
);
26981 attrs_type_load
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
26982 /* Use whatever attributes a normal TM store has. */
26983 decl
= builtin_decl_explicit (BUILT_IN_TM_STORE_1
);
26984 attrs_store
= DECL_ATTRIBUTES (decl
);
26985 attrs_type_store
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
26986 /* Use whatever attributes a normal TM log has. */
26987 decl
= builtin_decl_explicit (BUILT_IN_TM_LOG
);
26988 attrs_log
= DECL_ATTRIBUTES (decl
);
26989 attrs_type_log
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
26991 for (i
= 0, d
= bdesc_tm
;
26992 i
< ARRAY_SIZE (bdesc_tm
);
26995 if ((d
->mask
& ix86_isa_flags
) != 0
26996 || (lang_hooks
.builtin_function
26997 == lang_hooks
.builtin_function_ext_scope
))
26999 tree type
, attrs
, attrs_type
;
27000 enum built_in_function code
= (enum built_in_function
) d
->code
;
27002 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27003 type
= ix86_get_builtin_func_type (ftype
);
27005 if (BUILTIN_TM_LOAD_P (code
))
27007 attrs
= attrs_load
;
27008 attrs_type
= attrs_type_load
;
27010 else if (BUILTIN_TM_STORE_P (code
))
27012 attrs
= attrs_store
;
27013 attrs_type
= attrs_type_store
;
27018 attrs_type
= attrs_type_log
;
27020 decl
= add_builtin_function (d
->name
, type
, code
, BUILT_IN_NORMAL
,
27021 /* The builtin without the prefix for
27022 calling it directly. */
27023 d
->name
+ strlen ("__builtin_"),
27025 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
27026 set the TYPE_ATTRIBUTES. */
27027 decl_attributes (&TREE_TYPE (decl
), attrs_type
, ATTR_FLAG_BUILT_IN
);
27029 set_builtin_decl (code
, decl
, false);
27034 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
27035 in the current target ISA to allow the user to compile particular modules
27036 with different target specific options that differ from the command line
27039 ix86_init_mmx_sse_builtins (void)
27041 const struct builtin_description
* d
;
27042 enum ix86_builtin_func_type ftype
;
27045 /* Add all special builtins with variable number of operands. */
27046 for (i
= 0, d
= bdesc_special_args
;
27047 i
< ARRAY_SIZE (bdesc_special_args
);
27053 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27054 def_builtin (d
->mask
, d
->name
, ftype
, d
->code
);
27057 /* Add all builtins with variable number of operands. */
27058 for (i
= 0, d
= bdesc_args
;
27059 i
< ARRAY_SIZE (bdesc_args
);
27065 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27066 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27069 /* pcmpestr[im] insns. */
27070 for (i
= 0, d
= bdesc_pcmpestr
;
27071 i
< ARRAY_SIZE (bdesc_pcmpestr
);
27074 if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27075 ftype
= V16QI_FTYPE_V16QI_INT_V16QI_INT_INT
;
27077 ftype
= INT_FTYPE_V16QI_INT_V16QI_INT_INT
;
27078 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27081 /* pcmpistr[im] insns. */
27082 for (i
= 0, d
= bdesc_pcmpistr
;
27083 i
< ARRAY_SIZE (bdesc_pcmpistr
);
27086 if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
27087 ftype
= V16QI_FTYPE_V16QI_V16QI_INT
;
27089 ftype
= INT_FTYPE_V16QI_V16QI_INT
;
27090 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27093 /* comi/ucomi insns. */
27094 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
27096 if (d
->mask
== OPTION_MASK_ISA_SSE2
)
27097 ftype
= INT_FTYPE_V2DF_V2DF
;
27099 ftype
= INT_FTYPE_V4SF_V4SF
;
27100 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27104 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_ldmxcsr",
27105 VOID_FTYPE_UNSIGNED
, IX86_BUILTIN_LDMXCSR
);
27106 def_builtin (OPTION_MASK_ISA_SSE
, "__builtin_ia32_stmxcsr",
27107 UNSIGNED_FTYPE_VOID
, IX86_BUILTIN_STMXCSR
);
27109 /* SSE or 3DNow!A */
27110 def_builtin (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27111 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR
,
27112 IX86_BUILTIN_MASKMOVQ
);
27115 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_maskmovdqu",
27116 VOID_FTYPE_V16QI_V16QI_PCHAR
, IX86_BUILTIN_MASKMOVDQU
);
27118 def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_clflush",
27119 VOID_FTYPE_PCVOID
, IX86_BUILTIN_CLFLUSH
);
27120 x86_mfence
= def_builtin (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_mfence",
27121 VOID_FTYPE_VOID
, IX86_BUILTIN_MFENCE
);
27124 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_monitor",
27125 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MONITOR
);
27126 def_builtin (OPTION_MASK_ISA_SSE3
, "__builtin_ia32_mwait",
27127 VOID_FTYPE_UNSIGNED_UNSIGNED
, IX86_BUILTIN_MWAIT
);
27130 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenc128",
27131 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENC128
);
27132 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesenclast128",
27133 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESENCLAST128
);
27134 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdec128",
27135 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDEC128
);
27136 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesdeclast128",
27137 V2DI_FTYPE_V2DI_V2DI
, IX86_BUILTIN_AESDECLAST128
);
27138 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aesimc128",
27139 V2DI_FTYPE_V2DI
, IX86_BUILTIN_AESIMC128
);
27140 def_builtin_const (OPTION_MASK_ISA_AES
, "__builtin_ia32_aeskeygenassist128",
27141 V2DI_FTYPE_V2DI_INT
, IX86_BUILTIN_AESKEYGENASSIST128
);
27144 def_builtin_const (OPTION_MASK_ISA_PCLMUL
, "__builtin_ia32_pclmulqdq128",
27145 V2DI_FTYPE_V2DI_V2DI_INT
, IX86_BUILTIN_PCLMULQDQ128
);
27148 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand16_step",
27149 INT_FTYPE_PUSHORT
, IX86_BUILTIN_RDRAND16_STEP
);
27150 def_builtin (OPTION_MASK_ISA_RDRND
, "__builtin_ia32_rdrand32_step",
27151 INT_FTYPE_PUNSIGNED
, IX86_BUILTIN_RDRAND32_STEP
);
27152 def_builtin (OPTION_MASK_ISA_RDRND
| OPTION_MASK_ISA_64BIT
,
27153 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG
,
27154 IX86_BUILTIN_RDRAND64_STEP
);
27157 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2df",
27158 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT
,
27159 IX86_BUILTIN_GATHERSIV2DF
);
27161 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4df",
27162 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT
,
27163 IX86_BUILTIN_GATHERSIV4DF
);
27165 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2df",
27166 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT
,
27167 IX86_BUILTIN_GATHERDIV2DF
);
27169 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4df",
27170 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT
,
27171 IX86_BUILTIN_GATHERDIV4DF
);
27173 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4sf",
27174 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT
,
27175 IX86_BUILTIN_GATHERSIV4SF
);
27177 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8sf",
27178 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT
,
27179 IX86_BUILTIN_GATHERSIV8SF
);
27181 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf",
27182 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT
,
27183 IX86_BUILTIN_GATHERDIV4SF
);
27185 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4sf256",
27186 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT
,
27187 IX86_BUILTIN_GATHERDIV8SF
);
27189 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv2di",
27190 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT
,
27191 IX86_BUILTIN_GATHERSIV2DI
);
27193 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4di",
27194 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT
,
27195 IX86_BUILTIN_GATHERSIV4DI
);
27197 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv2di",
27198 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT
,
27199 IX86_BUILTIN_GATHERDIV2DI
);
27201 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4di",
27202 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT
,
27203 IX86_BUILTIN_GATHERDIV4DI
);
27205 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv4si",
27206 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT
,
27207 IX86_BUILTIN_GATHERSIV4SI
);
27209 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gathersiv8si",
27210 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT
,
27211 IX86_BUILTIN_GATHERSIV8SI
);
27213 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si",
27214 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT
,
27215 IX86_BUILTIN_GATHERDIV4SI
);
27217 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatherdiv4si256",
27218 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT
,
27219 IX86_BUILTIN_GATHERDIV8SI
);
27221 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4df ",
27222 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT
,
27223 IX86_BUILTIN_GATHERALTSIV4DF
);
27225 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4sf256 ",
27226 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT
,
27227 IX86_BUILTIN_GATHERALTDIV8SF
);
27229 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltsiv4di ",
27230 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT
,
27231 IX86_BUILTIN_GATHERALTSIV4DI
);
27233 def_builtin (OPTION_MASK_ISA_AVX2
, "__builtin_ia32_gatheraltdiv4si256 ",
27234 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT
,
27235 IX86_BUILTIN_GATHERALTDIV8SI
);
27237 /* MMX access to the vec_init patterns. */
27238 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v2si",
27239 V2SI_FTYPE_INT_INT
, IX86_BUILTIN_VEC_INIT_V2SI
);
27241 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v4hi",
27242 V4HI_FTYPE_HI_HI_HI_HI
,
27243 IX86_BUILTIN_VEC_INIT_V4HI
);
27245 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_init_v8qi",
27246 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI
,
27247 IX86_BUILTIN_VEC_INIT_V8QI
);
27249 /* Access to the vec_extract patterns. */
27250 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2df",
27251 DOUBLE_FTYPE_V2DF_INT
, IX86_BUILTIN_VEC_EXT_V2DF
);
27252 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v2di",
27253 DI_FTYPE_V2DI_INT
, IX86_BUILTIN_VEC_EXT_V2DI
);
27254 def_builtin_const (OPTION_MASK_ISA_SSE
, "__builtin_ia32_vec_ext_v4sf",
27255 FLOAT_FTYPE_V4SF_INT
, IX86_BUILTIN_VEC_EXT_V4SF
);
27256 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v4si",
27257 SI_FTYPE_V4SI_INT
, IX86_BUILTIN_VEC_EXT_V4SI
);
27258 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v8hi",
27259 HI_FTYPE_V8HI_INT
, IX86_BUILTIN_VEC_EXT_V8HI
);
27261 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27262 "__builtin_ia32_vec_ext_v4hi",
27263 HI_FTYPE_V4HI_INT
, IX86_BUILTIN_VEC_EXT_V4HI
);
27265 def_builtin_const (OPTION_MASK_ISA_MMX
, "__builtin_ia32_vec_ext_v2si",
27266 SI_FTYPE_V2SI_INT
, IX86_BUILTIN_VEC_EXT_V2SI
);
27268 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_ext_v16qi",
27269 QI_FTYPE_V16QI_INT
, IX86_BUILTIN_VEC_EXT_V16QI
);
27271 /* Access to the vec_set patterns. */
27272 def_builtin_const (OPTION_MASK_ISA_SSE4_1
| OPTION_MASK_ISA_64BIT
,
27273 "__builtin_ia32_vec_set_v2di",
27274 V2DI_FTYPE_V2DI_DI_INT
, IX86_BUILTIN_VEC_SET_V2DI
);
27276 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4sf",
27277 V4SF_FTYPE_V4SF_FLOAT_INT
, IX86_BUILTIN_VEC_SET_V4SF
);
27279 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v4si",
27280 V4SI_FTYPE_V4SI_SI_INT
, IX86_BUILTIN_VEC_SET_V4SI
);
27282 def_builtin_const (OPTION_MASK_ISA_SSE2
, "__builtin_ia32_vec_set_v8hi",
27283 V8HI_FTYPE_V8HI_HI_INT
, IX86_BUILTIN_VEC_SET_V8HI
);
27285 def_builtin_const (OPTION_MASK_ISA_SSE
| OPTION_MASK_ISA_3DNOW_A
,
27286 "__builtin_ia32_vec_set_v4hi",
27287 V4HI_FTYPE_V4HI_HI_INT
, IX86_BUILTIN_VEC_SET_V4HI
);
27289 def_builtin_const (OPTION_MASK_ISA_SSE4_1
, "__builtin_ia32_vec_set_v16qi",
27290 V16QI_FTYPE_V16QI_QI_INT
, IX86_BUILTIN_VEC_SET_V16QI
);
27292 /* Add FMA4 multi-arg argument instructions */
27293 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
27298 ftype
= (enum ix86_builtin_func_type
) d
->flag
;
27299 def_builtin_const (d
->mask
, d
->name
, ftype
, d
->code
);
27303 /* Internal method for ix86_init_builtins. */
27306 ix86_init_builtins_va_builtins_abi (void)
27308 tree ms_va_ref
, sysv_va_ref
;
27309 tree fnvoid_va_end_ms
, fnvoid_va_end_sysv
;
27310 tree fnvoid_va_start_ms
, fnvoid_va_start_sysv
;
27311 tree fnvoid_va_copy_ms
, fnvoid_va_copy_sysv
;
27312 tree fnattr_ms
= NULL_TREE
, fnattr_sysv
= NULL_TREE
;
27316 fnattr_ms
= build_tree_list (get_identifier ("ms_abi"), NULL_TREE
);
27317 fnattr_sysv
= build_tree_list (get_identifier ("sysv_abi"), NULL_TREE
);
27318 ms_va_ref
= build_reference_type (ms_va_list_type_node
);
27320 build_pointer_type (TREE_TYPE (sysv_va_list_type_node
));
27323 build_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27324 fnvoid_va_start_ms
=
27325 build_varargs_function_type_list (void_type_node
, ms_va_ref
, NULL_TREE
);
27326 fnvoid_va_end_sysv
=
27327 build_function_type_list (void_type_node
, sysv_va_ref
, NULL_TREE
);
27328 fnvoid_va_start_sysv
=
27329 build_varargs_function_type_list (void_type_node
, sysv_va_ref
,
27331 fnvoid_va_copy_ms
=
27332 build_function_type_list (void_type_node
, ms_va_ref
, ms_va_list_type_node
,
27334 fnvoid_va_copy_sysv
=
27335 build_function_type_list (void_type_node
, sysv_va_ref
,
27336 sysv_va_ref
, NULL_TREE
);
27338 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms
,
27339 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27340 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms
,
27341 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27342 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms
,
27343 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_ms
);
27344 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv
,
27345 BUILT_IN_VA_START
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27346 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv
,
27347 BUILT_IN_VA_END
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27348 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv
,
27349 BUILT_IN_VA_COPY
, BUILT_IN_NORMAL
, NULL
, fnattr_sysv
);
27353 ix86_init_builtin_types (void)
27355 tree float128_type_node
, float80_type_node
;
27357 /* The __float80 type. */
27358 float80_type_node
= long_double_type_node
;
27359 if (TYPE_MODE (float80_type_node
) != XFmode
)
27361 /* The __float80 type. */
27362 float80_type_node
= make_node (REAL_TYPE
);
27364 TYPE_PRECISION (float80_type_node
) = 80;
27365 layout_type (float80_type_node
);
27367 lang_hooks
.types
.register_builtin_type (float80_type_node
, "__float80");
27369 /* The __float128 type. */
27370 float128_type_node
= make_node (REAL_TYPE
);
27371 TYPE_PRECISION (float128_type_node
) = 128;
27372 layout_type (float128_type_node
);
27373 lang_hooks
.types
.register_builtin_type (float128_type_node
, "__float128");
27375 /* This macro is built by i386-builtin-types.awk. */
27376 DEFINE_BUILTIN_PRIMITIVE_TYPES
;
27380 ix86_init_builtins (void)
27384 ix86_init_builtin_types ();
27386 /* TFmode support builtins. */
27387 def_builtin_const (0, "__builtin_infq",
27388 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_INFQ
);
27389 def_builtin_const (0, "__builtin_huge_valq",
27390 FLOAT128_FTYPE_VOID
, IX86_BUILTIN_HUGE_VALQ
);
27392 /* We will expand them to normal call if SSE2 isn't available since
27393 they are used by libgcc. */
27394 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128
);
27395 t
= add_builtin_function ("__builtin_fabsq", t
, IX86_BUILTIN_FABSQ
,
27396 BUILT_IN_MD
, "__fabstf2", NULL_TREE
);
27397 TREE_READONLY (t
) = 1;
27398 ix86_builtins
[(int) IX86_BUILTIN_FABSQ
] = t
;
27400 t
= ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128
);
27401 t
= add_builtin_function ("__builtin_copysignq", t
, IX86_BUILTIN_COPYSIGNQ
,
27402 BUILT_IN_MD
, "__copysigntf3", NULL_TREE
);
27403 TREE_READONLY (t
) = 1;
27404 ix86_builtins
[(int) IX86_BUILTIN_COPYSIGNQ
] = t
;
27406 ix86_init_tm_builtins ();
27407 ix86_init_mmx_sse_builtins ();
27410 ix86_init_builtins_va_builtins_abi ();
27412 #ifdef SUBTARGET_INIT_BUILTINS
27413 SUBTARGET_INIT_BUILTINS
;
27417 /* Return the ix86 builtin for CODE. */
27420 ix86_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
27422 if (code
>= IX86_BUILTIN_MAX
)
27423 return error_mark_node
;
27425 return ix86_builtins
[code
];
27428 /* Errors in the source file can cause expand_expr to return const0_rtx
27429 where we expect a vector. To avoid crashing, use one of the vector
27430 clear instructions. */
27432 safe_vector_operand (rtx x
, enum machine_mode mode
)
27434 if (x
== const0_rtx
)
27435 x
= CONST0_RTX (mode
);
27439 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
27442 ix86_expand_binop_builtin (enum insn_code icode
, tree exp
, rtx target
)
27445 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27446 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27447 rtx op0
= expand_normal (arg0
);
27448 rtx op1
= expand_normal (arg1
);
27449 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27450 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27451 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
27453 if (VECTOR_MODE_P (mode0
))
27454 op0
= safe_vector_operand (op0
, mode0
);
27455 if (VECTOR_MODE_P (mode1
))
27456 op1
= safe_vector_operand (op1
, mode1
);
27458 if (optimize
|| !target
27459 || GET_MODE (target
) != tmode
27460 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27461 target
= gen_reg_rtx (tmode
);
27463 if (GET_MODE (op1
) == SImode
&& mode1
== TImode
)
27465 rtx x
= gen_reg_rtx (V4SImode
);
27466 emit_insn (gen_sse2_loadd (x
, op1
));
27467 op1
= gen_lowpart (TImode
, x
);
27470 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27471 op0
= copy_to_mode_reg (mode0
, op0
);
27472 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode1
))
27473 op1
= copy_to_mode_reg (mode1
, op1
);
27475 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27484 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
27487 ix86_expand_multi_arg_builtin (enum insn_code icode
, tree exp
, rtx target
,
27488 enum ix86_builtin_func_type m_type
,
27489 enum rtx_code sub_code
)
27494 bool comparison_p
= false;
27496 bool last_arg_constant
= false;
27497 int num_memory
= 0;
27500 enum machine_mode mode
;
27503 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27507 case MULTI_ARG_4_DF2_DI_I
:
27508 case MULTI_ARG_4_DF2_DI_I1
:
27509 case MULTI_ARG_4_SF2_SI_I
:
27510 case MULTI_ARG_4_SF2_SI_I1
:
27512 last_arg_constant
= true;
27515 case MULTI_ARG_3_SF
:
27516 case MULTI_ARG_3_DF
:
27517 case MULTI_ARG_3_SF2
:
27518 case MULTI_ARG_3_DF2
:
27519 case MULTI_ARG_3_DI
:
27520 case MULTI_ARG_3_SI
:
27521 case MULTI_ARG_3_SI_DI
:
27522 case MULTI_ARG_3_HI
:
27523 case MULTI_ARG_3_HI_SI
:
27524 case MULTI_ARG_3_QI
:
27525 case MULTI_ARG_3_DI2
:
27526 case MULTI_ARG_3_SI2
:
27527 case MULTI_ARG_3_HI2
:
27528 case MULTI_ARG_3_QI2
:
27532 case MULTI_ARG_2_SF
:
27533 case MULTI_ARG_2_DF
:
27534 case MULTI_ARG_2_DI
:
27535 case MULTI_ARG_2_SI
:
27536 case MULTI_ARG_2_HI
:
27537 case MULTI_ARG_2_QI
:
27541 case MULTI_ARG_2_DI_IMM
:
27542 case MULTI_ARG_2_SI_IMM
:
27543 case MULTI_ARG_2_HI_IMM
:
27544 case MULTI_ARG_2_QI_IMM
:
27546 last_arg_constant
= true;
27549 case MULTI_ARG_1_SF
:
27550 case MULTI_ARG_1_DF
:
27551 case MULTI_ARG_1_SF2
:
27552 case MULTI_ARG_1_DF2
:
27553 case MULTI_ARG_1_DI
:
27554 case MULTI_ARG_1_SI
:
27555 case MULTI_ARG_1_HI
:
27556 case MULTI_ARG_1_QI
:
27557 case MULTI_ARG_1_SI_DI
:
27558 case MULTI_ARG_1_HI_DI
:
27559 case MULTI_ARG_1_HI_SI
:
27560 case MULTI_ARG_1_QI_DI
:
27561 case MULTI_ARG_1_QI_SI
:
27562 case MULTI_ARG_1_QI_HI
:
27566 case MULTI_ARG_2_DI_CMP
:
27567 case MULTI_ARG_2_SI_CMP
:
27568 case MULTI_ARG_2_HI_CMP
:
27569 case MULTI_ARG_2_QI_CMP
:
27571 comparison_p
= true;
27574 case MULTI_ARG_2_SF_TF
:
27575 case MULTI_ARG_2_DF_TF
:
27576 case MULTI_ARG_2_DI_TF
:
27577 case MULTI_ARG_2_SI_TF
:
27578 case MULTI_ARG_2_HI_TF
:
27579 case MULTI_ARG_2_QI_TF
:
27585 gcc_unreachable ();
27588 if (optimize
|| !target
27589 || GET_MODE (target
) != tmode
27590 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27591 target
= gen_reg_rtx (tmode
);
27593 gcc_assert (nargs
<= 4);
27595 for (i
= 0; i
< nargs
; i
++)
27597 tree arg
= CALL_EXPR_ARG (exp
, i
);
27598 rtx op
= expand_normal (arg
);
27599 int adjust
= (comparison_p
) ? 1 : 0;
27600 enum machine_mode mode
= insn_data
[icode
].operand
[i
+adjust
+1].mode
;
27602 if (last_arg_constant
&& i
== nargs
- 1)
27604 if (!insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
))
27606 enum insn_code new_icode
= icode
;
27609 case CODE_FOR_xop_vpermil2v2df3
:
27610 case CODE_FOR_xop_vpermil2v4sf3
:
27611 case CODE_FOR_xop_vpermil2v4df3
:
27612 case CODE_FOR_xop_vpermil2v8sf3
:
27613 error ("the last argument must be a 2-bit immediate");
27614 return gen_reg_rtx (tmode
);
27615 case CODE_FOR_xop_rotlv2di3
:
27616 new_icode
= CODE_FOR_rotlv2di3
;
27618 case CODE_FOR_xop_rotlv4si3
:
27619 new_icode
= CODE_FOR_rotlv4si3
;
27621 case CODE_FOR_xop_rotlv8hi3
:
27622 new_icode
= CODE_FOR_rotlv8hi3
;
27624 case CODE_FOR_xop_rotlv16qi3
:
27625 new_icode
= CODE_FOR_rotlv16qi3
;
27627 if (CONST_INT_P (op
))
27629 int mask
= GET_MODE_BITSIZE (GET_MODE_INNER (tmode
)) - 1;
27630 op
= GEN_INT (INTVAL (op
) & mask
);
27631 gcc_checking_assert
27632 (insn_data
[icode
].operand
[i
+ 1].predicate (op
, mode
));
27636 gcc_checking_assert
27638 && insn_data
[new_icode
].operand
[0].mode
== tmode
27639 && insn_data
[new_icode
].operand
[1].mode
== tmode
27640 && insn_data
[new_icode
].operand
[2].mode
== mode
27641 && insn_data
[new_icode
].operand
[0].predicate
27642 == insn_data
[icode
].operand
[0].predicate
27643 && insn_data
[new_icode
].operand
[1].predicate
27644 == insn_data
[icode
].operand
[1].predicate
);
27650 gcc_unreachable ();
27657 if (VECTOR_MODE_P (mode
))
27658 op
= safe_vector_operand (op
, mode
);
27660 /* If we aren't optimizing, only allow one memory operand to be
27662 if (memory_operand (op
, mode
))
27665 gcc_assert (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
);
27668 || !insn_data
[icode
].operand
[i
+adjust
+1].predicate (op
, mode
)
27670 op
= force_reg (mode
, op
);
27674 args
[i
].mode
= mode
;
27680 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
27685 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
,
27686 GEN_INT ((int)sub_code
));
27687 else if (! comparison_p
)
27688 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
27691 rtx cmp_op
= gen_rtx_fmt_ee (sub_code
, GET_MODE (target
),
27695 pat
= GEN_FCN (icode
) (target
, cmp_op
, args
[0].op
, args
[1].op
);
27700 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
27704 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
, args
[3].op
);
27708 gcc_unreachable ();
27718 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
27719 insns with vec_merge. */
27722 ix86_expand_unop_vec_merge_builtin (enum insn_code icode
, tree exp
,
27726 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27727 rtx op1
, op0
= expand_normal (arg0
);
27728 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
27729 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
27731 if (optimize
|| !target
27732 || GET_MODE (target
) != tmode
27733 || !insn_data
[icode
].operand
[0].predicate (target
, tmode
))
27734 target
= gen_reg_rtx (tmode
);
27736 if (VECTOR_MODE_P (mode0
))
27737 op0
= safe_vector_operand (op0
, mode0
);
27739 if ((optimize
&& !register_operand (op0
, mode0
))
27740 || !insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
27741 op0
= copy_to_mode_reg (mode0
, op0
);
27744 if (!insn_data
[icode
].operand
[2].predicate (op1
, mode0
))
27745 op1
= copy_to_mode_reg (mode0
, op1
);
27747 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
27754 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
27757 ix86_expand_sse_compare (const struct builtin_description
*d
,
27758 tree exp
, rtx target
, bool swap
)
27761 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27762 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27763 rtx op0
= expand_normal (arg0
);
27764 rtx op1
= expand_normal (arg1
);
27766 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
27767 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
27768 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[2].mode
;
27769 enum rtx_code comparison
= d
->comparison
;
27771 if (VECTOR_MODE_P (mode0
))
27772 op0
= safe_vector_operand (op0
, mode0
);
27773 if (VECTOR_MODE_P (mode1
))
27774 op1
= safe_vector_operand (op1
, mode1
);
27776 /* Swap operands if we have a comparison that isn't available in
27780 rtx tmp
= gen_reg_rtx (mode1
);
27781 emit_move_insn (tmp
, op1
);
27786 if (optimize
|| !target
27787 || GET_MODE (target
) != tmode
27788 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
27789 target
= gen_reg_rtx (tmode
);
27791 if ((optimize
&& !register_operand (op0
, mode0
))
27792 || !insn_data
[d
->icode
].operand
[1].predicate (op0
, mode0
))
27793 op0
= copy_to_mode_reg (mode0
, op0
);
27794 if ((optimize
&& !register_operand (op1
, mode1
))
27795 || !insn_data
[d
->icode
].operand
[2].predicate (op1
, mode1
))
27796 op1
= copy_to_mode_reg (mode1
, op1
);
27798 op2
= gen_rtx_fmt_ee (comparison
, mode0
, op0
, op1
);
27799 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
, op2
);
27806 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
27809 ix86_expand_sse_comi (const struct builtin_description
*d
, tree exp
,
27813 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27814 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27815 rtx op0
= expand_normal (arg0
);
27816 rtx op1
= expand_normal (arg1
);
27817 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
27818 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
27819 enum rtx_code comparison
= d
->comparison
;
27821 if (VECTOR_MODE_P (mode0
))
27822 op0
= safe_vector_operand (op0
, mode0
);
27823 if (VECTOR_MODE_P (mode1
))
27824 op1
= safe_vector_operand (op1
, mode1
);
27826 /* Swap operands if we have a comparison that isn't available in
27828 if (d
->flag
& BUILTIN_DESC_SWAP_OPERANDS
)
27835 target
= gen_reg_rtx (SImode
);
27836 emit_move_insn (target
, const0_rtx
);
27837 target
= gen_rtx_SUBREG (QImode
, target
, 0);
27839 if ((optimize
&& !register_operand (op0
, mode0
))
27840 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
27841 op0
= copy_to_mode_reg (mode0
, op0
);
27842 if ((optimize
&& !register_operand (op1
, mode1
))
27843 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
27844 op1
= copy_to_mode_reg (mode1
, op1
);
27846 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
27850 emit_insn (gen_rtx_SET (VOIDmode
,
27851 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
27852 gen_rtx_fmt_ee (comparison
, QImode
,
27856 return SUBREG_REG (target
);
27859 /* Subroutine of ix86_expand_args_builtin to take care of round insns. */
27862 ix86_expand_sse_round (const struct builtin_description
*d
, tree exp
,
27866 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27867 rtx op1
, op0
= expand_normal (arg0
);
27868 enum machine_mode tmode
= insn_data
[d
->icode
].operand
[0].mode
;
27869 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[1].mode
;
27871 if (optimize
|| target
== 0
27872 || GET_MODE (target
) != tmode
27873 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode
))
27874 target
= gen_reg_rtx (tmode
);
27876 if (VECTOR_MODE_P (mode0
))
27877 op0
= safe_vector_operand (op0
, mode0
);
27879 if ((optimize
&& !register_operand (op0
, mode0
))
27880 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
27881 op0
= copy_to_mode_reg (mode0
, op0
);
27883 op1
= GEN_INT (d
->comparison
);
27885 pat
= GEN_FCN (d
->icode
) (target
, op0
, op1
);
27892 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
27895 ix86_expand_sse_ptest (const struct builtin_description
*d
, tree exp
,
27899 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27900 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27901 rtx op0
= expand_normal (arg0
);
27902 rtx op1
= expand_normal (arg1
);
27903 enum machine_mode mode0
= insn_data
[d
->icode
].operand
[0].mode
;
27904 enum machine_mode mode1
= insn_data
[d
->icode
].operand
[1].mode
;
27905 enum rtx_code comparison
= d
->comparison
;
27907 if (VECTOR_MODE_P (mode0
))
27908 op0
= safe_vector_operand (op0
, mode0
);
27909 if (VECTOR_MODE_P (mode1
))
27910 op1
= safe_vector_operand (op1
, mode1
);
27912 target
= gen_reg_rtx (SImode
);
27913 emit_move_insn (target
, const0_rtx
);
27914 target
= gen_rtx_SUBREG (QImode
, target
, 0);
27916 if ((optimize
&& !register_operand (op0
, mode0
))
27917 || !insn_data
[d
->icode
].operand
[0].predicate (op0
, mode0
))
27918 op0
= copy_to_mode_reg (mode0
, op0
);
27919 if ((optimize
&& !register_operand (op1
, mode1
))
27920 || !insn_data
[d
->icode
].operand
[1].predicate (op1
, mode1
))
27921 op1
= copy_to_mode_reg (mode1
, op1
);
27923 pat
= GEN_FCN (d
->icode
) (op0
, op1
);
27927 emit_insn (gen_rtx_SET (VOIDmode
,
27928 gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
27929 gen_rtx_fmt_ee (comparison
, QImode
,
27933 return SUBREG_REG (target
);
27936 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
27939 ix86_expand_sse_pcmpestr (const struct builtin_description
*d
,
27940 tree exp
, rtx target
)
27943 tree arg0
= CALL_EXPR_ARG (exp
, 0);
27944 tree arg1
= CALL_EXPR_ARG (exp
, 1);
27945 tree arg2
= CALL_EXPR_ARG (exp
, 2);
27946 tree arg3
= CALL_EXPR_ARG (exp
, 3);
27947 tree arg4
= CALL_EXPR_ARG (exp
, 4);
27948 rtx scratch0
, scratch1
;
27949 rtx op0
= expand_normal (arg0
);
27950 rtx op1
= expand_normal (arg1
);
27951 rtx op2
= expand_normal (arg2
);
27952 rtx op3
= expand_normal (arg3
);
27953 rtx op4
= expand_normal (arg4
);
27954 enum machine_mode tmode0
, tmode1
, modev2
, modei3
, modev4
, modei5
, modeimm
;
27956 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
27957 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
27958 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
27959 modei3
= insn_data
[d
->icode
].operand
[3].mode
;
27960 modev4
= insn_data
[d
->icode
].operand
[4].mode
;
27961 modei5
= insn_data
[d
->icode
].operand
[5].mode
;
27962 modeimm
= insn_data
[d
->icode
].operand
[6].mode
;
27964 if (VECTOR_MODE_P (modev2
))
27965 op0
= safe_vector_operand (op0
, modev2
);
27966 if (VECTOR_MODE_P (modev4
))
27967 op2
= safe_vector_operand (op2
, modev4
);
27969 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
27970 op0
= copy_to_mode_reg (modev2
, op0
);
27971 if (!insn_data
[d
->icode
].operand
[3].predicate (op1
, modei3
))
27972 op1
= copy_to_mode_reg (modei3
, op1
);
27973 if ((optimize
&& !register_operand (op2
, modev4
))
27974 || !insn_data
[d
->icode
].operand
[4].predicate (op2
, modev4
))
27975 op2
= copy_to_mode_reg (modev4
, op2
);
27976 if (!insn_data
[d
->icode
].operand
[5].predicate (op3
, modei5
))
27977 op3
= copy_to_mode_reg (modei5
, op3
);
27979 if (!insn_data
[d
->icode
].operand
[6].predicate (op4
, modeimm
))
27981 error ("the fifth argument must be an 8-bit immediate");
27985 if (d
->code
== IX86_BUILTIN_PCMPESTRI128
)
27987 if (optimize
|| !target
27988 || GET_MODE (target
) != tmode0
27989 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
27990 target
= gen_reg_rtx (tmode0
);
27992 scratch1
= gen_reg_rtx (tmode1
);
27994 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
, op3
, op4
);
27996 else if (d
->code
== IX86_BUILTIN_PCMPESTRM128
)
27998 if (optimize
|| !target
27999 || GET_MODE (target
) != tmode1
28000 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28001 target
= gen_reg_rtx (tmode1
);
28003 scratch0
= gen_reg_rtx (tmode0
);
28005 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
, op3
, op4
);
28009 gcc_assert (d
->flag
);
28011 scratch0
= gen_reg_rtx (tmode0
);
28012 scratch1
= gen_reg_rtx (tmode1
);
28014 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
, op3
, op4
);
28024 target
= gen_reg_rtx (SImode
);
28025 emit_move_insn (target
, const0_rtx
);
28026 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28029 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28030 gen_rtx_fmt_ee (EQ
, QImode
,
28031 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28034 return SUBREG_REG (target
);
28041 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
28044 ix86_expand_sse_pcmpistr (const struct builtin_description
*d
,
28045 tree exp
, rtx target
)
28048 tree arg0
= CALL_EXPR_ARG (exp
, 0);
28049 tree arg1
= CALL_EXPR_ARG (exp
, 1);
28050 tree arg2
= CALL_EXPR_ARG (exp
, 2);
28051 rtx scratch0
, scratch1
;
28052 rtx op0
= expand_normal (arg0
);
28053 rtx op1
= expand_normal (arg1
);
28054 rtx op2
= expand_normal (arg2
);
28055 enum machine_mode tmode0
, tmode1
, modev2
, modev3
, modeimm
;
28057 tmode0
= insn_data
[d
->icode
].operand
[0].mode
;
28058 tmode1
= insn_data
[d
->icode
].operand
[1].mode
;
28059 modev2
= insn_data
[d
->icode
].operand
[2].mode
;
28060 modev3
= insn_data
[d
->icode
].operand
[3].mode
;
28061 modeimm
= insn_data
[d
->icode
].operand
[4].mode
;
28063 if (VECTOR_MODE_P (modev2
))
28064 op0
= safe_vector_operand (op0
, modev2
);
28065 if (VECTOR_MODE_P (modev3
))
28066 op1
= safe_vector_operand (op1
, modev3
);
28068 if (!insn_data
[d
->icode
].operand
[2].predicate (op0
, modev2
))
28069 op0
= copy_to_mode_reg (modev2
, op0
);
28070 if ((optimize
&& !register_operand (op1
, modev3
))
28071 || !insn_data
[d
->icode
].operand
[3].predicate (op1
, modev3
))
28072 op1
= copy_to_mode_reg (modev3
, op1
);
28074 if (!insn_data
[d
->icode
].operand
[4].predicate (op2
, modeimm
))
28076 error ("the third argument must be an 8-bit immediate");
28080 if (d
->code
== IX86_BUILTIN_PCMPISTRI128
)
28082 if (optimize
|| !target
28083 || GET_MODE (target
) != tmode0
28084 || !insn_data
[d
->icode
].operand
[0].predicate (target
, tmode0
))
28085 target
= gen_reg_rtx (tmode0
);
28087 scratch1
= gen_reg_rtx (tmode1
);
28089 pat
= GEN_FCN (d
->icode
) (target
, scratch1
, op0
, op1
, op2
);
28091 else if (d
->code
== IX86_BUILTIN_PCMPISTRM128
)
28093 if (optimize
|| !target
28094 || GET_MODE (target
) != tmode1
28095 || !insn_data
[d
->icode
].operand
[1].predicate (target
, tmode1
))
28096 target
= gen_reg_rtx (tmode1
);
28098 scratch0
= gen_reg_rtx (tmode0
);
28100 pat
= GEN_FCN (d
->icode
) (scratch0
, target
, op0
, op1
, op2
);
28104 gcc_assert (d
->flag
);
28106 scratch0
= gen_reg_rtx (tmode0
);
28107 scratch1
= gen_reg_rtx (tmode1
);
28109 pat
= GEN_FCN (d
->icode
) (scratch0
, scratch1
, op0
, op1
, op2
);
28119 target
= gen_reg_rtx (SImode
);
28120 emit_move_insn (target
, const0_rtx
);
28121 target
= gen_rtx_SUBREG (QImode
, target
, 0);
28124 (gen_rtx_SET (VOIDmode
, gen_rtx_STRICT_LOW_PART (VOIDmode
, target
),
28125 gen_rtx_fmt_ee (EQ
, QImode
,
28126 gen_rtx_REG ((enum machine_mode
) d
->flag
,
28129 return SUBREG_REG (target
);
28135 /* Subroutine of ix86_expand_builtin to take care of insns with
28136 variable number of operands. */
28139 ix86_expand_args_builtin (const struct builtin_description
*d
,
28140 tree exp
, rtx target
)
28142 rtx pat
, real_target
;
28143 unsigned int i
, nargs
;
28144 unsigned int nargs_constant
= 0;
28145 int num_memory
= 0;
28149 enum machine_mode mode
;
28151 bool last_arg_count
= false;
28152 enum insn_code icode
= d
->icode
;
28153 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28154 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28155 enum machine_mode rmode
= VOIDmode
;
28157 enum rtx_code comparison
= d
->comparison
;
28159 switch ((enum ix86_builtin_func_type
) d
->flag
)
28161 case V2DF_FTYPE_V2DF_ROUND
:
28162 case V4DF_FTYPE_V4DF_ROUND
:
28163 case V4SF_FTYPE_V4SF_ROUND
:
28164 case V8SF_FTYPE_V8SF_ROUND
:
28165 return ix86_expand_sse_round (d
, exp
, target
);
28166 case INT_FTYPE_V8SF_V8SF_PTEST
:
28167 case INT_FTYPE_V4DI_V4DI_PTEST
:
28168 case INT_FTYPE_V4DF_V4DF_PTEST
:
28169 case INT_FTYPE_V4SF_V4SF_PTEST
:
28170 case INT_FTYPE_V2DI_V2DI_PTEST
:
28171 case INT_FTYPE_V2DF_V2DF_PTEST
:
28172 return ix86_expand_sse_ptest (d
, exp
, target
);
28173 case FLOAT128_FTYPE_FLOAT128
:
28174 case FLOAT_FTYPE_FLOAT
:
28175 case INT_FTYPE_INT
:
28176 case UINT64_FTYPE_INT
:
28177 case UINT16_FTYPE_UINT16
:
28178 case INT64_FTYPE_INT64
:
28179 case INT64_FTYPE_V4SF
:
28180 case INT64_FTYPE_V2DF
:
28181 case INT_FTYPE_V16QI
:
28182 case INT_FTYPE_V8QI
:
28183 case INT_FTYPE_V8SF
:
28184 case INT_FTYPE_V4DF
:
28185 case INT_FTYPE_V4SF
:
28186 case INT_FTYPE_V2DF
:
28187 case INT_FTYPE_V32QI
:
28188 case V16QI_FTYPE_V16QI
:
28189 case V8SI_FTYPE_V8SF
:
28190 case V8SI_FTYPE_V4SI
:
28191 case V8HI_FTYPE_V8HI
:
28192 case V8HI_FTYPE_V16QI
:
28193 case V8QI_FTYPE_V8QI
:
28194 case V8SF_FTYPE_V8SF
:
28195 case V8SF_FTYPE_V8SI
:
28196 case V8SF_FTYPE_V4SF
:
28197 case V8SF_FTYPE_V8HI
:
28198 case V4SI_FTYPE_V4SI
:
28199 case V4SI_FTYPE_V16QI
:
28200 case V4SI_FTYPE_V4SF
:
28201 case V4SI_FTYPE_V8SI
:
28202 case V4SI_FTYPE_V8HI
:
28203 case V4SI_FTYPE_V4DF
:
28204 case V4SI_FTYPE_V2DF
:
28205 case V4HI_FTYPE_V4HI
:
28206 case V4DF_FTYPE_V4DF
:
28207 case V4DF_FTYPE_V4SI
:
28208 case V4DF_FTYPE_V4SF
:
28209 case V4DF_FTYPE_V2DF
:
28210 case V4SF_FTYPE_V4SF
:
28211 case V4SF_FTYPE_V4SI
:
28212 case V4SF_FTYPE_V8SF
:
28213 case V4SF_FTYPE_V4DF
:
28214 case V4SF_FTYPE_V8HI
:
28215 case V4SF_FTYPE_V2DF
:
28216 case V2DI_FTYPE_V2DI
:
28217 case V2DI_FTYPE_V16QI
:
28218 case V2DI_FTYPE_V8HI
:
28219 case V2DI_FTYPE_V4SI
:
28220 case V2DF_FTYPE_V2DF
:
28221 case V2DF_FTYPE_V4SI
:
28222 case V2DF_FTYPE_V4DF
:
28223 case V2DF_FTYPE_V4SF
:
28224 case V2DF_FTYPE_V2SI
:
28225 case V2SI_FTYPE_V2SI
:
28226 case V2SI_FTYPE_V4SF
:
28227 case V2SI_FTYPE_V2SF
:
28228 case V2SI_FTYPE_V2DF
:
28229 case V2SF_FTYPE_V2SF
:
28230 case V2SF_FTYPE_V2SI
:
28231 case V32QI_FTYPE_V32QI
:
28232 case V32QI_FTYPE_V16QI
:
28233 case V16HI_FTYPE_V16HI
:
28234 case V16HI_FTYPE_V8HI
:
28235 case V8SI_FTYPE_V8SI
:
28236 case V16HI_FTYPE_V16QI
:
28237 case V8SI_FTYPE_V16QI
:
28238 case V4DI_FTYPE_V16QI
:
28239 case V8SI_FTYPE_V8HI
:
28240 case V4DI_FTYPE_V8HI
:
28241 case V4DI_FTYPE_V4SI
:
28242 case V4DI_FTYPE_V2DI
:
28245 case V4SF_FTYPE_V4SF_VEC_MERGE
:
28246 case V2DF_FTYPE_V2DF_VEC_MERGE
:
28247 return ix86_expand_unop_vec_merge_builtin (icode
, exp
, target
);
28248 case FLOAT128_FTYPE_FLOAT128_FLOAT128
:
28249 case V16QI_FTYPE_V16QI_V16QI
:
28250 case V16QI_FTYPE_V8HI_V8HI
:
28251 case V8QI_FTYPE_V8QI_V8QI
:
28252 case V8QI_FTYPE_V4HI_V4HI
:
28253 case V8HI_FTYPE_V8HI_V8HI
:
28254 case V8HI_FTYPE_V16QI_V16QI
:
28255 case V8HI_FTYPE_V4SI_V4SI
:
28256 case V8SF_FTYPE_V8SF_V8SF
:
28257 case V8SF_FTYPE_V8SF_V8SI
:
28258 case V4SI_FTYPE_V4SI_V4SI
:
28259 case V4SI_FTYPE_V8HI_V8HI
:
28260 case V4SI_FTYPE_V4SF_V4SF
:
28261 case V4SI_FTYPE_V2DF_V2DF
:
28262 case V4HI_FTYPE_V4HI_V4HI
:
28263 case V4HI_FTYPE_V8QI_V8QI
:
28264 case V4HI_FTYPE_V2SI_V2SI
:
28265 case V4DF_FTYPE_V4DF_V4DF
:
28266 case V4DF_FTYPE_V4DF_V4DI
:
28267 case V4SF_FTYPE_V4SF_V4SF
:
28268 case V4SF_FTYPE_V4SF_V4SI
:
28269 case V4SF_FTYPE_V4SF_V2SI
:
28270 case V4SF_FTYPE_V4SF_V2DF
:
28271 case V4SF_FTYPE_V4SF_DI
:
28272 case V4SF_FTYPE_V4SF_SI
:
28273 case V2DI_FTYPE_V2DI_V2DI
:
28274 case V2DI_FTYPE_V16QI_V16QI
:
28275 case V2DI_FTYPE_V4SI_V4SI
:
28276 case V2DI_FTYPE_V2DI_V16QI
:
28277 case V2DI_FTYPE_V2DF_V2DF
:
28278 case V2SI_FTYPE_V2SI_V2SI
:
28279 case V2SI_FTYPE_V4HI_V4HI
:
28280 case V2SI_FTYPE_V2SF_V2SF
:
28281 case V2DF_FTYPE_V2DF_V2DF
:
28282 case V2DF_FTYPE_V2DF_V4SF
:
28283 case V2DF_FTYPE_V2DF_V2DI
:
28284 case V2DF_FTYPE_V2DF_DI
:
28285 case V2DF_FTYPE_V2DF_SI
:
28286 case V2SF_FTYPE_V2SF_V2SF
:
28287 case V1DI_FTYPE_V1DI_V1DI
:
28288 case V1DI_FTYPE_V8QI_V8QI
:
28289 case V1DI_FTYPE_V2SI_V2SI
:
28290 case V32QI_FTYPE_V16HI_V16HI
:
28291 case V16HI_FTYPE_V8SI_V8SI
:
28292 case V32QI_FTYPE_V32QI_V32QI
:
28293 case V16HI_FTYPE_V32QI_V32QI
:
28294 case V16HI_FTYPE_V16HI_V16HI
:
28295 case V8SI_FTYPE_V4DF_V4DF
:
28296 case V8SI_FTYPE_V8SI_V8SI
:
28297 case V8SI_FTYPE_V16HI_V16HI
:
28298 case V4DI_FTYPE_V4DI_V4DI
:
28299 case V4DI_FTYPE_V8SI_V8SI
:
28300 if (comparison
== UNKNOWN
)
28301 return ix86_expand_binop_builtin (icode
, exp
, target
);
28304 case V4SF_FTYPE_V4SF_V4SF_SWAP
:
28305 case V2DF_FTYPE_V2DF_V2DF_SWAP
:
28306 gcc_assert (comparison
!= UNKNOWN
);
28310 case V16HI_FTYPE_V16HI_V8HI_COUNT
:
28311 case V16HI_FTYPE_V16HI_SI_COUNT
:
28312 case V8SI_FTYPE_V8SI_V4SI_COUNT
:
28313 case V8SI_FTYPE_V8SI_SI_COUNT
:
28314 case V4DI_FTYPE_V4DI_V2DI_COUNT
:
28315 case V4DI_FTYPE_V4DI_INT_COUNT
:
28316 case V8HI_FTYPE_V8HI_V8HI_COUNT
:
28317 case V8HI_FTYPE_V8HI_SI_COUNT
:
28318 case V4SI_FTYPE_V4SI_V4SI_COUNT
:
28319 case V4SI_FTYPE_V4SI_SI_COUNT
:
28320 case V4HI_FTYPE_V4HI_V4HI_COUNT
:
28321 case V4HI_FTYPE_V4HI_SI_COUNT
:
28322 case V2DI_FTYPE_V2DI_V2DI_COUNT
:
28323 case V2DI_FTYPE_V2DI_SI_COUNT
:
28324 case V2SI_FTYPE_V2SI_V2SI_COUNT
:
28325 case V2SI_FTYPE_V2SI_SI_COUNT
:
28326 case V1DI_FTYPE_V1DI_V1DI_COUNT
:
28327 case V1DI_FTYPE_V1DI_SI_COUNT
:
28329 last_arg_count
= true;
28331 case UINT64_FTYPE_UINT64_UINT64
:
28332 case UINT_FTYPE_UINT_UINT
:
28333 case UINT_FTYPE_UINT_USHORT
:
28334 case UINT_FTYPE_UINT_UCHAR
:
28335 case UINT16_FTYPE_UINT16_INT
:
28336 case UINT8_FTYPE_UINT8_INT
:
28339 case V2DI_FTYPE_V2DI_INT_CONVERT
:
28342 nargs_constant
= 1;
28344 case V4DI_FTYPE_V4DI_INT_CONVERT
:
28347 nargs_constant
= 1;
28349 case V8HI_FTYPE_V8HI_INT
:
28350 case V8HI_FTYPE_V8SF_INT
:
28351 case V8HI_FTYPE_V4SF_INT
:
28352 case V8SF_FTYPE_V8SF_INT
:
28353 case V4SI_FTYPE_V4SI_INT
:
28354 case V4SI_FTYPE_V8SI_INT
:
28355 case V4HI_FTYPE_V4HI_INT
:
28356 case V4DF_FTYPE_V4DF_INT
:
28357 case V4SF_FTYPE_V4SF_INT
:
28358 case V4SF_FTYPE_V8SF_INT
:
28359 case V2DI_FTYPE_V2DI_INT
:
28360 case V2DF_FTYPE_V2DF_INT
:
28361 case V2DF_FTYPE_V4DF_INT
:
28362 case V16HI_FTYPE_V16HI_INT
:
28363 case V8SI_FTYPE_V8SI_INT
:
28364 case V4DI_FTYPE_V4DI_INT
:
28365 case V2DI_FTYPE_V4DI_INT
:
28367 nargs_constant
= 1;
28369 case V16QI_FTYPE_V16QI_V16QI_V16QI
:
28370 case V8SF_FTYPE_V8SF_V8SF_V8SF
:
28371 case V4DF_FTYPE_V4DF_V4DF_V4DF
:
28372 case V4SF_FTYPE_V4SF_V4SF_V4SF
:
28373 case V2DF_FTYPE_V2DF_V2DF_V2DF
:
28374 case V32QI_FTYPE_V32QI_V32QI_V32QI
:
28377 case V32QI_FTYPE_V32QI_V32QI_INT
:
28378 case V16HI_FTYPE_V16HI_V16HI_INT
:
28379 case V16QI_FTYPE_V16QI_V16QI_INT
:
28380 case V4DI_FTYPE_V4DI_V4DI_INT
:
28381 case V8HI_FTYPE_V8HI_V8HI_INT
:
28382 case V8SI_FTYPE_V8SI_V8SI_INT
:
28383 case V8SI_FTYPE_V8SI_V4SI_INT
:
28384 case V8SF_FTYPE_V8SF_V8SF_INT
:
28385 case V8SF_FTYPE_V8SF_V4SF_INT
:
28386 case V4SI_FTYPE_V4SI_V4SI_INT
:
28387 case V4DF_FTYPE_V4DF_V4DF_INT
:
28388 case V4DF_FTYPE_V4DF_V2DF_INT
:
28389 case V4SF_FTYPE_V4SF_V4SF_INT
:
28390 case V2DI_FTYPE_V2DI_V2DI_INT
:
28391 case V4DI_FTYPE_V4DI_V2DI_INT
:
28392 case V2DF_FTYPE_V2DF_V2DF_INT
:
28394 nargs_constant
= 1;
28396 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT
:
28399 nargs_constant
= 1;
28401 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT
:
28404 nargs_constant
= 1;
28406 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT
:
28409 nargs_constant
= 1;
28411 case V2DI_FTYPE_V2DI_UINT_UINT
:
28413 nargs_constant
= 2;
28415 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT
:
28416 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT
:
28417 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT
:
28418 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT
:
28420 nargs_constant
= 1;
28422 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT
:
28424 nargs_constant
= 2;
28427 gcc_unreachable ();
28430 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28432 if (comparison
!= UNKNOWN
)
28434 gcc_assert (nargs
== 2);
28435 return ix86_expand_sse_compare (d
, exp
, target
, swap
);
28438 if (rmode
== VOIDmode
|| rmode
== tmode
)
28442 || GET_MODE (target
) != tmode
28443 || !insn_p
->operand
[0].predicate (target
, tmode
))
28444 target
= gen_reg_rtx (tmode
);
28445 real_target
= target
;
28449 target
= gen_reg_rtx (rmode
);
28450 real_target
= simplify_gen_subreg (tmode
, target
, rmode
, 0);
28453 for (i
= 0; i
< nargs
; i
++)
28455 tree arg
= CALL_EXPR_ARG (exp
, i
);
28456 rtx op
= expand_normal (arg
);
28457 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28458 bool match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28460 if (last_arg_count
&& (i
+ 1) == nargs
)
28462 /* SIMD shift insns take either an 8-bit immediate or
28463 register as count. But builtin functions take int as
28464 count. If count doesn't match, we put it in register. */
28467 op
= simplify_gen_subreg (SImode
, op
, GET_MODE (op
), 0);
28468 if (!insn_p
->operand
[i
+ 1].predicate (op
, mode
))
28469 op
= copy_to_reg (op
);
28472 else if ((nargs
- i
) <= nargs_constant
)
28477 case CODE_FOR_avx2_inserti128
:
28478 case CODE_FOR_avx2_extracti128
:
28479 error ("the last argument must be an 1-bit immediate");
28482 case CODE_FOR_sse4_1_roundpd
:
28483 case CODE_FOR_sse4_1_roundps
:
28484 case CODE_FOR_sse4_1_roundsd
:
28485 case CODE_FOR_sse4_1_roundss
:
28486 case CODE_FOR_sse4_1_blendps
:
28487 case CODE_FOR_avx_blendpd256
:
28488 case CODE_FOR_avx_vpermilv4df
:
28489 case CODE_FOR_avx_roundpd256
:
28490 case CODE_FOR_avx_roundps256
:
28491 error ("the last argument must be a 4-bit immediate");
28494 case CODE_FOR_sse4_1_blendpd
:
28495 case CODE_FOR_avx_vpermilv2df
:
28496 case CODE_FOR_xop_vpermil2v2df3
:
28497 case CODE_FOR_xop_vpermil2v4sf3
:
28498 case CODE_FOR_xop_vpermil2v4df3
:
28499 case CODE_FOR_xop_vpermil2v8sf3
:
28500 error ("the last argument must be a 2-bit immediate");
28503 case CODE_FOR_avx_vextractf128v4df
:
28504 case CODE_FOR_avx_vextractf128v8sf
:
28505 case CODE_FOR_avx_vextractf128v8si
:
28506 case CODE_FOR_avx_vinsertf128v4df
:
28507 case CODE_FOR_avx_vinsertf128v8sf
:
28508 case CODE_FOR_avx_vinsertf128v8si
:
28509 error ("the last argument must be a 1-bit immediate");
28512 case CODE_FOR_avx_vmcmpv2df3
:
28513 case CODE_FOR_avx_vmcmpv4sf3
:
28514 case CODE_FOR_avx_cmpv2df3
:
28515 case CODE_FOR_avx_cmpv4sf3
:
28516 case CODE_FOR_avx_cmpv4df3
:
28517 case CODE_FOR_avx_cmpv8sf3
:
28518 error ("the last argument must be a 5-bit immediate");
28522 switch (nargs_constant
)
28525 if ((nargs
- i
) == nargs_constant
)
28527 error ("the next to last argument must be an 8-bit immediate");
28531 error ("the last argument must be an 8-bit immediate");
28534 gcc_unreachable ();
28541 if (VECTOR_MODE_P (mode
))
28542 op
= safe_vector_operand (op
, mode
);
28544 /* If we aren't optimizing, only allow one memory operand to
28546 if (memory_operand (op
, mode
))
28549 if (GET_MODE (op
) == mode
|| GET_MODE (op
) == VOIDmode
)
28551 if (optimize
|| !match
|| num_memory
> 1)
28552 op
= copy_to_mode_reg (mode
, op
);
28556 op
= copy_to_reg (op
);
28557 op
= simplify_gen_subreg (mode
, op
, GET_MODE (op
), 0);
28562 args
[i
].mode
= mode
;
28568 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
);
28571 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
);
28574 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28578 pat
= GEN_FCN (icode
) (real_target
, args
[0].op
, args
[1].op
,
28579 args
[2].op
, args
[3].op
);
28582 gcc_unreachable ();
28592 /* Subroutine of ix86_expand_builtin to take care of special insns
28593 with variable number of operands. */
28596 ix86_expand_special_args_builtin (const struct builtin_description
*d
,
28597 tree exp
, rtx target
)
28601 unsigned int i
, nargs
, arg_adjust
, memory
;
28605 enum machine_mode mode
;
28607 enum insn_code icode
= d
->icode
;
28608 bool last_arg_constant
= false;
28609 const struct insn_data_d
*insn_p
= &insn_data
[icode
];
28610 enum machine_mode tmode
= insn_p
->operand
[0].mode
;
28611 enum { load
, store
} klass
;
28613 switch ((enum ix86_builtin_func_type
) d
->flag
)
28615 case VOID_FTYPE_VOID
:
28616 if (icode
== CODE_FOR_avx_vzeroupper
)
28617 target
= GEN_INT (vzeroupper_intrinsic
);
28618 emit_insn (GEN_FCN (icode
) (target
));
28620 case VOID_FTYPE_UINT64
:
28621 case VOID_FTYPE_UNSIGNED
:
28626 case UINT64_FTYPE_VOID
:
28627 case UNSIGNED_FTYPE_VOID
:
28632 case UINT64_FTYPE_PUNSIGNED
:
28633 case V2DI_FTYPE_PV2DI
:
28634 case V4DI_FTYPE_PV4DI
:
28635 case V32QI_FTYPE_PCCHAR
:
28636 case V16QI_FTYPE_PCCHAR
:
28637 case V8SF_FTYPE_PCV4SF
:
28638 case V8SF_FTYPE_PCFLOAT
:
28639 case V4SF_FTYPE_PCFLOAT
:
28640 case V4DF_FTYPE_PCV2DF
:
28641 case V4DF_FTYPE_PCDOUBLE
:
28642 case V2DF_FTYPE_PCDOUBLE
:
28643 case VOID_FTYPE_PVOID
:
28648 case VOID_FTYPE_PV2SF_V4SF
:
28649 case VOID_FTYPE_PV4DI_V4DI
:
28650 case VOID_FTYPE_PV2DI_V2DI
:
28651 case VOID_FTYPE_PCHAR_V32QI
:
28652 case VOID_FTYPE_PCHAR_V16QI
:
28653 case VOID_FTYPE_PFLOAT_V8SF
:
28654 case VOID_FTYPE_PFLOAT_V4SF
:
28655 case VOID_FTYPE_PDOUBLE_V4DF
:
28656 case VOID_FTYPE_PDOUBLE_V2DF
:
28657 case VOID_FTYPE_PULONGLONG_ULONGLONG
:
28658 case VOID_FTYPE_PINT_INT
:
28661 /* Reserve memory operand for target. */
28662 memory
= ARRAY_SIZE (args
);
28664 case V4SF_FTYPE_V4SF_PCV2SF
:
28665 case V2DF_FTYPE_V2DF_PCDOUBLE
:
28670 case V8SF_FTYPE_PCV8SF_V8SI
:
28671 case V4DF_FTYPE_PCV4DF_V4DI
:
28672 case V4SF_FTYPE_PCV4SF_V4SI
:
28673 case V2DF_FTYPE_PCV2DF_V2DI
:
28674 case V8SI_FTYPE_PCV8SI_V8SI
:
28675 case V4DI_FTYPE_PCV4DI_V4DI
:
28676 case V4SI_FTYPE_PCV4SI_V4SI
:
28677 case V2DI_FTYPE_PCV2DI_V2DI
:
28682 case VOID_FTYPE_PV8SF_V8SI_V8SF
:
28683 case VOID_FTYPE_PV4DF_V4DI_V4DF
:
28684 case VOID_FTYPE_PV4SF_V4SI_V4SF
:
28685 case VOID_FTYPE_PV2DF_V2DI_V2DF
:
28686 case VOID_FTYPE_PV8SI_V8SI_V8SI
:
28687 case VOID_FTYPE_PV4DI_V4DI_V4DI
:
28688 case VOID_FTYPE_PV4SI_V4SI_V4SI
:
28689 case VOID_FTYPE_PV2DI_V2DI_V2DI
:
28692 /* Reserve memory operand for target. */
28693 memory
= ARRAY_SIZE (args
);
28695 case VOID_FTYPE_UINT_UINT_UINT
:
28696 case VOID_FTYPE_UINT64_UINT_UINT
:
28697 case UCHAR_FTYPE_UINT_UINT_UINT
:
28698 case UCHAR_FTYPE_UINT64_UINT_UINT
:
28701 memory
= ARRAY_SIZE (args
);
28702 last_arg_constant
= true;
28705 gcc_unreachable ();
28708 gcc_assert (nargs
<= ARRAY_SIZE (args
));
28710 if (klass
== store
)
28712 arg
= CALL_EXPR_ARG (exp
, 0);
28713 op
= expand_normal (arg
);
28714 gcc_assert (target
== 0);
28717 if (GET_MODE (op
) != Pmode
)
28718 op
= convert_to_mode (Pmode
, op
, 1);
28719 target
= gen_rtx_MEM (tmode
, force_reg (Pmode
, op
));
28722 target
= force_reg (tmode
, op
);
28730 || GET_MODE (target
) != tmode
28731 || !insn_p
->operand
[0].predicate (target
, tmode
))
28732 target
= gen_reg_rtx (tmode
);
28735 for (i
= 0; i
< nargs
; i
++)
28737 enum machine_mode mode
= insn_p
->operand
[i
+ 1].mode
;
28740 arg
= CALL_EXPR_ARG (exp
, i
+ arg_adjust
);
28741 op
= expand_normal (arg
);
28742 match
= insn_p
->operand
[i
+ 1].predicate (op
, mode
);
28744 if (last_arg_constant
&& (i
+ 1) == nargs
)
28748 if (icode
== CODE_FOR_lwp_lwpvalsi3
28749 || icode
== CODE_FOR_lwp_lwpinssi3
28750 || icode
== CODE_FOR_lwp_lwpvaldi3
28751 || icode
== CODE_FOR_lwp_lwpinsdi3
)
28752 error ("the last argument must be a 32-bit immediate");
28754 error ("the last argument must be an 8-bit immediate");
28762 /* This must be the memory operand. */
28763 if (GET_MODE (op
) != Pmode
)
28764 op
= convert_to_mode (Pmode
, op
, 1);
28765 op
= gen_rtx_MEM (mode
, force_reg (Pmode
, op
));
28766 gcc_assert (GET_MODE (op
) == mode
28767 || GET_MODE (op
) == VOIDmode
);
28771 /* This must be register. */
28772 if (VECTOR_MODE_P (mode
))
28773 op
= safe_vector_operand (op
, mode
);
28775 gcc_assert (GET_MODE (op
) == mode
28776 || GET_MODE (op
) == VOIDmode
);
28777 op
= copy_to_mode_reg (mode
, op
);
28782 args
[i
].mode
= mode
;
28788 pat
= GEN_FCN (icode
) (target
);
28791 pat
= GEN_FCN (icode
) (target
, args
[0].op
);
28794 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
);
28797 pat
= GEN_FCN (icode
) (target
, args
[0].op
, args
[1].op
, args
[2].op
);
28800 gcc_unreachable ();
28806 return klass
== store
? 0 : target
;
28809 /* Return the integer constant in ARG. Constrain it to be in the range
28810 of the subparts of VEC_TYPE; issue an error if not. */
28813 get_element_number (tree vec_type
, tree arg
)
28815 unsigned HOST_WIDE_INT elt
, max
= TYPE_VECTOR_SUBPARTS (vec_type
) - 1;
28817 if (!host_integerp (arg
, 1)
28818 || (elt
= tree_low_cst (arg
, 1), elt
> max
))
28820 error ("selector must be an integer constant in the range 0..%wi", max
);
28827 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28828 ix86_expand_vector_init. We DO have language-level syntax for this, in
28829 the form of (type){ init-list }. Except that since we can't place emms
28830 instructions from inside the compiler, we can't allow the use of MMX
28831 registers unless the user explicitly asks for it. So we do *not* define
28832 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
28833 we have builtins invoked by mmintrin.h that gives us license to emit
28834 these sorts of instructions. */
28837 ix86_expand_vec_init_builtin (tree type
, tree exp
, rtx target
)
28839 enum machine_mode tmode
= TYPE_MODE (type
);
28840 enum machine_mode inner_mode
= GET_MODE_INNER (tmode
);
28841 int i
, n_elt
= GET_MODE_NUNITS (tmode
);
28842 rtvec v
= rtvec_alloc (n_elt
);
28844 gcc_assert (VECTOR_MODE_P (tmode
));
28845 gcc_assert (call_expr_nargs (exp
) == n_elt
);
28847 for (i
= 0; i
< n_elt
; ++i
)
28849 rtx x
= expand_normal (CALL_EXPR_ARG (exp
, i
));
28850 RTVEC_ELT (v
, i
) = gen_lowpart (inner_mode
, x
);
28853 if (!target
|| !register_operand (target
, tmode
))
28854 target
= gen_reg_rtx (tmode
);
28856 ix86_expand_vector_init (true, target
, gen_rtx_PARALLEL (tmode
, v
));
28860 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28861 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
28862 had a language-level syntax for referencing vector elements. */
28865 ix86_expand_vec_ext_builtin (tree exp
, rtx target
)
28867 enum machine_mode tmode
, mode0
;
28872 arg0
= CALL_EXPR_ARG (exp
, 0);
28873 arg1
= CALL_EXPR_ARG (exp
, 1);
28875 op0
= expand_normal (arg0
);
28876 elt
= get_element_number (TREE_TYPE (arg0
), arg1
);
28878 tmode
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
28879 mode0
= TYPE_MODE (TREE_TYPE (arg0
));
28880 gcc_assert (VECTOR_MODE_P (mode0
));
28882 op0
= force_reg (mode0
, op0
);
28884 if (optimize
|| !target
|| !register_operand (target
, tmode
))
28885 target
= gen_reg_rtx (tmode
);
28887 ix86_expand_vector_extract (true, target
, op0
, elt
);
28892 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
28893 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
28894 a language-level syntax for referencing vector elements. */
28897 ix86_expand_vec_set_builtin (tree exp
)
28899 enum machine_mode tmode
, mode1
;
28900 tree arg0
, arg1
, arg2
;
28902 rtx op0
, op1
, target
;
28904 arg0
= CALL_EXPR_ARG (exp
, 0);
28905 arg1
= CALL_EXPR_ARG (exp
, 1);
28906 arg2
= CALL_EXPR_ARG (exp
, 2);
28908 tmode
= TYPE_MODE (TREE_TYPE (arg0
));
28909 mode1
= TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0
)));
28910 gcc_assert (VECTOR_MODE_P (tmode
));
28912 op0
= expand_expr (arg0
, NULL_RTX
, tmode
, EXPAND_NORMAL
);
28913 op1
= expand_expr (arg1
, NULL_RTX
, mode1
, EXPAND_NORMAL
);
28914 elt
= get_element_number (TREE_TYPE (arg0
), arg2
);
28916 if (GET_MODE (op1
) != mode1
&& GET_MODE (op1
) != VOIDmode
)
28917 op1
= convert_modes (mode1
, GET_MODE (op1
), op1
, true);
28919 op0
= force_reg (tmode
, op0
);
28920 op1
= force_reg (mode1
, op1
);
28922 /* OP0 is the source of these builtin functions and shouldn't be
28923 modified. Create a copy, use it and return it as target. */
28924 target
= gen_reg_rtx (tmode
);
28925 emit_move_insn (target
, op0
);
28926 ix86_expand_vector_set (true, target
, op1
, elt
);
28931 /* Expand an expression EXP that calls a built-in function,
28932 with result going to TARGET if that's convenient
28933 (and in mode MODE if that's convenient).
28934 SUBTARGET may be used as the target for computing one of EXP's operands.
28935 IGNORE is nonzero if the value is to be ignored. */
28938 ix86_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
28939 enum machine_mode mode ATTRIBUTE_UNUSED
,
28940 int ignore ATTRIBUTE_UNUSED
)
28942 const struct builtin_description
*d
;
28944 enum insn_code icode
;
28945 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
28946 tree arg0
, arg1
, arg2
, arg3
, arg4
;
28947 rtx op0
, op1
, op2
, op3
, op4
, pat
;
28948 enum machine_mode mode0
, mode1
, mode2
, mode3
, mode4
;
28949 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
28951 /* Determine whether the builtin function is available under the current ISA.
28952 Originally the builtin was not created if it wasn't applicable to the
28953 current ISA based on the command line switches. With function specific
28954 options, we need to check in the context of the function making the call
28955 whether it is supported. */
28956 if (ix86_builtins_isa
[fcode
].isa
28957 && !(ix86_builtins_isa
[fcode
].isa
& ix86_isa_flags
))
28959 char *opts
= ix86_target_string (ix86_builtins_isa
[fcode
].isa
, 0, NULL
,
28960 NULL
, (enum fpmath_unit
) 0, false);
28963 error ("%qE needs unknown isa option", fndecl
);
28966 gcc_assert (opts
!= NULL
);
28967 error ("%qE needs isa option %s", fndecl
, opts
);
28975 case IX86_BUILTIN_MASKMOVQ
:
28976 case IX86_BUILTIN_MASKMOVDQU
:
28977 icode
= (fcode
== IX86_BUILTIN_MASKMOVQ
28978 ? CODE_FOR_mmx_maskmovq
28979 : CODE_FOR_sse2_maskmovdqu
);
28980 /* Note the arg order is different from the operand order. */
28981 arg1
= CALL_EXPR_ARG (exp
, 0);
28982 arg2
= CALL_EXPR_ARG (exp
, 1);
28983 arg0
= CALL_EXPR_ARG (exp
, 2);
28984 op0
= expand_normal (arg0
);
28985 op1
= expand_normal (arg1
);
28986 op2
= expand_normal (arg2
);
28987 mode0
= insn_data
[icode
].operand
[0].mode
;
28988 mode1
= insn_data
[icode
].operand
[1].mode
;
28989 mode2
= insn_data
[icode
].operand
[2].mode
;
28991 if (GET_MODE (op0
) != Pmode
)
28992 op0
= convert_to_mode (Pmode
, op0
, 1);
28993 op0
= gen_rtx_MEM (mode1
, force_reg (Pmode
, op0
));
28995 if (!insn_data
[icode
].operand
[0].predicate (op0
, mode0
))
28996 op0
= copy_to_mode_reg (mode0
, op0
);
28997 if (!insn_data
[icode
].operand
[1].predicate (op1
, mode1
))
28998 op1
= copy_to_mode_reg (mode1
, op1
);
28999 if (!insn_data
[icode
].operand
[2].predicate (op2
, mode2
))
29000 op2
= copy_to_mode_reg (mode2
, op2
);
29001 pat
= GEN_FCN (icode
) (op0
, op1
, op2
);
29007 case IX86_BUILTIN_LDMXCSR
:
29008 op0
= expand_normal (CALL_EXPR_ARG (exp
, 0));
29009 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29010 emit_move_insn (target
, op0
);
29011 emit_insn (gen_sse_ldmxcsr (target
));
29014 case IX86_BUILTIN_STMXCSR
:
29015 target
= assign_386_stack_local (SImode
, SLOT_VIRTUAL
);
29016 emit_insn (gen_sse_stmxcsr (target
));
29017 return copy_to_mode_reg (SImode
, target
);
29019 case IX86_BUILTIN_CLFLUSH
:
29020 arg0
= CALL_EXPR_ARG (exp
, 0);
29021 op0
= expand_normal (arg0
);
29022 icode
= CODE_FOR_sse2_clflush
;
29023 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29025 if (GET_MODE (op0
) != Pmode
)
29026 op0
= convert_to_mode (Pmode
, op0
, 1);
29027 op0
= force_reg (Pmode
, op0
);
29030 emit_insn (gen_sse2_clflush (op0
));
29033 case IX86_BUILTIN_MONITOR
:
29034 arg0
= CALL_EXPR_ARG (exp
, 0);
29035 arg1
= CALL_EXPR_ARG (exp
, 1);
29036 arg2
= CALL_EXPR_ARG (exp
, 2);
29037 op0
= expand_normal (arg0
);
29038 op1
= expand_normal (arg1
);
29039 op2
= expand_normal (arg2
);
29042 if (GET_MODE (op0
) != Pmode
)
29043 op0
= convert_to_mode (Pmode
, op0
, 1);
29044 op0
= force_reg (Pmode
, op0
);
29047 op1
= copy_to_mode_reg (SImode
, op1
);
29049 op2
= copy_to_mode_reg (SImode
, op2
);
29050 emit_insn (ix86_gen_monitor (op0
, op1
, op2
));
29053 case IX86_BUILTIN_MWAIT
:
29054 arg0
= CALL_EXPR_ARG (exp
, 0);
29055 arg1
= CALL_EXPR_ARG (exp
, 1);
29056 op0
= expand_normal (arg0
);
29057 op1
= expand_normal (arg1
);
29059 op0
= copy_to_mode_reg (SImode
, op0
);
29061 op1
= copy_to_mode_reg (SImode
, op1
);
29062 emit_insn (gen_sse3_mwait (op0
, op1
));
29065 case IX86_BUILTIN_VEC_INIT_V2SI
:
29066 case IX86_BUILTIN_VEC_INIT_V4HI
:
29067 case IX86_BUILTIN_VEC_INIT_V8QI
:
29068 return ix86_expand_vec_init_builtin (TREE_TYPE (exp
), exp
, target
);
29070 case IX86_BUILTIN_VEC_EXT_V2DF
:
29071 case IX86_BUILTIN_VEC_EXT_V2DI
:
29072 case IX86_BUILTIN_VEC_EXT_V4SF
:
29073 case IX86_BUILTIN_VEC_EXT_V4SI
:
29074 case IX86_BUILTIN_VEC_EXT_V8HI
:
29075 case IX86_BUILTIN_VEC_EXT_V2SI
:
29076 case IX86_BUILTIN_VEC_EXT_V4HI
:
29077 case IX86_BUILTIN_VEC_EXT_V16QI
:
29078 return ix86_expand_vec_ext_builtin (exp
, target
);
29080 case IX86_BUILTIN_VEC_SET_V2DI
:
29081 case IX86_BUILTIN_VEC_SET_V4SF
:
29082 case IX86_BUILTIN_VEC_SET_V4SI
:
29083 case IX86_BUILTIN_VEC_SET_V8HI
:
29084 case IX86_BUILTIN_VEC_SET_V4HI
:
29085 case IX86_BUILTIN_VEC_SET_V16QI
:
29086 return ix86_expand_vec_set_builtin (exp
);
29088 case IX86_BUILTIN_INFQ
:
29089 case IX86_BUILTIN_HUGE_VALQ
:
29091 REAL_VALUE_TYPE inf
;
29095 tmp
= CONST_DOUBLE_FROM_REAL_VALUE (inf
, mode
);
29097 tmp
= validize_mem (force_const_mem (mode
, tmp
));
29100 target
= gen_reg_rtx (mode
);
29102 emit_move_insn (target
, tmp
);
29106 case IX86_BUILTIN_LLWPCB
:
29107 arg0
= CALL_EXPR_ARG (exp
, 0);
29108 op0
= expand_normal (arg0
);
29109 icode
= CODE_FOR_lwp_llwpcb
;
29110 if (!insn_data
[icode
].operand
[0].predicate (op0
, Pmode
))
29112 if (GET_MODE (op0
) != Pmode
)
29113 op0
= convert_to_mode (Pmode
, op0
, 1);
29114 op0
= force_reg (Pmode
, op0
);
29116 emit_insn (gen_lwp_llwpcb (op0
));
29119 case IX86_BUILTIN_SLWPCB
:
29120 icode
= CODE_FOR_lwp_slwpcb
;
29122 || !insn_data
[icode
].operand
[0].predicate (target
, Pmode
))
29123 target
= gen_reg_rtx (Pmode
);
29124 emit_insn (gen_lwp_slwpcb (target
));
29127 case IX86_BUILTIN_BEXTRI32
:
29128 case IX86_BUILTIN_BEXTRI64
:
29129 arg0
= CALL_EXPR_ARG (exp
, 0);
29130 arg1
= CALL_EXPR_ARG (exp
, 1);
29131 op0
= expand_normal (arg0
);
29132 op1
= expand_normal (arg1
);
29133 icode
= (fcode
== IX86_BUILTIN_BEXTRI32
29134 ? CODE_FOR_tbm_bextri_si
29135 : CODE_FOR_tbm_bextri_di
);
29136 if (!CONST_INT_P (op1
))
29138 error ("last argument must be an immediate");
29143 unsigned char length
= (INTVAL (op1
) >> 8) & 0xFF;
29144 unsigned char lsb_index
= INTVAL (op1
) & 0xFF;
29145 op1
= GEN_INT (length
);
29146 op2
= GEN_INT (lsb_index
);
29147 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
29153 case IX86_BUILTIN_RDRAND16_STEP
:
29154 icode
= CODE_FOR_rdrandhi_1
;
29158 case IX86_BUILTIN_RDRAND32_STEP
:
29159 icode
= CODE_FOR_rdrandsi_1
;
29163 case IX86_BUILTIN_RDRAND64_STEP
:
29164 icode
= CODE_FOR_rdranddi_1
;
29168 op0
= gen_reg_rtx (mode0
);
29169 emit_insn (GEN_FCN (icode
) (op0
));
29171 arg0
= CALL_EXPR_ARG (exp
, 0);
29172 op1
= expand_normal (arg0
);
29173 if (!address_operand (op1
, VOIDmode
))
29175 op1
= convert_memory_address (Pmode
, op1
);
29176 op1
= copy_addr_to_reg (op1
);
29178 emit_move_insn (gen_rtx_MEM (mode0
, op1
), op0
);
29180 op1
= gen_reg_rtx (SImode
);
29181 emit_move_insn (op1
, CONST1_RTX (SImode
));
29183 /* Emit SImode conditional move. */
29184 if (mode0
== HImode
)
29186 op2
= gen_reg_rtx (SImode
);
29187 emit_insn (gen_zero_extendhisi2 (op2
, op0
));
29189 else if (mode0
== SImode
)
29192 op2
= gen_rtx_SUBREG (SImode
, op0
, 0);
29195 target
= gen_reg_rtx (SImode
);
29197 pat
= gen_rtx_GEU (VOIDmode
, gen_rtx_REG (CCCmode
, FLAGS_REG
),
29199 emit_insn (gen_rtx_SET (VOIDmode
, target
,
29200 gen_rtx_IF_THEN_ELSE (SImode
, pat
, op2
, op1
)));
29203 case IX86_BUILTIN_GATHERSIV2DF
:
29204 icode
= CODE_FOR_avx2_gathersiv2df
;
29206 case IX86_BUILTIN_GATHERSIV4DF
:
29207 icode
= CODE_FOR_avx2_gathersiv4df
;
29209 case IX86_BUILTIN_GATHERDIV2DF
:
29210 icode
= CODE_FOR_avx2_gatherdiv2df
;
29212 case IX86_BUILTIN_GATHERDIV4DF
:
29213 icode
= CODE_FOR_avx2_gatherdiv4df
;
29215 case IX86_BUILTIN_GATHERSIV4SF
:
29216 icode
= CODE_FOR_avx2_gathersiv4sf
;
29218 case IX86_BUILTIN_GATHERSIV8SF
:
29219 icode
= CODE_FOR_avx2_gathersiv8sf
;
29221 case IX86_BUILTIN_GATHERDIV4SF
:
29222 icode
= CODE_FOR_avx2_gatherdiv4sf
;
29224 case IX86_BUILTIN_GATHERDIV8SF
:
29225 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29227 case IX86_BUILTIN_GATHERSIV2DI
:
29228 icode
= CODE_FOR_avx2_gathersiv2di
;
29230 case IX86_BUILTIN_GATHERSIV4DI
:
29231 icode
= CODE_FOR_avx2_gathersiv4di
;
29233 case IX86_BUILTIN_GATHERDIV2DI
:
29234 icode
= CODE_FOR_avx2_gatherdiv2di
;
29236 case IX86_BUILTIN_GATHERDIV4DI
:
29237 icode
= CODE_FOR_avx2_gatherdiv4di
;
29239 case IX86_BUILTIN_GATHERSIV4SI
:
29240 icode
= CODE_FOR_avx2_gathersiv4si
;
29242 case IX86_BUILTIN_GATHERSIV8SI
:
29243 icode
= CODE_FOR_avx2_gathersiv8si
;
29245 case IX86_BUILTIN_GATHERDIV4SI
:
29246 icode
= CODE_FOR_avx2_gatherdiv4si
;
29248 case IX86_BUILTIN_GATHERDIV8SI
:
29249 icode
= CODE_FOR_avx2_gatherdiv8si
;
29251 case IX86_BUILTIN_GATHERALTSIV4DF
:
29252 icode
= CODE_FOR_avx2_gathersiv4df
;
29254 case IX86_BUILTIN_GATHERALTDIV8SF
:
29255 icode
= CODE_FOR_avx2_gatherdiv8sf
;
29257 case IX86_BUILTIN_GATHERALTSIV4DI
:
29258 icode
= CODE_FOR_avx2_gathersiv4df
;
29260 case IX86_BUILTIN_GATHERALTDIV8SI
:
29261 icode
= CODE_FOR_avx2_gatherdiv8si
;
29265 arg0
= CALL_EXPR_ARG (exp
, 0);
29266 arg1
= CALL_EXPR_ARG (exp
, 1);
29267 arg2
= CALL_EXPR_ARG (exp
, 2);
29268 arg3
= CALL_EXPR_ARG (exp
, 3);
29269 arg4
= CALL_EXPR_ARG (exp
, 4);
29270 op0
= expand_normal (arg0
);
29271 op1
= expand_normal (arg1
);
29272 op2
= expand_normal (arg2
);
29273 op3
= expand_normal (arg3
);
29274 op4
= expand_normal (arg4
);
29275 /* Note the arg order is different from the operand order. */
29276 mode0
= insn_data
[icode
].operand
[1].mode
;
29277 mode2
= insn_data
[icode
].operand
[3].mode
;
29278 mode3
= insn_data
[icode
].operand
[4].mode
;
29279 mode4
= insn_data
[icode
].operand
[5].mode
;
29281 if (target
== NULL_RTX
29282 || GET_MODE (target
) != insn_data
[icode
].operand
[0].mode
)
29283 subtarget
= gen_reg_rtx (insn_data
[icode
].operand
[0].mode
);
29285 subtarget
= target
;
29287 if (fcode
== IX86_BUILTIN_GATHERALTSIV4DF
29288 || fcode
== IX86_BUILTIN_GATHERALTSIV4DI
)
29290 rtx half
= gen_reg_rtx (V4SImode
);
29291 if (!nonimmediate_operand (op2
, V8SImode
))
29292 op2
= copy_to_mode_reg (V8SImode
, op2
);
29293 emit_insn (gen_vec_extract_lo_v8si (half
, op2
));
29296 else if (fcode
== IX86_BUILTIN_GATHERALTDIV8SF
29297 || fcode
== IX86_BUILTIN_GATHERALTDIV8SI
)
29299 rtx (*gen
) (rtx
, rtx
);
29300 rtx half
= gen_reg_rtx (mode0
);
29301 if (mode0
== V4SFmode
)
29302 gen
= gen_vec_extract_lo_v8sf
;
29304 gen
= gen_vec_extract_lo_v8si
;
29305 if (!nonimmediate_operand (op0
, GET_MODE (op0
)))
29306 op0
= copy_to_mode_reg (GET_MODE (op0
), op0
);
29307 emit_insn (gen (half
, op0
));
29309 if (!nonimmediate_operand (op3
, GET_MODE (op3
)))
29310 op3
= copy_to_mode_reg (GET_MODE (op3
), op3
);
29311 emit_insn (gen (half
, op3
));
29315 /* Force memory operand only with base register here. But we
29316 don't want to do it on memory operand for other builtin
29318 if (GET_MODE (op1
) != Pmode
)
29319 op1
= convert_to_mode (Pmode
, op1
, 1);
29320 op1
= force_reg (Pmode
, op1
);
29322 if (!insn_data
[icode
].operand
[1].predicate (op0
, mode0
))
29323 op0
= copy_to_mode_reg (mode0
, op0
);
29324 if (!insn_data
[icode
].operand
[2].predicate (op1
, Pmode
))
29325 op1
= copy_to_mode_reg (Pmode
, op1
);
29326 if (!insn_data
[icode
].operand
[3].predicate (op2
, mode2
))
29327 op2
= copy_to_mode_reg (mode2
, op2
);
29328 if (!insn_data
[icode
].operand
[4].predicate (op3
, mode3
))
29329 op3
= copy_to_mode_reg (mode3
, op3
);
29330 if (!insn_data
[icode
].operand
[5].predicate (op4
, mode4
))
29332 error ("last argument must be scale 1, 2, 4, 8");
29336 /* Optimize. If mask is known to have all high bits set,
29337 replace op0 with pc_rtx to signal that the instruction
29338 overwrites the whole destination and doesn't use its
29339 previous contents. */
29342 if (TREE_CODE (arg3
) == VECTOR_CST
)
29345 unsigned int negative
= 0;
29346 for (elt
= TREE_VECTOR_CST_ELTS (arg3
);
29347 elt
; elt
= TREE_CHAIN (elt
))
29349 tree cst
= TREE_VALUE (elt
);
29350 if (TREE_CODE (cst
) == INTEGER_CST
29351 && tree_int_cst_sign_bit (cst
))
29353 else if (TREE_CODE (cst
) == REAL_CST
29354 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst
)))
29357 if (negative
== TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3
)))
29360 else if (TREE_CODE (arg3
) == SSA_NAME
)
29362 /* Recognize also when mask is like:
29363 __v2df src = _mm_setzero_pd ();
29364 __v2df mask = _mm_cmpeq_pd (src, src);
29366 __v8sf src = _mm256_setzero_ps ();
29367 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
29368 as that is a cheaper way to load all ones into
29369 a register than having to load a constant from
29371 gimple def_stmt
= SSA_NAME_DEF_STMT (arg3
);
29372 if (is_gimple_call (def_stmt
))
29374 tree fndecl
= gimple_call_fndecl (def_stmt
);
29376 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
29377 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl
))
29379 case IX86_BUILTIN_CMPPD
:
29380 case IX86_BUILTIN_CMPPS
:
29381 case IX86_BUILTIN_CMPPD256
:
29382 case IX86_BUILTIN_CMPPS256
:
29383 if (!integer_zerop (gimple_call_arg (def_stmt
, 2)))
29386 case IX86_BUILTIN_CMPEQPD
:
29387 case IX86_BUILTIN_CMPEQPS
:
29388 if (initializer_zerop (gimple_call_arg (def_stmt
, 0))
29389 && initializer_zerop (gimple_call_arg (def_stmt
,
29400 pat
= GEN_FCN (icode
) (subtarget
, op0
, op1
, op2
, op3
, op4
);
29405 if (fcode
== IX86_BUILTIN_GATHERDIV8SF
29406 || fcode
== IX86_BUILTIN_GATHERDIV8SI
)
29408 enum machine_mode tmode
= GET_MODE (subtarget
) == V8SFmode
29409 ? V4SFmode
: V4SImode
;
29410 if (target
== NULL_RTX
)
29411 target
= gen_reg_rtx (tmode
);
29412 if (tmode
== V4SFmode
)
29413 emit_insn (gen_vec_extract_lo_v8sf (target
, subtarget
));
29415 emit_insn (gen_vec_extract_lo_v8si (target
, subtarget
));
29418 target
= subtarget
;
29426 for (i
= 0, d
= bdesc_special_args
;
29427 i
< ARRAY_SIZE (bdesc_special_args
);
29429 if (d
->code
== fcode
)
29430 return ix86_expand_special_args_builtin (d
, exp
, target
);
29432 for (i
= 0, d
= bdesc_args
;
29433 i
< ARRAY_SIZE (bdesc_args
);
29435 if (d
->code
== fcode
)
29438 case IX86_BUILTIN_FABSQ
:
29439 case IX86_BUILTIN_COPYSIGNQ
:
29441 /* Emit a normal call if SSE2 isn't available. */
29442 return expand_call (exp
, target
, ignore
);
29444 return ix86_expand_args_builtin (d
, exp
, target
);
29447 for (i
= 0, d
= bdesc_comi
; i
< ARRAY_SIZE (bdesc_comi
); i
++, d
++)
29448 if (d
->code
== fcode
)
29449 return ix86_expand_sse_comi (d
, exp
, target
);
29451 for (i
= 0, d
= bdesc_pcmpestr
;
29452 i
< ARRAY_SIZE (bdesc_pcmpestr
);
29454 if (d
->code
== fcode
)
29455 return ix86_expand_sse_pcmpestr (d
, exp
, target
);
29457 for (i
= 0, d
= bdesc_pcmpistr
;
29458 i
< ARRAY_SIZE (bdesc_pcmpistr
);
29460 if (d
->code
== fcode
)
29461 return ix86_expand_sse_pcmpistr (d
, exp
, target
);
29463 for (i
= 0, d
= bdesc_multi_arg
; i
< ARRAY_SIZE (bdesc_multi_arg
); i
++, d
++)
29464 if (d
->code
== fcode
)
29465 return ix86_expand_multi_arg_builtin (d
->icode
, exp
, target
,
29466 (enum ix86_builtin_func_type
)
29467 d
->flag
, d
->comparison
);
29469 gcc_unreachable ();
29472 /* Returns a function decl for a vectorized version of the builtin function
29473 with builtin function code FN and the result vector type TYPE, or NULL_TREE
29474 if it is not available. */
29477 ix86_builtin_vectorized_function (tree fndecl
, tree type_out
,
29480 enum machine_mode in_mode
, out_mode
;
29482 enum built_in_function fn
= DECL_FUNCTION_CODE (fndecl
);
29484 if (TREE_CODE (type_out
) != VECTOR_TYPE
29485 || TREE_CODE (type_in
) != VECTOR_TYPE
29486 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
29489 out_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29490 out_n
= TYPE_VECTOR_SUBPARTS (type_out
);
29491 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29492 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29496 case BUILT_IN_SQRT
:
29497 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29499 if (out_n
== 2 && in_n
== 2)
29500 return ix86_builtins
[IX86_BUILTIN_SQRTPD
];
29501 else if (out_n
== 4 && in_n
== 4)
29502 return ix86_builtins
[IX86_BUILTIN_SQRTPD256
];
29506 case BUILT_IN_SQRTF
:
29507 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29509 if (out_n
== 4 && in_n
== 4)
29510 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR
];
29511 else if (out_n
== 8 && in_n
== 8)
29512 return ix86_builtins
[IX86_BUILTIN_SQRTPS_NR256
];
29516 case BUILT_IN_IRINT
:
29517 case BUILT_IN_LRINT
:
29518 case BUILT_IN_LLRINT
:
29519 if (out_mode
== SImode
&& in_mode
== DFmode
)
29521 if (out_n
== 4 && in_n
== 2)
29522 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX
];
29523 else if (out_n
== 8 && in_n
== 4)
29524 return ix86_builtins
[IX86_BUILTIN_VEC_PACK_SFIX256
];
29528 case BUILT_IN_IRINTF
:
29529 case BUILT_IN_LRINTF
:
29530 case BUILT_IN_LLRINTF
:
29531 if (out_mode
== SImode
&& in_mode
== SFmode
)
29533 if (out_n
== 4 && in_n
== 4)
29534 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ
];
29535 else if (out_n
== 8 && in_n
== 8)
29536 return ix86_builtins
[IX86_BUILTIN_CVTPS2DQ256
];
29540 case BUILT_IN_COPYSIGN
:
29541 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29543 if (out_n
== 2 && in_n
== 2)
29544 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD
];
29545 else if (out_n
== 4 && in_n
== 4)
29546 return ix86_builtins
[IX86_BUILTIN_CPYSGNPD256
];
29550 case BUILT_IN_COPYSIGNF
:
29551 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29553 if (out_n
== 4 && in_n
== 4)
29554 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS
];
29555 else if (out_n
== 8 && in_n
== 8)
29556 return ix86_builtins
[IX86_BUILTIN_CPYSGNPS256
];
29560 case BUILT_IN_FLOOR
:
29561 /* The round insn does not trap on denormals. */
29562 if (flag_trapping_math
|| !TARGET_ROUND
)
29565 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29567 if (out_n
== 2 && in_n
== 2)
29568 return ix86_builtins
[IX86_BUILTIN_FLOORPD
];
29569 else if (out_n
== 4 && in_n
== 4)
29570 return ix86_builtins
[IX86_BUILTIN_FLOORPD256
];
29574 case BUILT_IN_FLOORF
:
29575 /* The round insn does not trap on denormals. */
29576 if (flag_trapping_math
|| !TARGET_ROUND
)
29579 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29581 if (out_n
== 4 && in_n
== 4)
29582 return ix86_builtins
[IX86_BUILTIN_FLOORPS
];
29583 else if (out_n
== 8 && in_n
== 8)
29584 return ix86_builtins
[IX86_BUILTIN_FLOORPS256
];
29588 case BUILT_IN_CEIL
:
29589 /* The round insn does not trap on denormals. */
29590 if (flag_trapping_math
|| !TARGET_ROUND
)
29593 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29595 if (out_n
== 2 && in_n
== 2)
29596 return ix86_builtins
[IX86_BUILTIN_CEILPD
];
29597 else if (out_n
== 4 && in_n
== 4)
29598 return ix86_builtins
[IX86_BUILTIN_CEILPD256
];
29602 case BUILT_IN_CEILF
:
29603 /* The round insn does not trap on denormals. */
29604 if (flag_trapping_math
|| !TARGET_ROUND
)
29607 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29609 if (out_n
== 4 && in_n
== 4)
29610 return ix86_builtins
[IX86_BUILTIN_CEILPS
];
29611 else if (out_n
== 8 && in_n
== 8)
29612 return ix86_builtins
[IX86_BUILTIN_CEILPS256
];
29616 case BUILT_IN_TRUNC
:
29617 /* The round insn does not trap on denormals. */
29618 if (flag_trapping_math
|| !TARGET_ROUND
)
29621 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29623 if (out_n
== 2 && in_n
== 2)
29624 return ix86_builtins
[IX86_BUILTIN_TRUNCPD
];
29625 else if (out_n
== 4 && in_n
== 4)
29626 return ix86_builtins
[IX86_BUILTIN_TRUNCPD256
];
29630 case BUILT_IN_TRUNCF
:
29631 /* The round insn does not trap on denormals. */
29632 if (flag_trapping_math
|| !TARGET_ROUND
)
29635 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29637 if (out_n
== 4 && in_n
== 4)
29638 return ix86_builtins
[IX86_BUILTIN_TRUNCPS
];
29639 else if (out_n
== 8 && in_n
== 8)
29640 return ix86_builtins
[IX86_BUILTIN_TRUNCPS256
];
29644 case BUILT_IN_RINT
:
29645 /* The round insn does not trap on denormals. */
29646 if (flag_trapping_math
|| !TARGET_ROUND
)
29649 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29651 if (out_n
== 2 && in_n
== 2)
29652 return ix86_builtins
[IX86_BUILTIN_RINTPD
];
29653 else if (out_n
== 4 && in_n
== 4)
29654 return ix86_builtins
[IX86_BUILTIN_RINTPD256
];
29658 case BUILT_IN_RINTF
:
29659 /* The round insn does not trap on denormals. */
29660 if (flag_trapping_math
|| !TARGET_ROUND
)
29663 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29665 if (out_n
== 4 && in_n
== 4)
29666 return ix86_builtins
[IX86_BUILTIN_RINTPS
];
29667 else if (out_n
== 8 && in_n
== 8)
29668 return ix86_builtins
[IX86_BUILTIN_RINTPS256
];
29672 case BUILT_IN_ROUND
:
29673 /* The round insn does not trap on denormals. */
29674 if (flag_trapping_math
|| !TARGET_ROUND
)
29677 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29679 if (out_n
== 2 && in_n
== 2)
29680 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ
];
29681 else if (out_n
== 4 && in_n
== 4)
29682 return ix86_builtins
[IX86_BUILTIN_ROUNDPD_AZ256
];
29686 case BUILT_IN_ROUNDF
:
29687 /* The round insn does not trap on denormals. */
29688 if (flag_trapping_math
|| !TARGET_ROUND
)
29691 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29693 if (out_n
== 4 && in_n
== 4)
29694 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ
];
29695 else if (out_n
== 8 && in_n
== 8)
29696 return ix86_builtins
[IX86_BUILTIN_ROUNDPS_AZ256
];
29701 if (out_mode
== DFmode
&& in_mode
== DFmode
)
29703 if (out_n
== 2 && in_n
== 2)
29704 return ix86_builtins
[IX86_BUILTIN_VFMADDPD
];
29705 if (out_n
== 4 && in_n
== 4)
29706 return ix86_builtins
[IX86_BUILTIN_VFMADDPD256
];
29710 case BUILT_IN_FMAF
:
29711 if (out_mode
== SFmode
&& in_mode
== SFmode
)
29713 if (out_n
== 4 && in_n
== 4)
29714 return ix86_builtins
[IX86_BUILTIN_VFMADDPS
];
29715 if (out_n
== 8 && in_n
== 8)
29716 return ix86_builtins
[IX86_BUILTIN_VFMADDPS256
];
29724 /* Dispatch to a handler for a vectorization library. */
29725 if (ix86_veclib_handler
)
29726 return ix86_veclib_handler ((enum built_in_function
) fn
, type_out
,
29732 /* Handler for an SVML-style interface to
29733 a library with vectorized intrinsics. */
29736 ix86_veclibabi_svml (enum built_in_function fn
, tree type_out
, tree type_in
)
29739 tree fntype
, new_fndecl
, args
;
29742 enum machine_mode el_mode
, in_mode
;
29745 /* The SVML is suitable for unsafe math only. */
29746 if (!flag_unsafe_math_optimizations
)
29749 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29750 n
= TYPE_VECTOR_SUBPARTS (type_out
);
29751 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29752 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29753 if (el_mode
!= in_mode
29761 case BUILT_IN_LOG10
:
29763 case BUILT_IN_TANH
:
29765 case BUILT_IN_ATAN
:
29766 case BUILT_IN_ATAN2
:
29767 case BUILT_IN_ATANH
:
29768 case BUILT_IN_CBRT
:
29769 case BUILT_IN_SINH
:
29771 case BUILT_IN_ASINH
:
29772 case BUILT_IN_ASIN
:
29773 case BUILT_IN_COSH
:
29775 case BUILT_IN_ACOSH
:
29776 case BUILT_IN_ACOS
:
29777 if (el_mode
!= DFmode
|| n
!= 2)
29781 case BUILT_IN_EXPF
:
29782 case BUILT_IN_LOGF
:
29783 case BUILT_IN_LOG10F
:
29784 case BUILT_IN_POWF
:
29785 case BUILT_IN_TANHF
:
29786 case BUILT_IN_TANF
:
29787 case BUILT_IN_ATANF
:
29788 case BUILT_IN_ATAN2F
:
29789 case BUILT_IN_ATANHF
:
29790 case BUILT_IN_CBRTF
:
29791 case BUILT_IN_SINHF
:
29792 case BUILT_IN_SINF
:
29793 case BUILT_IN_ASINHF
:
29794 case BUILT_IN_ASINF
:
29795 case BUILT_IN_COSHF
:
29796 case BUILT_IN_COSF
:
29797 case BUILT_IN_ACOSHF
:
29798 case BUILT_IN_ACOSF
:
29799 if (el_mode
!= SFmode
|| n
!= 4)
29807 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
29809 if (fn
== BUILT_IN_LOGF
)
29810 strcpy (name
, "vmlsLn4");
29811 else if (fn
== BUILT_IN_LOG
)
29812 strcpy (name
, "vmldLn2");
29815 sprintf (name
, "vmls%s", bname
+10);
29816 name
[strlen (name
)-1] = '4';
29819 sprintf (name
, "vmld%s2", bname
+10);
29821 /* Convert to uppercase. */
29825 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
29827 args
= TREE_CHAIN (args
))
29831 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
29833 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
29835 /* Build a function declaration for the vectorized function. */
29836 new_fndecl
= build_decl (BUILTINS_LOCATION
,
29837 FUNCTION_DECL
, get_identifier (name
), fntype
);
29838 TREE_PUBLIC (new_fndecl
) = 1;
29839 DECL_EXTERNAL (new_fndecl
) = 1;
29840 DECL_IS_NOVOPS (new_fndecl
) = 1;
29841 TREE_READONLY (new_fndecl
) = 1;
29846 /* Handler for an ACML-style interface to
29847 a library with vectorized intrinsics. */
29850 ix86_veclibabi_acml (enum built_in_function fn
, tree type_out
, tree type_in
)
29852 char name
[20] = "__vr.._";
29853 tree fntype
, new_fndecl
, args
;
29856 enum machine_mode el_mode
, in_mode
;
29859 /* The ACML is 64bits only and suitable for unsafe math only as
29860 it does not correctly support parts of IEEE with the required
29861 precision such as denormals. */
29863 || !flag_unsafe_math_optimizations
)
29866 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
29867 n
= TYPE_VECTOR_SUBPARTS (type_out
);
29868 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
29869 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
29870 if (el_mode
!= in_mode
29880 case BUILT_IN_LOG2
:
29881 case BUILT_IN_LOG10
:
29884 if (el_mode
!= DFmode
29889 case BUILT_IN_SINF
:
29890 case BUILT_IN_COSF
:
29891 case BUILT_IN_EXPF
:
29892 case BUILT_IN_POWF
:
29893 case BUILT_IN_LOGF
:
29894 case BUILT_IN_LOG2F
:
29895 case BUILT_IN_LOG10F
:
29898 if (el_mode
!= SFmode
29907 bname
= IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn
)));
29908 sprintf (name
+ 7, "%s", bname
+10);
29911 for (args
= DECL_ARGUMENTS (builtin_decl_implicit (fn
));
29913 args
= TREE_CHAIN (args
))
29917 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
29919 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
29921 /* Build a function declaration for the vectorized function. */
29922 new_fndecl
= build_decl (BUILTINS_LOCATION
,
29923 FUNCTION_DECL
, get_identifier (name
), fntype
);
29924 TREE_PUBLIC (new_fndecl
) = 1;
29925 DECL_EXTERNAL (new_fndecl
) = 1;
29926 DECL_IS_NOVOPS (new_fndecl
) = 1;
29927 TREE_READONLY (new_fndecl
) = 1;
29932 /* Returns a decl of a function that implements gather load with
29933 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
29934 Return NULL_TREE if it is not available. */
29937 ix86_vectorize_builtin_gather (const_tree mem_vectype
,
29938 const_tree index_type
, int scale
)
29941 enum ix86_builtins code
;
29946 if ((TREE_CODE (index_type
) != INTEGER_TYPE
29947 && !POINTER_TYPE_P (index_type
))
29948 || (TYPE_MODE (index_type
) != SImode
29949 && TYPE_MODE (index_type
) != DImode
))
29952 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
29955 /* v*gather* insn sign extends index to pointer mode. */
29956 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
29957 && TYPE_UNSIGNED (index_type
))
29962 || (scale
& (scale
- 1)) != 0)
29965 si
= TYPE_MODE (index_type
) == SImode
;
29966 switch (TYPE_MODE (mem_vectype
))
29969 code
= si
? IX86_BUILTIN_GATHERSIV2DF
: IX86_BUILTIN_GATHERDIV2DF
;
29972 code
= si
? IX86_BUILTIN_GATHERALTSIV4DF
: IX86_BUILTIN_GATHERDIV4DF
;
29975 code
= si
? IX86_BUILTIN_GATHERSIV2DI
: IX86_BUILTIN_GATHERDIV2DI
;
29978 code
= si
? IX86_BUILTIN_GATHERALTSIV4DI
: IX86_BUILTIN_GATHERDIV4DI
;
29981 code
= si
? IX86_BUILTIN_GATHERSIV4SF
: IX86_BUILTIN_GATHERDIV4SF
;
29984 code
= si
? IX86_BUILTIN_GATHERSIV8SF
: IX86_BUILTIN_GATHERALTDIV8SF
;
29987 code
= si
? IX86_BUILTIN_GATHERSIV4SI
: IX86_BUILTIN_GATHERDIV4SI
;
29990 code
= si
? IX86_BUILTIN_GATHERSIV8SI
: IX86_BUILTIN_GATHERALTDIV8SI
;
29996 return ix86_builtins
[code
];
29999 /* Returns a code for a target-specific builtin that implements
30000 reciprocal of the function, or NULL_TREE if not available. */
30003 ix86_builtin_reciprocal (unsigned int fn
, bool md_fn
,
30004 bool sqrt ATTRIBUTE_UNUSED
)
30006 if (! (TARGET_SSE_MATH
&& !optimize_insn_for_size_p ()
30007 && flag_finite_math_only
&& !flag_trapping_math
30008 && flag_unsafe_math_optimizations
))
30012 /* Machine dependent builtins. */
30015 /* Vectorized version of sqrt to rsqrt conversion. */
30016 case IX86_BUILTIN_SQRTPS_NR
:
30017 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR
];
30019 case IX86_BUILTIN_SQRTPS_NR256
:
30020 return ix86_builtins
[IX86_BUILTIN_RSQRTPS_NR256
];
30026 /* Normal builtins. */
30029 /* Sqrt to rsqrt conversion. */
30030 case BUILT_IN_SQRTF
:
30031 return ix86_builtins
[IX86_BUILTIN_RSQRTF
];
30038 /* Helper for avx_vpermilps256_operand et al. This is also used by
30039 the expansion functions to turn the parallel back into a mask.
30040 The return value is 0 for no match and the imm8+1 for a match. */
30043 avx_vpermilp_parallel (rtx par
, enum machine_mode mode
)
30045 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
30047 unsigned char ipar
[8];
30049 if (XVECLEN (par
, 0) != (int) nelt
)
30052 /* Validate that all of the elements are constants, and not totally
30053 out of range. Copy the data into an integral array to make the
30054 subsequent checks easier. */
30055 for (i
= 0; i
< nelt
; ++i
)
30057 rtx er
= XVECEXP (par
, 0, i
);
30058 unsigned HOST_WIDE_INT ei
;
30060 if (!CONST_INT_P (er
))
30071 /* In the 256-bit DFmode case, we can only move elements within
30073 for (i
= 0; i
< 2; ++i
)
30077 mask
|= ipar
[i
] << i
;
30079 for (i
= 2; i
< 4; ++i
)
30083 mask
|= (ipar
[i
] - 2) << i
;
30088 /* In the 256-bit SFmode case, we have full freedom of movement
30089 within the low 128-bit lane, but the high 128-bit lane must
30090 mirror the exact same pattern. */
30091 for (i
= 0; i
< 4; ++i
)
30092 if (ipar
[i
] + 4 != ipar
[i
+ 4])
30099 /* In the 128-bit case, we've full freedom in the placement of
30100 the elements from the source operand. */
30101 for (i
= 0; i
< nelt
; ++i
)
30102 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
30106 gcc_unreachable ();
30109 /* Make sure success has a non-zero value by adding one. */
30113 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
30114 the expansion functions to turn the parallel back into a mask.
30115 The return value is 0 for no match and the imm8+1 for a match. */
30118 avx_vperm2f128_parallel (rtx par
, enum machine_mode mode
)
30120 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
30122 unsigned char ipar
[8];
30124 if (XVECLEN (par
, 0) != (int) nelt
)
30127 /* Validate that all of the elements are constants, and not totally
30128 out of range. Copy the data into an integral array to make the
30129 subsequent checks easier. */
30130 for (i
= 0; i
< nelt
; ++i
)
30132 rtx er
= XVECEXP (par
, 0, i
);
30133 unsigned HOST_WIDE_INT ei
;
30135 if (!CONST_INT_P (er
))
30138 if (ei
>= 2 * nelt
)
30143 /* Validate that the halves of the permute are halves. */
30144 for (i
= 0; i
< nelt2
- 1; ++i
)
30145 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30147 for (i
= nelt2
; i
< nelt
- 1; ++i
)
30148 if (ipar
[i
] + 1 != ipar
[i
+ 1])
30151 /* Reconstruct the mask. */
30152 for (i
= 0; i
< 2; ++i
)
30154 unsigned e
= ipar
[i
* nelt2
];
30158 mask
|= e
<< (i
* 4);
30161 /* Make sure success has a non-zero value by adding one. */
30165 /* Store OPERAND to the memory after reload is completed. This means
30166 that we can't easily use assign_stack_local. */
30168 ix86_force_to_memory (enum machine_mode mode
, rtx operand
)
30172 gcc_assert (reload_completed
);
30173 if (ix86_using_red_zone ())
30175 result
= gen_rtx_MEM (mode
,
30176 gen_rtx_PLUS (Pmode
,
30178 GEN_INT (-RED_ZONE_SIZE
)));
30179 emit_move_insn (result
, operand
);
30181 else if (TARGET_64BIT
)
30187 operand
= gen_lowpart (DImode
, operand
);
30191 gen_rtx_SET (VOIDmode
,
30192 gen_rtx_MEM (DImode
,
30193 gen_rtx_PRE_DEC (DImode
,
30194 stack_pointer_rtx
)),
30198 gcc_unreachable ();
30200 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30209 split_double_mode (mode
, &operand
, 1, operands
, operands
+ 1);
30211 gen_rtx_SET (VOIDmode
,
30212 gen_rtx_MEM (SImode
,
30213 gen_rtx_PRE_DEC (Pmode
,
30214 stack_pointer_rtx
)),
30217 gen_rtx_SET (VOIDmode
,
30218 gen_rtx_MEM (SImode
,
30219 gen_rtx_PRE_DEC (Pmode
,
30220 stack_pointer_rtx
)),
30225 /* Store HImodes as SImodes. */
30226 operand
= gen_lowpart (SImode
, operand
);
30230 gen_rtx_SET (VOIDmode
,
30231 gen_rtx_MEM (GET_MODE (operand
),
30232 gen_rtx_PRE_DEC (SImode
,
30233 stack_pointer_rtx
)),
30237 gcc_unreachable ();
30239 result
= gen_rtx_MEM (mode
, stack_pointer_rtx
);
30244 /* Free operand from the memory. */
30246 ix86_free_from_memory (enum machine_mode mode
)
30248 if (!ix86_using_red_zone ())
30252 if (mode
== DImode
|| TARGET_64BIT
)
30256 /* Use LEA to deallocate stack space. In peephole2 it will be converted
30257 to pop or add instruction if registers are available. */
30258 emit_insn (gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
30259 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
30264 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
30266 Put float CONST_DOUBLE in the constant pool instead of fp regs.
30267 QImode must go into class Q_REGS.
30268 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
30269 movdf to do mem-to-mem moves through integer regs. */
30272 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
30274 enum machine_mode mode
= GET_MODE (x
);
30276 /* We're only allowed to return a subclass of CLASS. Many of the
30277 following checks fail for NO_REGS, so eliminate that early. */
30278 if (regclass
== NO_REGS
)
30281 /* All classes can load zeros. */
30282 if (x
== CONST0_RTX (mode
))
30285 /* Force constants into memory if we are loading a (nonzero) constant into
30286 an MMX or SSE register. This is because there are no MMX/SSE instructions
30287 to load from a constant. */
30289 && (MAYBE_MMX_CLASS_P (regclass
) || MAYBE_SSE_CLASS_P (regclass
)))
30292 /* Prefer SSE regs only, if we can use them for math. */
30293 if (TARGET_SSE_MATH
&& !TARGET_MIX_SSE_I387
&& SSE_FLOAT_MODE_P (mode
))
30294 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30296 /* Floating-point constants need more complex checks. */
30297 if (GET_CODE (x
) == CONST_DOUBLE
&& GET_MODE (x
) != VOIDmode
)
30299 /* General regs can load everything. */
30300 if (reg_class_subset_p (regclass
, GENERAL_REGS
))
30303 /* Floats can load 0 and 1 plus some others. Note that we eliminated
30304 zero above. We only want to wind up preferring 80387 registers if
30305 we plan on doing computation with them. */
30307 && standard_80387_constant_p (x
) > 0)
30309 /* Limit class to non-sse. */
30310 if (regclass
== FLOAT_SSE_REGS
)
30312 if (regclass
== FP_TOP_SSE_REGS
)
30314 if (regclass
== FP_SECOND_SSE_REGS
)
30315 return FP_SECOND_REG
;
30316 if (regclass
== FLOAT_INT_REGS
|| regclass
== FLOAT_REGS
)
30323 /* Generally when we see PLUS here, it's the function invariant
30324 (plus soft-fp const_int). Which can only be computed into general
30326 if (GET_CODE (x
) == PLUS
)
30327 return reg_class_subset_p (regclass
, GENERAL_REGS
) ? regclass
: NO_REGS
;
30329 /* QImode constants are easy to load, but non-constant QImode data
30330 must go into Q_REGS. */
30331 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
30333 if (reg_class_subset_p (regclass
, Q_REGS
))
30335 if (reg_class_subset_p (Q_REGS
, regclass
))
30343 /* Discourage putting floating-point values in SSE registers unless
30344 SSE math is being used, and likewise for the 387 registers. */
30346 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
30348 enum machine_mode mode
= GET_MODE (x
);
30350 /* Restrict the output reload class to the register bank that we are doing
30351 math on. If we would like not to return a subset of CLASS, reject this
30352 alternative: if reload cannot do this, it will still use its choice. */
30353 mode
= GET_MODE (x
);
30354 if (TARGET_SSE_MATH
&& SSE_FLOAT_MODE_P (mode
))
30355 return MAYBE_SSE_CLASS_P (regclass
) ? SSE_REGS
: NO_REGS
;
30357 if (X87_FLOAT_MODE_P (mode
))
30359 if (regclass
== FP_TOP_SSE_REGS
)
30361 else if (regclass
== FP_SECOND_SSE_REGS
)
30362 return FP_SECOND_REG
;
30364 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
30371 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
30372 enum machine_mode mode
, secondary_reload_info
*sri
)
30374 /* Double-word spills from general registers to non-offsettable memory
30375 references (zero-extended addresses) require special handling. */
30378 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
30379 && rclass
== GENERAL_REGS
30380 && !offsettable_memref_p (x
))
30383 ? CODE_FOR_reload_noff_load
30384 : CODE_FOR_reload_noff_store
);
30385 /* Add the cost of moving address to a temporary. */
30386 sri
->extra_cost
= 1;
30391 /* QImode spills from non-QI registers require
30392 intermediate register on 32bit targets. */
30394 && !in_p
&& mode
== QImode
30395 && (rclass
== GENERAL_REGS
30396 || rclass
== LEGACY_REGS
30397 || rclass
== INDEX_REGS
))
30406 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
30407 regno
= true_regnum (x
);
30409 /* Return Q_REGS if the operand is in memory. */
30414 /* This condition handles corner case where an expression involving
30415 pointers gets vectorized. We're trying to use the address of a
30416 stack slot as a vector initializer.
30418 (set (reg:V2DI 74 [ vect_cst_.2 ])
30419 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
30421 Eventually frame gets turned into sp+offset like this:
30423 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30424 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30425 (const_int 392 [0x188]))))
30427 That later gets turned into:
30429 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30430 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
30431 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
30433 We'll have the following reload recorded:
30435 Reload 0: reload_in (DI) =
30436 (plus:DI (reg/f:DI 7 sp)
30437 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
30438 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30439 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
30440 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
30441 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
30442 reload_reg_rtx: (reg:V2DI 22 xmm1)
30444 Which isn't going to work since SSE instructions can't handle scalar
30445 additions. Returning GENERAL_REGS forces the addition into integer
30446 register and reload can handle subsequent reloads without problems. */
30448 if (in_p
&& GET_CODE (x
) == PLUS
30449 && SSE_CLASS_P (rclass
)
30450 && SCALAR_INT_MODE_P (mode
))
30451 return GENERAL_REGS
;
30456 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
30459 ix86_class_likely_spilled_p (reg_class_t rclass
)
30470 case SSE_FIRST_REG
:
30472 case FP_SECOND_REG
:
30482 /* If we are copying between general and FP registers, we need a memory
30483 location. The same is true for SSE and MMX registers.
30485 To optimize register_move_cost performance, allow inline variant.
30487 The macro can't work reliably when one of the CLASSES is class containing
30488 registers from multiple units (SSE, MMX, integer). We avoid this by never
30489 combining those units in single alternative in the machine description.
30490 Ensure that this constraint holds to avoid unexpected surprises.
30492 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
30493 enforce these sanity checks. */
30496 inline_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30497 enum machine_mode mode
, int strict
)
30499 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
30500 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
30501 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
30502 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
30503 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
30504 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
))
30506 gcc_assert (!strict
);
30510 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
30513 /* ??? This is a lie. We do have moves between mmx/general, and for
30514 mmx/sse2. But by saying we need secondary memory we discourage the
30515 register allocator from using the mmx registers unless needed. */
30516 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
30519 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
30521 /* SSE1 doesn't have any direct moves from other classes. */
30525 /* If the target says that inter-unit moves are more expensive
30526 than moving through memory, then don't generate them. */
30527 if (!TARGET_INTER_UNIT_MOVES
)
30530 /* Between SSE and general, we have moves no larger than word size. */
30531 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
30539 ix86_secondary_memory_needed (enum reg_class class1
, enum reg_class class2
,
30540 enum machine_mode mode
, int strict
)
30542 return inline_secondary_memory_needed (class1
, class2
, mode
, strict
);
30545 /* Implement the TARGET_CLASS_MAX_NREGS hook.
30547 On the 80386, this is the size of MODE in words,
30548 except in the FP regs, where a single reg is always enough. */
30550 static unsigned char
30551 ix86_class_max_nregs (reg_class_t rclass
, enum machine_mode mode
)
30553 if (MAYBE_INTEGER_CLASS_P (rclass
))
30555 if (mode
== XFmode
)
30556 return (TARGET_64BIT
? 2 : 3);
30557 else if (mode
== XCmode
)
30558 return (TARGET_64BIT
? 4 : 6);
30560 return ((GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
);
30564 if (COMPLEX_MODE_P (mode
))
30571 /* Return true if the registers in CLASS cannot represent the change from
30572 modes FROM to TO. */
30575 ix86_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
30576 enum reg_class regclass
)
30581 /* x87 registers can't do subreg at all, as all values are reformatted
30582 to extended precision. */
30583 if (MAYBE_FLOAT_CLASS_P (regclass
))
30586 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
30588 /* Vector registers do not support QI or HImode loads. If we don't
30589 disallow a change to these modes, reload will assume it's ok to
30590 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
30591 the vec_dupv4hi pattern. */
30592 if (GET_MODE_SIZE (from
) < 4)
30595 /* Vector registers do not support subreg with nonzero offsets, which
30596 are otherwise valid for integer registers. Since we can't see
30597 whether we have a nonzero offset from here, prohibit all
30598 nonparadoxical subregs changing size. */
30599 if (GET_MODE_SIZE (to
) < GET_MODE_SIZE (from
))
30606 /* Return the cost of moving data of mode M between a
30607 register and memory. A value of 2 is the default; this cost is
30608 relative to those in `REGISTER_MOVE_COST'.
30610 This function is used extensively by register_move_cost that is used to
30611 build tables at startup. Make it inline in this case.
30612 When IN is 2, return maximum of in and out move cost.
30614 If moving between registers and memory is more expensive than
30615 between two registers, you should define this macro to express the
30618 Model also increased moving costs of QImode registers in non
30622 inline_memory_move_cost (enum machine_mode mode
, enum reg_class regclass
,
30626 if (FLOAT_CLASS_P (regclass
))
30644 return MAX (ix86_cost
->fp_load
[index
], ix86_cost
->fp_store
[index
]);
30645 return in
? ix86_cost
->fp_load
[index
] : ix86_cost
->fp_store
[index
];
30647 if (SSE_CLASS_P (regclass
))
30650 switch (GET_MODE_SIZE (mode
))
30665 return MAX (ix86_cost
->sse_load
[index
], ix86_cost
->sse_store
[index
]);
30666 return in
? ix86_cost
->sse_load
[index
] : ix86_cost
->sse_store
[index
];
30668 if (MMX_CLASS_P (regclass
))
30671 switch (GET_MODE_SIZE (mode
))
30683 return MAX (ix86_cost
->mmx_load
[index
], ix86_cost
->mmx_store
[index
]);
30684 return in
? ix86_cost
->mmx_load
[index
] : ix86_cost
->mmx_store
[index
];
30686 switch (GET_MODE_SIZE (mode
))
30689 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
30692 return ix86_cost
->int_store
[0];
30693 if (TARGET_PARTIAL_REG_DEPENDENCY
30694 && optimize_function_for_speed_p (cfun
))
30695 cost
= ix86_cost
->movzbl_load
;
30697 cost
= ix86_cost
->int_load
[0];
30699 return MAX (cost
, ix86_cost
->int_store
[0]);
30705 return MAX (ix86_cost
->movzbl_load
, ix86_cost
->int_store
[0] + 4);
30707 return ix86_cost
->movzbl_load
;
30709 return ix86_cost
->int_store
[0] + 4;
30714 return MAX (ix86_cost
->int_load
[1], ix86_cost
->int_store
[1]);
30715 return in
? ix86_cost
->int_load
[1] : ix86_cost
->int_store
[1];
30717 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
30718 if (mode
== TFmode
)
30721 cost
= MAX (ix86_cost
->int_load
[2] , ix86_cost
->int_store
[2]);
30723 cost
= ix86_cost
->int_load
[2];
30725 cost
= ix86_cost
->int_store
[2];
30726 return (cost
* (((int) GET_MODE_SIZE (mode
)
30727 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
));
30732 ix86_memory_move_cost (enum machine_mode mode
, reg_class_t regclass
,
30735 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
30739 /* Return the cost of moving data from a register in class CLASS1 to
30740 one in class CLASS2.
30742 It is not required that the cost always equal 2 when FROM is the same as TO;
30743 on some machines it is expensive to move between registers if they are not
30744 general registers. */
30747 ix86_register_move_cost (enum machine_mode mode
, reg_class_t class1_i
,
30748 reg_class_t class2_i
)
30750 enum reg_class class1
= (enum reg_class
) class1_i
;
30751 enum reg_class class2
= (enum reg_class
) class2_i
;
30753 /* In case we require secondary memory, compute cost of the store followed
30754 by load. In order to avoid bad register allocation choices, we need
30755 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
30757 if (inline_secondary_memory_needed (class1
, class2
, mode
, 0))
30761 cost
+= inline_memory_move_cost (mode
, class1
, 2);
30762 cost
+= inline_memory_move_cost (mode
, class2
, 2);
30764 /* In case of copying from general_purpose_register we may emit multiple
30765 stores followed by single load causing memory size mismatch stall.
30766 Count this as arbitrarily high cost of 20. */
30767 if (targetm
.class_max_nregs (class1
, mode
)
30768 > targetm
.class_max_nregs (class2
, mode
))
30771 /* In the case of FP/MMX moves, the registers actually overlap, and we
30772 have to switch modes in order to treat them differently. */
30773 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
30774 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
30780 /* Moves between SSE/MMX and integer unit are expensive. */
30781 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
)
30782 || SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
30784 /* ??? By keeping returned value relatively high, we limit the number
30785 of moves between integer and MMX/SSE registers for all targets.
30786 Additionally, high value prevents problem with x86_modes_tieable_p(),
30787 where integer modes in MMX/SSE registers are not tieable
30788 because of missing QImode and HImode moves to, from or between
30789 MMX/SSE registers. */
30790 return MAX (8, ix86_cost
->mmxsse_to_integer
);
30792 if (MAYBE_FLOAT_CLASS_P (class1
))
30793 return ix86_cost
->fp_move
;
30794 if (MAYBE_SSE_CLASS_P (class1
))
30795 return ix86_cost
->sse_move
;
30796 if (MAYBE_MMX_CLASS_P (class1
))
30797 return ix86_cost
->mmx_move
;
30801 /* Return TRUE if hard register REGNO can hold a value of machine-mode
30805 ix86_hard_regno_mode_ok (int regno
, enum machine_mode mode
)
30807 /* Flags and only flags can only hold CCmode values. */
30808 if (CC_REGNO_P (regno
))
30809 return GET_MODE_CLASS (mode
) == MODE_CC
;
30810 if (GET_MODE_CLASS (mode
) == MODE_CC
30811 || GET_MODE_CLASS (mode
) == MODE_RANDOM
30812 || GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
30814 if (FP_REGNO_P (regno
))
30815 return VALID_FP_MODE_P (mode
);
30816 if (SSE_REGNO_P (regno
))
30818 /* We implement the move patterns for all vector modes into and
30819 out of SSE registers, even when no operation instructions
30820 are available. OImode move is available only when AVX is
30822 return ((TARGET_AVX
&& mode
== OImode
)
30823 || VALID_AVX256_REG_MODE (mode
)
30824 || VALID_SSE_REG_MODE (mode
)
30825 || VALID_SSE2_REG_MODE (mode
)
30826 || VALID_MMX_REG_MODE (mode
)
30827 || VALID_MMX_REG_MODE_3DNOW (mode
));
30829 if (MMX_REGNO_P (regno
))
30831 /* We implement the move patterns for 3DNOW modes even in MMX mode,
30832 so if the register is available at all, then we can move data of
30833 the given mode into or out of it. */
30834 return (VALID_MMX_REG_MODE (mode
)
30835 || VALID_MMX_REG_MODE_3DNOW (mode
));
30838 if (mode
== QImode
)
30840 /* Take care for QImode values - they can be in non-QI regs,
30841 but then they do cause partial register stalls. */
30842 if (regno
<= BX_REG
|| TARGET_64BIT
)
30844 if (!TARGET_PARTIAL_REG_STALL
)
30846 return !can_create_pseudo_p ();
30848 /* We handle both integer and floats in the general purpose registers. */
30849 else if (VALID_INT_MODE_P (mode
))
30851 else if (VALID_FP_MODE_P (mode
))
30853 else if (VALID_DFP_MODE_P (mode
))
30855 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
30856 on to use that value in smaller contexts, this can easily force a
30857 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
30858 supporting DImode, allow it. */
30859 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
30865 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
30866 tieable integer mode. */
30869 ix86_tieable_integer_mode_p (enum machine_mode mode
)
30878 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
30881 return TARGET_64BIT
;
30888 /* Return true if MODE1 is accessible in a register that can hold MODE2
30889 without copying. That is, all register classes that can hold MODE2
30890 can also hold MODE1. */
30893 ix86_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
30895 if (mode1
== mode2
)
30898 if (ix86_tieable_integer_mode_p (mode1
)
30899 && ix86_tieable_integer_mode_p (mode2
))
30902 /* MODE2 being XFmode implies fp stack or general regs, which means we
30903 can tie any smaller floating point modes to it. Note that we do not
30904 tie this with TFmode. */
30905 if (mode2
== XFmode
)
30906 return mode1
== SFmode
|| mode1
== DFmode
;
30908 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
30909 that we can tie it with SFmode. */
30910 if (mode2
== DFmode
)
30911 return mode1
== SFmode
;
30913 /* If MODE2 is only appropriate for an SSE register, then tie with
30914 any other mode acceptable to SSE registers. */
30915 if (GET_MODE_SIZE (mode2
) == 16
30916 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
30917 return (GET_MODE_SIZE (mode1
) == 16
30918 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
30920 /* If MODE2 is appropriate for an MMX register, then tie
30921 with any other mode acceptable to MMX registers. */
30922 if (GET_MODE_SIZE (mode2
) == 8
30923 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
30924 return (GET_MODE_SIZE (mode1
) == 8
30925 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
30930 /* Compute a (partial) cost for rtx X. Return true if the complete
30931 cost has been computed, and false if subexpressions should be
30932 scanned. In either case, *TOTAL contains the cost result. */
30935 ix86_rtx_costs (rtx x
, int code
, int outer_code_i
, int opno
, int *total
,
30938 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
30939 enum machine_mode mode
= GET_MODE (x
);
30940 const struct processor_costs
*cost
= speed
? ix86_cost
: &ix86_size_cost
;
30948 if (TARGET_64BIT
&& !x86_64_immediate_operand (x
, VOIDmode
))
30950 else if (TARGET_64BIT
&& !x86_64_zext_immediate_operand (x
, VOIDmode
))
30952 else if (flag_pic
&& SYMBOLIC_CONST (x
)
30954 || (!GET_CODE (x
) != LABEL_REF
30955 && (GET_CODE (x
) != SYMBOL_REF
30956 || !SYMBOL_REF_LOCAL_P (x
)))))
30963 if (mode
== VOIDmode
)
30966 switch (standard_80387_constant_p (x
))
30971 default: /* Other constants */
30976 /* Start with (MEM (SYMBOL_REF)), since that's where
30977 it'll probably end up. Add a penalty for size. */
30978 *total
= (COSTS_N_INSNS (1)
30979 + (flag_pic
!= 0 && !TARGET_64BIT
)
30980 + (mode
== SFmode
? 0 : mode
== DFmode
? 1 : 2));
30986 /* The zero extensions is often completely free on x86_64, so make
30987 it as cheap as possible. */
30988 if (TARGET_64BIT
&& mode
== DImode
30989 && GET_MODE (XEXP (x
, 0)) == SImode
)
30991 else if (TARGET_ZERO_EXTEND_WITH_AND
)
30992 *total
= cost
->add
;
30994 *total
= cost
->movzx
;
30998 *total
= cost
->movsx
;
31002 if (CONST_INT_P (XEXP (x
, 1))
31003 && (GET_MODE (XEXP (x
, 0)) != DImode
|| TARGET_64BIT
))
31005 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31008 *total
= cost
->add
;
31011 if ((value
== 2 || value
== 3)
31012 && cost
->lea
<= cost
->shift_const
)
31014 *total
= cost
->lea
;
31024 if (!TARGET_64BIT
&& GET_MODE (XEXP (x
, 0)) == DImode
)
31026 if (CONST_INT_P (XEXP (x
, 1)))
31028 if (INTVAL (XEXP (x
, 1)) > 32)
31029 *total
= cost
->shift_const
+ COSTS_N_INSNS (2);
31031 *total
= cost
->shift_const
* 2;
31035 if (GET_CODE (XEXP (x
, 1)) == AND
)
31036 *total
= cost
->shift_var
* 2;
31038 *total
= cost
->shift_var
* 6 + COSTS_N_INSNS (2);
31043 if (CONST_INT_P (XEXP (x
, 1)))
31044 *total
= cost
->shift_const
;
31046 *total
= cost
->shift_var
;
31054 gcc_assert (FLOAT_MODE_P (mode
));
31055 gcc_assert (TARGET_FMA
|| TARGET_FMA4
);
31057 /* ??? SSE scalar/vector cost should be used here. */
31058 /* ??? Bald assumption that fma has the same cost as fmul. */
31059 *total
= cost
->fmul
;
31060 *total
+= rtx_cost (XEXP (x
, 1), FMA
, 1, speed
);
31062 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
31064 if (GET_CODE (sub
) == NEG
)
31065 sub
= XEXP (sub
, 0);
31066 *total
+= rtx_cost (sub
, FMA
, 0, speed
);
31069 if (GET_CODE (sub
) == NEG
)
31070 sub
= XEXP (sub
, 0);
31071 *total
+= rtx_cost (sub
, FMA
, 2, speed
);
31076 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31078 /* ??? SSE scalar cost should be used here. */
31079 *total
= cost
->fmul
;
31082 else if (X87_FLOAT_MODE_P (mode
))
31084 *total
= cost
->fmul
;
31087 else if (FLOAT_MODE_P (mode
))
31089 /* ??? SSE vector cost should be used here. */
31090 *total
= cost
->fmul
;
31095 rtx op0
= XEXP (x
, 0);
31096 rtx op1
= XEXP (x
, 1);
31098 if (CONST_INT_P (XEXP (x
, 1)))
31100 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
31101 for (nbits
= 0; value
!= 0; value
&= value
- 1)
31105 /* This is arbitrary. */
31108 /* Compute costs correctly for widening multiplication. */
31109 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
31110 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
31111 == GET_MODE_SIZE (mode
))
31113 int is_mulwiden
= 0;
31114 enum machine_mode inner_mode
= GET_MODE (op0
);
31116 if (GET_CODE (op0
) == GET_CODE (op1
))
31117 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
31118 else if (CONST_INT_P (op1
))
31120 if (GET_CODE (op0
) == SIGN_EXTEND
)
31121 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
31124 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
31128 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
31131 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
31132 + nbits
* cost
->mult_bit
31133 + rtx_cost (op0
, outer_code
, opno
, speed
)
31134 + rtx_cost (op1
, outer_code
, opno
, speed
));
31143 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31144 /* ??? SSE cost should be used here. */
31145 *total
= cost
->fdiv
;
31146 else if (X87_FLOAT_MODE_P (mode
))
31147 *total
= cost
->fdiv
;
31148 else if (FLOAT_MODE_P (mode
))
31149 /* ??? SSE vector cost should be used here. */
31150 *total
= cost
->fdiv
;
31152 *total
= cost
->divide
[MODE_INDEX (mode
)];
31156 if (GET_MODE_CLASS (mode
) == MODE_INT
31157 && GET_MODE_BITSIZE (mode
) <= GET_MODE_BITSIZE (Pmode
))
31159 if (GET_CODE (XEXP (x
, 0)) == PLUS
31160 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
31161 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
31162 && CONSTANT_P (XEXP (x
, 1)))
31164 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
31165 if (val
== 2 || val
== 4 || val
== 8)
31167 *total
= cost
->lea
;
31168 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31169 outer_code
, opno
, speed
);
31170 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0),
31171 outer_code
, opno
, speed
);
31172 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31176 else if (GET_CODE (XEXP (x
, 0)) == MULT
31177 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
31179 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
31180 if (val
== 2 || val
== 4 || val
== 8)
31182 *total
= cost
->lea
;
31183 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31184 outer_code
, opno
, speed
);
31185 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31189 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
31191 *total
= cost
->lea
;
31192 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0),
31193 outer_code
, opno
, speed
);
31194 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1),
31195 outer_code
, opno
, speed
);
31196 *total
+= rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
);
31203 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31205 /* ??? SSE cost should be used here. */
31206 *total
= cost
->fadd
;
31209 else if (X87_FLOAT_MODE_P (mode
))
31211 *total
= cost
->fadd
;
31214 else if (FLOAT_MODE_P (mode
))
31216 /* ??? SSE vector cost should be used here. */
31217 *total
= cost
->fadd
;
31225 if (!TARGET_64BIT
&& mode
== DImode
)
31227 *total
= (cost
->add
* 2
31228 + (rtx_cost (XEXP (x
, 0), outer_code
, opno
, speed
)
31229 << (GET_MODE (XEXP (x
, 0)) != DImode
))
31230 + (rtx_cost (XEXP (x
, 1), outer_code
, opno
, speed
)
31231 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
31237 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31239 /* ??? SSE cost should be used here. */
31240 *total
= cost
->fchs
;
31243 else if (X87_FLOAT_MODE_P (mode
))
31245 *total
= cost
->fchs
;
31248 else if (FLOAT_MODE_P (mode
))
31250 /* ??? SSE vector cost should be used here. */
31251 *total
= cost
->fchs
;
31257 if (!TARGET_64BIT
&& mode
== DImode
)
31258 *total
= cost
->add
* 2;
31260 *total
= cost
->add
;
31264 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
31265 && XEXP (XEXP (x
, 0), 1) == const1_rtx
31266 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
31267 && XEXP (x
, 1) == const0_rtx
)
31269 /* This kind of construct is implemented using test[bwl].
31270 Treat it as if we had an AND. */
31271 *total
= (cost
->add
31272 + rtx_cost (XEXP (XEXP (x
, 0), 0), outer_code
, opno
, speed
)
31273 + rtx_cost (const1_rtx
, outer_code
, opno
, speed
));
31279 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
31284 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31285 /* ??? SSE cost should be used here. */
31286 *total
= cost
->fabs
;
31287 else if (X87_FLOAT_MODE_P (mode
))
31288 *total
= cost
->fabs
;
31289 else if (FLOAT_MODE_P (mode
))
31290 /* ??? SSE vector cost should be used here. */
31291 *total
= cost
->fabs
;
31295 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
31296 /* ??? SSE cost should be used here. */
31297 *total
= cost
->fsqrt
;
31298 else if (X87_FLOAT_MODE_P (mode
))
31299 *total
= cost
->fsqrt
;
31300 else if (FLOAT_MODE_P (mode
))
31301 /* ??? SSE vector cost should be used here. */
31302 *total
= cost
->fsqrt
;
31306 if (XINT (x
, 1) == UNSPEC_TP
)
31313 case VEC_DUPLICATE
:
31314 /* ??? Assume all of these vector manipulation patterns are
31315 recognizable. In which case they all pretty much have the
31317 *total
= COSTS_N_INSNS (1);
31327 static int current_machopic_label_num
;
31329 /* Given a symbol name and its associated stub, write out the
31330 definition of the stub. */
31333 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
31335 unsigned int length
;
31336 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
31337 int label
= ++current_machopic_label_num
;
31339 /* For 64-bit we shouldn't get here. */
31340 gcc_assert (!TARGET_64BIT
);
31342 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
31343 symb
= targetm
.strip_name_encoding (symb
);
31345 length
= strlen (stub
);
31346 binder_name
= XALLOCAVEC (char, length
+ 32);
31347 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
31349 length
= strlen (symb
);
31350 symbol_name
= XALLOCAVEC (char, length
+ 32);
31351 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
31353 sprintf (lazy_ptr_name
, "L%d$lz", label
);
31355 if (MACHOPIC_ATT_STUB
)
31356 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
31357 else if (MACHOPIC_PURE
)
31358 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
31360 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
31362 fprintf (file
, "%s:\n", stub
);
31363 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31365 if (MACHOPIC_ATT_STUB
)
31367 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
31369 else if (MACHOPIC_PURE
)
31372 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31373 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
31374 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
31375 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
31376 label
, lazy_ptr_name
, label
);
31377 fprintf (file
, "\tjmp\t*%%ecx\n");
31380 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
31382 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
31383 it needs no stub-binding-helper. */
31384 if (MACHOPIC_ATT_STUB
)
31387 fprintf (file
, "%s:\n", binder_name
);
31391 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
31392 fprintf (file
, "\tpushl\t%%ecx\n");
31395 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
31397 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
31399 /* N.B. Keep the correspondence of these
31400 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
31401 old-pic/new-pic/non-pic stubs; altering this will break
31402 compatibility with existing dylibs. */
31405 /* 25-byte PIC stub using "CALL get_pc_thunk". */
31406 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
31409 /* 16-byte -mdynamic-no-pic stub. */
31410 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
31412 fprintf (file
, "%s:\n", lazy_ptr_name
);
31413 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
31414 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
31416 #endif /* TARGET_MACHO */
31418 /* Order the registers for register allocator. */
31421 x86_order_regs_for_local_alloc (void)
31426 /* First allocate the local general purpose registers. */
31427 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31428 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
31429 reg_alloc_order
[pos
++] = i
;
31431 /* Global general purpose registers. */
31432 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
31433 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
31434 reg_alloc_order
[pos
++] = i
;
31436 /* x87 registers come first in case we are doing FP math
31438 if (!TARGET_SSE_MATH
)
31439 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31440 reg_alloc_order
[pos
++] = i
;
31442 /* SSE registers. */
31443 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
31444 reg_alloc_order
[pos
++] = i
;
31445 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
31446 reg_alloc_order
[pos
++] = i
;
31448 /* x87 registers. */
31449 if (TARGET_SSE_MATH
)
31450 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
31451 reg_alloc_order
[pos
++] = i
;
31453 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
31454 reg_alloc_order
[pos
++] = i
;
31456 /* Initialize the rest of array as we do not allocate some registers
31458 while (pos
< FIRST_PSEUDO_REGISTER
)
31459 reg_alloc_order
[pos
++] = 0;
31462 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
31463 in struct attribute_spec handler. */
31465 ix86_handle_callee_pop_aggregate_return (tree
*node
, tree name
,
31467 int flags ATTRIBUTE_UNUSED
,
31468 bool *no_add_attrs
)
31470 if (TREE_CODE (*node
) != FUNCTION_TYPE
31471 && TREE_CODE (*node
) != METHOD_TYPE
31472 && TREE_CODE (*node
) != FIELD_DECL
31473 && TREE_CODE (*node
) != TYPE_DECL
)
31475 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31477 *no_add_attrs
= true;
31482 warning (OPT_Wattributes
, "%qE attribute only available for 32-bit",
31484 *no_add_attrs
= true;
31487 if (is_attribute_p ("callee_pop_aggregate_return", name
))
31491 cst
= TREE_VALUE (args
);
31492 if (TREE_CODE (cst
) != INTEGER_CST
)
31494 warning (OPT_Wattributes
,
31495 "%qE attribute requires an integer constant argument",
31497 *no_add_attrs
= true;
31499 else if (compare_tree_int (cst
, 0) != 0
31500 && compare_tree_int (cst
, 1) != 0)
31502 warning (OPT_Wattributes
,
31503 "argument to %qE attribute is neither zero, nor one",
31505 *no_add_attrs
= true;
31514 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
31515 struct attribute_spec.handler. */
31517 ix86_handle_abi_attribute (tree
*node
, tree name
,
31518 tree args ATTRIBUTE_UNUSED
,
31519 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31521 if (TREE_CODE (*node
) != FUNCTION_TYPE
31522 && TREE_CODE (*node
) != METHOD_TYPE
31523 && TREE_CODE (*node
) != FIELD_DECL
31524 && TREE_CODE (*node
) != TYPE_DECL
)
31526 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31528 *no_add_attrs
= true;
31532 /* Can combine regparm with all attributes but fastcall. */
31533 if (is_attribute_p ("ms_abi", name
))
31535 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node
)))
31537 error ("ms_abi and sysv_abi attributes are not compatible");
31542 else if (is_attribute_p ("sysv_abi", name
))
31544 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node
)))
31546 error ("ms_abi and sysv_abi attributes are not compatible");
31555 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
31556 struct attribute_spec.handler. */
31558 ix86_handle_struct_attribute (tree
*node
, tree name
,
31559 tree args ATTRIBUTE_UNUSED
,
31560 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31563 if (DECL_P (*node
))
31565 if (TREE_CODE (*node
) == TYPE_DECL
)
31566 type
= &TREE_TYPE (*node
);
31571 if (!(type
&& (TREE_CODE (*type
) == RECORD_TYPE
31572 || TREE_CODE (*type
) == UNION_TYPE
)))
31574 warning (OPT_Wattributes
, "%qE attribute ignored",
31576 *no_add_attrs
= true;
31579 else if ((is_attribute_p ("ms_struct", name
)
31580 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type
)))
31581 || ((is_attribute_p ("gcc_struct", name
)
31582 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type
)))))
31584 warning (OPT_Wattributes
, "%qE incompatible attribute ignored",
31586 *no_add_attrs
= true;
31593 ix86_handle_fndecl_attribute (tree
*node
, tree name
,
31594 tree args ATTRIBUTE_UNUSED
,
31595 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
31597 if (TREE_CODE (*node
) != FUNCTION_DECL
)
31599 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
31601 *no_add_attrs
= true;
31607 ix86_ms_bitfield_layout_p (const_tree record_type
)
31609 return ((TARGET_MS_BITFIELD_LAYOUT
31610 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
31611 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
31614 /* Returns an expression indicating where the this parameter is
31615 located on entry to the FUNCTION. */
31618 x86_this_parameter (tree function
)
31620 tree type
= TREE_TYPE (function
);
31621 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
31626 const int *parm_regs
;
31628 if (ix86_function_type_abi (type
) == MS_ABI
)
31629 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
31631 parm_regs
= x86_64_int_parameter_registers
;
31632 return gen_rtx_REG (DImode
, parm_regs
[aggr
]);
31635 nregs
= ix86_function_regparm (type
, function
);
31637 if (nregs
> 0 && !stdarg_p (type
))
31640 unsigned int ccvt
= ix86_get_callcvt (type
);
31642 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
31643 regno
= aggr
? DX_REG
: CX_REG
;
31644 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
31648 return gen_rtx_MEM (SImode
,
31649 plus_constant (stack_pointer_rtx
, 4));
31658 return gen_rtx_MEM (SImode
,
31659 plus_constant (stack_pointer_rtx
, 4));
31662 return gen_rtx_REG (SImode
, regno
);
31665 return gen_rtx_MEM (SImode
, plus_constant (stack_pointer_rtx
, aggr
? 8 : 4));
31668 /* Determine whether x86_output_mi_thunk can succeed. */
31671 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED
,
31672 HOST_WIDE_INT delta ATTRIBUTE_UNUSED
,
31673 HOST_WIDE_INT vcall_offset
, const_tree function
)
31675 /* 64-bit can handle anything. */
31679 /* For 32-bit, everything's fine if we have one free register. */
31680 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
31683 /* Need a free register for vcall_offset. */
31687 /* Need a free register for GOT references. */
31688 if (flag_pic
&& !targetm
.binds_local_p (function
))
31691 /* Otherwise ok. */
31695 /* Output the assembler code for a thunk function. THUNK_DECL is the
31696 declaration for the thunk function itself, FUNCTION is the decl for
31697 the target function. DELTA is an immediate constant offset to be
31698 added to THIS. If VCALL_OFFSET is nonzero, the word at
31699 *(*this + vcall_offset) should be added to THIS. */
31702 x86_output_mi_thunk (FILE *file
,
31703 tree thunk ATTRIBUTE_UNUSED
, HOST_WIDE_INT delta
,
31704 HOST_WIDE_INT vcall_offset
, tree function
)
31706 rtx this_param
= x86_this_parameter (function
);
31707 rtx this_reg
, tmp
, fnaddr
;
31709 emit_note (NOTE_INSN_PROLOGUE_END
);
31711 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
31712 pull it in now and let DELTA benefit. */
31713 if (REG_P (this_param
))
31714 this_reg
= this_param
;
31715 else if (vcall_offset
)
31717 /* Put the this parameter into %eax. */
31718 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
31719 emit_move_insn (this_reg
, this_param
);
31722 this_reg
= NULL_RTX
;
31724 /* Adjust the this parameter by a fixed constant. */
31727 rtx delta_rtx
= GEN_INT (delta
);
31728 rtx delta_dst
= this_reg
? this_reg
: this_param
;
31732 if (!x86_64_general_operand (delta_rtx
, Pmode
))
31734 tmp
= gen_rtx_REG (Pmode
, R10_REG
);
31735 emit_move_insn (tmp
, delta_rtx
);
31740 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
31743 /* Adjust the this parameter by a value stored in the vtable. */
31746 rtx vcall_addr
, vcall_mem
, this_mem
;
31747 unsigned int tmp_regno
;
31750 tmp_regno
= R10_REG
;
31753 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
31754 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) != 0)
31755 tmp_regno
= AX_REG
;
31757 tmp_regno
= CX_REG
;
31759 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
31761 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
31762 if (Pmode
!= ptr_mode
)
31763 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
31764 emit_move_insn (tmp
, this_mem
);
31766 /* Adjust the this parameter. */
31767 vcall_addr
= plus_constant (tmp
, vcall_offset
);
31769 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
31771 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
31772 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
31773 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
31776 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
31777 if (Pmode
!= ptr_mode
)
31778 emit_insn (gen_addsi_1_zext (this_reg
,
31779 gen_rtx_REG (ptr_mode
,
31783 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
31786 /* If necessary, drop THIS back to its stack slot. */
31787 if (this_reg
&& this_reg
!= this_param
)
31788 emit_move_insn (this_param
, this_reg
);
31790 fnaddr
= XEXP (DECL_RTL (function
), 0);
31793 if (!flag_pic
|| targetm
.binds_local_p (function
)
31794 || cfun
->machine
->call_abi
== MS_ABI
)
31798 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
31799 tmp
= gen_rtx_CONST (Pmode
, tmp
);
31800 fnaddr
= gen_rtx_MEM (Pmode
, tmp
);
31805 if (!flag_pic
|| targetm
.binds_local_p (function
))
31808 else if (TARGET_MACHO
)
31810 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
31811 fnaddr
= XEXP (fnaddr
, 0);
31813 #endif /* TARGET_MACHO */
31816 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
31817 output_set_got (tmp
, NULL_RTX
);
31819 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
31820 fnaddr
= gen_rtx_PLUS (Pmode
, fnaddr
, tmp
);
31821 fnaddr
= gen_rtx_MEM (Pmode
, fnaddr
);
31825 /* Our sibling call patterns do not allow memories, because we have no
31826 predicate that can distinguish between frame and non-frame memory.
31827 For our purposes here, we can get away with (ab)using a jump pattern,
31828 because we're going to do no optimization. */
31829 if (MEM_P (fnaddr
))
31830 emit_jump_insn (gen_indirect_jump (fnaddr
));
31833 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
31834 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
31835 tmp
= emit_call_insn (tmp
);
31836 SIBLING_CALL_P (tmp
) = 1;
31840 /* Emit just enough of rest_of_compilation to get the insns emitted.
31841 Note that use_thunk calls assemble_start_function et al. */
31842 tmp
= get_insns ();
31843 insn_locators_alloc ();
31844 shorten_branches (tmp
);
31845 final_start_function (tmp
, file
, 1);
31846 final (tmp
, file
, 1);
31847 final_end_function ();
31851 x86_file_start (void)
31853 default_file_start ();
31855 darwin_file_start ();
31857 if (X86_FILE_START_VERSION_DIRECTIVE
)
31858 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
31859 if (X86_FILE_START_FLTUSED
)
31860 fputs ("\t.global\t__fltused\n", asm_out_file
);
31861 if (ix86_asm_dialect
== ASM_INTEL
)
31862 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
31866 x86_field_alignment (tree field
, int computed
)
31868 enum machine_mode mode
;
31869 tree type
= TREE_TYPE (field
);
31871 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
31873 mode
= TYPE_MODE (strip_array_types (type
));
31874 if (mode
== DFmode
|| mode
== DCmode
31875 || GET_MODE_CLASS (mode
) == MODE_INT
31876 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
31877 return MIN (32, computed
);
31881 /* Output assembler code to FILE to increment profiler label # LABELNO
31882 for profiling a function entry. */
31884 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
31886 const char *mcount_name
= (flag_fentry
? MCOUNT_NAME_BEFORE_PROLOGUE
31891 #ifndef NO_PROFILE_COUNTERS
31892 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
31895 if (DEFAULT_ABI
== SYSV_ABI
&& flag_pic
)
31896 fprintf (file
, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
31898 fprintf (file
, "\tcall\t%s\n", mcount_name
);
31902 #ifndef NO_PROFILE_COUNTERS
31903 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
31906 fprintf (file
, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
31910 #ifndef NO_PROFILE_COUNTERS
31911 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
31914 fprintf (file
, "\tcall\t%s\n", mcount_name
);
31918 /* We don't have exact information about the insn sizes, but we may assume
31919 quite safely that we are informed about all 1 byte insns and memory
31920 address sizes. This is enough to eliminate unnecessary padding in
31924 min_insn_size (rtx insn
)
31928 if (!INSN_P (insn
) || !active_insn_p (insn
))
31931 /* Discard alignments we've emit and jump instructions. */
31932 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
31933 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
31935 if (JUMP_TABLE_DATA_P (insn
))
31938 /* Important case - calls are always 5 bytes.
31939 It is common to have many calls in the row. */
31941 && symbolic_reference_mentioned_p (PATTERN (insn
))
31942 && !SIBLING_CALL_P (insn
))
31944 len
= get_attr_length (insn
);
31948 /* For normal instructions we rely on get_attr_length being exact,
31949 with a few exceptions. */
31950 if (!JUMP_P (insn
))
31952 enum attr_type type
= get_attr_type (insn
);
31957 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
31958 || asm_noperands (PATTERN (insn
)) >= 0)
31965 /* Otherwise trust get_attr_length. */
31969 l
= get_attr_length_address (insn
);
31970 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
31979 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
31981 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
31985 ix86_avoid_jump_mispredicts (void)
31987 rtx insn
, start
= get_insns ();
31988 int nbytes
= 0, njumps
= 0;
31991 /* Look for all minimal intervals of instructions containing 4 jumps.
31992 The intervals are bounded by START and INSN. NBYTES is the total
31993 size of instructions in the interval including INSN and not including
31994 START. When the NBYTES is smaller than 16 bytes, it is possible
31995 that the end of START and INSN ends up in the same 16byte page.
31997 The smallest offset in the page INSN can start is the case where START
31998 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
31999 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
32001 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
32005 if (LABEL_P (insn
))
32007 int align
= label_to_alignment (insn
);
32008 int max_skip
= label_to_max_skip (insn
);
32012 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
32013 already in the current 16 byte page, because otherwise
32014 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
32015 bytes to reach 16 byte boundary. */
32017 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
32020 fprintf (dump_file
, "Label %i with max_skip %i\n",
32021 INSN_UID (insn
), max_skip
);
32024 while (nbytes
+ max_skip
>= 16)
32026 start
= NEXT_INSN (start
);
32027 if ((JUMP_P (start
)
32028 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32029 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32031 njumps
--, isjump
= 1;
32034 nbytes
-= min_insn_size (start
);
32040 min_size
= min_insn_size (insn
);
32041 nbytes
+= min_size
;
32043 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
32044 INSN_UID (insn
), min_size
);
32046 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
32047 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
)
32055 start
= NEXT_INSN (start
);
32056 if ((JUMP_P (start
)
32057 && GET_CODE (PATTERN (start
)) != ADDR_VEC
32058 && GET_CODE (PATTERN (start
)) != ADDR_DIFF_VEC
)
32060 njumps
--, isjump
= 1;
32063 nbytes
-= min_insn_size (start
);
32065 gcc_assert (njumps
>= 0);
32067 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
32068 INSN_UID (start
), INSN_UID (insn
), nbytes
);
32070 if (njumps
== 3 && isjump
&& nbytes
< 16)
32072 int padsize
= 15 - nbytes
+ min_insn_size (insn
);
32075 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
32076 INSN_UID (insn
), padsize
);
32077 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
32083 /* AMD Athlon works faster
32084 when RET is not destination of conditional jump or directly preceded
32085 by other jump instruction. We avoid the penalty by inserting NOP just
32086 before the RET instructions in such cases. */
32088 ix86_pad_returns (void)
32093 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32095 basic_block bb
= e
->src
;
32096 rtx ret
= BB_END (bb
);
32098 bool replace
= false;
32100 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
32101 || optimize_bb_for_size_p (bb
))
32103 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
32104 if (active_insn_p (prev
) || LABEL_P (prev
))
32106 if (prev
&& LABEL_P (prev
))
32111 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32112 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
32113 && !(e
->flags
& EDGE_FALLTHRU
))
32118 prev
= prev_active_insn (ret
);
32120 && ((JUMP_P (prev
) && any_condjump_p (prev
))
32123 /* Empty functions get branch mispredict even when
32124 the jump destination is not visible to us. */
32125 if (!prev
&& !optimize_function_for_size_p (cfun
))
32130 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
32136 /* Count the minimum number of instructions in BB. Return 4 if the
32137 number of instructions >= 4. */
32140 ix86_count_insn_bb (basic_block bb
)
32143 int insn_count
= 0;
32145 /* Count number of instructions in this block. Return 4 if the number
32146 of instructions >= 4. */
32147 FOR_BB_INSNS (bb
, insn
)
32149 /* Only happen in exit blocks. */
32151 && ANY_RETURN_P (PATTERN (insn
)))
32154 if (NONDEBUG_INSN_P (insn
)
32155 && GET_CODE (PATTERN (insn
)) != USE
32156 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
32159 if (insn_count
>= 4)
32168 /* Count the minimum number of instructions in code path in BB.
32169 Return 4 if the number of instructions >= 4. */
32172 ix86_count_insn (basic_block bb
)
32176 int min_prev_count
;
32178 /* Only bother counting instructions along paths with no
32179 more than 2 basic blocks between entry and exit. Given
32180 that BB has an edge to exit, determine if a predecessor
32181 of BB has an edge from entry. If so, compute the number
32182 of instructions in the predecessor block. If there
32183 happen to be multiple such blocks, compute the minimum. */
32184 min_prev_count
= 4;
32185 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
32188 edge_iterator prev_ei
;
32190 if (e
->src
== ENTRY_BLOCK_PTR
)
32192 min_prev_count
= 0;
32195 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
32197 if (prev_e
->src
== ENTRY_BLOCK_PTR
)
32199 int count
= ix86_count_insn_bb (e
->src
);
32200 if (count
< min_prev_count
)
32201 min_prev_count
= count
;
32207 if (min_prev_count
< 4)
32208 min_prev_count
+= ix86_count_insn_bb (bb
);
32210 return min_prev_count
;
32213 /* Pad short funtion to 4 instructions. */
32216 ix86_pad_short_function (void)
32221 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
32223 rtx ret
= BB_END (e
->src
);
32224 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
32226 int insn_count
= ix86_count_insn (e
->src
);
32228 /* Pad short function. */
32229 if (insn_count
< 4)
32233 /* Find epilogue. */
32236 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
32237 insn
= PREV_INSN (insn
);
32242 /* Two NOPs count as one instruction. */
32243 insn_count
= 2 * (4 - insn_count
);
32244 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
32250 /* Implement machine specific optimizations. We implement padding of returns
32251 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
32255 /* We are freeing block_for_insn in the toplev to keep compatibility
32256 with old MDEP_REORGS that are not CFG based. Recompute it now. */
32257 compute_bb_for_insn ();
32259 /* Run the vzeroupper optimization if needed. */
32260 if (TARGET_VZEROUPPER
)
32261 move_or_delete_vzeroupper ();
32263 if (optimize
&& optimize_function_for_speed_p (cfun
))
32265 if (TARGET_PAD_SHORT_FUNCTION
)
32266 ix86_pad_short_function ();
32267 else if (TARGET_PAD_RETURNS
)
32268 ix86_pad_returns ();
32269 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
32270 if (TARGET_FOUR_JUMP_LIMIT
)
32271 ix86_avoid_jump_mispredicts ();
32276 /* Return nonzero when QImode register that must be represented via REX prefix
32279 x86_extended_QIreg_mentioned_p (rtx insn
)
32282 extract_insn_cached (insn
);
32283 for (i
= 0; i
< recog_data
.n_operands
; i
++)
32284 if (REG_P (recog_data
.operand
[i
])
32285 && REGNO (recog_data
.operand
[i
]) > BX_REG
)
32290 /* Return nonzero when P points to register encoded via REX prefix.
32291 Called via for_each_rtx. */
32293 extended_reg_mentioned_1 (rtx
*p
, void *data ATTRIBUTE_UNUSED
)
32295 unsigned int regno
;
32298 regno
= REGNO (*p
);
32299 return REX_INT_REGNO_P (regno
) || REX_SSE_REGNO_P (regno
);
32302 /* Return true when INSN mentions register that must be encoded using REX
32305 x86_extended_reg_mentioned_p (rtx insn
)
32307 return for_each_rtx (INSN_P (insn
) ? &PATTERN (insn
) : &insn
,
32308 extended_reg_mentioned_1
, NULL
);
32311 /* If profitable, negate (without causing overflow) integer constant
32312 of mode MODE at location LOC. Return true in this case. */
32314 x86_maybe_negate_const_int (rtx
*loc
, enum machine_mode mode
)
32318 if (!CONST_INT_P (*loc
))
32324 /* DImode x86_64 constants must fit in 32 bits. */
32325 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
32336 gcc_unreachable ();
32339 /* Avoid overflows. */
32340 if (mode_signbit_p (mode
, *loc
))
32343 val
= INTVAL (*loc
);
32345 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
32346 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
32347 if ((val
< 0 && val
!= -128)
32350 *loc
= GEN_INT (-val
);
32357 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
32358 optabs would emit if we didn't have TFmode patterns. */
32361 x86_emit_floatuns (rtx operands
[2])
32363 rtx neglab
, donelab
, i0
, i1
, f0
, in
, out
;
32364 enum machine_mode mode
, inmode
;
32366 inmode
= GET_MODE (operands
[1]);
32367 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
32370 in
= force_reg (inmode
, operands
[1]);
32371 mode
= GET_MODE (out
);
32372 neglab
= gen_label_rtx ();
32373 donelab
= gen_label_rtx ();
32374 f0
= gen_reg_rtx (mode
);
32376 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
32378 expand_float (out
, in
, 0);
32380 emit_jump_insn (gen_jump (donelab
));
32383 emit_label (neglab
);
32385 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
32387 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
32389 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
32391 expand_float (f0
, i0
, 0);
32393 emit_insn (gen_rtx_SET (VOIDmode
, out
, gen_rtx_PLUS (mode
, f0
, f0
)));
32395 emit_label (donelab
);
32398 /* AVX2 does support 32-byte integer vector operations,
32399 thus the longest vector we are faced with is V32QImode. */
32400 #define MAX_VECT_LEN 32
32402 struct expand_vec_perm_d
32404 rtx target
, op0
, op1
;
32405 unsigned char perm
[MAX_VECT_LEN
];
32406 enum machine_mode vmode
;
32407 unsigned char nelt
;
32411 static bool expand_vec_perm_1 (struct expand_vec_perm_d
*d
);
32412 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
);
32414 /* Get a vector mode of the same size as the original but with elements
32415 twice as wide. This is only guaranteed to apply to integral vectors. */
32417 static inline enum machine_mode
32418 get_mode_wider_vector (enum machine_mode o
)
32420 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
32421 enum machine_mode n
= GET_MODE_WIDER_MODE (o
);
32422 gcc_assert (GET_MODE_NUNITS (o
) == GET_MODE_NUNITS (n
) * 2);
32423 gcc_assert (GET_MODE_SIZE (o
) == GET_MODE_SIZE (n
));
32427 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32428 with all elements equal to VAR. Return true if successful. */
32431 ix86_expand_vector_init_duplicate (bool mmx_ok
, enum machine_mode mode
,
32432 rtx target
, rtx val
)
32455 /* First attempt to recognize VAL as-is. */
32456 dup
= gen_rtx_VEC_DUPLICATE (mode
, val
);
32457 insn
= emit_insn (gen_rtx_SET (VOIDmode
, target
, dup
));
32458 if (recog_memoized (insn
) < 0)
32461 /* If that fails, force VAL into a register. */
32464 XEXP (dup
, 0) = force_reg (GET_MODE_INNER (mode
), val
);
32465 seq
= get_insns ();
32468 emit_insn_before (seq
, insn
);
32470 ok
= recog_memoized (insn
) >= 0;
32479 if (TARGET_SSE
|| TARGET_3DNOW_A
)
32483 val
= gen_lowpart (SImode
, val
);
32484 x
= gen_rtx_TRUNCATE (HImode
, val
);
32485 x
= gen_rtx_VEC_DUPLICATE (mode
, x
);
32486 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32499 struct expand_vec_perm_d dperm
;
32503 memset (&dperm
, 0, sizeof (dperm
));
32504 dperm
.target
= target
;
32505 dperm
.vmode
= mode
;
32506 dperm
.nelt
= GET_MODE_NUNITS (mode
);
32507 dperm
.op0
= dperm
.op1
= gen_reg_rtx (mode
);
32509 /* Extend to SImode using a paradoxical SUBREG. */
32510 tmp1
= gen_reg_rtx (SImode
);
32511 emit_move_insn (tmp1
, gen_lowpart (SImode
, val
));
32513 /* Insert the SImode value as low element of a V4SImode vector. */
32514 tmp2
= gen_lowpart (V4SImode
, dperm
.op0
);
32515 emit_insn (gen_vec_setv4si_0 (tmp2
, CONST0_RTX (V4SImode
), tmp1
));
32517 ok
= (expand_vec_perm_1 (&dperm
)
32518 || expand_vec_perm_broadcast_1 (&dperm
));
32530 /* Replicate the value once into the next wider mode and recurse. */
32532 enum machine_mode smode
, wsmode
, wvmode
;
32535 smode
= GET_MODE_INNER (mode
);
32536 wvmode
= get_mode_wider_vector (mode
);
32537 wsmode
= GET_MODE_INNER (wvmode
);
32539 val
= convert_modes (wsmode
, smode
, val
, true);
32540 x
= expand_simple_binop (wsmode
, ASHIFT
, val
,
32541 GEN_INT (GET_MODE_BITSIZE (smode
)),
32542 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
32543 val
= expand_simple_binop (wsmode
, IOR
, val
, x
, x
, 1, OPTAB_LIB_WIDEN
);
32545 x
= gen_lowpart (wvmode
, target
);
32546 ok
= ix86_expand_vector_init_duplicate (mmx_ok
, wvmode
, x
, val
);
32554 enum machine_mode hvmode
= (mode
== V16HImode
? V8HImode
: V16QImode
);
32555 rtx x
= gen_reg_rtx (hvmode
);
32557 ok
= ix86_expand_vector_init_duplicate (false, hvmode
, x
, val
);
32560 x
= gen_rtx_VEC_CONCAT (mode
, x
, x
);
32561 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32570 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32571 whose ONE_VAR element is VAR, and other elements are zero. Return true
32575 ix86_expand_vector_init_one_nonzero (bool mmx_ok
, enum machine_mode mode
,
32576 rtx target
, rtx var
, int one_var
)
32578 enum machine_mode vsimode
;
32581 bool use_vector_set
= false;
32586 /* For SSE4.1, we normally use vector set. But if the second
32587 element is zero and inter-unit moves are OK, we use movq
32589 use_vector_set
= (TARGET_64BIT
32591 && !(TARGET_INTER_UNIT_MOVES
32597 use_vector_set
= TARGET_SSE4_1
;
32600 use_vector_set
= TARGET_SSE2
;
32603 use_vector_set
= TARGET_SSE
|| TARGET_3DNOW_A
;
32610 use_vector_set
= TARGET_AVX
;
32613 /* Use ix86_expand_vector_set in 64bit mode only. */
32614 use_vector_set
= TARGET_AVX
&& TARGET_64BIT
;
32620 if (use_vector_set
)
32622 emit_insn (gen_rtx_SET (VOIDmode
, target
, CONST0_RTX (mode
)));
32623 var
= force_reg (GET_MODE_INNER (mode
), var
);
32624 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
32640 var
= force_reg (GET_MODE_INNER (mode
), var
);
32641 x
= gen_rtx_VEC_CONCAT (mode
, var
, CONST0_RTX (GET_MODE_INNER (mode
)));
32642 emit_insn (gen_rtx_SET (VOIDmode
, target
, x
));
32647 if (!REG_P (target
) || REGNO (target
) < FIRST_PSEUDO_REGISTER
)
32648 new_target
= gen_reg_rtx (mode
);
32650 new_target
= target
;
32651 var
= force_reg (GET_MODE_INNER (mode
), var
);
32652 x
= gen_rtx_VEC_DUPLICATE (mode
, var
);
32653 x
= gen_rtx_VEC_MERGE (mode
, x
, CONST0_RTX (mode
), const1_rtx
);
32654 emit_insn (gen_rtx_SET (VOIDmode
, new_target
, x
));
32657 /* We need to shuffle the value to the correct position, so
32658 create a new pseudo to store the intermediate result. */
32660 /* With SSE2, we can use the integer shuffle insns. */
32661 if (mode
!= V4SFmode
&& TARGET_SSE2
)
32663 emit_insn (gen_sse2_pshufd_1 (new_target
, new_target
,
32665 GEN_INT (one_var
== 1 ? 0 : 1),
32666 GEN_INT (one_var
== 2 ? 0 : 1),
32667 GEN_INT (one_var
== 3 ? 0 : 1)));
32668 if (target
!= new_target
)
32669 emit_move_insn (target
, new_target
);
32673 /* Otherwise convert the intermediate result to V4SFmode and
32674 use the SSE1 shuffle instructions. */
32675 if (mode
!= V4SFmode
)
32677 tmp
= gen_reg_rtx (V4SFmode
);
32678 emit_move_insn (tmp
, gen_lowpart (V4SFmode
, new_target
));
32683 emit_insn (gen_sse_shufps_v4sf (tmp
, tmp
, tmp
,
32685 GEN_INT (one_var
== 1 ? 0 : 1),
32686 GEN_INT (one_var
== 2 ? 0+4 : 1+4),
32687 GEN_INT (one_var
== 3 ? 0+4 : 1+4)));
32689 if (mode
!= V4SFmode
)
32690 emit_move_insn (target
, gen_lowpart (V4SImode
, tmp
));
32691 else if (tmp
!= target
)
32692 emit_move_insn (target
, tmp
);
32694 else if (target
!= new_target
)
32695 emit_move_insn (target
, new_target
);
32700 vsimode
= V4SImode
;
32706 vsimode
= V2SImode
;
32712 /* Zero extend the variable element to SImode and recurse. */
32713 var
= convert_modes (SImode
, GET_MODE_INNER (mode
), var
, true);
32715 x
= gen_reg_rtx (vsimode
);
32716 if (!ix86_expand_vector_init_one_nonzero (mmx_ok
, vsimode
, x
,
32718 gcc_unreachable ();
32720 emit_move_insn (target
, gen_lowpart (mode
, x
));
32728 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
32729 consisting of the values in VALS. It is known that all elements
32730 except ONE_VAR are constants. Return true if successful. */
32733 ix86_expand_vector_init_one_var (bool mmx_ok
, enum machine_mode mode
,
32734 rtx target
, rtx vals
, int one_var
)
32736 rtx var
= XVECEXP (vals
, 0, one_var
);
32737 enum machine_mode wmode
;
32740 const_vec
= copy_rtx (vals
);
32741 XVECEXP (const_vec
, 0, one_var
) = CONST0_RTX (GET_MODE_INNER (mode
));
32742 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (const_vec
, 0));
32750 /* For the two element vectors, it's just as easy to use
32751 the general case. */
32755 /* Use ix86_expand_vector_set in 64bit mode only. */
32778 /* There's no way to set one QImode entry easily. Combine
32779 the variable value with its adjacent constant value, and
32780 promote to an HImode set. */
32781 x
= XVECEXP (vals
, 0, one_var
^ 1);
32784 var
= convert_modes (HImode
, QImode
, var
, true);
32785 var
= expand_simple_binop (HImode
, ASHIFT
, var
, GEN_INT (8),
32786 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
32787 x
= GEN_INT (INTVAL (x
) & 0xff);
32791 var
= convert_modes (HImode
, QImode
, var
, true);
32792 x
= gen_int_mode (INTVAL (x
) << 8, HImode
);
32794 if (x
!= const0_rtx
)
32795 var
= expand_simple_binop (HImode
, IOR
, var
, x
, var
,
32796 1, OPTAB_LIB_WIDEN
);
32798 x
= gen_reg_rtx (wmode
);
32799 emit_move_insn (x
, gen_lowpart (wmode
, const_vec
));
32800 ix86_expand_vector_set (mmx_ok
, x
, var
, one_var
>> 1);
32802 emit_move_insn (target
, gen_lowpart (mode
, x
));
32809 emit_move_insn (target
, const_vec
);
32810 ix86_expand_vector_set (mmx_ok
, target
, var
, one_var
);
32814 /* A subroutine of ix86_expand_vector_init_general. Use vector
32815 concatenate to handle the most general case: all values variable,
32816 and none identical. */
32819 ix86_expand_vector_init_concat (enum machine_mode mode
,
32820 rtx target
, rtx
*ops
, int n
)
32822 enum machine_mode cmode
, hmode
= VOIDmode
;
32823 rtx first
[8], second
[4];
32863 gcc_unreachable ();
32866 if (!register_operand (ops
[1], cmode
))
32867 ops
[1] = force_reg (cmode
, ops
[1]);
32868 if (!register_operand (ops
[0], cmode
))
32869 ops
[0] = force_reg (cmode
, ops
[0]);
32870 emit_insn (gen_rtx_SET (VOIDmode
, target
,
32871 gen_rtx_VEC_CONCAT (mode
, ops
[0],
32891 gcc_unreachable ();
32907 gcc_unreachable ();
32912 /* FIXME: We process inputs backward to help RA. PR 36222. */
32915 for (; i
> 0; i
-= 2, j
--)
32917 first
[j
] = gen_reg_rtx (cmode
);
32918 v
= gen_rtvec (2, ops
[i
- 1], ops
[i
]);
32919 ix86_expand_vector_init (false, first
[j
],
32920 gen_rtx_PARALLEL (cmode
, v
));
32926 gcc_assert (hmode
!= VOIDmode
);
32927 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
32929 second
[j
] = gen_reg_rtx (hmode
);
32930 ix86_expand_vector_init_concat (hmode
, second
[j
],
32934 ix86_expand_vector_init_concat (mode
, target
, second
, n
);
32937 ix86_expand_vector_init_concat (mode
, target
, first
, n
);
32941 gcc_unreachable ();
32945 /* A subroutine of ix86_expand_vector_init_general. Use vector
32946 interleave to handle the most general case: all values variable,
32947 and none identical. */
32950 ix86_expand_vector_init_interleave (enum machine_mode mode
,
32951 rtx target
, rtx
*ops
, int n
)
32953 enum machine_mode first_imode
, second_imode
, third_imode
, inner_mode
;
32956 rtx (*gen_load_even
) (rtx
, rtx
, rtx
);
32957 rtx (*gen_interleave_first_low
) (rtx
, rtx
, rtx
);
32958 rtx (*gen_interleave_second_low
) (rtx
, rtx
, rtx
);
32963 gen_load_even
= gen_vec_setv8hi
;
32964 gen_interleave_first_low
= gen_vec_interleave_lowv4si
;
32965 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
32966 inner_mode
= HImode
;
32967 first_imode
= V4SImode
;
32968 second_imode
= V2DImode
;
32969 third_imode
= VOIDmode
;
32972 gen_load_even
= gen_vec_setv16qi
;
32973 gen_interleave_first_low
= gen_vec_interleave_lowv8hi
;
32974 gen_interleave_second_low
= gen_vec_interleave_lowv4si
;
32975 inner_mode
= QImode
;
32976 first_imode
= V8HImode
;
32977 second_imode
= V4SImode
;
32978 third_imode
= V2DImode
;
32981 gcc_unreachable ();
32984 for (i
= 0; i
< n
; i
++)
32986 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
32987 op0
= gen_reg_rtx (SImode
);
32988 emit_move_insn (op0
, gen_lowpart (SImode
, ops
[i
+ i
]));
32990 /* Insert the SImode value as low element of V4SImode vector. */
32991 op1
= gen_reg_rtx (V4SImode
);
32992 op0
= gen_rtx_VEC_MERGE (V4SImode
,
32993 gen_rtx_VEC_DUPLICATE (V4SImode
,
32995 CONST0_RTX (V4SImode
),
32997 emit_insn (gen_rtx_SET (VOIDmode
, op1
, op0
));
32999 /* Cast the V4SImode vector back to a vector in orignal mode. */
33000 op0
= gen_reg_rtx (mode
);
33001 emit_move_insn (op0
, gen_lowpart (mode
, op1
));
33003 /* Load even elements into the second positon. */
33004 emit_insn (gen_load_even (op0
,
33005 force_reg (inner_mode
,
33009 /* Cast vector to FIRST_IMODE vector. */
33010 ops
[i
] = gen_reg_rtx (first_imode
);
33011 emit_move_insn (ops
[i
], gen_lowpart (first_imode
, op0
));
33014 /* Interleave low FIRST_IMODE vectors. */
33015 for (i
= j
= 0; i
< n
; i
+= 2, j
++)
33017 op0
= gen_reg_rtx (first_imode
);
33018 emit_insn (gen_interleave_first_low (op0
, ops
[i
], ops
[i
+ 1]));
33020 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
33021 ops
[j
] = gen_reg_rtx (second_imode
);
33022 emit_move_insn (ops
[j
], gen_lowpart (second_imode
, op0
));
33025 /* Interleave low SECOND_IMODE vectors. */
33026 switch (second_imode
)
33029 for (i
= j
= 0; i
< n
/ 2; i
+= 2, j
++)
33031 op0
= gen_reg_rtx (second_imode
);
33032 emit_insn (gen_interleave_second_low (op0
, ops
[i
],
33035 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
33037 ops
[j
] = gen_reg_rtx (third_imode
);
33038 emit_move_insn (ops
[j
], gen_lowpart (third_imode
, op0
));
33040 second_imode
= V2DImode
;
33041 gen_interleave_second_low
= gen_vec_interleave_lowv2di
;
33045 op0
= gen_reg_rtx (second_imode
);
33046 emit_insn (gen_interleave_second_low (op0
, ops
[0],
33049 /* Cast the SECOND_IMODE vector back to a vector on original
33051 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33052 gen_lowpart (mode
, op0
)));
33056 gcc_unreachable ();
33060 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
33061 all values variable, and none identical. */
33064 ix86_expand_vector_init_general (bool mmx_ok
, enum machine_mode mode
,
33065 rtx target
, rtx vals
)
33067 rtx ops
[32], op0
, op1
;
33068 enum machine_mode half_mode
= VOIDmode
;
33075 if (!mmx_ok
&& !TARGET_SSE
)
33087 n
= GET_MODE_NUNITS (mode
);
33088 for (i
= 0; i
< n
; i
++)
33089 ops
[i
] = XVECEXP (vals
, 0, i
);
33090 ix86_expand_vector_init_concat (mode
, target
, ops
, n
);
33094 half_mode
= V16QImode
;
33098 half_mode
= V8HImode
;
33102 n
= GET_MODE_NUNITS (mode
);
33103 for (i
= 0; i
< n
; i
++)
33104 ops
[i
] = XVECEXP (vals
, 0, i
);
33105 op0
= gen_reg_rtx (half_mode
);
33106 op1
= gen_reg_rtx (half_mode
);
33107 ix86_expand_vector_init_interleave (half_mode
, op0
, ops
,
33109 ix86_expand_vector_init_interleave (half_mode
, op1
,
33110 &ops
[n
>> 1], n
>> 2);
33111 emit_insn (gen_rtx_SET (VOIDmode
, target
,
33112 gen_rtx_VEC_CONCAT (mode
, op0
, op1
)));
33116 if (!TARGET_SSE4_1
)
33124 /* Don't use ix86_expand_vector_init_interleave if we can't
33125 move from GPR to SSE register directly. */
33126 if (!TARGET_INTER_UNIT_MOVES
)
33129 n
= GET_MODE_NUNITS (mode
);
33130 for (i
= 0; i
< n
; i
++)
33131 ops
[i
] = XVECEXP (vals
, 0, i
);
33132 ix86_expand_vector_init_interleave (mode
, target
, ops
, n
>> 1);
33140 gcc_unreachable ();
33144 int i
, j
, n_elts
, n_words
, n_elt_per_word
;
33145 enum machine_mode inner_mode
;
33146 rtx words
[4], shift
;
33148 inner_mode
= GET_MODE_INNER (mode
);
33149 n_elts
= GET_MODE_NUNITS (mode
);
33150 n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
33151 n_elt_per_word
= n_elts
/ n_words
;
33152 shift
= GEN_INT (GET_MODE_BITSIZE (inner_mode
));
33154 for (i
= 0; i
< n_words
; ++i
)
33156 rtx word
= NULL_RTX
;
33158 for (j
= 0; j
< n_elt_per_word
; ++j
)
33160 rtx elt
= XVECEXP (vals
, 0, (i
+1)*n_elt_per_word
- j
- 1);
33161 elt
= convert_modes (word_mode
, inner_mode
, elt
, true);
33167 word
= expand_simple_binop (word_mode
, ASHIFT
, word
, shift
,
33168 word
, 1, OPTAB_LIB_WIDEN
);
33169 word
= expand_simple_binop (word_mode
, IOR
, word
, elt
,
33170 word
, 1, OPTAB_LIB_WIDEN
);
33178 emit_move_insn (target
, gen_lowpart (mode
, words
[0]));
33179 else if (n_words
== 2)
33181 rtx tmp
= gen_reg_rtx (mode
);
33182 emit_clobber (tmp
);
33183 emit_move_insn (gen_lowpart (word_mode
, tmp
), words
[0]);
33184 emit_move_insn (gen_highpart (word_mode
, tmp
), words
[1]);
33185 emit_move_insn (target
, tmp
);
33187 else if (n_words
== 4)
33189 rtx tmp
= gen_reg_rtx (V4SImode
);
33190 gcc_assert (word_mode
== SImode
);
33191 vals
= gen_rtx_PARALLEL (V4SImode
, gen_rtvec_v (4, words
));
33192 ix86_expand_vector_init_general (false, V4SImode
, tmp
, vals
);
33193 emit_move_insn (target
, gen_lowpart (mode
, tmp
));
33196 gcc_unreachable ();
33200 /* Initialize vector TARGET via VALS. Suppress the use of MMX
33201 instructions unless MMX_OK is true. */
33204 ix86_expand_vector_init (bool mmx_ok
, rtx target
, rtx vals
)
33206 enum machine_mode mode
= GET_MODE (target
);
33207 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33208 int n_elts
= GET_MODE_NUNITS (mode
);
33209 int n_var
= 0, one_var
= -1;
33210 bool all_same
= true, all_const_zero
= true;
33214 for (i
= 0; i
< n_elts
; ++i
)
33216 x
= XVECEXP (vals
, 0, i
);
33217 if (!(CONST_INT_P (x
)
33218 || GET_CODE (x
) == CONST_DOUBLE
33219 || GET_CODE (x
) == CONST_FIXED
))
33220 n_var
++, one_var
= i
;
33221 else if (x
!= CONST0_RTX (inner_mode
))
33222 all_const_zero
= false;
33223 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
33227 /* Constants are best loaded from the constant pool. */
33230 emit_move_insn (target
, gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
33234 /* If all values are identical, broadcast the value. */
33236 && ix86_expand_vector_init_duplicate (mmx_ok
, mode
, target
,
33237 XVECEXP (vals
, 0, 0)))
33240 /* Values where only one field is non-constant are best loaded from
33241 the pool and overwritten via move later. */
33245 && ix86_expand_vector_init_one_nonzero (mmx_ok
, mode
, target
,
33246 XVECEXP (vals
, 0, one_var
),
33250 if (ix86_expand_vector_init_one_var (mmx_ok
, mode
, target
, vals
, one_var
))
33254 ix86_expand_vector_init_general (mmx_ok
, mode
, target
, vals
);
33258 ix86_expand_vector_set (bool mmx_ok
, rtx target
, rtx val
, int elt
)
33260 enum machine_mode mode
= GET_MODE (target
);
33261 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33262 enum machine_mode half_mode
;
33263 bool use_vec_merge
= false;
33265 static rtx (*gen_extract
[6][2]) (rtx
, rtx
)
33267 { gen_vec_extract_lo_v32qi
, gen_vec_extract_hi_v32qi
},
33268 { gen_vec_extract_lo_v16hi
, gen_vec_extract_hi_v16hi
},
33269 { gen_vec_extract_lo_v8si
, gen_vec_extract_hi_v8si
},
33270 { gen_vec_extract_lo_v4di
, gen_vec_extract_hi_v4di
},
33271 { gen_vec_extract_lo_v8sf
, gen_vec_extract_hi_v8sf
},
33272 { gen_vec_extract_lo_v4df
, gen_vec_extract_hi_v4df
}
33274 static rtx (*gen_insert
[6][2]) (rtx
, rtx
, rtx
)
33276 { gen_vec_set_lo_v32qi
, gen_vec_set_hi_v32qi
},
33277 { gen_vec_set_lo_v16hi
, gen_vec_set_hi_v16hi
},
33278 { gen_vec_set_lo_v8si
, gen_vec_set_hi_v8si
},
33279 { gen_vec_set_lo_v4di
, gen_vec_set_hi_v4di
},
33280 { gen_vec_set_lo_v8sf
, gen_vec_set_hi_v8sf
},
33281 { gen_vec_set_lo_v4df
, gen_vec_set_hi_v4df
}
33291 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33292 ix86_expand_vector_extract (true, tmp
, target
, 1 - elt
);
33294 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33296 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33297 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33303 use_vec_merge
= TARGET_SSE4_1
&& TARGET_64BIT
;
33307 tmp
= gen_reg_rtx (GET_MODE_INNER (mode
));
33308 ix86_expand_vector_extract (false, tmp
, target
, 1 - elt
);
33310 tmp
= gen_rtx_VEC_CONCAT (mode
, tmp
, val
);
33312 tmp
= gen_rtx_VEC_CONCAT (mode
, val
, tmp
);
33313 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33320 /* For the two element vectors, we implement a VEC_CONCAT with
33321 the extraction of the other element. */
33323 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (1 - elt
)));
33324 tmp
= gen_rtx_VEC_SELECT (inner_mode
, target
, tmp
);
33327 op0
= val
, op1
= tmp
;
33329 op0
= tmp
, op1
= val
;
33331 tmp
= gen_rtx_VEC_CONCAT (mode
, op0
, op1
);
33332 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33337 use_vec_merge
= TARGET_SSE4_1
;
33344 use_vec_merge
= true;
33348 /* tmp = target = A B C D */
33349 tmp
= copy_to_reg (target
);
33350 /* target = A A B B */
33351 emit_insn (gen_vec_interleave_lowv4sf (target
, target
, target
));
33352 /* target = X A B B */
33353 ix86_expand_vector_set (false, target
, val
, 0);
33354 /* target = A X C D */
33355 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33356 const1_rtx
, const0_rtx
,
33357 GEN_INT (2+4), GEN_INT (3+4)));
33361 /* tmp = target = A B C D */
33362 tmp
= copy_to_reg (target
);
33363 /* tmp = X B C D */
33364 ix86_expand_vector_set (false, tmp
, val
, 0);
33365 /* target = A B X D */
33366 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33367 const0_rtx
, const1_rtx
,
33368 GEN_INT (0+4), GEN_INT (3+4)));
33372 /* tmp = target = A B C D */
33373 tmp
= copy_to_reg (target
);
33374 /* tmp = X B C D */
33375 ix86_expand_vector_set (false, tmp
, val
, 0);
33376 /* target = A B X D */
33377 emit_insn (gen_sse_shufps_v4sf (target
, target
, tmp
,
33378 const0_rtx
, const1_rtx
,
33379 GEN_INT (2+4), GEN_INT (0+4)));
33383 gcc_unreachable ();
33388 use_vec_merge
= TARGET_SSE4_1
;
33392 /* Element 0 handled by vec_merge below. */
33395 use_vec_merge
= true;
33401 /* With SSE2, use integer shuffles to swap element 0 and ELT,
33402 store into element 0, then shuffle them back. */
33406 order
[0] = GEN_INT (elt
);
33407 order
[1] = const1_rtx
;
33408 order
[2] = const2_rtx
;
33409 order
[3] = GEN_INT (3);
33410 order
[elt
] = const0_rtx
;
33412 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33413 order
[1], order
[2], order
[3]));
33415 ix86_expand_vector_set (false, target
, val
, 0);
33417 emit_insn (gen_sse2_pshufd_1 (target
, target
, order
[0],
33418 order
[1], order
[2], order
[3]));
33422 /* For SSE1, we have to reuse the V4SF code. */
33423 ix86_expand_vector_set (false, gen_lowpart (V4SFmode
, target
),
33424 gen_lowpart (SFmode
, val
), elt
);
33429 use_vec_merge
= TARGET_SSE2
;
33432 use_vec_merge
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
33436 use_vec_merge
= TARGET_SSE4_1
;
33443 half_mode
= V16QImode
;
33449 half_mode
= V8HImode
;
33455 half_mode
= V4SImode
;
33461 half_mode
= V2DImode
;
33467 half_mode
= V4SFmode
;
33473 half_mode
= V2DFmode
;
33479 /* Compute offset. */
33483 gcc_assert (i
<= 1);
33485 /* Extract the half. */
33486 tmp
= gen_reg_rtx (half_mode
);
33487 emit_insn (gen_extract
[j
][i
] (tmp
, target
));
33489 /* Put val in tmp at elt. */
33490 ix86_expand_vector_set (false, tmp
, val
, elt
);
33493 emit_insn (gen_insert
[j
][i
] (target
, target
, tmp
));
33502 tmp
= gen_rtx_VEC_DUPLICATE (mode
, val
);
33503 tmp
= gen_rtx_VEC_MERGE (mode
, tmp
, target
, GEN_INT (1 << elt
));
33504 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33508 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
33510 emit_move_insn (mem
, target
);
33512 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
33513 emit_move_insn (tmp
, val
);
33515 emit_move_insn (target
, mem
);
33520 ix86_expand_vector_extract (bool mmx_ok
, rtx target
, rtx vec
, int elt
)
33522 enum machine_mode mode
= GET_MODE (vec
);
33523 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
33524 bool use_vec_extr
= false;
33537 use_vec_extr
= true;
33541 use_vec_extr
= TARGET_SSE4_1
;
33553 tmp
= gen_reg_rtx (mode
);
33554 emit_insn (gen_sse_shufps_v4sf (tmp
, vec
, vec
,
33555 GEN_INT (elt
), GEN_INT (elt
),
33556 GEN_INT (elt
+4), GEN_INT (elt
+4)));
33560 tmp
= gen_reg_rtx (mode
);
33561 emit_insn (gen_vec_interleave_highv4sf (tmp
, vec
, vec
));
33565 gcc_unreachable ();
33568 use_vec_extr
= true;
33573 use_vec_extr
= TARGET_SSE4_1
;
33587 tmp
= gen_reg_rtx (mode
);
33588 emit_insn (gen_sse2_pshufd_1 (tmp
, vec
,
33589 GEN_INT (elt
), GEN_INT (elt
),
33590 GEN_INT (elt
), GEN_INT (elt
)));
33594 tmp
= gen_reg_rtx (mode
);
33595 emit_insn (gen_vec_interleave_highv4si (tmp
, vec
, vec
));
33599 gcc_unreachable ();
33602 use_vec_extr
= true;
33607 /* For SSE1, we have to reuse the V4SF code. */
33608 ix86_expand_vector_extract (false, gen_lowpart (SFmode
, target
),
33609 gen_lowpart (V4SFmode
, vec
), elt
);
33615 use_vec_extr
= TARGET_SSE2
;
33618 use_vec_extr
= mmx_ok
&& (TARGET_SSE
|| TARGET_3DNOW_A
);
33622 use_vec_extr
= TARGET_SSE4_1
;
33628 tmp
= gen_reg_rtx (V4SFmode
);
33630 emit_insn (gen_vec_extract_lo_v8sf (tmp
, vec
));
33632 emit_insn (gen_vec_extract_hi_v8sf (tmp
, vec
));
33633 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
33641 tmp
= gen_reg_rtx (V2DFmode
);
33643 emit_insn (gen_vec_extract_lo_v4df (tmp
, vec
));
33645 emit_insn (gen_vec_extract_hi_v4df (tmp
, vec
));
33646 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
33654 tmp
= gen_reg_rtx (V16QImode
);
33656 emit_insn (gen_vec_extract_lo_v32qi (tmp
, vec
));
33658 emit_insn (gen_vec_extract_hi_v32qi (tmp
, vec
));
33659 ix86_expand_vector_extract (false, target
, tmp
, elt
& 15);
33667 tmp
= gen_reg_rtx (V8HImode
);
33669 emit_insn (gen_vec_extract_lo_v16hi (tmp
, vec
));
33671 emit_insn (gen_vec_extract_hi_v16hi (tmp
, vec
));
33672 ix86_expand_vector_extract (false, target
, tmp
, elt
& 7);
33680 tmp
= gen_reg_rtx (V4SImode
);
33682 emit_insn (gen_vec_extract_lo_v8si (tmp
, vec
));
33684 emit_insn (gen_vec_extract_hi_v8si (tmp
, vec
));
33685 ix86_expand_vector_extract (false, target
, tmp
, elt
& 3);
33693 tmp
= gen_reg_rtx (V2DImode
);
33695 emit_insn (gen_vec_extract_lo_v4di (tmp
, vec
));
33697 emit_insn (gen_vec_extract_hi_v4di (tmp
, vec
));
33698 ix86_expand_vector_extract (false, target
, tmp
, elt
& 1);
33704 /* ??? Could extract the appropriate HImode element and shift. */
33711 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, GEN_INT (elt
)));
33712 tmp
= gen_rtx_VEC_SELECT (inner_mode
, vec
, tmp
);
33714 /* Let the rtl optimizers know about the zero extension performed. */
33715 if (inner_mode
== QImode
|| inner_mode
== HImode
)
33717 tmp
= gen_rtx_ZERO_EXTEND (SImode
, tmp
);
33718 target
= gen_lowpart (SImode
, target
);
33721 emit_insn (gen_rtx_SET (VOIDmode
, target
, tmp
));
33725 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), false);
33727 emit_move_insn (mem
, vec
);
33729 tmp
= adjust_address (mem
, inner_mode
, elt
*GET_MODE_SIZE (inner_mode
));
33730 emit_move_insn (target
, tmp
);
33734 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
33735 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
33736 The upper bits of DEST are undefined, though they shouldn't cause
33737 exceptions (some bits from src or all zeros are ok). */
33740 emit_reduc_half (rtx dest
, rtx src
, int i
)
33743 switch (GET_MODE (src
))
33747 tem
= gen_sse_movhlps (dest
, src
, src
);
33749 tem
= gen_sse_shufps_v4sf (dest
, src
, src
, const1_rtx
, const1_rtx
,
33750 GEN_INT (1 + 4), GEN_INT (1 + 4));
33753 tem
= gen_vec_interleave_highv2df (dest
, src
, src
);
33759 tem
= gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode
, dest
),
33760 gen_lowpart (V1TImode
, src
),
33765 tem
= gen_avx_vperm2f128v8sf3 (dest
, src
, src
, const1_rtx
);
33767 tem
= gen_avx_shufps256 (dest
, src
, src
,
33768 GEN_INT (i
== 128 ? 2 + (3 << 2) : 1));
33772 tem
= gen_avx_vperm2f128v4df3 (dest
, src
, src
, const1_rtx
);
33774 tem
= gen_avx_shufpd256 (dest
, src
, src
, const1_rtx
);
33781 tem
= gen_avx2_permv2ti (gen_lowpart (V4DImode
, dest
),
33782 gen_lowpart (V4DImode
, src
),
33783 gen_lowpart (V4DImode
, src
),
33786 tem
= gen_avx2_lshrv2ti3 (gen_lowpart (V2TImode
, dest
),
33787 gen_lowpart (V2TImode
, src
),
33791 gcc_unreachable ();
33796 /* Expand a vector reduction. FN is the binary pattern to reduce;
33797 DEST is the destination; IN is the input vector. */
33800 ix86_expand_reduc (rtx (*fn
) (rtx
, rtx
, rtx
), rtx dest
, rtx in
)
33802 rtx half
, dst
, vec
= in
;
33803 enum machine_mode mode
= GET_MODE (in
);
33806 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
33808 && mode
== V8HImode
33809 && fn
== gen_uminv8hi3
)
33811 emit_insn (gen_sse4_1_phminposuw (dest
, in
));
33815 for (i
= GET_MODE_BITSIZE (mode
);
33816 i
> GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
33819 half
= gen_reg_rtx (mode
);
33820 emit_reduc_half (half
, vec
, i
);
33821 if (i
== GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) * 2)
33824 dst
= gen_reg_rtx (mode
);
33825 emit_insn (fn (dst
, half
, vec
));
33830 /* Target hook for scalar_mode_supported_p. */
33832 ix86_scalar_mode_supported_p (enum machine_mode mode
)
33834 if (DECIMAL_FLOAT_MODE_P (mode
))
33835 return default_decimal_float_supported_p ();
33836 else if (mode
== TFmode
)
33839 return default_scalar_mode_supported_p (mode
);
33842 /* Implements target hook vector_mode_supported_p. */
33844 ix86_vector_mode_supported_p (enum machine_mode mode
)
33846 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
33848 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
33850 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
33852 if (TARGET_MMX
&& VALID_MMX_REG_MODE (mode
))
33854 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
33859 /* Target hook for c_mode_for_suffix. */
33860 static enum machine_mode
33861 ix86_c_mode_for_suffix (char suffix
)
33871 /* Worker function for TARGET_MD_ASM_CLOBBERS.
33873 We do this in the new i386 backend to maintain source compatibility
33874 with the old cc0-based compiler. */
33877 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED
,
33878 tree inputs ATTRIBUTE_UNUSED
,
33881 clobbers
= tree_cons (NULL_TREE
, build_string (5, "flags"),
33883 clobbers
= tree_cons (NULL_TREE
, build_string (4, "fpsr"),
33888 /* Implements target vector targetm.asm.encode_section_info. */
33890 static void ATTRIBUTE_UNUSED
33891 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
33893 default_encode_section_info (decl
, rtl
, first
);
33895 if (TREE_CODE (decl
) == VAR_DECL
33896 && (TREE_STATIC (decl
) || DECL_EXTERNAL (decl
))
33897 && ix86_in_large_data_p (decl
))
33898 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
33901 /* Worker function for REVERSE_CONDITION. */
33904 ix86_reverse_condition (enum rtx_code code
, enum machine_mode mode
)
33906 return (mode
!= CCFPmode
&& mode
!= CCFPUmode
33907 ? reverse_condition (code
)
33908 : reverse_condition_maybe_unordered (code
));
33911 /* Output code to perform an x87 FP register move, from OPERANDS[1]
33915 output_387_reg_move (rtx insn
, rtx
*operands
)
33917 if (REG_P (operands
[0]))
33919 if (REG_P (operands
[1])
33920 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
33922 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
33923 return output_387_ffreep (operands
, 0);
33924 return "fstp\t%y0";
33926 if (STACK_TOP_P (operands
[0]))
33927 return "fld%Z1\t%y1";
33930 else if (MEM_P (operands
[0]))
33932 gcc_assert (REG_P (operands
[1]));
33933 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
33934 return "fstp%Z0\t%y0";
33937 /* There is no non-popping store to memory for XFmode.
33938 So if we need one, follow the store with a load. */
33939 if (GET_MODE (operands
[0]) == XFmode
)
33940 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
33942 return "fst%Z0\t%y0";
33949 /* Output code to perform a conditional jump to LABEL, if C2 flag in
33950 FP status register is set. */
33953 ix86_emit_fp_unordered_jump (rtx label
)
33955 rtx reg
= gen_reg_rtx (HImode
);
33958 emit_insn (gen_x86_fnstsw_1 (reg
));
33960 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
33962 emit_insn (gen_x86_sahf_1 (reg
));
33964 temp
= gen_rtx_REG (CCmode
, FLAGS_REG
);
33965 temp
= gen_rtx_UNORDERED (VOIDmode
, temp
, const0_rtx
);
33969 emit_insn (gen_testqi_ext_ccno_0 (reg
, GEN_INT (0x04)));
33971 temp
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
33972 temp
= gen_rtx_NE (VOIDmode
, temp
, const0_rtx
);
33975 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
33976 gen_rtx_LABEL_REF (VOIDmode
, label
),
33978 temp
= gen_rtx_SET (VOIDmode
, pc_rtx
, temp
);
33980 emit_jump_insn (temp
);
33981 predict_jump (REG_BR_PROB_BASE
* 10 / 100);
33984 /* Output code to perform a log1p XFmode calculation. */
33986 void ix86_emit_i387_log1p (rtx op0
, rtx op1
)
33988 rtx label1
= gen_label_rtx ();
33989 rtx label2
= gen_label_rtx ();
33991 rtx tmp
= gen_reg_rtx (XFmode
);
33992 rtx tmp2
= gen_reg_rtx (XFmode
);
33995 emit_insn (gen_absxf2 (tmp
, op1
));
33996 test
= gen_rtx_GE (VOIDmode
, tmp
,
33997 CONST_DOUBLE_FROM_REAL_VALUE (
33998 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode
),
34000 emit_jump_insn (gen_cbranchxf4 (test
, XEXP (test
, 0), XEXP (test
, 1), label1
));
34002 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34003 emit_insn (gen_fyl2xp1xf3_i387 (op0
, op1
, tmp2
));
34004 emit_jump (label2
);
34006 emit_label (label1
);
34007 emit_move_insn (tmp
, CONST1_RTX (XFmode
));
34008 emit_insn (gen_addxf3 (tmp
, op1
, tmp
));
34009 emit_move_insn (tmp2
, standard_80387_constant_rtx (4)); /* fldln2 */
34010 emit_insn (gen_fyl2xxf3_i387 (op0
, tmp
, tmp2
));
34012 emit_label (label2
);
34015 /* Emit code for round calculation. */
34016 void ix86_emit_i387_round (rtx op0
, rtx op1
)
34018 enum machine_mode inmode
= GET_MODE (op1
);
34019 enum machine_mode outmode
= GET_MODE (op0
);
34020 rtx e1
, e2
, res
, tmp
, tmp1
, half
;
34021 rtx scratch
= gen_reg_rtx (HImode
);
34022 rtx flags
= gen_rtx_REG (CCNOmode
, FLAGS_REG
);
34023 rtx jump_label
= gen_label_rtx ();
34025 rtx (*gen_abs
) (rtx
, rtx
);
34026 rtx (*gen_neg
) (rtx
, rtx
);
34031 gen_abs
= gen_abssf2
;
34034 gen_abs
= gen_absdf2
;
34037 gen_abs
= gen_absxf2
;
34040 gcc_unreachable ();
34046 gen_neg
= gen_negsf2
;
34049 gen_neg
= gen_negdf2
;
34052 gen_neg
= gen_negxf2
;
34055 gen_neg
= gen_neghi2
;
34058 gen_neg
= gen_negsi2
;
34061 gen_neg
= gen_negdi2
;
34064 gcc_unreachable ();
34067 e1
= gen_reg_rtx (inmode
);
34068 e2
= gen_reg_rtx (inmode
);
34069 res
= gen_reg_rtx (outmode
);
34071 half
= CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf
, inmode
);
34073 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
34075 /* scratch = fxam(op1) */
34076 emit_insn (gen_rtx_SET (VOIDmode
, scratch
,
34077 gen_rtx_UNSPEC (HImode
, gen_rtvec (1, op1
),
34079 /* e1 = fabs(op1) */
34080 emit_insn (gen_abs (e1
, op1
));
34082 /* e2 = e1 + 0.5 */
34083 half
= force_reg (inmode
, half
);
34084 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34085 gen_rtx_PLUS (inmode
, e1
, half
)));
34087 /* res = floor(e2) */
34088 if (inmode
!= XFmode
)
34090 tmp1
= gen_reg_rtx (XFmode
);
34092 emit_insn (gen_rtx_SET (VOIDmode
, tmp1
,
34093 gen_rtx_FLOAT_EXTEND (XFmode
, e2
)));
34103 rtx tmp0
= gen_reg_rtx (XFmode
);
34105 emit_insn (gen_frndintxf2_floor (tmp0
, tmp1
));
34107 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34108 gen_rtx_UNSPEC (outmode
, gen_rtvec (1, tmp0
),
34109 UNSPEC_TRUNC_NOOP
)));
34113 emit_insn (gen_frndintxf2_floor (res
, tmp1
));
34116 emit_insn (gen_lfloorxfhi2 (res
, tmp1
));
34119 emit_insn (gen_lfloorxfsi2 (res
, tmp1
));
34122 emit_insn (gen_lfloorxfdi2 (res
, tmp1
));
34125 gcc_unreachable ();
34128 /* flags = signbit(a) */
34129 emit_insn (gen_testqi_ext_ccno_0 (scratch
, GEN_INT (0x02)));
34131 /* if (flags) then res = -res */
34132 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
,
34133 gen_rtx_EQ (VOIDmode
, flags
, const0_rtx
),
34134 gen_rtx_LABEL_REF (VOIDmode
, jump_label
),
34136 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34137 predict_jump (REG_BR_PROB_BASE
* 50 / 100);
34138 JUMP_LABEL (insn
) = jump_label
;
34140 emit_insn (gen_neg (res
, res
));
34142 emit_label (jump_label
);
34143 LABEL_NUSES (jump_label
) = 1;
34145 emit_move_insn (op0
, res
);
34148 /* Output code to perform a Newton-Rhapson approximation of a single precision
34149 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
34151 void ix86_emit_swdivsf (rtx res
, rtx a
, rtx b
, enum machine_mode mode
)
34153 rtx x0
, x1
, e0
, e1
;
34155 x0
= gen_reg_rtx (mode
);
34156 e0
= gen_reg_rtx (mode
);
34157 e1
= gen_reg_rtx (mode
);
34158 x1
= gen_reg_rtx (mode
);
34160 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
34162 b
= force_reg (mode
, b
);
34164 /* x0 = rcp(b) estimate */
34165 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34166 gen_rtx_UNSPEC (mode
, gen_rtvec (1, b
),
34169 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34170 gen_rtx_MULT (mode
, x0
, b
)));
34173 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34174 gen_rtx_MULT (mode
, x0
, e0
)));
34177 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34178 gen_rtx_PLUS (mode
, x0
, x0
)));
34181 emit_insn (gen_rtx_SET (VOIDmode
, x1
,
34182 gen_rtx_MINUS (mode
, e1
, e0
)));
34185 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34186 gen_rtx_MULT (mode
, a
, x1
)));
34189 /* Output code to perform a Newton-Rhapson approximation of a
34190 single precision floating point [reciprocal] square root. */
34192 void ix86_emit_swsqrtsf (rtx res
, rtx a
, enum machine_mode mode
,
34195 rtx x0
, e0
, e1
, e2
, e3
, mthree
, mhalf
;
34198 x0
= gen_reg_rtx (mode
);
34199 e0
= gen_reg_rtx (mode
);
34200 e1
= gen_reg_rtx (mode
);
34201 e2
= gen_reg_rtx (mode
);
34202 e3
= gen_reg_rtx (mode
);
34204 real_from_integer (&r
, VOIDmode
, -3, -1, 0);
34205 mthree
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34207 real_arithmetic (&r
, NEGATE_EXPR
, &dconsthalf
, NULL
);
34208 mhalf
= CONST_DOUBLE_FROM_REAL_VALUE (r
, SFmode
);
34210 if (VECTOR_MODE_P (mode
))
34212 mthree
= ix86_build_const_vector (mode
, true, mthree
);
34213 mhalf
= ix86_build_const_vector (mode
, true, mhalf
);
34216 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
34217 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
34219 a
= force_reg (mode
, a
);
34221 /* x0 = rsqrt(a) estimate */
34222 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34223 gen_rtx_UNSPEC (mode
, gen_rtvec (1, a
),
34226 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
34231 zero
= gen_reg_rtx (mode
);
34232 mask
= gen_reg_rtx (mode
);
34234 zero
= force_reg (mode
, CONST0_RTX(mode
));
34235 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
34236 gen_rtx_NE (mode
, zero
, a
)));
34238 emit_insn (gen_rtx_SET (VOIDmode
, x0
,
34239 gen_rtx_AND (mode
, x0
, mask
)));
34243 emit_insn (gen_rtx_SET (VOIDmode
, e0
,
34244 gen_rtx_MULT (mode
, x0
, a
)));
34246 emit_insn (gen_rtx_SET (VOIDmode
, e1
,
34247 gen_rtx_MULT (mode
, e0
, x0
)));
34250 mthree
= force_reg (mode
, mthree
);
34251 emit_insn (gen_rtx_SET (VOIDmode
, e2
,
34252 gen_rtx_PLUS (mode
, e1
, mthree
)));
34254 mhalf
= force_reg (mode
, mhalf
);
34256 /* e3 = -.5 * x0 */
34257 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34258 gen_rtx_MULT (mode
, x0
, mhalf
)));
34260 /* e3 = -.5 * e0 */
34261 emit_insn (gen_rtx_SET (VOIDmode
, e3
,
34262 gen_rtx_MULT (mode
, e0
, mhalf
)));
34263 /* ret = e2 * e3 */
34264 emit_insn (gen_rtx_SET (VOIDmode
, res
,
34265 gen_rtx_MULT (mode
, e2
, e3
)));
34268 #ifdef TARGET_SOLARIS
34269 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
34272 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
34275 /* With Binutils 2.15, the "@unwind" marker must be specified on
34276 every occurrence of the ".eh_frame" section, not just the first
34279 && strcmp (name
, ".eh_frame") == 0)
34281 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
34282 flags
& SECTION_WRITE
? "aw" : "a");
34287 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
34289 solaris_elf_asm_comdat_section (name
, flags
, decl
);
34294 default_elf_asm_named_section (name
, flags
, decl
);
34296 #endif /* TARGET_SOLARIS */
34298 /* Return the mangling of TYPE if it is an extended fundamental type. */
34300 static const char *
34301 ix86_mangle_type (const_tree type
)
34303 type
= TYPE_MAIN_VARIANT (type
);
34305 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
34306 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
34309 switch (TYPE_MODE (type
))
34312 /* __float128 is "g". */
34315 /* "long double" or __float80 is "e". */
34322 /* For 32-bit code we can save PIC register setup by using
34323 __stack_chk_fail_local hidden function instead of calling
34324 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
34325 register, so it is better to call __stack_chk_fail directly. */
34327 static tree ATTRIBUTE_UNUSED
34328 ix86_stack_protect_fail (void)
34330 return TARGET_64BIT
34331 ? default_external_stack_protect_fail ()
34332 : default_hidden_stack_protect_fail ();
34335 /* Select a format to encode pointers in exception handling data. CODE
34336 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
34337 true if the symbol may be affected by dynamic relocations.
34339 ??? All x86 object file formats are capable of representing this.
34340 After all, the relocation needed is the same as for the call insn.
34341 Whether or not a particular assembler allows us to enter such, I
34342 guess we'll have to see. */
34344 asm_preferred_eh_data_format (int code
, int global
)
34348 int type
= DW_EH_PE_sdata8
;
34350 || ix86_cmodel
== CM_SMALL_PIC
34351 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
34352 type
= DW_EH_PE_sdata4
;
34353 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
34355 if (ix86_cmodel
== CM_SMALL
34356 || (ix86_cmodel
== CM_MEDIUM
&& code
))
34357 return DW_EH_PE_udata4
;
34358 return DW_EH_PE_absptr
;
34361 /* Expand copysign from SIGN to the positive value ABS_VALUE
34362 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
34365 ix86_sse_copysign_to_positive (rtx result
, rtx abs_value
, rtx sign
, rtx mask
)
34367 enum machine_mode mode
= GET_MODE (sign
);
34368 rtx sgn
= gen_reg_rtx (mode
);
34369 if (mask
== NULL_RTX
)
34371 enum machine_mode vmode
;
34373 if (mode
== SFmode
)
34375 else if (mode
== DFmode
)
34380 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), false);
34381 if (!VECTOR_MODE_P (mode
))
34383 /* We need to generate a scalar mode mask in this case. */
34384 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34385 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34386 mask
= gen_reg_rtx (mode
);
34387 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34391 mask
= gen_rtx_NOT (mode
, mask
);
34392 emit_insn (gen_rtx_SET (VOIDmode
, sgn
,
34393 gen_rtx_AND (mode
, mask
, sign
)));
34394 emit_insn (gen_rtx_SET (VOIDmode
, result
,
34395 gen_rtx_IOR (mode
, abs_value
, sgn
)));
34398 /* Expand fabs (OP0) and return a new rtx that holds the result. The
34399 mask for masking out the sign-bit is stored in *SMASK, if that is
34402 ix86_expand_sse_fabs (rtx op0
, rtx
*smask
)
34404 enum machine_mode vmode
, mode
= GET_MODE (op0
);
34407 xa
= gen_reg_rtx (mode
);
34408 if (mode
== SFmode
)
34410 else if (mode
== DFmode
)
34414 mask
= ix86_build_signbit_mask (vmode
, VECTOR_MODE_P (mode
), true);
34415 if (!VECTOR_MODE_P (mode
))
34417 /* We need to generate a scalar mode mask in this case. */
34418 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const0_rtx
));
34419 tmp
= gen_rtx_VEC_SELECT (mode
, mask
, tmp
);
34420 mask
= gen_reg_rtx (mode
);
34421 emit_insn (gen_rtx_SET (VOIDmode
, mask
, tmp
));
34423 emit_insn (gen_rtx_SET (VOIDmode
, xa
,
34424 gen_rtx_AND (mode
, op0
, mask
)));
34432 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
34433 swapping the operands if SWAP_OPERANDS is true. The expanded
34434 code is a forward jump to a newly created label in case the
34435 comparison is true. The generated label rtx is returned. */
34437 ix86_expand_sse_compare_and_jump (enum rtx_code code
, rtx op0
, rtx op1
,
34438 bool swap_operands
)
34449 label
= gen_label_rtx ();
34450 tmp
= gen_rtx_REG (CCFPUmode
, FLAGS_REG
);
34451 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34452 gen_rtx_COMPARE (CCFPUmode
, op0
, op1
)));
34453 tmp
= gen_rtx_fmt_ee (code
, VOIDmode
, tmp
, const0_rtx
);
34454 tmp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, tmp
,
34455 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
34456 tmp
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
, tmp
));
34457 JUMP_LABEL (tmp
) = label
;
34462 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
34463 using comparison code CODE. Operands are swapped for the comparison if
34464 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
34466 ix86_expand_sse_compare_mask (enum rtx_code code
, rtx op0
, rtx op1
,
34467 bool swap_operands
)
34469 rtx (*insn
)(rtx
, rtx
, rtx
, rtx
);
34470 enum machine_mode mode
= GET_MODE (op0
);
34471 rtx mask
= gen_reg_rtx (mode
);
34480 insn
= mode
== DFmode
? gen_setcc_df_sse
: gen_setcc_sf_sse
;
34482 emit_insn (insn (mask
, op0
, op1
,
34483 gen_rtx_fmt_ee (code
, mode
, op0
, op1
)));
34487 /* Generate and return a rtx of mode MODE for 2**n where n is the number
34488 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
34490 ix86_gen_TWO52 (enum machine_mode mode
)
34492 REAL_VALUE_TYPE TWO52r
;
34495 real_ldexp (&TWO52r
, &dconst1
, mode
== DFmode
? 52 : 23);
34496 TWO52
= const_double_from_real_value (TWO52r
, mode
);
34497 TWO52
= force_reg (mode
, TWO52
);
34502 /* Expand SSE sequence for computing lround from OP1 storing
34505 ix86_expand_lround (rtx op0
, rtx op1
)
34507 /* C code for the stuff we're doing below:
34508 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
34511 enum machine_mode mode
= GET_MODE (op1
);
34512 const struct real_format
*fmt
;
34513 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34516 /* load nextafter (0.5, 0.0) */
34517 fmt
= REAL_MODE_FORMAT (mode
);
34518 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34519 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34521 /* adj = copysign (0.5, op1) */
34522 adj
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
34523 ix86_sse_copysign_to_positive (adj
, adj
, force_reg (mode
, op1
), NULL_RTX
);
34525 /* adj = op1 + adj */
34526 adj
= expand_simple_binop (mode
, PLUS
, adj
, op1
, NULL_RTX
, 0, OPTAB_DIRECT
);
34528 /* op0 = (imode)adj */
34529 expand_fix (op0
, adj
, 0);
34532 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
34535 ix86_expand_lfloorceil (rtx op0
, rtx op1
, bool do_floor
)
34537 /* C code for the stuff we're doing below (for do_floor):
34539 xi -= (double)xi > op1 ? 1 : 0;
34542 enum machine_mode fmode
= GET_MODE (op1
);
34543 enum machine_mode imode
= GET_MODE (op0
);
34544 rtx ireg
, freg
, label
, tmp
;
34546 /* reg = (long)op1 */
34547 ireg
= gen_reg_rtx (imode
);
34548 expand_fix (ireg
, op1
, 0);
34550 /* freg = (double)reg */
34551 freg
= gen_reg_rtx (fmode
);
34552 expand_float (freg
, ireg
, 0);
34554 /* ireg = (freg > op1) ? ireg - 1 : ireg */
34555 label
= ix86_expand_sse_compare_and_jump (UNLE
,
34556 freg
, op1
, !do_floor
);
34557 tmp
= expand_simple_binop (imode
, do_floor
? MINUS
: PLUS
,
34558 ireg
, const1_rtx
, NULL_RTX
, 0, OPTAB_DIRECT
);
34559 emit_move_insn (ireg
, tmp
);
34561 emit_label (label
);
34562 LABEL_NUSES (label
) = 1;
34564 emit_move_insn (op0
, ireg
);
34567 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
34568 result in OPERAND0. */
34570 ix86_expand_rint (rtx operand0
, rtx operand1
)
34572 /* C code for the stuff we're doing below:
34573 xa = fabs (operand1);
34574 if (!isless (xa, 2**52))
34576 xa = xa + 2**52 - 2**52;
34577 return copysign (xa, operand1);
34579 enum machine_mode mode
= GET_MODE (operand0
);
34580 rtx res
, xa
, label
, TWO52
, mask
;
34582 res
= gen_reg_rtx (mode
);
34583 emit_move_insn (res
, operand1
);
34585 /* xa = abs (operand1) */
34586 xa
= ix86_expand_sse_fabs (res
, &mask
);
34588 /* if (!isless (xa, TWO52)) goto label; */
34589 TWO52
= ix86_gen_TWO52 (mode
);
34590 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34592 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34593 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
34595 ix86_sse_copysign_to_positive (res
, xa
, res
, mask
);
34597 emit_label (label
);
34598 LABEL_NUSES (label
) = 1;
34600 emit_move_insn (operand0
, res
);
34603 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34606 ix86_expand_floorceildf_32 (rtx operand0
, rtx operand1
, bool do_floor
)
34608 /* C code for the stuff we expand below.
34609 double xa = fabs (x), x2;
34610 if (!isless (xa, TWO52))
34612 xa = xa + TWO52 - TWO52;
34613 x2 = copysign (xa, x);
34622 enum machine_mode mode
= GET_MODE (operand0
);
34623 rtx xa
, TWO52
, tmp
, label
, one
, res
, mask
;
34625 TWO52
= ix86_gen_TWO52 (mode
);
34627 /* Temporary for holding the result, initialized to the input
34628 operand to ease control flow. */
34629 res
= gen_reg_rtx (mode
);
34630 emit_move_insn (res
, operand1
);
34632 /* xa = abs (operand1) */
34633 xa
= ix86_expand_sse_fabs (res
, &mask
);
34635 /* if (!isless (xa, TWO52)) goto label; */
34636 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34638 /* xa = xa + TWO52 - TWO52; */
34639 xa
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34640 xa
= expand_simple_binop (mode
, MINUS
, xa
, TWO52
, xa
, 0, OPTAB_DIRECT
);
34642 /* xa = copysign (xa, operand1) */
34643 ix86_sse_copysign_to_positive (xa
, xa
, res
, mask
);
34645 /* generate 1.0 or -1.0 */
34646 one
= force_reg (mode
,
34647 const_double_from_real_value (do_floor
34648 ? dconst1
: dconstm1
, mode
));
34650 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34651 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
34652 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34653 gen_rtx_AND (mode
, one
, tmp
)));
34654 /* We always need to subtract here to preserve signed zero. */
34655 tmp
= expand_simple_binop (mode
, MINUS
,
34656 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34657 emit_move_insn (res
, tmp
);
34659 emit_label (label
);
34660 LABEL_NUSES (label
) = 1;
34662 emit_move_insn (operand0
, res
);
34665 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
34668 ix86_expand_floorceil (rtx operand0
, rtx operand1
, bool do_floor
)
34670 /* C code for the stuff we expand below.
34671 double xa = fabs (x), x2;
34672 if (!isless (xa, TWO52))
34674 x2 = (double)(long)x;
34681 if (HONOR_SIGNED_ZEROS (mode))
34682 return copysign (x2, x);
34685 enum machine_mode mode
= GET_MODE (operand0
);
34686 rtx xa
, xi
, TWO52
, tmp
, label
, one
, res
, mask
;
34688 TWO52
= ix86_gen_TWO52 (mode
);
34690 /* Temporary for holding the result, initialized to the input
34691 operand to ease control flow. */
34692 res
= gen_reg_rtx (mode
);
34693 emit_move_insn (res
, operand1
);
34695 /* xa = abs (operand1) */
34696 xa
= ix86_expand_sse_fabs (res
, &mask
);
34698 /* if (!isless (xa, TWO52)) goto label; */
34699 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34701 /* xa = (double)(long)x */
34702 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
34703 expand_fix (xi
, res
, 0);
34704 expand_float (xa
, xi
, 0);
34707 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
34709 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
34710 tmp
= ix86_expand_sse_compare_mask (UNGT
, xa
, res
, !do_floor
);
34711 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34712 gen_rtx_AND (mode
, one
, tmp
)));
34713 tmp
= expand_simple_binop (mode
, do_floor
? MINUS
: PLUS
,
34714 xa
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34715 emit_move_insn (res
, tmp
);
34717 if (HONOR_SIGNED_ZEROS (mode
))
34718 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
34720 emit_label (label
);
34721 LABEL_NUSES (label
) = 1;
34723 emit_move_insn (operand0
, res
);
34726 /* Expand SSE sequence for computing round from OPERAND1 storing
34727 into OPERAND0. Sequence that works without relying on DImode truncation
34728 via cvttsd2siq that is only available on 64bit targets. */
34730 ix86_expand_rounddf_32 (rtx operand0
, rtx operand1
)
34732 /* C code for the stuff we expand below.
34733 double xa = fabs (x), xa2, x2;
34734 if (!isless (xa, TWO52))
34736 Using the absolute value and copying back sign makes
34737 -0.0 -> -0.0 correct.
34738 xa2 = xa + TWO52 - TWO52;
34743 else if (dxa > 0.5)
34745 x2 = copysign (xa2, x);
34748 enum machine_mode mode
= GET_MODE (operand0
);
34749 rtx xa
, xa2
, dxa
, TWO52
, tmp
, label
, half
, mhalf
, one
, res
, mask
;
34751 TWO52
= ix86_gen_TWO52 (mode
);
34753 /* Temporary for holding the result, initialized to the input
34754 operand to ease control flow. */
34755 res
= gen_reg_rtx (mode
);
34756 emit_move_insn (res
, operand1
);
34758 /* xa = abs (operand1) */
34759 xa
= ix86_expand_sse_fabs (res
, &mask
);
34761 /* if (!isless (xa, TWO52)) goto label; */
34762 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34764 /* xa2 = xa + TWO52 - TWO52; */
34765 xa2
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34766 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, TWO52
, xa2
, 0, OPTAB_DIRECT
);
34768 /* dxa = xa2 - xa; */
34769 dxa
= expand_simple_binop (mode
, MINUS
, xa2
, xa
, NULL_RTX
, 0, OPTAB_DIRECT
);
34771 /* generate 0.5, 1.0 and -0.5 */
34772 half
= force_reg (mode
, const_double_from_real_value (dconsthalf
, mode
));
34773 one
= expand_simple_binop (mode
, PLUS
, half
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
34774 mhalf
= expand_simple_binop (mode
, MINUS
, half
, one
, NULL_RTX
,
34778 tmp
= gen_reg_rtx (mode
);
34779 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
34780 tmp
= ix86_expand_sse_compare_mask (UNGT
, dxa
, half
, false);
34781 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34782 gen_rtx_AND (mode
, one
, tmp
)));
34783 xa2
= expand_simple_binop (mode
, MINUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34784 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
34785 tmp
= ix86_expand_sse_compare_mask (UNGE
, mhalf
, dxa
, false);
34786 emit_insn (gen_rtx_SET (VOIDmode
, tmp
,
34787 gen_rtx_AND (mode
, one
, tmp
)));
34788 xa2
= expand_simple_binop (mode
, PLUS
, xa2
, tmp
, NULL_RTX
, 0, OPTAB_DIRECT
);
34790 /* res = copysign (xa2, operand1) */
34791 ix86_sse_copysign_to_positive (res
, xa2
, force_reg (mode
, operand1
), mask
);
34793 emit_label (label
);
34794 LABEL_NUSES (label
) = 1;
34796 emit_move_insn (operand0
, res
);
34799 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34802 ix86_expand_trunc (rtx operand0
, rtx operand1
)
34804 /* C code for SSE variant we expand below.
34805 double xa = fabs (x), x2;
34806 if (!isless (xa, TWO52))
34808 x2 = (double)(long)x;
34809 if (HONOR_SIGNED_ZEROS (mode))
34810 return copysign (x2, x);
34813 enum machine_mode mode
= GET_MODE (operand0
);
34814 rtx xa
, xi
, TWO52
, label
, res
, mask
;
34816 TWO52
= ix86_gen_TWO52 (mode
);
34818 /* Temporary for holding the result, initialized to the input
34819 operand to ease control flow. */
34820 res
= gen_reg_rtx (mode
);
34821 emit_move_insn (res
, operand1
);
34823 /* xa = abs (operand1) */
34824 xa
= ix86_expand_sse_fabs (res
, &mask
);
34826 /* if (!isless (xa, TWO52)) goto label; */
34827 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34829 /* x = (double)(long)x */
34830 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
34831 expand_fix (xi
, res
, 0);
34832 expand_float (res
, xi
, 0);
34834 if (HONOR_SIGNED_ZEROS (mode
))
34835 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), mask
);
34837 emit_label (label
);
34838 LABEL_NUSES (label
) = 1;
34840 emit_move_insn (operand0
, res
);
34843 /* Expand SSE sequence for computing trunc from OPERAND1 storing
34846 ix86_expand_truncdf_32 (rtx operand0
, rtx operand1
)
34848 enum machine_mode mode
= GET_MODE (operand0
);
34849 rtx xa
, mask
, TWO52
, label
, one
, res
, smask
, tmp
;
34851 /* C code for SSE variant we expand below.
34852 double xa = fabs (x), x2;
34853 if (!isless (xa, TWO52))
34855 xa2 = xa + TWO52 - TWO52;
34859 x2 = copysign (xa2, x);
34863 TWO52
= ix86_gen_TWO52 (mode
);
34865 /* Temporary for holding the result, initialized to the input
34866 operand to ease control flow. */
34867 res
= gen_reg_rtx (mode
);
34868 emit_move_insn (res
, operand1
);
34870 /* xa = abs (operand1) */
34871 xa
= ix86_expand_sse_fabs (res
, &smask
);
34873 /* if (!isless (xa, TWO52)) goto label; */
34874 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34876 /* res = xa + TWO52 - TWO52; */
34877 tmp
= expand_simple_binop (mode
, PLUS
, xa
, TWO52
, NULL_RTX
, 0, OPTAB_DIRECT
);
34878 tmp
= expand_simple_binop (mode
, MINUS
, tmp
, TWO52
, tmp
, 0, OPTAB_DIRECT
);
34879 emit_move_insn (res
, tmp
);
34882 one
= force_reg (mode
, const_double_from_real_value (dconst1
, mode
));
34884 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
34885 mask
= ix86_expand_sse_compare_mask (UNGT
, res
, xa
, false);
34886 emit_insn (gen_rtx_SET (VOIDmode
, mask
,
34887 gen_rtx_AND (mode
, mask
, one
)));
34888 tmp
= expand_simple_binop (mode
, MINUS
,
34889 res
, mask
, NULL_RTX
, 0, OPTAB_DIRECT
);
34890 emit_move_insn (res
, tmp
);
34892 /* res = copysign (res, operand1) */
34893 ix86_sse_copysign_to_positive (res
, res
, force_reg (mode
, operand1
), smask
);
34895 emit_label (label
);
34896 LABEL_NUSES (label
) = 1;
34898 emit_move_insn (operand0
, res
);
34901 /* Expand SSE sequence for computing round from OPERAND1 storing
34904 ix86_expand_round (rtx operand0
, rtx operand1
)
34906 /* C code for the stuff we're doing below:
34907 double xa = fabs (x);
34908 if (!isless (xa, TWO52))
34910 xa = (double)(long)(xa + nextafter (0.5, 0.0));
34911 return copysign (xa, x);
34913 enum machine_mode mode
= GET_MODE (operand0
);
34914 rtx res
, TWO52
, xa
, label
, xi
, half
, mask
;
34915 const struct real_format
*fmt
;
34916 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34918 /* Temporary for holding the result, initialized to the input
34919 operand to ease control flow. */
34920 res
= gen_reg_rtx (mode
);
34921 emit_move_insn (res
, operand1
);
34923 TWO52
= ix86_gen_TWO52 (mode
);
34924 xa
= ix86_expand_sse_fabs (res
, &mask
);
34925 label
= ix86_expand_sse_compare_and_jump (UNLE
, TWO52
, xa
, false);
34927 /* load nextafter (0.5, 0.0) */
34928 fmt
= REAL_MODE_FORMAT (mode
);
34929 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34930 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34932 /* xa = xa + 0.5 */
34933 half
= force_reg (mode
, const_double_from_real_value (pred_half
, mode
));
34934 xa
= expand_simple_binop (mode
, PLUS
, xa
, half
, NULL_RTX
, 0, OPTAB_DIRECT
);
34936 /* xa = (double)(int64_t)xa */
34937 xi
= gen_reg_rtx (mode
== DFmode
? DImode
: SImode
);
34938 expand_fix (xi
, xa
, 0);
34939 expand_float (xa
, xi
, 0);
34941 /* res = copysign (xa, operand1) */
34942 ix86_sse_copysign_to_positive (res
, xa
, force_reg (mode
, operand1
), mask
);
34944 emit_label (label
);
34945 LABEL_NUSES (label
) = 1;
34947 emit_move_insn (operand0
, res
);
34950 /* Expand SSE sequence for computing round
34951 from OP1 storing into OP0 using sse4 round insn. */
34953 ix86_expand_round_sse4 (rtx op0
, rtx op1
)
34955 enum machine_mode mode
= GET_MODE (op0
);
34956 rtx e1
, e2
, res
, half
;
34957 const struct real_format
*fmt
;
34958 REAL_VALUE_TYPE pred_half
, half_minus_pred_half
;
34959 rtx (*gen_copysign
) (rtx
, rtx
, rtx
);
34960 rtx (*gen_round
) (rtx
, rtx
, rtx
);
34965 gen_copysign
= gen_copysignsf3
;
34966 gen_round
= gen_sse4_1_roundsf2
;
34969 gen_copysign
= gen_copysigndf3
;
34970 gen_round
= gen_sse4_1_rounddf2
;
34973 gcc_unreachable ();
34976 /* round (a) = trunc (a + copysign (0.5, a)) */
34978 /* load nextafter (0.5, 0.0) */
34979 fmt
= REAL_MODE_FORMAT (mode
);
34980 real_2expN (&half_minus_pred_half
, -(fmt
->p
) - 1, mode
);
34981 REAL_ARITHMETIC (pred_half
, MINUS_EXPR
, dconsthalf
, half_minus_pred_half
);
34982 half
= const_double_from_real_value (pred_half
, mode
);
34984 /* e1 = copysign (0.5, op1) */
34985 e1
= gen_reg_rtx (mode
);
34986 emit_insn (gen_copysign (e1
, half
, op1
));
34988 /* e2 = op1 + e1 */
34989 e2
= expand_simple_binop (mode
, PLUS
, op1
, e1
, NULL_RTX
, 0, OPTAB_DIRECT
);
34991 /* res = trunc (e2) */
34992 res
= gen_reg_rtx (mode
);
34993 emit_insn (gen_round (res
, e2
, GEN_INT (ROUND_TRUNC
)));
34995 emit_move_insn (op0
, res
);
34999 /* Table of valid machine attributes. */
35000 static const struct attribute_spec ix86_attribute_table
[] =
35002 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
35003 affects_type_identity } */
35004 /* Stdcall attribute says callee is responsible for popping arguments
35005 if they are not variable. */
35006 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35008 /* Fastcall attribute says callee is responsible for popping arguments
35009 if they are not variable. */
35010 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35012 /* Thiscall attribute says callee is responsible for popping arguments
35013 if they are not variable. */
35014 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35016 /* Cdecl attribute says the callee is a normal C declaration */
35017 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35019 /* Regparm attribute specifies how many integer arguments are to be
35020 passed in registers. */
35021 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute
,
35023 /* Sseregparm attribute says we are using x86_64 calling conventions
35024 for FP arguments. */
35025 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute
,
35027 /* The transactional memory builtins are implicitly regparm or fastcall
35028 depending on the ABI. Override the generic do-nothing attribute that
35029 these builtins were declared with. */
35030 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute
,
35032 /* force_align_arg_pointer says this function realigns the stack at entry. */
35033 { (const char *)&ix86_force_align_arg_pointer_string
, 0, 0,
35034 false, true, true, ix86_handle_cconv_attribute
, false },
35035 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
35036 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
35037 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
35038 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute
,
35041 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35043 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute
,
35045 #ifdef SUBTARGET_ATTRIBUTE_TABLE
35046 SUBTARGET_ATTRIBUTE_TABLE
,
35048 /* ms_abi and sysv_abi calling convention function attributes. */
35049 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35050 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute
, true },
35051 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute
,
35053 { "callee_pop_aggregate_return", 1, 1, false, true, true,
35054 ix86_handle_callee_pop_aggregate_return
, true },
35056 { NULL
, 0, 0, false, false, false, NULL
, false }
35059 /* Implement targetm.vectorize.builtin_vectorization_cost. */
35061 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
35062 tree vectype ATTRIBUTE_UNUSED
,
35063 int misalign ATTRIBUTE_UNUSED
)
35065 switch (type_of_cost
)
35068 return ix86_cost
->scalar_stmt_cost
;
35071 return ix86_cost
->scalar_load_cost
;
35074 return ix86_cost
->scalar_store_cost
;
35077 return ix86_cost
->vec_stmt_cost
;
35080 return ix86_cost
->vec_align_load_cost
;
35083 return ix86_cost
->vec_store_cost
;
35085 case vec_to_scalar
:
35086 return ix86_cost
->vec_to_scalar_cost
;
35088 case scalar_to_vec
:
35089 return ix86_cost
->scalar_to_vec_cost
;
35091 case unaligned_load
:
35092 case unaligned_store
:
35093 return ix86_cost
->vec_unalign_load_cost
;
35095 case cond_branch_taken
:
35096 return ix86_cost
->cond_taken_branch_cost
;
35098 case cond_branch_not_taken
:
35099 return ix86_cost
->cond_not_taken_branch_cost
;
35105 gcc_unreachable ();
35110 /* Return a vector mode with twice as many elements as VMODE. */
35111 /* ??? Consider moving this to a table generated by genmodes.c. */
35113 static enum machine_mode
35114 doublesize_vector_mode (enum machine_mode vmode
)
35118 case V2SFmode
: return V4SFmode
;
35119 case V1DImode
: return V2DImode
;
35120 case V2SImode
: return V4SImode
;
35121 case V4HImode
: return V8HImode
;
35122 case V8QImode
: return V16QImode
;
35124 case V2DFmode
: return V4DFmode
;
35125 case V4SFmode
: return V8SFmode
;
35126 case V2DImode
: return V4DImode
;
35127 case V4SImode
: return V8SImode
;
35128 case V8HImode
: return V16HImode
;
35129 case V16QImode
: return V32QImode
;
35131 case V4DFmode
: return V8DFmode
;
35132 case V8SFmode
: return V16SFmode
;
35133 case V4DImode
: return V8DImode
;
35134 case V8SImode
: return V16SImode
;
35135 case V16HImode
: return V32HImode
;
35136 case V32QImode
: return V64QImode
;
35139 gcc_unreachable ();
35143 /* Construct (set target (vec_select op0 (parallel perm))) and
35144 return true if that's a valid instruction in the active ISA. */
35147 expand_vselect (rtx target
, rtx op0
, const unsigned char *perm
, unsigned nelt
)
35149 rtx rperm
[MAX_VECT_LEN
], x
;
35152 for (i
= 0; i
< nelt
; ++i
)
35153 rperm
[i
] = GEN_INT (perm
[i
]);
35155 x
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelt
, rperm
));
35156 x
= gen_rtx_VEC_SELECT (GET_MODE (target
), op0
, x
);
35157 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35160 if (recog_memoized (x
) < 0)
35168 /* Similar, but generate a vec_concat from op0 and op1 as well. */
35171 expand_vselect_vconcat (rtx target
, rtx op0
, rtx op1
,
35172 const unsigned char *perm
, unsigned nelt
)
35174 enum machine_mode v2mode
;
35177 v2mode
= doublesize_vector_mode (GET_MODE (op0
));
35178 x
= gen_rtx_VEC_CONCAT (v2mode
, op0
, op1
);
35179 return expand_vselect (target
, x
, perm
, nelt
);
35182 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35183 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
35186 expand_vec_perm_blend (struct expand_vec_perm_d
*d
)
35188 enum machine_mode vmode
= d
->vmode
;
35189 unsigned i
, mask
, nelt
= d
->nelt
;
35190 rtx target
, op0
, op1
, x
;
35191 rtx rperm
[32], vperm
;
35193 if (d
->op0
== d
->op1
)
35195 if (TARGET_AVX2
&& GET_MODE_SIZE (vmode
) == 32)
35197 else if (TARGET_AVX
&& (vmode
== V4DFmode
|| vmode
== V8SFmode
))
35199 else if (TARGET_SSE4_1
&& GET_MODE_SIZE (vmode
) == 16)
35204 /* This is a blend, not a permute. Elements must stay in their
35205 respective lanes. */
35206 for (i
= 0; i
< nelt
; ++i
)
35208 unsigned e
= d
->perm
[i
];
35209 if (!(e
== i
|| e
== i
+ nelt
))
35216 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
35217 decision should be extracted elsewhere, so that we only try that
35218 sequence once all budget==3 options have been tried. */
35219 target
= d
->target
;
35232 for (i
= 0; i
< nelt
; ++i
)
35233 mask
|= (d
->perm
[i
] >= nelt
) << i
;
35237 for (i
= 0; i
< 2; ++i
)
35238 mask
|= (d
->perm
[i
] >= 2 ? 15 : 0) << (i
* 4);
35243 for (i
= 0; i
< 4; ++i
)
35244 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35249 /* See if bytes move in pairs so we can use pblendw with
35250 an immediate argument, rather than pblendvb with a vector
35252 for (i
= 0; i
< 16; i
+= 2)
35253 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35256 for (i
= 0; i
< nelt
; ++i
)
35257 rperm
[i
] = (d
->perm
[i
] < nelt
? const0_rtx
: constm1_rtx
);
35260 vperm
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
35261 vperm
= force_reg (vmode
, vperm
);
35263 if (GET_MODE_SIZE (vmode
) == 16)
35264 emit_insn (gen_sse4_1_pblendvb (target
, op0
, op1
, vperm
));
35266 emit_insn (gen_avx2_pblendvb (target
, op0
, op1
, vperm
));
35270 for (i
= 0; i
< 8; ++i
)
35271 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35276 target
= gen_lowpart (vmode
, target
);
35277 op0
= gen_lowpart (vmode
, op0
);
35278 op1
= gen_lowpart (vmode
, op1
);
35282 /* See if bytes move in pairs. If not, vpblendvb must be used. */
35283 for (i
= 0; i
< 32; i
+= 2)
35284 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35286 /* See if bytes move in quadruplets. If yes, vpblendd
35287 with immediate can be used. */
35288 for (i
= 0; i
< 32; i
+= 4)
35289 if (d
->perm
[i
] + 2 != d
->perm
[i
+ 2])
35293 /* See if bytes move the same in both lanes. If yes,
35294 vpblendw with immediate can be used. */
35295 for (i
= 0; i
< 16; i
+= 2)
35296 if (d
->perm
[i
] + 16 != d
->perm
[i
+ 16])
35299 /* Use vpblendw. */
35300 for (i
= 0; i
< 16; ++i
)
35301 mask
|= (d
->perm
[i
* 2] >= 32) << i
;
35306 /* Use vpblendd. */
35307 for (i
= 0; i
< 8; ++i
)
35308 mask
|= (d
->perm
[i
* 4] >= 32) << i
;
35313 /* See if words move in pairs. If yes, vpblendd can be used. */
35314 for (i
= 0; i
< 16; i
+= 2)
35315 if (d
->perm
[i
] + 1 != d
->perm
[i
+ 1])
35319 /* See if words move the same in both lanes. If not,
35320 vpblendvb must be used. */
35321 for (i
= 0; i
< 8; i
++)
35322 if (d
->perm
[i
] + 8 != d
->perm
[i
+ 8])
35324 /* Use vpblendvb. */
35325 for (i
= 0; i
< 32; ++i
)
35326 rperm
[i
] = (d
->perm
[i
/ 2] < 16 ? const0_rtx
: constm1_rtx
);
35330 target
= gen_lowpart (vmode
, target
);
35331 op0
= gen_lowpart (vmode
, op0
);
35332 op1
= gen_lowpart (vmode
, op1
);
35333 goto finish_pblendvb
;
35336 /* Use vpblendw. */
35337 for (i
= 0; i
< 16; ++i
)
35338 mask
|= (d
->perm
[i
] >= 16) << i
;
35342 /* Use vpblendd. */
35343 for (i
= 0; i
< 8; ++i
)
35344 mask
|= (d
->perm
[i
* 2] >= 16) << i
;
35349 /* Use vpblendd. */
35350 for (i
= 0; i
< 4; ++i
)
35351 mask
|= (d
->perm
[i
] >= 4 ? 3 : 0) << (i
* 2);
35356 gcc_unreachable ();
35359 /* This matches five different patterns with the different modes. */
35360 x
= gen_rtx_VEC_MERGE (vmode
, op1
, op0
, GEN_INT (mask
));
35361 x
= gen_rtx_SET (VOIDmode
, target
, x
);
35367 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35368 in terms of the variable form of vpermilps.
35370 Note that we will have already failed the immediate input vpermilps,
35371 which requires that the high and low part shuffle be identical; the
35372 variable form doesn't require that. */
35375 expand_vec_perm_vpermil (struct expand_vec_perm_d
*d
)
35377 rtx rperm
[8], vperm
;
35380 if (!TARGET_AVX
|| d
->vmode
!= V8SFmode
|| d
->op0
!= d
->op1
)
35383 /* We can only permute within the 128-bit lane. */
35384 for (i
= 0; i
< 8; ++i
)
35386 unsigned e
= d
->perm
[i
];
35387 if (i
< 4 ? e
>= 4 : e
< 4)
35394 for (i
= 0; i
< 8; ++i
)
35396 unsigned e
= d
->perm
[i
];
35398 /* Within each 128-bit lane, the elements of op0 are numbered
35399 from 0 and the elements of op1 are numbered from 4. */
35405 rperm
[i
] = GEN_INT (e
);
35408 vperm
= gen_rtx_CONST_VECTOR (V8SImode
, gen_rtvec_v (8, rperm
));
35409 vperm
= force_reg (V8SImode
, vperm
);
35410 emit_insn (gen_avx_vpermilvarv8sf3 (d
->target
, d
->op0
, vperm
));
35415 /* Return true if permutation D can be performed as VMODE permutation
35419 valid_perm_using_mode_p (enum machine_mode vmode
, struct expand_vec_perm_d
*d
)
35421 unsigned int i
, j
, chunk
;
35423 if (GET_MODE_CLASS (vmode
) != MODE_VECTOR_INT
35424 || GET_MODE_CLASS (d
->vmode
) != MODE_VECTOR_INT
35425 || GET_MODE_SIZE (vmode
) != GET_MODE_SIZE (d
->vmode
))
35428 if (GET_MODE_NUNITS (vmode
) >= d
->nelt
)
35431 chunk
= d
->nelt
/ GET_MODE_NUNITS (vmode
);
35432 for (i
= 0; i
< d
->nelt
; i
+= chunk
)
35433 if (d
->perm
[i
] & (chunk
- 1))
35436 for (j
= 1; j
< chunk
; ++j
)
35437 if (d
->perm
[i
] + j
!= d
->perm
[i
+ j
])
35443 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35444 in terms of pshufb, vpperm, vpermq, vpermd or vperm2i128. */
35447 expand_vec_perm_pshufb (struct expand_vec_perm_d
*d
)
35449 unsigned i
, nelt
, eltsz
, mask
;
35450 unsigned char perm
[32];
35451 enum machine_mode vmode
= V16QImode
;
35452 rtx rperm
[32], vperm
, target
, op0
, op1
;
35456 if (d
->op0
!= d
->op1
)
35458 if (!TARGET_XOP
|| GET_MODE_SIZE (d
->vmode
) != 16)
35461 && valid_perm_using_mode_p (V2TImode
, d
))
35466 /* Use vperm2i128 insn. The pattern uses
35467 V4DImode instead of V2TImode. */
35468 target
= gen_lowpart (V4DImode
, d
->target
);
35469 op0
= gen_lowpart (V4DImode
, d
->op0
);
35470 op1
= gen_lowpart (V4DImode
, d
->op1
);
35472 = GEN_INT (((d
->perm
[0] & (nelt
/ 2)) ? 1 : 0)
35473 || ((d
->perm
[nelt
/ 2] & (nelt
/ 2)) ? 2 : 0));
35474 emit_insn (gen_avx2_permv2ti (target
, op0
, op1
, rperm
[0]));
35482 if (GET_MODE_SIZE (d
->vmode
) == 16)
35487 else if (GET_MODE_SIZE (d
->vmode
) == 32)
35492 /* V4DImode should be already handled through
35493 expand_vselect by vpermq instruction. */
35494 gcc_assert (d
->vmode
!= V4DImode
);
35497 if (d
->vmode
== V8SImode
35498 || d
->vmode
== V16HImode
35499 || d
->vmode
== V32QImode
)
35501 /* First see if vpermq can be used for
35502 V8SImode/V16HImode/V32QImode. */
35503 if (valid_perm_using_mode_p (V4DImode
, d
))
35505 for (i
= 0; i
< 4; i
++)
35506 perm
[i
] = (d
->perm
[i
* nelt
/ 4] * 4 / nelt
) & 3;
35509 return expand_vselect (gen_lowpart (V4DImode
, d
->target
),
35510 gen_lowpart (V4DImode
, d
->op0
),
35514 /* Next see if vpermd can be used. */
35515 if (valid_perm_using_mode_p (V8SImode
, d
))
35519 if (vmode
== V32QImode
)
35521 /* vpshufb only works intra lanes, it is not
35522 possible to shuffle bytes in between the lanes. */
35523 for (i
= 0; i
< nelt
; ++i
)
35524 if ((d
->perm
[i
] ^ i
) & (nelt
/ 2))
35535 if (vmode
== V8SImode
)
35536 for (i
= 0; i
< 8; ++i
)
35537 rperm
[i
] = GEN_INT ((d
->perm
[i
* nelt
/ 8] * 8 / nelt
) & 7);
35540 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
35541 if (d
->op0
!= d
->op1
)
35542 mask
= 2 * nelt
- 1;
35543 else if (vmode
== V16QImode
)
35546 mask
= nelt
/ 2 - 1;
35548 for (i
= 0; i
< nelt
; ++i
)
35550 unsigned j
, e
= d
->perm
[i
] & mask
;
35551 for (j
= 0; j
< eltsz
; ++j
)
35552 rperm
[i
* eltsz
+ j
] = GEN_INT (e
* eltsz
+ j
);
35556 vperm
= gen_rtx_CONST_VECTOR (vmode
,
35557 gen_rtvec_v (GET_MODE_NUNITS (vmode
), rperm
));
35558 vperm
= force_reg (vmode
, vperm
);
35560 target
= gen_lowpart (vmode
, d
->target
);
35561 op0
= gen_lowpart (vmode
, d
->op0
);
35562 if (d
->op0
== d
->op1
)
35564 if (vmode
== V16QImode
)
35565 emit_insn (gen_ssse3_pshufbv16qi3 (target
, op0
, vperm
));
35566 else if (vmode
== V32QImode
)
35567 emit_insn (gen_avx2_pshufbv32qi3 (target
, op0
, vperm
));
35569 emit_insn (gen_avx2_permvarv8si (target
, vperm
, op0
));
35573 op1
= gen_lowpart (vmode
, d
->op1
);
35574 emit_insn (gen_xop_pperm (target
, op0
, op1
, vperm
));
35580 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
35581 in a single instruction. */
35584 expand_vec_perm_1 (struct expand_vec_perm_d
*d
)
35586 unsigned i
, nelt
= d
->nelt
;
35587 unsigned char perm2
[MAX_VECT_LEN
];
35589 /* Check plain VEC_SELECT first, because AVX has instructions that could
35590 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
35591 input where SEL+CONCAT may not. */
35592 if (d
->op0
== d
->op1
)
35594 int mask
= nelt
- 1;
35595 bool identity_perm
= true;
35596 bool broadcast_perm
= true;
35598 for (i
= 0; i
< nelt
; i
++)
35600 perm2
[i
] = d
->perm
[i
] & mask
;
35602 identity_perm
= false;
35604 broadcast_perm
= false;
35610 emit_move_insn (d
->target
, d
->op0
);
35613 else if (broadcast_perm
&& TARGET_AVX2
)
35615 /* Use vpbroadcast{b,w,d}. */
35616 rtx op
= d
->op0
, (*gen
) (rtx
, rtx
) = NULL
;
35620 op
= gen_lowpart (V16QImode
, op
);
35621 gen
= gen_avx2_pbroadcastv32qi
;
35624 op
= gen_lowpart (V8HImode
, op
);
35625 gen
= gen_avx2_pbroadcastv16hi
;
35628 op
= gen_lowpart (V4SImode
, op
);
35629 gen
= gen_avx2_pbroadcastv8si
;
35632 gen
= gen_avx2_pbroadcastv16qi
;
35635 gen
= gen_avx2_pbroadcastv8hi
;
35637 /* For other modes prefer other shuffles this function creates. */
35643 emit_insn (gen (d
->target
, op
));
35648 if (expand_vselect (d
->target
, d
->op0
, perm2
, nelt
))
35651 /* There are plenty of patterns in sse.md that are written for
35652 SEL+CONCAT and are not replicated for a single op. Perhaps
35653 that should be changed, to avoid the nastiness here. */
35655 /* Recognize interleave style patterns, which means incrementing
35656 every other permutation operand. */
35657 for (i
= 0; i
< nelt
; i
+= 2)
35659 perm2
[i
] = d
->perm
[i
] & mask
;
35660 perm2
[i
+ 1] = (d
->perm
[i
+ 1] & mask
) + nelt
;
35662 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
35665 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
35668 for (i
= 0; i
< nelt
; i
+= 4)
35670 perm2
[i
+ 0] = d
->perm
[i
+ 0] & mask
;
35671 perm2
[i
+ 1] = d
->perm
[i
+ 1] & mask
;
35672 perm2
[i
+ 2] = (d
->perm
[i
+ 2] & mask
) + nelt
;
35673 perm2
[i
+ 3] = (d
->perm
[i
+ 3] & mask
) + nelt
;
35676 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op0
, perm2
, nelt
))
35681 /* Finally, try the fully general two operand permute. */
35682 if (expand_vselect_vconcat (d
->target
, d
->op0
, d
->op1
, d
->perm
, nelt
))
35685 /* Recognize interleave style patterns with reversed operands. */
35686 if (d
->op0
!= d
->op1
)
35688 for (i
= 0; i
< nelt
; ++i
)
35690 unsigned e
= d
->perm
[i
];
35698 if (expand_vselect_vconcat (d
->target
, d
->op1
, d
->op0
, perm2
, nelt
))
35702 /* Try the SSE4.1 blend variable merge instructions. */
35703 if (expand_vec_perm_blend (d
))
35706 /* Try one of the AVX vpermil variable permutations. */
35707 if (expand_vec_perm_vpermil (d
))
35710 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
35711 vpshufb, vpermd or vpermq variable permutation. */
35712 if (expand_vec_perm_pshufb (d
))
35718 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
35719 in terms of a pair of pshuflw + pshufhw instructions. */
35722 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d
*d
)
35724 unsigned char perm2
[MAX_VECT_LEN
];
35728 if (d
->vmode
!= V8HImode
|| d
->op0
!= d
->op1
)
35731 /* The two permutations only operate in 64-bit lanes. */
35732 for (i
= 0; i
< 4; ++i
)
35733 if (d
->perm
[i
] >= 4)
35735 for (i
= 4; i
< 8; ++i
)
35736 if (d
->perm
[i
] < 4)
35742 /* Emit the pshuflw. */
35743 memcpy (perm2
, d
->perm
, 4);
35744 for (i
= 4; i
< 8; ++i
)
35746 ok
= expand_vselect (d
->target
, d
->op0
, perm2
, 8);
35749 /* Emit the pshufhw. */
35750 memcpy (perm2
+ 4, d
->perm
+ 4, 4);
35751 for (i
= 0; i
< 4; ++i
)
35753 ok
= expand_vselect (d
->target
, d
->target
, perm2
, 8);
35759 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35760 the permutation using the SSSE3 palignr instruction. This succeeds
35761 when all of the elements in PERM fit within one vector and we merely
35762 need to shift them down so that a single vector permutation has a
35763 chance to succeed. */
35766 expand_vec_perm_palignr (struct expand_vec_perm_d
*d
)
35768 unsigned i
, nelt
= d
->nelt
;
35773 /* Even with AVX, palignr only operates on 128-bit vectors. */
35774 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
35777 min
= nelt
, max
= 0;
35778 for (i
= 0; i
< nelt
; ++i
)
35780 unsigned e
= d
->perm
[i
];
35786 if (min
== 0 || max
- min
>= nelt
)
35789 /* Given that we have SSSE3, we know we'll be able to implement the
35790 single operand permutation after the palignr with pshufb. */
35794 shift
= GEN_INT (min
* GET_MODE_BITSIZE (GET_MODE_INNER (d
->vmode
)));
35795 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode
, d
->target
),
35796 gen_lowpart (TImode
, d
->op1
),
35797 gen_lowpart (TImode
, d
->op0
), shift
));
35799 d
->op0
= d
->op1
= d
->target
;
35802 for (i
= 0; i
< nelt
; ++i
)
35804 unsigned e
= d
->perm
[i
] - min
;
35810 /* Test for the degenerate case where the alignment by itself
35811 produces the desired permutation. */
35815 ok
= expand_vec_perm_1 (d
);
35821 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
35822 a two vector permutation into a single vector permutation by using
35823 an interleave operation to merge the vectors. */
35826 expand_vec_perm_interleave2 (struct expand_vec_perm_d
*d
)
35828 struct expand_vec_perm_d dremap
, dfinal
;
35829 unsigned i
, nelt
= d
->nelt
, nelt2
= nelt
/ 2;
35830 unsigned HOST_WIDE_INT contents
;
35831 unsigned char remap
[2 * MAX_VECT_LEN
];
35833 bool ok
, same_halves
= false;
35835 if (GET_MODE_SIZE (d
->vmode
) == 16)
35837 if (d
->op0
== d
->op1
)
35840 else if (GET_MODE_SIZE (d
->vmode
) == 32)
35844 /* For 32-byte modes allow even d->op0 == d->op1.
35845 The lack of cross-lane shuffling in some instructions
35846 might prevent a single insn shuffle. */
35851 /* Examine from whence the elements come. */
35853 for (i
= 0; i
< nelt
; ++i
)
35854 contents
|= ((unsigned HOST_WIDE_INT
) 1) << d
->perm
[i
];
35856 memset (remap
, 0xff, sizeof (remap
));
35859 if (GET_MODE_SIZE (d
->vmode
) == 16)
35861 unsigned HOST_WIDE_INT h1
, h2
, h3
, h4
;
35863 /* Split the two input vectors into 4 halves. */
35864 h1
= (((unsigned HOST_WIDE_INT
) 1) << nelt2
) - 1;
35869 /* If the elements from the low halves use interleave low, and similarly
35870 for interleave high. If the elements are from mis-matched halves, we
35871 can use shufps for V4SF/V4SI or do a DImode shuffle. */
35872 if ((contents
& (h1
| h3
)) == contents
)
35875 for (i
= 0; i
< nelt2
; ++i
)
35878 remap
[i
+ nelt
] = i
* 2 + 1;
35879 dremap
.perm
[i
* 2] = i
;
35880 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
35882 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
35883 dremap
.vmode
= V4SFmode
;
35885 else if ((contents
& (h2
| h4
)) == contents
)
35888 for (i
= 0; i
< nelt2
; ++i
)
35890 remap
[i
+ nelt2
] = i
* 2;
35891 remap
[i
+ nelt
+ nelt2
] = i
* 2 + 1;
35892 dremap
.perm
[i
* 2] = i
+ nelt2
;
35893 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt2
;
35895 if (!TARGET_SSE2
&& d
->vmode
== V4SImode
)
35896 dremap
.vmode
= V4SFmode
;
35898 else if ((contents
& (h1
| h4
)) == contents
)
35901 for (i
= 0; i
< nelt2
; ++i
)
35904 remap
[i
+ nelt
+ nelt2
] = i
+ nelt2
;
35905 dremap
.perm
[i
] = i
;
35906 dremap
.perm
[i
+ nelt2
] = i
+ nelt
+ nelt2
;
35911 dremap
.vmode
= V2DImode
;
35913 dremap
.perm
[0] = 0;
35914 dremap
.perm
[1] = 3;
35917 else if ((contents
& (h2
| h3
)) == contents
)
35920 for (i
= 0; i
< nelt2
; ++i
)
35922 remap
[i
+ nelt2
] = i
;
35923 remap
[i
+ nelt
] = i
+ nelt2
;
35924 dremap
.perm
[i
] = i
+ nelt2
;
35925 dremap
.perm
[i
+ nelt2
] = i
+ nelt
;
35930 dremap
.vmode
= V2DImode
;
35932 dremap
.perm
[0] = 1;
35933 dremap
.perm
[1] = 2;
35941 unsigned int nelt4
= nelt
/ 4, nzcnt
= 0;
35942 unsigned HOST_WIDE_INT q
[8];
35943 unsigned int nonzero_halves
[4];
35945 /* Split the two input vectors into 8 quarters. */
35946 q
[0] = (((unsigned HOST_WIDE_INT
) 1) << nelt4
) - 1;
35947 for (i
= 1; i
< 8; ++i
)
35948 q
[i
] = q
[0] << (nelt4
* i
);
35949 for (i
= 0; i
< 4; ++i
)
35950 if (((q
[2 * i
] | q
[2 * i
+ 1]) & contents
) != 0)
35952 nonzero_halves
[nzcnt
] = i
;
35958 gcc_assert (d
->op0
== d
->op1
);
35959 nonzero_halves
[1] = nonzero_halves
[0];
35960 same_halves
= true;
35962 else if (d
->op0
== d
->op1
)
35964 gcc_assert (nonzero_halves
[0] == 0);
35965 gcc_assert (nonzero_halves
[1] == 1);
35970 if (d
->perm
[0] / nelt2
== nonzero_halves
[1])
35972 /* Attempt to increase the likelyhood that dfinal
35973 shuffle will be intra-lane. */
35974 char tmph
= nonzero_halves
[0];
35975 nonzero_halves
[0] = nonzero_halves
[1];
35976 nonzero_halves
[1] = tmph
;
35979 /* vperm2f128 or vperm2i128. */
35980 for (i
= 0; i
< nelt2
; ++i
)
35982 remap
[i
+ nonzero_halves
[1] * nelt2
] = i
+ nelt2
;
35983 remap
[i
+ nonzero_halves
[0] * nelt2
] = i
;
35984 dremap
.perm
[i
+ nelt2
] = i
+ nonzero_halves
[1] * nelt2
;
35985 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * nelt2
;
35988 if (d
->vmode
!= V8SFmode
35989 && d
->vmode
!= V4DFmode
35990 && d
->vmode
!= V8SImode
)
35992 dremap
.vmode
= V8SImode
;
35994 for (i
= 0; i
< 4; ++i
)
35996 dremap
.perm
[i
] = i
+ nonzero_halves
[0] * 4;
35997 dremap
.perm
[i
+ 4] = i
+ nonzero_halves
[1] * 4;
36001 else if (d
->op0
== d
->op1
)
36003 else if (TARGET_AVX2
36004 && (contents
& (q
[0] | q
[2] | q
[4] | q
[6])) == contents
)
36007 for (i
= 0; i
< nelt4
; ++i
)
36010 remap
[i
+ nelt
] = i
* 2 + 1;
36011 remap
[i
+ nelt2
] = i
* 2 + nelt2
;
36012 remap
[i
+ nelt
+ nelt2
] = i
* 2 + nelt2
+ 1;
36013 dremap
.perm
[i
* 2] = i
;
36014 dremap
.perm
[i
* 2 + 1] = i
+ nelt
;
36015 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
;
36016 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
;
36019 else if (TARGET_AVX2
36020 && (contents
& (q
[1] | q
[3] | q
[5] | q
[7])) == contents
)
36023 for (i
= 0; i
< nelt4
; ++i
)
36025 remap
[i
+ nelt4
] = i
* 2;
36026 remap
[i
+ nelt
+ nelt4
] = i
* 2 + 1;
36027 remap
[i
+ nelt2
+ nelt4
] = i
* 2 + nelt2
;
36028 remap
[i
+ nelt
+ nelt2
+ nelt4
] = i
* 2 + nelt2
+ 1;
36029 dremap
.perm
[i
* 2] = i
+ nelt4
;
36030 dremap
.perm
[i
* 2 + 1] = i
+ nelt
+ nelt4
;
36031 dremap
.perm
[i
* 2 + nelt2
] = i
+ nelt2
+ nelt4
;
36032 dremap
.perm
[i
* 2 + nelt2
+ 1] = i
+ nelt
+ nelt2
+ nelt4
;
36039 /* Use the remapping array set up above to move the elements from their
36040 swizzled locations into their final destinations. */
36042 for (i
= 0; i
< nelt
; ++i
)
36044 unsigned e
= remap
[d
->perm
[i
]];
36045 gcc_assert (e
< nelt
);
36046 /* If same_halves is true, both halves of the remapped vector are the
36047 same. Avoid cross-lane accesses if possible. */
36048 if (same_halves
&& i
>= nelt2
)
36050 gcc_assert (e
< nelt2
);
36051 dfinal
.perm
[i
] = e
+ nelt2
;
36054 dfinal
.perm
[i
] = e
;
36056 dfinal
.op0
= gen_reg_rtx (dfinal
.vmode
);
36057 dfinal
.op1
= dfinal
.op0
;
36058 dremap
.target
= dfinal
.op0
;
36060 /* Test if the final remap can be done with a single insn. For V4SFmode or
36061 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
36063 ok
= expand_vec_perm_1 (&dfinal
);
36064 seq
= get_insns ();
36073 if (dremap
.vmode
!= dfinal
.vmode
)
36075 dremap
.target
= gen_lowpart (dremap
.vmode
, dremap
.target
);
36076 dremap
.op0
= gen_lowpart (dremap
.vmode
, dremap
.op0
);
36077 dremap
.op1
= gen_lowpart (dremap
.vmode
, dremap
.op1
);
36080 ok
= expand_vec_perm_1 (&dremap
);
36087 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36088 a single vector cross-lane permutation into vpermq followed
36089 by any of the single insn permutations. */
36092 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d
*d
)
36094 struct expand_vec_perm_d dremap
, dfinal
;
36095 unsigned i
, j
, nelt
= d
->nelt
, nelt2
= nelt
/ 2, nelt4
= nelt
/ 4;
36096 unsigned contents
[2];
36100 && (d
->vmode
== V32QImode
|| d
->vmode
== V16HImode
)
36101 && d
->op0
== d
->op1
))
36106 for (i
= 0; i
< nelt2
; ++i
)
36108 contents
[0] |= 1u << (d
->perm
[i
] / nelt4
);
36109 contents
[1] |= 1u << (d
->perm
[i
+ nelt2
] / nelt4
);
36112 for (i
= 0; i
< 2; ++i
)
36114 unsigned int cnt
= 0;
36115 for (j
= 0; j
< 4; ++j
)
36116 if ((contents
[i
] & (1u << j
)) != 0 && ++cnt
> 2)
36124 dremap
.vmode
= V4DImode
;
36126 dremap
.target
= gen_reg_rtx (V4DImode
);
36127 dremap
.op0
= gen_lowpart (V4DImode
, d
->op0
);
36128 dremap
.op1
= dremap
.op0
;
36129 for (i
= 0; i
< 2; ++i
)
36131 unsigned int cnt
= 0;
36132 for (j
= 0; j
< 4; ++j
)
36133 if ((contents
[i
] & (1u << j
)) != 0)
36134 dremap
.perm
[2 * i
+ cnt
++] = j
;
36135 for (; cnt
< 2; ++cnt
)
36136 dremap
.perm
[2 * i
+ cnt
] = 0;
36140 dfinal
.op0
= gen_lowpart (dfinal
.vmode
, dremap
.target
);
36141 dfinal
.op1
= dfinal
.op0
;
36142 for (i
= 0, j
= 0; i
< nelt
; ++i
)
36146 dfinal
.perm
[i
] = (d
->perm
[i
] & (nelt4
- 1)) | (j
? nelt2
: 0);
36147 if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
])
36149 else if ((d
->perm
[i
] / nelt4
) == dremap
.perm
[j
+ 1])
36150 dfinal
.perm
[i
] |= nelt4
;
36152 gcc_unreachable ();
36155 ok
= expand_vec_perm_1 (&dremap
);
36158 ok
= expand_vec_perm_1 (&dfinal
);
36164 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
36165 a two vector permutation using 2 intra-lane interleave insns
36166 and cross-lane shuffle for 32-byte vectors. */
36169 expand_vec_perm_interleave3 (struct expand_vec_perm_d
*d
)
36172 rtx (*gen
) (rtx
, rtx
, rtx
);
36174 if (d
->op0
== d
->op1
)
36176 if (TARGET_AVX2
&& GET_MODE_SIZE (d
->vmode
) == 32)
36178 else if (TARGET_AVX
&& (d
->vmode
== V8SFmode
|| d
->vmode
== V4DFmode
))
36184 if (d
->perm
[0] != 0 && d
->perm
[0] != nelt
/ 2)
36186 for (i
= 0; i
< nelt
; i
+= 2)
36187 if (d
->perm
[i
] != d
->perm
[0] + i
/ 2
36188 || d
->perm
[i
+ 1] != d
->perm
[0] + i
/ 2 + nelt
)
36198 gen
= gen_vec_interleave_highv32qi
;
36200 gen
= gen_vec_interleave_lowv32qi
;
36204 gen
= gen_vec_interleave_highv16hi
;
36206 gen
= gen_vec_interleave_lowv16hi
;
36210 gen
= gen_vec_interleave_highv8si
;
36212 gen
= gen_vec_interleave_lowv8si
;
36216 gen
= gen_vec_interleave_highv4di
;
36218 gen
= gen_vec_interleave_lowv4di
;
36222 gen
= gen_vec_interleave_highv8sf
;
36224 gen
= gen_vec_interleave_lowv8sf
;
36228 gen
= gen_vec_interleave_highv4df
;
36230 gen
= gen_vec_interleave_lowv4df
;
36233 gcc_unreachable ();
36236 emit_insn (gen (d
->target
, d
->op0
, d
->op1
));
36240 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
36241 permutation with two pshufb insns and an ior. We should have already
36242 failed all two instruction sequences. */
36245 expand_vec_perm_pshufb2 (struct expand_vec_perm_d
*d
)
36247 rtx rperm
[2][16], vperm
, l
, h
, op
, m128
;
36248 unsigned int i
, nelt
, eltsz
;
36250 if (!TARGET_SSSE3
|| GET_MODE_SIZE (d
->vmode
) != 16)
36252 gcc_assert (d
->op0
!= d
->op1
);
36255 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36257 /* Generate two permutation masks. If the required element is within
36258 the given vector it is shuffled into the proper lane. If the required
36259 element is in the other vector, force a zero into the lane by setting
36260 bit 7 in the permutation mask. */
36261 m128
= GEN_INT (-128);
36262 for (i
= 0; i
< nelt
; ++i
)
36264 unsigned j
, e
= d
->perm
[i
];
36265 unsigned which
= (e
>= nelt
);
36269 for (j
= 0; j
< eltsz
; ++j
)
36271 rperm
[which
][i
*eltsz
+ j
] = GEN_INT (e
*eltsz
+ j
);
36272 rperm
[1-which
][i
*eltsz
+ j
] = m128
;
36276 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[0]));
36277 vperm
= force_reg (V16QImode
, vperm
);
36279 l
= gen_reg_rtx (V16QImode
);
36280 op
= gen_lowpart (V16QImode
, d
->op0
);
36281 emit_insn (gen_ssse3_pshufbv16qi3 (l
, op
, vperm
));
36283 vperm
= gen_rtx_CONST_VECTOR (V16QImode
, gen_rtvec_v (16, rperm
[1]));
36284 vperm
= force_reg (V16QImode
, vperm
);
36286 h
= gen_reg_rtx (V16QImode
);
36287 op
= gen_lowpart (V16QImode
, d
->op1
);
36288 emit_insn (gen_ssse3_pshufbv16qi3 (h
, op
, vperm
));
36290 op
= gen_lowpart (V16QImode
, d
->target
);
36291 emit_insn (gen_iorv16qi3 (op
, l
, h
));
36296 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
36297 with two vpshufb insns, vpermq and vpor. We should have already failed
36298 all two or three instruction sequences. */
36301 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d
*d
)
36303 rtx rperm
[2][32], vperm
, l
, h
, hp
, op
, m128
;
36304 unsigned int i
, nelt
, eltsz
;
36307 || d
->op0
!= d
->op1
36308 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36315 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36317 /* Generate two permutation masks. If the required element is within
36318 the same lane, it is shuffled in. If the required element from the
36319 other lane, force a zero by setting bit 7 in the permutation mask.
36320 In the other mask the mask has non-negative elements if element
36321 is requested from the other lane, but also moved to the other lane,
36322 so that the result of vpshufb can have the two V2TImode halves
36324 m128
= GEN_INT (-128);
36325 for (i
= 0; i
< nelt
; ++i
)
36327 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36328 unsigned which
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
36330 for (j
= 0; j
< eltsz
; ++j
)
36332 rperm
[!!which
][(i
* eltsz
+ j
) ^ which
] = GEN_INT (e
* eltsz
+ j
);
36333 rperm
[!which
][(i
* eltsz
+ j
) ^ (which
^ 16)] = m128
;
36337 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
36338 vperm
= force_reg (V32QImode
, vperm
);
36340 h
= gen_reg_rtx (V32QImode
);
36341 op
= gen_lowpart (V32QImode
, d
->op0
);
36342 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
36344 /* Swap the 128-byte lanes of h into hp. */
36345 hp
= gen_reg_rtx (V4DImode
);
36346 op
= gen_lowpart (V4DImode
, h
);
36347 emit_insn (gen_avx2_permv4di_1 (hp
, op
, const2_rtx
, GEN_INT (3), const0_rtx
,
36350 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
36351 vperm
= force_reg (V32QImode
, vperm
);
36353 l
= gen_reg_rtx (V32QImode
);
36354 op
= gen_lowpart (V32QImode
, d
->op0
);
36355 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
36357 op
= gen_lowpart (V32QImode
, d
->target
);
36358 emit_insn (gen_iorv32qi3 (op
, l
, gen_lowpart (V32QImode
, hp
)));
36363 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
36364 and extract-odd permutations of two V32QImode and V16QImode operand
36365 with two vpshufb insns, vpor and vpermq. We should have already
36366 failed all two or three instruction sequences. */
36369 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d
*d
)
36371 rtx rperm
[2][32], vperm
, l
, h
, ior
, op
, m128
;
36372 unsigned int i
, nelt
, eltsz
;
36375 || d
->op0
== d
->op1
36376 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36379 for (i
= 0; i
< d
->nelt
; ++i
)
36380 if ((d
->perm
[i
] ^ (i
* 2)) & (3 * d
->nelt
/ 2))
36387 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36389 /* Generate two permutation masks. In the first permutation mask
36390 the first quarter will contain indexes for the first half
36391 of the op0, the second quarter will contain bit 7 set, third quarter
36392 will contain indexes for the second half of the op0 and the
36393 last quarter bit 7 set. In the second permutation mask
36394 the first quarter will contain bit 7 set, the second quarter
36395 indexes for the first half of the op1, the third quarter bit 7 set
36396 and last quarter indexes for the second half of the op1.
36397 I.e. the first mask e.g. for V32QImode extract even will be:
36398 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
36399 (all values masked with 0xf except for -128) and second mask
36400 for extract even will be
36401 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
36402 m128
= GEN_INT (-128);
36403 for (i
= 0; i
< nelt
; ++i
)
36405 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36406 unsigned which
= d
->perm
[i
] >= nelt
;
36407 unsigned xorv
= (i
>= nelt
/ 4 && i
< 3 * nelt
/ 4) ? 24 : 0;
36409 for (j
= 0; j
< eltsz
; ++j
)
36411 rperm
[which
][(i
* eltsz
+ j
) ^ xorv
] = GEN_INT (e
* eltsz
+ j
);
36412 rperm
[1 - which
][(i
* eltsz
+ j
) ^ xorv
] = m128
;
36416 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[0]));
36417 vperm
= force_reg (V32QImode
, vperm
);
36419 l
= gen_reg_rtx (V32QImode
);
36420 op
= gen_lowpart (V32QImode
, d
->op0
);
36421 emit_insn (gen_avx2_pshufbv32qi3 (l
, op
, vperm
));
36423 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[1]));
36424 vperm
= force_reg (V32QImode
, vperm
);
36426 h
= gen_reg_rtx (V32QImode
);
36427 op
= gen_lowpart (V32QImode
, d
->op1
);
36428 emit_insn (gen_avx2_pshufbv32qi3 (h
, op
, vperm
));
36430 ior
= gen_reg_rtx (V32QImode
);
36431 emit_insn (gen_iorv32qi3 (ior
, l
, h
));
36433 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
36434 op
= gen_lowpart (V4DImode
, d
->target
);
36435 ior
= gen_lowpart (V4DImode
, ior
);
36436 emit_insn (gen_avx2_permv4di_1 (op
, ior
, const0_rtx
, const2_rtx
,
36437 const1_rtx
, GEN_INT (3)));
36442 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
36443 and extract-odd permutations. */
36446 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d
*d
, unsigned odd
)
36453 t1
= gen_reg_rtx (V4DFmode
);
36454 t2
= gen_reg_rtx (V4DFmode
);
36456 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36457 emit_insn (gen_avx_vperm2f128v4df3 (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
36458 emit_insn (gen_avx_vperm2f128v4df3 (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
36460 /* Now an unpck[lh]pd will produce the result required. */
36462 t3
= gen_avx_unpckhpd256 (d
->target
, t1
, t2
);
36464 t3
= gen_avx_unpcklpd256 (d
->target
, t1
, t2
);
36470 int mask
= odd
? 0xdd : 0x88;
36472 t1
= gen_reg_rtx (V8SFmode
);
36473 t2
= gen_reg_rtx (V8SFmode
);
36474 t3
= gen_reg_rtx (V8SFmode
);
36476 /* Shuffle within the 128-bit lanes to produce:
36477 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
36478 emit_insn (gen_avx_shufps256 (t1
, d
->op0
, d
->op1
,
36481 /* Shuffle the lanes around to produce:
36482 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
36483 emit_insn (gen_avx_vperm2f128v8sf3 (t2
, t1
, t1
,
36486 /* Shuffle within the 128-bit lanes to produce:
36487 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
36488 emit_insn (gen_avx_shufps256 (t3
, t1
, t2
, GEN_INT (0x44)));
36490 /* Shuffle within the 128-bit lanes to produce:
36491 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
36492 emit_insn (gen_avx_shufps256 (t2
, t1
, t2
, GEN_INT (0xee)));
36494 /* Shuffle the lanes around to produce:
36495 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
36496 emit_insn (gen_avx_vperm2f128v8sf3 (d
->target
, t3
, t2
,
36505 /* These are always directly implementable by expand_vec_perm_1. */
36506 gcc_unreachable ();
36510 return expand_vec_perm_pshufb2 (d
);
36513 /* We need 2*log2(N)-1 operations to achieve odd/even
36514 with interleave. */
36515 t1
= gen_reg_rtx (V8HImode
);
36516 t2
= gen_reg_rtx (V8HImode
);
36517 emit_insn (gen_vec_interleave_highv8hi (t1
, d
->op0
, d
->op1
));
36518 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->op0
, d
->op1
));
36519 emit_insn (gen_vec_interleave_highv8hi (t2
, d
->target
, t1
));
36520 emit_insn (gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t1
));
36522 t3
= gen_vec_interleave_highv8hi (d
->target
, d
->target
, t2
);
36524 t3
= gen_vec_interleave_lowv8hi (d
->target
, d
->target
, t2
);
36531 return expand_vec_perm_pshufb2 (d
);
36534 t1
= gen_reg_rtx (V16QImode
);
36535 t2
= gen_reg_rtx (V16QImode
);
36536 t3
= gen_reg_rtx (V16QImode
);
36537 emit_insn (gen_vec_interleave_highv16qi (t1
, d
->op0
, d
->op1
));
36538 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->op0
, d
->op1
));
36539 emit_insn (gen_vec_interleave_highv16qi (t2
, d
->target
, t1
));
36540 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t1
));
36541 emit_insn (gen_vec_interleave_highv16qi (t3
, d
->target
, t2
));
36542 emit_insn (gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t2
));
36544 t3
= gen_vec_interleave_highv16qi (d
->target
, d
->target
, t3
);
36546 t3
= gen_vec_interleave_lowv16qi (d
->target
, d
->target
, t3
);
36553 return expand_vec_perm_vpshufb2_vpermq_even_odd (d
);
36558 struct expand_vec_perm_d d_copy
= *d
;
36559 d_copy
.vmode
= V4DFmode
;
36560 d_copy
.target
= gen_lowpart (V4DFmode
, d
->target
);
36561 d_copy
.op0
= gen_lowpart (V4DFmode
, d
->op0
);
36562 d_copy
.op1
= gen_lowpart (V4DFmode
, d
->op1
);
36563 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
36566 t1
= gen_reg_rtx (V4DImode
);
36567 t2
= gen_reg_rtx (V4DImode
);
36569 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
36570 emit_insn (gen_avx2_permv2ti (t1
, d
->op0
, d
->op1
, GEN_INT (0x20)));
36571 emit_insn (gen_avx2_permv2ti (t2
, d
->op0
, d
->op1
, GEN_INT (0x31)));
36573 /* Now an vpunpck[lh]qdq will produce the result required. */
36575 t3
= gen_avx2_interleave_highv4di (d
->target
, t1
, t2
);
36577 t3
= gen_avx2_interleave_lowv4di (d
->target
, t1
, t2
);
36584 struct expand_vec_perm_d d_copy
= *d
;
36585 d_copy
.vmode
= V8SFmode
;
36586 d_copy
.target
= gen_lowpart (V8SFmode
, d
->target
);
36587 d_copy
.op0
= gen_lowpart (V8SFmode
, d
->op0
);
36588 d_copy
.op1
= gen_lowpart (V8SFmode
, d
->op1
);
36589 return expand_vec_perm_even_odd_1 (&d_copy
, odd
);
36592 t1
= gen_reg_rtx (V8SImode
);
36593 t2
= gen_reg_rtx (V8SImode
);
36595 /* Shuffle the lanes around into
36596 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
36597 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t1
),
36598 gen_lowpart (V4DImode
, d
->op0
),
36599 gen_lowpart (V4DImode
, d
->op1
),
36601 emit_insn (gen_avx2_permv2ti (gen_lowpart (V4DImode
, t2
),
36602 gen_lowpart (V4DImode
, d
->op0
),
36603 gen_lowpart (V4DImode
, d
->op1
),
36606 /* Swap the 2nd and 3rd position in each lane into
36607 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
36608 emit_insn (gen_avx2_pshufdv3 (t1
, t1
,
36609 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36610 emit_insn (gen_avx2_pshufdv3 (t2
, t2
,
36611 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
36613 /* Now an vpunpck[lh]qdq will produce
36614 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
36616 t3
= gen_avx2_interleave_highv4di (gen_lowpart (V4DImode
, d
->target
),
36617 gen_lowpart (V4DImode
, t1
),
36618 gen_lowpart (V4DImode
, t2
));
36620 t3
= gen_avx2_interleave_lowv4di (gen_lowpart (V4DImode
, d
->target
),
36621 gen_lowpart (V4DImode
, t1
),
36622 gen_lowpart (V4DImode
, t2
));
36627 gcc_unreachable ();
36633 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36634 extract-even and extract-odd permutations. */
36637 expand_vec_perm_even_odd (struct expand_vec_perm_d
*d
)
36639 unsigned i
, odd
, nelt
= d
->nelt
;
36642 if (odd
!= 0 && odd
!= 1)
36645 for (i
= 1; i
< nelt
; ++i
)
36646 if (d
->perm
[i
] != 2 * i
+ odd
)
36649 return expand_vec_perm_even_odd_1 (d
, odd
);
36652 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
36653 permutations. We assume that expand_vec_perm_1 has already failed. */
36656 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d
*d
)
36658 unsigned elt
= d
->perm
[0], nelt2
= d
->nelt
/ 2;
36659 enum machine_mode vmode
= d
->vmode
;
36660 unsigned char perm2
[4];
36668 /* These are special-cased in sse.md so that we can optionally
36669 use the vbroadcast instruction. They expand to two insns
36670 if the input happens to be in a register. */
36671 gcc_unreachable ();
36677 /* These are always implementable using standard shuffle patterns. */
36678 gcc_unreachable ();
36682 /* These can be implemented via interleave. We save one insn by
36683 stopping once we have promoted to V4SImode and then use pshufd. */
36686 optab otab
= vec_interleave_low_optab
;
36690 otab
= vec_interleave_high_optab
;
36695 op0
= expand_binop (vmode
, otab
, op0
, op0
, NULL
, 0, OPTAB_DIRECT
);
36696 vmode
= get_mode_wider_vector (vmode
);
36697 op0
= gen_lowpart (vmode
, op0
);
36699 while (vmode
!= V4SImode
);
36701 memset (perm2
, elt
, 4);
36702 ok
= expand_vselect (gen_lowpart (V4SImode
, d
->target
), op0
, perm2
, 4);
36710 /* For AVX2 broadcasts of the first element vpbroadcast* or
36711 vpermq should be used by expand_vec_perm_1. */
36712 gcc_assert (!TARGET_AVX2
|| d
->perm
[0]);
36716 gcc_unreachable ();
36720 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
36721 broadcast permutations. */
36724 expand_vec_perm_broadcast (struct expand_vec_perm_d
*d
)
36726 unsigned i
, elt
, nelt
= d
->nelt
;
36728 if (d
->op0
!= d
->op1
)
36732 for (i
= 1; i
< nelt
; ++i
)
36733 if (d
->perm
[i
] != elt
)
36736 return expand_vec_perm_broadcast_1 (d
);
36739 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
36740 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
36741 all the shorter instruction sequences. */
36744 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d
*d
)
36746 rtx rperm
[4][32], vperm
, l
[2], h
[2], op
, m128
;
36747 unsigned int i
, nelt
, eltsz
;
36751 || d
->op0
== d
->op1
36752 || (d
->vmode
!= V32QImode
&& d
->vmode
!= V16HImode
))
36759 eltsz
= GET_MODE_SIZE (GET_MODE_INNER (d
->vmode
));
36761 /* Generate 4 permutation masks. If the required element is within
36762 the same lane, it is shuffled in. If the required element from the
36763 other lane, force a zero by setting bit 7 in the permutation mask.
36764 In the other mask the mask has non-negative elements if element
36765 is requested from the other lane, but also moved to the other lane,
36766 so that the result of vpshufb can have the two V2TImode halves
36768 m128
= GEN_INT (-128);
36769 for (i
= 0; i
< 32; ++i
)
36771 rperm
[0][i
] = m128
;
36772 rperm
[1][i
] = m128
;
36773 rperm
[2][i
] = m128
;
36774 rperm
[3][i
] = m128
;
36780 for (i
= 0; i
< nelt
; ++i
)
36782 unsigned j
, e
= d
->perm
[i
] & (nelt
/ 2 - 1);
36783 unsigned xlane
= ((d
->perm
[i
] ^ i
) & (nelt
/ 2)) * eltsz
;
36784 unsigned int which
= ((d
->perm
[i
] & nelt
) ? 2 : 0) + (xlane
? 1 : 0);
36786 for (j
= 0; j
< eltsz
; ++j
)
36787 rperm
[which
][(i
* eltsz
+ j
) ^ xlane
] = GEN_INT (e
* eltsz
+ j
);
36788 used
[which
] = true;
36791 for (i
= 0; i
< 2; ++i
)
36793 if (!used
[2 * i
+ 1])
36798 vperm
= gen_rtx_CONST_VECTOR (V32QImode
,
36799 gen_rtvec_v (32, rperm
[2 * i
+ 1]));
36800 vperm
= force_reg (V32QImode
, vperm
);
36801 h
[i
] = gen_reg_rtx (V32QImode
);
36802 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
36803 emit_insn (gen_avx2_pshufbv32qi3 (h
[i
], op
, vperm
));
36806 /* Swap the 128-byte lanes of h[X]. */
36807 for (i
= 0; i
< 2; ++i
)
36809 if (h
[i
] == NULL_RTX
)
36811 op
= gen_reg_rtx (V4DImode
);
36812 emit_insn (gen_avx2_permv4di_1 (op
, gen_lowpart (V4DImode
, h
[i
]),
36813 const2_rtx
, GEN_INT (3), const0_rtx
,
36815 h
[i
] = gen_lowpart (V32QImode
, op
);
36818 for (i
= 0; i
< 2; ++i
)
36825 vperm
= gen_rtx_CONST_VECTOR (V32QImode
, gen_rtvec_v (32, rperm
[2 * i
]));
36826 vperm
= force_reg (V32QImode
, vperm
);
36827 l
[i
] = gen_reg_rtx (V32QImode
);
36828 op
= gen_lowpart (V32QImode
, i
? d
->op1
: d
->op0
);
36829 emit_insn (gen_avx2_pshufbv32qi3 (l
[i
], op
, vperm
));
36832 for (i
= 0; i
< 2; ++i
)
36836 op
= gen_reg_rtx (V32QImode
);
36837 emit_insn (gen_iorv32qi3 (op
, l
[i
], h
[i
]));
36844 gcc_assert (l
[0] && l
[1]);
36845 op
= gen_lowpart (V32QImode
, d
->target
);
36846 emit_insn (gen_iorv32qi3 (op
, l
[0], l
[1]));
36850 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
36851 With all of the interface bits taken care of, perform the expansion
36852 in D and return true on success. */
36855 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
36857 /* Try a single instruction expansion. */
36858 if (expand_vec_perm_1 (d
))
36861 /* Try sequences of two instructions. */
36863 if (expand_vec_perm_pshuflw_pshufhw (d
))
36866 if (expand_vec_perm_palignr (d
))
36869 if (expand_vec_perm_interleave2 (d
))
36872 if (expand_vec_perm_broadcast (d
))
36875 if (expand_vec_perm_vpermq_perm_1 (d
))
36878 /* Try sequences of three instructions. */
36880 if (expand_vec_perm_pshufb2 (d
))
36883 if (expand_vec_perm_interleave3 (d
))
36886 /* Try sequences of four instructions. */
36888 if (expand_vec_perm_vpshufb2_vpermq (d
))
36891 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d
))
36894 /* ??? Look for narrow permutations whose element orderings would
36895 allow the promotion to a wider mode. */
36897 /* ??? Look for sequences of interleave or a wider permute that place
36898 the data into the correct lanes for a half-vector shuffle like
36899 pshuf[lh]w or vpermilps. */
36901 /* ??? Look for sequences of interleave that produce the desired results.
36902 The combinatorics of punpck[lh] get pretty ugly... */
36904 if (expand_vec_perm_even_odd (d
))
36907 /* Even longer sequences. */
36908 if (expand_vec_perm_vpshufb4_vpermq2 (d
))
36915 ix86_expand_vec_perm_const (rtx operands
[4])
36917 struct expand_vec_perm_d d
;
36918 unsigned char perm
[MAX_VECT_LEN
];
36919 int i
, nelt
, which
;
36922 d
.target
= operands
[0];
36923 d
.op0
= operands
[1];
36924 d
.op1
= operands
[2];
36927 d
.vmode
= GET_MODE (d
.target
);
36928 gcc_assert (VECTOR_MODE_P (d
.vmode
));
36929 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
36930 d
.testing_p
= false;
36932 gcc_assert (GET_CODE (sel
) == CONST_VECTOR
);
36933 gcc_assert (XVECLEN (sel
, 0) == nelt
);
36934 gcc_checking_assert (sizeof (d
.perm
) == sizeof (perm
));
36936 for (i
= which
= 0; i
< nelt
; ++i
)
36938 rtx e
= XVECEXP (sel
, 0, i
);
36939 int ei
= INTVAL (e
) & (2 * nelt
- 1);
36941 which
|= (ei
< nelt
? 1 : 2);
36952 if (!rtx_equal_p (d
.op0
, d
.op1
))
36955 /* The elements of PERM do not suggest that only the first operand
36956 is used, but both operands are identical. Allow easier matching
36957 of the permutation by folding the permutation into the single
36959 for (i
= 0; i
< nelt
; ++i
)
36960 if (d
.perm
[i
] >= nelt
)
36969 for (i
= 0; i
< nelt
; ++i
)
36975 if (ix86_expand_vec_perm_const_1 (&d
))
36978 /* If the mask says both arguments are needed, but they are the same,
36979 the above tried to expand with d.op0 == d.op1. If that didn't work,
36980 retry with d.op0 != d.op1 as that is what testing has been done with. */
36981 if (which
== 3 && d
.op0
== d
.op1
)
36986 memcpy (d
.perm
, perm
, sizeof (perm
));
36987 d
.op1
= gen_reg_rtx (d
.vmode
);
36989 ok
= ix86_expand_vec_perm_const_1 (&d
);
36990 seq
= get_insns ();
36994 emit_move_insn (d
.op1
, d
.op0
);
37003 /* Implement targetm.vectorize.vec_perm_const_ok. */
37006 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
37007 const unsigned char *sel
)
37009 struct expand_vec_perm_d d
;
37010 unsigned int i
, nelt
, which
;
37014 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37015 d
.testing_p
= true;
37017 /* Given sufficient ISA support we can just return true here
37018 for selected vector modes. */
37019 if (GET_MODE_SIZE (d
.vmode
) == 16)
37021 /* All implementable with a single vpperm insn. */
37024 /* All implementable with 2 pshufb + 1 ior. */
37027 /* All implementable with shufpd or unpck[lh]pd. */
37032 /* Extract the values from the vector CST into the permutation
37034 memcpy (d
.perm
, sel
, nelt
);
37035 for (i
= which
= 0; i
< nelt
; ++i
)
37037 unsigned char e
= d
.perm
[i
];
37038 gcc_assert (e
< 2 * nelt
);
37039 which
|= (e
< nelt
? 1 : 2);
37042 /* For all elements from second vector, fold the elements to first. */
37044 for (i
= 0; i
< nelt
; ++i
)
37047 /* Check whether the mask can be applied to the vector type. */
37048 one_vec
= (which
!= 3);
37050 /* Implementable with shufps or pshufd. */
37051 if (one_vec
&& (d
.vmode
== V4SFmode
|| d
.vmode
== V4SImode
))
37054 /* Otherwise we have to go through the motions and see if we can
37055 figure out how to generate the requested permutation. */
37056 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
37057 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
37059 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
37062 ret
= ix86_expand_vec_perm_const_1 (&d
);
37069 ix86_expand_vec_extract_even_odd (rtx targ
, rtx op0
, rtx op1
, unsigned odd
)
37071 struct expand_vec_perm_d d
;
37077 d
.vmode
= GET_MODE (targ
);
37078 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
37079 d
.testing_p
= false;
37081 for (i
= 0; i
< nelt
; ++i
)
37082 d
.perm
[i
] = i
* 2 + odd
;
37084 /* We'll either be able to implement the permutation directly... */
37085 if (expand_vec_perm_1 (&d
))
37088 /* ... or we use the special-case patterns. */
37089 expand_vec_perm_even_odd_1 (&d
, odd
);
37092 /* Expand an insert into a vector register through pinsr insn.
37093 Return true if successful. */
37096 ix86_expand_pinsr (rtx
*operands
)
37098 rtx dst
= operands
[0];
37099 rtx src
= operands
[3];
37101 unsigned int size
= INTVAL (operands
[1]);
37102 unsigned int pos
= INTVAL (operands
[2]);
37104 if (GET_CODE (dst
) == SUBREG
)
37106 pos
+= SUBREG_BYTE (dst
) * BITS_PER_UNIT
;
37107 dst
= SUBREG_REG (dst
);
37110 if (GET_CODE (src
) == SUBREG
)
37111 src
= SUBREG_REG (src
);
37113 switch (GET_MODE (dst
))
37120 enum machine_mode srcmode
, dstmode
;
37121 rtx (*pinsr
)(rtx
, rtx
, rtx
, rtx
);
37123 srcmode
= mode_for_size (size
, MODE_INT
, 0);
37128 if (!TARGET_SSE4_1
)
37130 dstmode
= V16QImode
;
37131 pinsr
= gen_sse4_1_pinsrb
;
37137 dstmode
= V8HImode
;
37138 pinsr
= gen_sse2_pinsrw
;
37142 if (!TARGET_SSE4_1
)
37144 dstmode
= V4SImode
;
37145 pinsr
= gen_sse4_1_pinsrd
;
37149 gcc_assert (TARGET_64BIT
);
37150 if (!TARGET_SSE4_1
)
37152 dstmode
= V2DImode
;
37153 pinsr
= gen_sse4_1_pinsrq
;
37160 dst
= gen_lowpart (dstmode
, dst
);
37161 src
= gen_lowpart (srcmode
, src
);
37165 emit_insn (pinsr (dst
, dst
, src
, GEN_INT (1 << pos
)));
37174 /* This function returns the calling abi specific va_list type node.
37175 It returns the FNDECL specific va_list type. */
37178 ix86_fn_abi_va_list (tree fndecl
)
37181 return va_list_type_node
;
37182 gcc_assert (fndecl
!= NULL_TREE
);
37184 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
37185 return ms_va_list_type_node
;
37187 return sysv_va_list_type_node
;
37190 /* Returns the canonical va_list type specified by TYPE. If there
37191 is no valid TYPE provided, it return NULL_TREE. */
37194 ix86_canonical_va_list_type (tree type
)
37198 /* Resolve references and pointers to va_list type. */
37199 if (TREE_CODE (type
) == MEM_REF
)
37200 type
= TREE_TYPE (type
);
37201 else if (POINTER_TYPE_P (type
) && POINTER_TYPE_P (TREE_TYPE(type
)))
37202 type
= TREE_TYPE (type
);
37203 else if (POINTER_TYPE_P (type
) && TREE_CODE (TREE_TYPE (type
)) == ARRAY_TYPE
)
37204 type
= TREE_TYPE (type
);
37206 if (TARGET_64BIT
&& va_list_type_node
!= NULL_TREE
)
37208 wtype
= va_list_type_node
;
37209 gcc_assert (wtype
!= NULL_TREE
);
37211 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37213 /* If va_list is an array type, the argument may have decayed
37214 to a pointer type, e.g. by being passed to another function.
37215 In that case, unwrap both types so that we can compare the
37216 underlying records. */
37217 if (TREE_CODE (htype
) == ARRAY_TYPE
37218 || POINTER_TYPE_P (htype
))
37220 wtype
= TREE_TYPE (wtype
);
37221 htype
= TREE_TYPE (htype
);
37224 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37225 return va_list_type_node
;
37226 wtype
= sysv_va_list_type_node
;
37227 gcc_assert (wtype
!= NULL_TREE
);
37229 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37231 /* If va_list is an array type, the argument may have decayed
37232 to a pointer type, e.g. by being passed to another function.
37233 In that case, unwrap both types so that we can compare the
37234 underlying records. */
37235 if (TREE_CODE (htype
) == ARRAY_TYPE
37236 || POINTER_TYPE_P (htype
))
37238 wtype
= TREE_TYPE (wtype
);
37239 htype
= TREE_TYPE (htype
);
37242 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37243 return sysv_va_list_type_node
;
37244 wtype
= ms_va_list_type_node
;
37245 gcc_assert (wtype
!= NULL_TREE
);
37247 if (TREE_CODE (wtype
) == ARRAY_TYPE
)
37249 /* If va_list is an array type, the argument may have decayed
37250 to a pointer type, e.g. by being passed to another function.
37251 In that case, unwrap both types so that we can compare the
37252 underlying records. */
37253 if (TREE_CODE (htype
) == ARRAY_TYPE
37254 || POINTER_TYPE_P (htype
))
37256 wtype
= TREE_TYPE (wtype
);
37257 htype
= TREE_TYPE (htype
);
37260 if (TYPE_MAIN_VARIANT (wtype
) == TYPE_MAIN_VARIANT (htype
))
37261 return ms_va_list_type_node
;
37264 return std_canonical_va_list_type (type
);
37267 /* Iterate through the target-specific builtin types for va_list.
37268 IDX denotes the iterator, *PTREE is set to the result type of
37269 the va_list builtin, and *PNAME to its internal type.
37270 Returns zero if there is no element for this index, otherwise
37271 IDX should be increased upon the next call.
37272 Note, do not iterate a base builtin's name like __builtin_va_list.
37273 Used from c_common_nodes_and_builtins. */
37276 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
37286 *ptree
= ms_va_list_type_node
;
37287 *pname
= "__builtin_ms_va_list";
37291 *ptree
= sysv_va_list_type_node
;
37292 *pname
= "__builtin_sysv_va_list";
37300 #undef TARGET_SCHED_DISPATCH
37301 #define TARGET_SCHED_DISPATCH has_dispatch
37302 #undef TARGET_SCHED_DISPATCH_DO
37303 #define TARGET_SCHED_DISPATCH_DO do_dispatch
37304 #undef TARGET_SCHED_REASSOCIATION_WIDTH
37305 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
37307 /* The size of the dispatch window is the total number of bytes of
37308 object code allowed in a window. */
37309 #define DISPATCH_WINDOW_SIZE 16
37311 /* Number of dispatch windows considered for scheduling. */
37312 #define MAX_DISPATCH_WINDOWS 3
37314 /* Maximum number of instructions in a window. */
37317 /* Maximum number of immediate operands in a window. */
37320 /* Maximum number of immediate bits allowed in a window. */
37321 #define MAX_IMM_SIZE 128
37323 /* Maximum number of 32 bit immediates allowed in a window. */
37324 #define MAX_IMM_32 4
37326 /* Maximum number of 64 bit immediates allowed in a window. */
37327 #define MAX_IMM_64 2
37329 /* Maximum total of loads or prefetches allowed in a window. */
37332 /* Maximum total of stores allowed in a window. */
37333 #define MAX_STORE 1
37339 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
37340 enum dispatch_group
{
37355 /* Number of allowable groups in a dispatch window. It is an array
37356 indexed by dispatch_group enum. 100 is used as a big number,
37357 because the number of these kind of operations does not have any
37358 effect in dispatch window, but we need them for other reasons in
37360 static unsigned int num_allowable_groups
[disp_last
] = {
37361 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG
, BIG
37364 char group_name
[disp_last
+ 1][16] = {
37365 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
37366 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
37367 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
37370 /* Instruction path. */
37373 path_single
, /* Single micro op. */
37374 path_double
, /* Double micro op. */
37375 path_multi
, /* Instructions with more than 2 micro op.. */
37379 /* sched_insn_info defines a window to the instructions scheduled in
37380 the basic block. It contains a pointer to the insn_info table and
37381 the instruction scheduled.
37383 Windows are allocated for each basic block and are linked
37385 typedef struct sched_insn_info_s
{
37387 enum dispatch_group group
;
37388 enum insn_path path
;
37393 /* Linked list of dispatch windows. This is a two way list of
37394 dispatch windows of a basic block. It contains information about
37395 the number of uops in the window and the total number of
37396 instructions and of bytes in the object code for this dispatch
37398 typedef struct dispatch_windows_s
{
37399 int num_insn
; /* Number of insn in the window. */
37400 int num_uops
; /* Number of uops in the window. */
37401 int window_size
; /* Number of bytes in the window. */
37402 int window_num
; /* Window number between 0 or 1. */
37403 int num_imm
; /* Number of immediates in an insn. */
37404 int num_imm_32
; /* Number of 32 bit immediates in an insn. */
37405 int num_imm_64
; /* Number of 64 bit immediates in an insn. */
37406 int imm_size
; /* Total immediates in the window. */
37407 int num_loads
; /* Total memory loads in the window. */
37408 int num_stores
; /* Total memory stores in the window. */
37409 int violation
; /* Violation exists in window. */
37410 sched_insn_info
*window
; /* Pointer to the window. */
37411 struct dispatch_windows_s
*next
;
37412 struct dispatch_windows_s
*prev
;
37413 } dispatch_windows
;
37415 /* Immediate valuse used in an insn. */
37416 typedef struct imm_info_s
37423 static dispatch_windows
*dispatch_window_list
;
37424 static dispatch_windows
*dispatch_window_list1
;
37426 /* Get dispatch group of insn. */
37428 static enum dispatch_group
37429 get_mem_group (rtx insn
)
37431 enum attr_memory memory
;
37433 if (INSN_CODE (insn
) < 0)
37434 return disp_no_group
;
37435 memory
= get_attr_memory (insn
);
37436 if (memory
== MEMORY_STORE
)
37439 if (memory
== MEMORY_LOAD
)
37442 if (memory
== MEMORY_BOTH
)
37443 return disp_load_store
;
37445 return disp_no_group
;
37448 /* Return true if insn is a compare instruction. */
37453 enum attr_type type
;
37455 type
= get_attr_type (insn
);
37456 return (type
== TYPE_TEST
37457 || type
== TYPE_ICMP
37458 || type
== TYPE_FCMP
37459 || GET_CODE (PATTERN (insn
)) == COMPARE
);
37462 /* Return true if a dispatch violation encountered. */
37465 dispatch_violation (void)
37467 if (dispatch_window_list
->next
)
37468 return dispatch_window_list
->next
->violation
;
37469 return dispatch_window_list
->violation
;
37472 /* Return true if insn is a branch instruction. */
37475 is_branch (rtx insn
)
37477 return (CALL_P (insn
) || JUMP_P (insn
));
37480 /* Return true if insn is a prefetch instruction. */
37483 is_prefetch (rtx insn
)
37485 return NONJUMP_INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PREFETCH
;
37488 /* This function initializes a dispatch window and the list container holding a
37489 pointer to the window. */
37492 init_window (int window_num
)
37495 dispatch_windows
*new_list
;
37497 if (window_num
== 0)
37498 new_list
= dispatch_window_list
;
37500 new_list
= dispatch_window_list1
;
37502 new_list
->num_insn
= 0;
37503 new_list
->num_uops
= 0;
37504 new_list
->window_size
= 0;
37505 new_list
->next
= NULL
;
37506 new_list
->prev
= NULL
;
37507 new_list
->window_num
= window_num
;
37508 new_list
->num_imm
= 0;
37509 new_list
->num_imm_32
= 0;
37510 new_list
->num_imm_64
= 0;
37511 new_list
->imm_size
= 0;
37512 new_list
->num_loads
= 0;
37513 new_list
->num_stores
= 0;
37514 new_list
->violation
= false;
37516 for (i
= 0; i
< MAX_INSN
; i
++)
37518 new_list
->window
[i
].insn
= NULL
;
37519 new_list
->window
[i
].group
= disp_no_group
;
37520 new_list
->window
[i
].path
= no_path
;
37521 new_list
->window
[i
].byte_len
= 0;
37522 new_list
->window
[i
].imm_bytes
= 0;
37527 /* This function allocates and initializes a dispatch window and the
37528 list container holding a pointer to the window. */
37530 static dispatch_windows
*
37531 allocate_window (void)
37533 dispatch_windows
*new_list
= XNEW (struct dispatch_windows_s
);
37534 new_list
->window
= XNEWVEC (struct sched_insn_info_s
, MAX_INSN
+ 1);
37539 /* This routine initializes the dispatch scheduling information. It
37540 initiates building dispatch scheduler tables and constructs the
37541 first dispatch window. */
37544 init_dispatch_sched (void)
37546 /* Allocate a dispatch list and a window. */
37547 dispatch_window_list
= allocate_window ();
37548 dispatch_window_list1
= allocate_window ();
37553 /* This function returns true if a branch is detected. End of a basic block
37554 does not have to be a branch, but here we assume only branches end a
37558 is_end_basic_block (enum dispatch_group group
)
37560 return group
== disp_branch
;
37563 /* This function is called when the end of a window processing is reached. */
37566 process_end_window (void)
37568 gcc_assert (dispatch_window_list
->num_insn
<= MAX_INSN
);
37569 if (dispatch_window_list
->next
)
37571 gcc_assert (dispatch_window_list1
->num_insn
<= MAX_INSN
);
37572 gcc_assert (dispatch_window_list
->window_size
37573 + dispatch_window_list1
->window_size
<= 48);
37579 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
37580 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
37581 for 48 bytes of instructions. Note that these windows are not dispatch
37582 windows that their sizes are DISPATCH_WINDOW_SIZE. */
37584 static dispatch_windows
*
37585 allocate_next_window (int window_num
)
37587 if (window_num
== 0)
37589 if (dispatch_window_list
->next
)
37592 return dispatch_window_list
;
37595 dispatch_window_list
->next
= dispatch_window_list1
;
37596 dispatch_window_list1
->prev
= dispatch_window_list
;
37598 return dispatch_window_list1
;
37601 /* Increment the number of immediate operands of an instruction. */
37604 find_constant_1 (rtx
*in_rtx
, imm_info
*imm_values
)
37609 switch ( GET_CODE (*in_rtx
))
37614 (imm_values
->imm
)++;
37615 if (x86_64_immediate_operand (*in_rtx
, SImode
))
37616 (imm_values
->imm32
)++;
37618 (imm_values
->imm64
)++;
37622 (imm_values
->imm
)++;
37623 (imm_values
->imm64
)++;
37627 if (LABEL_KIND (*in_rtx
) == LABEL_NORMAL
)
37629 (imm_values
->imm
)++;
37630 (imm_values
->imm32
)++;
37641 /* Compute number of immediate operands of an instruction. */
37644 find_constant (rtx in_rtx
, imm_info
*imm_values
)
37646 for_each_rtx (INSN_P (in_rtx
) ? &PATTERN (in_rtx
) : &in_rtx
,
37647 (rtx_function
) find_constant_1
, (void *) imm_values
);
37650 /* Return total size of immediate operands of an instruction along with number
37651 of corresponding immediate-operands. It initializes its parameters to zero
37652 befor calling FIND_CONSTANT.
37653 INSN is the input instruction. IMM is the total of immediates.
37654 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
37658 get_num_immediates (rtx insn
, int *imm
, int *imm32
, int *imm64
)
37660 imm_info imm_values
= {0, 0, 0};
37662 find_constant (insn
, &imm_values
);
37663 *imm
= imm_values
.imm
;
37664 *imm32
= imm_values
.imm32
;
37665 *imm64
= imm_values
.imm64
;
37666 return imm_values
.imm32
* 4 + imm_values
.imm64
* 8;
37669 /* This function indicates if an operand of an instruction is an
37673 has_immediate (rtx insn
)
37675 int num_imm_operand
;
37676 int num_imm32_operand
;
37677 int num_imm64_operand
;
37680 return get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
37681 &num_imm64_operand
);
37685 /* Return single or double path for instructions. */
37687 static enum insn_path
37688 get_insn_path (rtx insn
)
37690 enum attr_amdfam10_decode path
= get_attr_amdfam10_decode (insn
);
37692 if ((int)path
== 0)
37693 return path_single
;
37695 if ((int)path
== 1)
37696 return path_double
;
37701 /* Return insn dispatch group. */
37703 static enum dispatch_group
37704 get_insn_group (rtx insn
)
37706 enum dispatch_group group
= get_mem_group (insn
);
37710 if (is_branch (insn
))
37711 return disp_branch
;
37716 if (has_immediate (insn
))
37719 if (is_prefetch (insn
))
37720 return disp_prefetch
;
37722 return disp_no_group
;
37725 /* Count number of GROUP restricted instructions in a dispatch
37726 window WINDOW_LIST. */
37729 count_num_restricted (rtx insn
, dispatch_windows
*window_list
)
37731 enum dispatch_group group
= get_insn_group (insn
);
37733 int num_imm_operand
;
37734 int num_imm32_operand
;
37735 int num_imm64_operand
;
37737 if (group
== disp_no_group
)
37740 if (group
== disp_imm
)
37742 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
37743 &num_imm64_operand
);
37744 if (window_list
->imm_size
+ imm_size
> MAX_IMM_SIZE
37745 || num_imm_operand
+ window_list
->num_imm
> MAX_IMM
37746 || (num_imm32_operand
> 0
37747 && (window_list
->num_imm_32
+ num_imm32_operand
> MAX_IMM_32
37748 || window_list
->num_imm_64
* 2 + num_imm32_operand
> MAX_IMM_32
))
37749 || (num_imm64_operand
> 0
37750 && (window_list
->num_imm_64
+ num_imm64_operand
> MAX_IMM_64
37751 || window_list
->num_imm_32
+ num_imm64_operand
* 2 > MAX_IMM_32
))
37752 || (window_list
->imm_size
+ imm_size
== MAX_IMM_SIZE
37753 && num_imm64_operand
> 0
37754 && ((window_list
->num_imm_64
> 0
37755 && window_list
->num_insn
>= 2)
37756 || window_list
->num_insn
>= 3)))
37762 if ((group
== disp_load_store
37763 && (window_list
->num_loads
>= MAX_LOAD
37764 || window_list
->num_stores
>= MAX_STORE
))
37765 || ((group
== disp_load
37766 || group
== disp_prefetch
)
37767 && window_list
->num_loads
>= MAX_LOAD
)
37768 || (group
== disp_store
37769 && window_list
->num_stores
>= MAX_STORE
))
37775 /* This function returns true if insn satisfies dispatch rules on the
37776 last window scheduled. */
37779 fits_dispatch_window (rtx insn
)
37781 dispatch_windows
*window_list
= dispatch_window_list
;
37782 dispatch_windows
*window_list_next
= dispatch_window_list
->next
;
37783 unsigned int num_restrict
;
37784 enum dispatch_group group
= get_insn_group (insn
);
37785 enum insn_path path
= get_insn_path (insn
);
37788 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
37789 instructions should be given the lowest priority in the
37790 scheduling process in Haifa scheduler to make sure they will be
37791 scheduled in the same dispatch window as the refrence to them. */
37792 if (group
== disp_jcc
|| group
== disp_cmp
)
37795 /* Check nonrestricted. */
37796 if (group
== disp_no_group
|| group
== disp_branch
)
37799 /* Get last dispatch window. */
37800 if (window_list_next
)
37801 window_list
= window_list_next
;
37803 if (window_list
->window_num
== 1)
37805 sum
= window_list
->prev
->window_size
+ window_list
->window_size
;
37808 || (min_insn_size (insn
) + sum
) >= 48)
37809 /* Window 1 is full. Go for next window. */
37813 num_restrict
= count_num_restricted (insn
, window_list
);
37815 if (num_restrict
> num_allowable_groups
[group
])
37818 /* See if it fits in the first window. */
37819 if (window_list
->window_num
== 0)
37821 /* The first widow should have only single and double path
37823 if (path
== path_double
37824 && (window_list
->num_uops
+ 2) > MAX_INSN
)
37826 else if (path
!= path_single
)
37832 /* Add an instruction INSN with NUM_UOPS micro-operations to the
37833 dispatch window WINDOW_LIST. */
37836 add_insn_window (rtx insn
, dispatch_windows
*window_list
, int num_uops
)
37838 int byte_len
= min_insn_size (insn
);
37839 int num_insn
= window_list
->num_insn
;
37841 sched_insn_info
*window
= window_list
->window
;
37842 enum dispatch_group group
= get_insn_group (insn
);
37843 enum insn_path path
= get_insn_path (insn
);
37844 int num_imm_operand
;
37845 int num_imm32_operand
;
37846 int num_imm64_operand
;
37848 if (!window_list
->violation
&& group
!= disp_cmp
37849 && !fits_dispatch_window (insn
))
37850 window_list
->violation
= true;
37852 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
37853 &num_imm64_operand
);
37855 /* Initialize window with new instruction. */
37856 window
[num_insn
].insn
= insn
;
37857 window
[num_insn
].byte_len
= byte_len
;
37858 window
[num_insn
].group
= group
;
37859 window
[num_insn
].path
= path
;
37860 window
[num_insn
].imm_bytes
= imm_size
;
37862 window_list
->window_size
+= byte_len
;
37863 window_list
->num_insn
= num_insn
+ 1;
37864 window_list
->num_uops
= window_list
->num_uops
+ num_uops
;
37865 window_list
->imm_size
+= imm_size
;
37866 window_list
->num_imm
+= num_imm_operand
;
37867 window_list
->num_imm_32
+= num_imm32_operand
;
37868 window_list
->num_imm_64
+= num_imm64_operand
;
37870 if (group
== disp_store
)
37871 window_list
->num_stores
+= 1;
37872 else if (group
== disp_load
37873 || group
== disp_prefetch
)
37874 window_list
->num_loads
+= 1;
37875 else if (group
== disp_load_store
)
37877 window_list
->num_stores
+= 1;
37878 window_list
->num_loads
+= 1;
37882 /* Adds a scheduled instruction, INSN, to the current dispatch window.
37883 If the total bytes of instructions or the number of instructions in
37884 the window exceed allowable, it allocates a new window. */
37887 add_to_dispatch_window (rtx insn
)
37890 dispatch_windows
*window_list
;
37891 dispatch_windows
*next_list
;
37892 dispatch_windows
*window0_list
;
37893 enum insn_path path
;
37894 enum dispatch_group insn_group
;
37902 if (INSN_CODE (insn
) < 0)
37905 byte_len
= min_insn_size (insn
);
37906 window_list
= dispatch_window_list
;
37907 next_list
= window_list
->next
;
37908 path
= get_insn_path (insn
);
37909 insn_group
= get_insn_group (insn
);
37911 /* Get the last dispatch window. */
37913 window_list
= dispatch_window_list
->next
;
37915 if (path
== path_single
)
37917 else if (path
== path_double
)
37920 insn_num_uops
= (int) path
;
37922 /* If current window is full, get a new window.
37923 Window number zero is full, if MAX_INSN uops are scheduled in it.
37924 Window number one is full, if window zero's bytes plus window
37925 one's bytes is 32, or if the bytes of the new instruction added
37926 to the total makes it greater than 48, or it has already MAX_INSN
37927 instructions in it. */
37928 num_insn
= window_list
->num_insn
;
37929 num_uops
= window_list
->num_uops
;
37930 window_num
= window_list
->window_num
;
37931 insn_fits
= fits_dispatch_window (insn
);
37933 if (num_insn
>= MAX_INSN
37934 || num_uops
+ insn_num_uops
> MAX_INSN
37937 window_num
= ~window_num
& 1;
37938 window_list
= allocate_next_window (window_num
);
37941 if (window_num
== 0)
37943 add_insn_window (insn
, window_list
, insn_num_uops
);
37944 if (window_list
->num_insn
>= MAX_INSN
37945 && insn_group
== disp_branch
)
37947 process_end_window ();
37951 else if (window_num
== 1)
37953 window0_list
= window_list
->prev
;
37954 sum
= window0_list
->window_size
+ window_list
->window_size
;
37956 || (byte_len
+ sum
) >= 48)
37958 process_end_window ();
37959 window_list
= dispatch_window_list
;
37962 add_insn_window (insn
, window_list
, insn_num_uops
);
37965 gcc_unreachable ();
37967 if (is_end_basic_block (insn_group
))
37969 /* End of basic block is reached do end-basic-block process. */
37970 process_end_window ();
37975 /* Print the dispatch window, WINDOW_NUM, to FILE. */
37977 DEBUG_FUNCTION
static void
37978 debug_dispatch_window_file (FILE *file
, int window_num
)
37980 dispatch_windows
*list
;
37983 if (window_num
== 0)
37984 list
= dispatch_window_list
;
37986 list
= dispatch_window_list1
;
37988 fprintf (file
, "Window #%d:\n", list
->window_num
);
37989 fprintf (file
, " num_insn = %d, num_uops = %d, window_size = %d\n",
37990 list
->num_insn
, list
->num_uops
, list
->window_size
);
37991 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
37992 list
->num_imm
, list
->num_imm_32
, list
->num_imm_64
, list
->imm_size
);
37994 fprintf (file
, " num_loads = %d, num_stores = %d\n", list
->num_loads
,
37996 fprintf (file
, " insn info:\n");
37998 for (i
= 0; i
< MAX_INSN
; i
++)
38000 if (!list
->window
[i
].insn
)
38002 fprintf (file
, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
38003 i
, group_name
[list
->window
[i
].group
],
38004 i
, (void *)list
->window
[i
].insn
,
38005 i
, list
->window
[i
].path
,
38006 i
, list
->window
[i
].byte_len
,
38007 i
, list
->window
[i
].imm_bytes
);
38011 /* Print to stdout a dispatch window. */
38013 DEBUG_FUNCTION
void
38014 debug_dispatch_window (int window_num
)
38016 debug_dispatch_window_file (stdout
, window_num
);
38019 /* Print INSN dispatch information to FILE. */
38021 DEBUG_FUNCTION
static void
38022 debug_insn_dispatch_info_file (FILE *file
, rtx insn
)
38025 enum insn_path path
;
38026 enum dispatch_group group
;
38028 int num_imm_operand
;
38029 int num_imm32_operand
;
38030 int num_imm64_operand
;
38032 if (INSN_CODE (insn
) < 0)
38035 byte_len
= min_insn_size (insn
);
38036 path
= get_insn_path (insn
);
38037 group
= get_insn_group (insn
);
38038 imm_size
= get_num_immediates (insn
, &num_imm_operand
, &num_imm32_operand
,
38039 &num_imm64_operand
);
38041 fprintf (file
, " insn info:\n");
38042 fprintf (file
, " group = %s, path = %d, byte_len = %d\n",
38043 group_name
[group
], path
, byte_len
);
38044 fprintf (file
, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
38045 num_imm_operand
, num_imm32_operand
, num_imm64_operand
, imm_size
);
38048 /* Print to STDERR the status of the ready list with respect to
38049 dispatch windows. */
38051 DEBUG_FUNCTION
void
38052 debug_ready_dispatch (void)
38055 int no_ready
= number_in_ready ();
38057 fprintf (stdout
, "Number of ready: %d\n", no_ready
);
38059 for (i
= 0; i
< no_ready
; i
++)
38060 debug_insn_dispatch_info_file (stdout
, get_ready_element (i
));
38063 /* This routine is the driver of the dispatch scheduler. */
38066 do_dispatch (rtx insn
, int mode
)
38068 if (mode
== DISPATCH_INIT
)
38069 init_dispatch_sched ();
38070 else if (mode
== ADD_TO_DISPATCH_WINDOW
)
38071 add_to_dispatch_window (insn
);
38074 /* Return TRUE if Dispatch Scheduling is supported. */
38077 has_dispatch (rtx insn
, int action
)
38079 if ((ix86_tune
== PROCESSOR_BDVER1
|| ix86_tune
== PROCESSOR_BDVER2
)
38080 && flag_dispatch_scheduler
)
38086 case IS_DISPATCH_ON
:
38091 return is_cmp (insn
);
38093 case DISPATCH_VIOLATION
:
38094 return dispatch_violation ();
38096 case FITS_DISPATCH_WINDOW
:
38097 return fits_dispatch_window (insn
);
38103 /* Implementation of reassociation_width target hook used by
38104 reassoc phase to identify parallelism level in reassociated
38105 tree. Statements tree_code is passed in OPC. Arguments type
38108 Currently parallel reassociation is enabled for Atom
38109 processors only and we set reassociation width to be 2
38110 because Atom may issue up to 2 instructions per cycle.
38112 Return value should be fixed if parallel reassociation is
38113 enabled for other processors. */
38116 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED
,
38117 enum machine_mode mode
)
38121 if (INTEGRAL_MODE_P (mode
) && TARGET_REASSOC_INT_TO_PARALLEL
)
38123 else if (FLOAT_MODE_P (mode
) && TARGET_REASSOC_FP_TO_PARALLEL
)
38129 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
38130 place emms and femms instructions. */
38132 static enum machine_mode
38133 ix86_preferred_simd_mode (enum machine_mode mode
)
38141 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V32QImode
: V16QImode
;
38143 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V16HImode
: V8HImode
;
38145 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V8SImode
: V4SImode
;
38147 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? V4DImode
: V2DImode
;
38150 if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38156 if (!TARGET_VECTORIZE_DOUBLE
)
38158 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
38160 else if (TARGET_SSE2
)
38169 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
38172 static unsigned int
38173 ix86_autovectorize_vector_sizes (void)
38175 return (TARGET_AVX
&& !TARGET_PREFER_AVX128
) ? 32 | 16 : 0;
38178 /* Initialize the GCC target structure. */
38179 #undef TARGET_RETURN_IN_MEMORY
38180 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
38182 #undef TARGET_LEGITIMIZE_ADDRESS
38183 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
38185 #undef TARGET_ATTRIBUTE_TABLE
38186 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
38187 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38188 # undef TARGET_MERGE_DECL_ATTRIBUTES
38189 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
38192 #undef TARGET_COMP_TYPE_ATTRIBUTES
38193 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
38195 #undef TARGET_INIT_BUILTINS
38196 #define TARGET_INIT_BUILTINS ix86_init_builtins
38197 #undef TARGET_BUILTIN_DECL
38198 #define TARGET_BUILTIN_DECL ix86_builtin_decl
38199 #undef TARGET_EXPAND_BUILTIN
38200 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
38202 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
38203 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
38204 ix86_builtin_vectorized_function
38206 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
38207 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
38209 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
38210 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
38212 #undef TARGET_VECTORIZE_BUILTIN_GATHER
38213 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
38215 #undef TARGET_BUILTIN_RECIPROCAL
38216 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
38218 #undef TARGET_ASM_FUNCTION_EPILOGUE
38219 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
38221 #undef TARGET_ENCODE_SECTION_INFO
38222 #ifndef SUBTARGET_ENCODE_SECTION_INFO
38223 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
38225 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
38228 #undef TARGET_ASM_OPEN_PAREN
38229 #define TARGET_ASM_OPEN_PAREN ""
38230 #undef TARGET_ASM_CLOSE_PAREN
38231 #define TARGET_ASM_CLOSE_PAREN ""
38233 #undef TARGET_ASM_BYTE_OP
38234 #define TARGET_ASM_BYTE_OP ASM_BYTE
38236 #undef TARGET_ASM_ALIGNED_HI_OP
38237 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
38238 #undef TARGET_ASM_ALIGNED_SI_OP
38239 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
38241 #undef TARGET_ASM_ALIGNED_DI_OP
38242 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
38245 #undef TARGET_PROFILE_BEFORE_PROLOGUE
38246 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
38248 #undef TARGET_ASM_UNALIGNED_HI_OP
38249 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
38250 #undef TARGET_ASM_UNALIGNED_SI_OP
38251 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
38252 #undef TARGET_ASM_UNALIGNED_DI_OP
38253 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
38255 #undef TARGET_PRINT_OPERAND
38256 #define TARGET_PRINT_OPERAND ix86_print_operand
38257 #undef TARGET_PRINT_OPERAND_ADDRESS
38258 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
38259 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
38260 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
38261 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
38262 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
38264 #undef TARGET_SCHED_INIT_GLOBAL
38265 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
38266 #undef TARGET_SCHED_ADJUST_COST
38267 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
38268 #undef TARGET_SCHED_ISSUE_RATE
38269 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
38270 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
38271 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
38272 ia32_multipass_dfa_lookahead
38274 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
38275 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
38278 #undef TARGET_HAVE_TLS
38279 #define TARGET_HAVE_TLS true
38281 #undef TARGET_CANNOT_FORCE_CONST_MEM
38282 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
38283 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
38284 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
38286 #undef TARGET_DELEGITIMIZE_ADDRESS
38287 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
38289 #undef TARGET_MS_BITFIELD_LAYOUT_P
38290 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
38293 #undef TARGET_BINDS_LOCAL_P
38294 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
38296 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
38297 #undef TARGET_BINDS_LOCAL_P
38298 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
38301 #undef TARGET_ASM_OUTPUT_MI_THUNK
38302 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
38303 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
38304 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
38306 #undef TARGET_ASM_FILE_START
38307 #define TARGET_ASM_FILE_START x86_file_start
38309 #undef TARGET_OPTION_OVERRIDE
38310 #define TARGET_OPTION_OVERRIDE ix86_option_override
38312 #undef TARGET_REGISTER_MOVE_COST
38313 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
38314 #undef TARGET_MEMORY_MOVE_COST
38315 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
38316 #undef TARGET_RTX_COSTS
38317 #define TARGET_RTX_COSTS ix86_rtx_costs
38318 #undef TARGET_ADDRESS_COST
38319 #define TARGET_ADDRESS_COST ix86_address_cost
38321 #undef TARGET_FIXED_CONDITION_CODE_REGS
38322 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
38323 #undef TARGET_CC_MODES_COMPATIBLE
38324 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
38326 #undef TARGET_MACHINE_DEPENDENT_REORG
38327 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
38329 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
38330 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
38332 #undef TARGET_BUILD_BUILTIN_VA_LIST
38333 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
38335 #undef TARGET_ENUM_VA_LIST_P
38336 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
38338 #undef TARGET_FN_ABI_VA_LIST
38339 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
38341 #undef TARGET_CANONICAL_VA_LIST_TYPE
38342 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
38344 #undef TARGET_EXPAND_BUILTIN_VA_START
38345 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
38347 #undef TARGET_MD_ASM_CLOBBERS
38348 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
38350 #undef TARGET_PROMOTE_PROTOTYPES
38351 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
38352 #undef TARGET_STRUCT_VALUE_RTX
38353 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
38354 #undef TARGET_SETUP_INCOMING_VARARGS
38355 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
38356 #undef TARGET_MUST_PASS_IN_STACK
38357 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
38358 #undef TARGET_FUNCTION_ARG_ADVANCE
38359 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
38360 #undef TARGET_FUNCTION_ARG
38361 #define TARGET_FUNCTION_ARG ix86_function_arg
38362 #undef TARGET_FUNCTION_ARG_BOUNDARY
38363 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
38364 #undef TARGET_PASS_BY_REFERENCE
38365 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
38366 #undef TARGET_INTERNAL_ARG_POINTER
38367 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
38368 #undef TARGET_UPDATE_STACK_BOUNDARY
38369 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
38370 #undef TARGET_GET_DRAP_RTX
38371 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
38372 #undef TARGET_STRICT_ARGUMENT_NAMING
38373 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
38374 #undef TARGET_STATIC_CHAIN
38375 #define TARGET_STATIC_CHAIN ix86_static_chain
38376 #undef TARGET_TRAMPOLINE_INIT
38377 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
38378 #undef TARGET_RETURN_POPS_ARGS
38379 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
38381 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
38382 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
38384 #undef TARGET_SCALAR_MODE_SUPPORTED_P
38385 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
38387 #undef TARGET_VECTOR_MODE_SUPPORTED_P
38388 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
38390 #undef TARGET_C_MODE_FOR_SUFFIX
38391 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
38394 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
38395 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
38398 #ifdef SUBTARGET_INSERT_ATTRIBUTES
38399 #undef TARGET_INSERT_ATTRIBUTES
38400 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
38403 #undef TARGET_MANGLE_TYPE
38404 #define TARGET_MANGLE_TYPE ix86_mangle_type
38406 #ifndef TARGET_MACHO
38407 #undef TARGET_STACK_PROTECT_FAIL
38408 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
38411 #undef TARGET_FUNCTION_VALUE
38412 #define TARGET_FUNCTION_VALUE ix86_function_value
38414 #undef TARGET_FUNCTION_VALUE_REGNO_P
38415 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
38417 #undef TARGET_PROMOTE_FUNCTION_MODE
38418 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
38420 #undef TARGET_SECONDARY_RELOAD
38421 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
38423 #undef TARGET_CLASS_MAX_NREGS
38424 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
38426 #undef TARGET_PREFERRED_RELOAD_CLASS
38427 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
38428 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
38429 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
38430 #undef TARGET_CLASS_LIKELY_SPILLED_P
38431 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
38433 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
38434 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
38435 ix86_builtin_vectorization_cost
38436 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
38437 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
38438 ix86_vectorize_vec_perm_const_ok
38439 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
38440 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
38441 ix86_preferred_simd_mode
38442 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
38443 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
38444 ix86_autovectorize_vector_sizes
38446 #undef TARGET_SET_CURRENT_FUNCTION
38447 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
38449 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
38450 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
38452 #undef TARGET_OPTION_SAVE
38453 #define TARGET_OPTION_SAVE ix86_function_specific_save
38455 #undef TARGET_OPTION_RESTORE
38456 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
38458 #undef TARGET_OPTION_PRINT
38459 #define TARGET_OPTION_PRINT ix86_function_specific_print
38461 #undef TARGET_CAN_INLINE_P
38462 #define TARGET_CAN_INLINE_P ix86_can_inline_p
38464 #undef TARGET_EXPAND_TO_RTL_HOOK
38465 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
38467 #undef TARGET_LEGITIMATE_ADDRESS_P
38468 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
38470 #undef TARGET_LEGITIMATE_CONSTANT_P
38471 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
38473 #undef TARGET_FRAME_POINTER_REQUIRED
38474 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
38476 #undef TARGET_CAN_ELIMINATE
38477 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
38479 #undef TARGET_EXTRA_LIVE_ON_ENTRY
38480 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
38482 #undef TARGET_ASM_CODE_END
38483 #define TARGET_ASM_CODE_END ix86_code_end
38485 #undef TARGET_CONDITIONAL_REGISTER_USAGE
38486 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
38489 #undef TARGET_INIT_LIBFUNCS
38490 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
38493 struct gcc_target targetm
= TARGET_INITIALIZER
;
38495 #include "gt-i386.h"