1 /* Copyright (C) 1988-2022 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
19 #define IN_TARGET_CODE 1
23 #include "coretypes.h"
33 #include "stringpool.h"
40 #include "diagnostic.h"
43 #include "fold-const.h"
46 #include "stor-layout.h"
49 #include "insn-attr.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
60 #include "tm-constrs.h"
62 #include "sched-int.h"
64 #include "tree-pass.h"
66 #include "pass_manager.h"
67 #include "target-globals.h"
68 #include "gimple-iterator.h"
69 #include "tree-vectorizer.h"
70 #include "shrink-wrap.h"
73 #include "tree-iterator.h"
75 #include "case-cfn-macros.h"
77 #include "fold-const-call.h"
79 #include "tree-ssanames.h"
81 #include "selftest-rtl.h"
82 #include "print-rtl.h"
85 #include "symbol-summary.h"
87 #include "ipa-fnsummary.h"
88 #include "wide-int-bitmask.h"
89 #include "tree-vector-builder.h"
91 #include "dwarf2out.h"
92 #include "i386-builtins.h"
93 #include "i386-features.h"
95 const char * const xlogue_layout::STUB_BASE_NAMES
[XLOGUE_STUB_COUNT
] = {
104 const unsigned xlogue_layout::REG_ORDER
[xlogue_layout::MAX_REGS
] = {
105 /* The below offset values are where each register is stored for the layout
106 relative to incoming stack pointer. The value of each m_regs[].offset will
107 be relative to the incoming base pointer (rax or rsi) used by the stub.
110 Offset: realigned or aligned + 8
111 Register aligned aligned + 8 aligned w/HFP w/HFP */
112 XMM15_REG
, /* 0x10 0x18 0x10 0x18 */
113 XMM14_REG
, /* 0x20 0x28 0x20 0x28 */
114 XMM13_REG
, /* 0x30 0x38 0x30 0x38 */
115 XMM12_REG
, /* 0x40 0x48 0x40 0x48 */
116 XMM11_REG
, /* 0x50 0x58 0x50 0x58 */
117 XMM10_REG
, /* 0x60 0x68 0x60 0x68 */
118 XMM9_REG
, /* 0x70 0x78 0x70 0x78 */
119 XMM8_REG
, /* 0x80 0x88 0x80 0x88 */
120 XMM7_REG
, /* 0x90 0x98 0x90 0x98 */
121 XMM6_REG
, /* 0xa0 0xa8 0xa0 0xa8 */
122 SI_REG
, /* 0xa8 0xb0 0xa8 0xb0 */
123 DI_REG
, /* 0xb0 0xb8 0xb0 0xb8 */
124 BX_REG
, /* 0xb8 0xc0 0xb8 0xc0 */
125 BP_REG
, /* 0xc0 0xc8 N/A N/A */
126 R12_REG
, /* 0xc8 0xd0 0xc0 0xc8 */
127 R13_REG
, /* 0xd0 0xd8 0xc8 0xd0 */
128 R14_REG
, /* 0xd8 0xe0 0xd0 0xd8 */
129 R15_REG
, /* 0xe0 0xe8 0xd8 0xe0 */
132 /* Instantiate static const values. */
133 const HOST_WIDE_INT
xlogue_layout::STUB_INDEX_OFFSET
;
134 const unsigned xlogue_layout::MIN_REGS
;
135 const unsigned xlogue_layout::MAX_REGS
;
136 const unsigned xlogue_layout::MAX_EXTRA_REGS
;
137 const unsigned xlogue_layout::VARIANT_COUNT
;
138 const unsigned xlogue_layout::STUB_NAME_MAX_LEN
;
140 /* Initialize xlogue_layout::s_stub_names to zero. */
141 char xlogue_layout::s_stub_names
[2][XLOGUE_STUB_COUNT
][VARIANT_COUNT
]
144 /* Instantiates all xlogue_layout instances. */
145 const xlogue_layout
xlogue_layout::s_instances
[XLOGUE_SET_COUNT
] = {
146 xlogue_layout (0, false),
147 xlogue_layout (8, false),
148 xlogue_layout (0, true),
149 xlogue_layout (8, true)
152 /* Return an appropriate const instance of xlogue_layout based upon values
153 in cfun->machine and crtl. */
154 const class xlogue_layout
&
155 xlogue_layout::get_instance ()
157 enum xlogue_stub_sets stub_set
;
158 bool aligned_plus_8
= cfun
->machine
->call_ms2sysv_pad_in
;
160 if (stack_realign_fp
)
161 stub_set
= XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
;
162 else if (frame_pointer_needed
)
163 stub_set
= aligned_plus_8
164 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
165 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN
;
167 stub_set
= aligned_plus_8
? XLOGUE_SET_ALIGNED_PLUS_8
: XLOGUE_SET_ALIGNED
;
169 return s_instances
[stub_set
];
172 /* Determine how many clobbered registers can be saved by the stub.
173 Returns the count of registers the stub will save and restore. */
175 xlogue_layout::count_stub_managed_regs ()
177 bool hfp
= frame_pointer_needed
|| stack_realign_fp
;
181 for (count
= i
= MIN_REGS
; i
< MAX_REGS
; ++i
)
183 regno
= REG_ORDER
[i
];
184 if (regno
== BP_REG
&& hfp
)
186 if (!ix86_save_reg (regno
, false, false))
193 /* Determine if register REGNO is a stub managed register given the
194 total COUNT of stub managed registers. */
196 xlogue_layout::is_stub_managed_reg (unsigned regno
, unsigned count
)
198 bool hfp
= frame_pointer_needed
|| stack_realign_fp
;
201 for (i
= 0; i
< count
; ++i
)
203 gcc_assert (i
< MAX_REGS
);
204 if (REG_ORDER
[i
] == BP_REG
&& hfp
)
206 else if (REG_ORDER
[i
] == regno
)
212 /* Constructor for xlogue_layout. */
213 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in
, bool hfp
)
214 : m_hfp (hfp
) , m_nregs (hfp
? 17 : 18),
215 m_stack_align_off_in (stack_align_off_in
)
217 HOST_WIDE_INT offset
= stack_align_off_in
;
220 for (i
= j
= 0; i
< MAX_REGS
; ++i
)
222 unsigned regno
= REG_ORDER
[i
];
224 if (regno
== BP_REG
&& hfp
)
226 if (SSE_REGNO_P (regno
))
229 /* Verify that SSE regs are always aligned. */
230 gcc_assert (!((stack_align_off_in
+ offset
) & 15));
235 m_regs
[j
].regno
= regno
;
236 m_regs
[j
++].offset
= offset
- STUB_INDEX_OFFSET
;
238 gcc_assert (j
== m_nregs
);
242 xlogue_layout::get_stub_name (enum xlogue_stub stub
,
243 unsigned n_extra_regs
)
245 const int have_avx
= TARGET_AVX
;
246 char *name
= s_stub_names
[!!have_avx
][stub
][n_extra_regs
];
251 int res
= snprintf (name
, STUB_NAME_MAX_LEN
, "__%s_%s_%u",
252 (have_avx
? "avx" : "sse"),
253 STUB_BASE_NAMES
[stub
],
254 MIN_REGS
+ n_extra_regs
);
255 gcc_checking_assert (res
< (int)STUB_NAME_MAX_LEN
);
261 /* Return rtx of a symbol ref for the entry point (based upon
262 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
264 xlogue_layout::get_stub_rtx (enum xlogue_stub stub
)
266 const unsigned n_extra_regs
= cfun
->machine
->call_ms2sysv_extra_regs
;
267 gcc_checking_assert (n_extra_regs
<= MAX_EXTRA_REGS
);
268 gcc_assert (stub
< XLOGUE_STUB_COUNT
);
269 gcc_assert (crtl
->stack_realign_finalized
);
271 return gen_rtx_SYMBOL_REF (Pmode
, get_stub_name (stub
, n_extra_regs
));
274 unsigned scalar_chain::max_id
= 0;
278 /* Initialize new chain. */
280 scalar_chain::scalar_chain (enum machine_mode smode_
, enum machine_mode vmode_
)
288 fprintf (dump_file
, "Created a new instruction chain #%d\n", chain_id
);
290 bitmap_obstack_initialize (NULL
);
291 insns
= BITMAP_ALLOC (NULL
);
292 defs
= BITMAP_ALLOC (NULL
);
293 defs_conv
= BITMAP_ALLOC (NULL
);
297 /* Free chain's data. */
299 scalar_chain::~scalar_chain ()
303 BITMAP_FREE (defs_conv
);
304 bitmap_obstack_release (NULL
);
307 /* Add instruction into chains' queue. */
310 scalar_chain::add_to_queue (unsigned insn_uid
)
312 if (bitmap_bit_p (insns
, insn_uid
)
313 || bitmap_bit_p (queue
, insn_uid
))
317 fprintf (dump_file
, " Adding insn %d into chain's #%d queue\n",
319 bitmap_set_bit (queue
, insn_uid
);
322 general_scalar_chain::general_scalar_chain (enum machine_mode smode_
,
323 enum machine_mode vmode_
)
324 : scalar_chain (smode_
, vmode_
)
326 insns_conv
= BITMAP_ALLOC (NULL
);
327 n_sse_to_integer
= 0;
328 n_integer_to_sse
= 0;
331 general_scalar_chain::~general_scalar_chain ()
333 BITMAP_FREE (insns_conv
);
336 /* For DImode conversion, mark register defined by DEF as requiring
340 general_scalar_chain::mark_dual_mode_def (df_ref def
)
342 gcc_assert (DF_REF_REG_DEF_P (def
));
344 /* Record the def/insn pair so we can later efficiently iterate over
345 the defs to convert on insns not in the chain. */
346 bool reg_new
= bitmap_set_bit (defs_conv
, DF_REF_REGNO (def
));
347 if (!bitmap_bit_p (insns
, DF_REF_INSN_UID (def
)))
349 if (!bitmap_set_bit (insns_conv
, DF_REF_INSN_UID (def
))
363 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
364 DF_REF_REGNO (def
), DF_REF_INSN_UID (def
), chain_id
);
367 /* For TImode conversion, it is unused. */
370 timode_scalar_chain::mark_dual_mode_def (df_ref
)
375 /* Check REF's chain to add new insns into a queue
376 and find registers requiring conversion. */
379 scalar_chain::analyze_register_chain (bitmap candidates
, df_ref ref
)
383 gcc_assert (bitmap_bit_p (insns
, DF_REF_INSN_UID (ref
))
384 || bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)));
385 add_to_queue (DF_REF_INSN_UID (ref
));
387 for (chain
= DF_REF_CHAIN (ref
); chain
; chain
= chain
->next
)
389 unsigned uid
= DF_REF_INSN_UID (chain
->ref
);
391 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain
->ref
)))
394 if (!DF_REF_REG_MEM_P (chain
->ref
))
396 if (bitmap_bit_p (insns
, uid
))
399 if (bitmap_bit_p (candidates
, uid
))
406 if (DF_REF_REG_DEF_P (chain
->ref
))
409 fprintf (dump_file
, " r%d def in insn %d isn't convertible\n",
410 DF_REF_REGNO (chain
->ref
), uid
);
411 mark_dual_mode_def (chain
->ref
);
416 fprintf (dump_file
, " r%d use in insn %d isn't convertible\n",
417 DF_REF_REGNO (chain
->ref
), uid
);
418 mark_dual_mode_def (ref
);
423 /* Add instruction into a chain. */
426 scalar_chain::add_insn (bitmap candidates
, unsigned int insn_uid
)
428 if (bitmap_bit_p (insns
, insn_uid
))
432 fprintf (dump_file
, " Adding insn %d to chain #%d\n", insn_uid
, chain_id
);
434 bitmap_set_bit (insns
, insn_uid
);
436 rtx_insn
*insn
= DF_INSN_UID_GET (insn_uid
)->insn
;
437 rtx def_set
= single_set (insn
);
438 if (def_set
&& REG_P (SET_DEST (def_set
))
439 && !HARD_REGISTER_P (SET_DEST (def_set
)))
440 bitmap_set_bit (defs
, REGNO (SET_DEST (def_set
)));
442 /* ??? The following is quadratic since analyze_register_chain
443 iterates over all refs to look for dual-mode regs. Instead this
444 should be done separately for all regs mentioned in the chain once. */
446 for (ref
= DF_INSN_UID_DEFS (insn_uid
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
447 if (!HARD_REGISTER_P (DF_REF_REG (ref
)))
448 analyze_register_chain (candidates
, ref
);
449 for (ref
= DF_INSN_UID_USES (insn_uid
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
450 if (!DF_REF_REG_MEM_P (ref
))
451 analyze_register_chain (candidates
, ref
);
454 /* Build new chain starting from insn INSN_UID recursively
455 adding all dependent uses and definitions. */
458 scalar_chain::build (bitmap candidates
, unsigned insn_uid
)
460 queue
= BITMAP_ALLOC (NULL
);
461 bitmap_set_bit (queue
, insn_uid
);
464 fprintf (dump_file
, "Building chain #%d...\n", chain_id
);
466 while (!bitmap_empty_p (queue
))
468 insn_uid
= bitmap_first_set_bit (queue
);
469 bitmap_clear_bit (queue
, insn_uid
);
470 bitmap_clear_bit (candidates
, insn_uid
);
471 add_insn (candidates
, insn_uid
);
476 fprintf (dump_file
, "Collected chain #%d...\n", chain_id
);
477 fprintf (dump_file
, " insns: ");
478 dump_bitmap (dump_file
, insns
);
479 if (!bitmap_empty_p (defs_conv
))
483 const char *comma
= "";
484 fprintf (dump_file
, " defs to convert: ");
485 EXECUTE_IF_SET_IN_BITMAP (defs_conv
, 0, id
, bi
)
487 fprintf (dump_file
, "%sr%d", comma
, id
);
490 fprintf (dump_file
, "\n");
497 /* Return a cost of building a vector costant
498 instead of using a scalar one. */
501 general_scalar_chain::vector_const_cost (rtx exp
)
503 gcc_assert (CONST_INT_P (exp
));
505 if (standard_sse_constant_p (exp
, vmode
))
506 return ix86_cost
->sse_op
;
507 /* We have separate costs for SImode and DImode, use SImode costs
508 for smaller modes. */
509 return ix86_cost
->sse_load
[smode
== DImode
? 1 : 0];
512 /* Compute a gain for chain conversion. */
515 general_scalar_chain::compute_convert_gain ()
523 fprintf (dump_file
, "Computing gain for chain #%d...\n", chain_id
);
525 /* SSE costs distinguish between SImode and DImode loads/stores, for
526 int costs factor in the number of GPRs involved. When supporting
527 smaller modes than SImode the int load/store costs need to be
529 unsigned sse_cost_idx
= smode
== DImode
? 1 : 0;
530 unsigned m
= smode
== DImode
? (TARGET_64BIT
? 1 : 2) : 1;
532 EXECUTE_IF_SET_IN_BITMAP (insns
, 0, insn_uid
, bi
)
534 rtx_insn
*insn
= DF_INSN_UID_GET (insn_uid
)->insn
;
535 rtx def_set
= single_set (insn
);
536 rtx src
= SET_SRC (def_set
);
537 rtx dst
= SET_DEST (def_set
);
540 if (REG_P (src
) && REG_P (dst
))
541 igain
+= 2 * m
- ix86_cost
->xmm_move
;
542 else if (REG_P (src
) && MEM_P (dst
))
544 += m
* ix86_cost
->int_store
[2] - ix86_cost
->sse_store
[sse_cost_idx
];
545 else if (MEM_P (src
) && REG_P (dst
))
546 igain
+= m
* ix86_cost
->int_load
[2] - ix86_cost
->sse_load
[sse_cost_idx
];
548 switch (GET_CODE (src
))
555 if (INTVAL (XEXP (src
, 1)) >= 32)
556 igain
+= ix86_cost
->add
;
558 igain
+= ix86_cost
->shift_const
;
561 igain
+= ix86_cost
->shift_const
- ix86_cost
->sse_op
;
563 if (CONST_INT_P (XEXP (src
, 0)))
564 igain
-= vector_const_cost (XEXP (src
, 0));
572 igain
+= m
* ix86_cost
->add
- ix86_cost
->sse_op
;
573 /* Additional gain for andnot for targets without BMI. */
574 if (GET_CODE (XEXP (src
, 0)) == NOT
576 igain
+= m
* ix86_cost
->add
;
578 if (CONST_INT_P (XEXP (src
, 0)))
579 igain
-= vector_const_cost (XEXP (src
, 0));
580 if (CONST_INT_P (XEXP (src
, 1)))
581 igain
-= vector_const_cost (XEXP (src
, 1));
586 igain
-= ix86_cost
->sse_op
+ COSTS_N_INSNS (1);
588 if (GET_CODE (XEXP (src
, 0)) != ABS
)
590 igain
+= m
* ix86_cost
->add
;
600 /* We do not have any conditional move cost, estimate it as a
601 reg-reg move. Comparisons are costed as adds. */
602 igain
+= m
* (COSTS_N_INSNS (2) + ix86_cost
->add
);
603 /* Integer SSE ops are all costed the same. */
604 igain
-= ix86_cost
->sse_op
;
608 /* Assume comparison cost is the same. */
614 if (optimize_insn_for_size_p ())
616 /* xor (2 bytes) vs. xorps (3 bytes). */
617 if (src
== const0_rtx
)
618 igain
-= COSTS_N_BYTES (1);
619 /* movdi_internal vs. movv2di_internal. */
620 /* => mov (5 bytes) vs. movaps (7 bytes). */
621 else if (x86_64_immediate_operand (src
, SImode
))
622 igain
-= COSTS_N_BYTES (2);
624 /* ??? Larger immediate constants are placed in the
625 constant pool, where the size benefit/impact of
626 STV conversion is affected by whether and how
627 often each constant pool entry is shared/reused.
628 The value below is empirically derived from the
629 CSiBE benchmark (and the optimal value may drift
631 igain
+= COSTS_N_BYTES (0);
635 /* DImode can be immediate for TARGET_64BIT
636 and SImode always. */
637 igain
+= m
* COSTS_N_INSNS (1);
638 igain
-= vector_const_cost (src
);
641 else if (MEM_P (dst
))
643 igain
+= (m
* ix86_cost
->int_store
[2]
644 - ix86_cost
->sse_store
[sse_cost_idx
]);
645 igain
-= vector_const_cost (src
);
653 if (igain
!= 0 && dump_file
)
655 fprintf (dump_file
, " Instruction gain %d for ", igain
);
656 dump_insn_slim (dump_file
, insn
);
662 fprintf (dump_file
, " Instruction conversion gain: %d\n", gain
);
664 /* Cost the integer to sse and sse to integer moves. */
665 cost
+= n_sse_to_integer
* ix86_cost
->sse_to_integer
;
666 /* ??? integer_to_sse but we only have that in the RA cost table.
667 Assume sse_to_integer/integer_to_sse are the same which they
668 are at the moment. */
669 cost
+= n_integer_to_sse
* ix86_cost
->sse_to_integer
;
672 fprintf (dump_file
, " Registers conversion cost: %d\n", cost
);
677 fprintf (dump_file
, " Total gain: %d\n", gain
);
682 /* Insert generated conversion instruction sequence INSNS
683 after instruction AFTER. New BB may be required in case
684 instruction has EH region attached. */
687 scalar_chain::emit_conversion_insns (rtx insns
, rtx_insn
*after
)
689 if (!control_flow_insn_p (after
))
691 emit_insn_after (insns
, after
);
695 basic_block bb
= BLOCK_FOR_INSN (after
);
696 edge e
= find_fallthru_edge (bb
->succs
);
699 basic_block new_bb
= split_edge (e
);
700 emit_insn_after (insns
, BB_HEAD (new_bb
));
705 /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
706 zeroing the upper parts. */
709 gen_gpr_to_xmm_move_src (enum machine_mode vmode
, rtx gpr
)
711 switch (GET_MODE_NUNITS (vmode
))
714 /* We are not using this case currently. */
717 return gen_rtx_VEC_CONCAT (vmode
, gpr
,
718 CONST0_RTX (GET_MODE_INNER (vmode
)));
720 return gen_rtx_VEC_MERGE (vmode
, gen_rtx_VEC_DUPLICATE (vmode
, gpr
),
721 CONST0_RTX (vmode
), GEN_INT (HOST_WIDE_INT_1U
));
725 /* Make vector copies for all register REGNO definitions
726 and replace its uses in a chain. */
729 general_scalar_chain::make_vector_copies (rtx_insn
*insn
, rtx reg
)
731 rtx vreg
= *defs_map
.get (reg
);
734 if (!TARGET_INTER_UNIT_MOVES_TO_VEC
)
736 rtx tmp
= assign_386_stack_local (smode
, SLOT_STV_TEMP
);
737 if (smode
== DImode
&& !TARGET_64BIT
)
739 emit_move_insn (adjust_address (tmp
, SImode
, 0),
740 gen_rtx_SUBREG (SImode
, reg
, 0));
741 emit_move_insn (adjust_address (tmp
, SImode
, 4),
742 gen_rtx_SUBREG (SImode
, reg
, 4));
745 emit_move_insn (copy_rtx (tmp
), reg
);
746 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode
, vreg
, 0),
747 gen_gpr_to_xmm_move_src (vmode
, tmp
)));
749 else if (!TARGET_64BIT
&& smode
== DImode
)
753 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
754 CONST0_RTX (V4SImode
),
755 gen_rtx_SUBREG (SImode
, reg
, 0)));
756 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
757 gen_rtx_SUBREG (V4SImode
, vreg
, 0),
758 gen_rtx_SUBREG (SImode
, reg
, 4),
763 rtx tmp
= gen_reg_rtx (DImode
);
764 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
765 CONST0_RTX (V4SImode
),
766 gen_rtx_SUBREG (SImode
, reg
, 0)));
767 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode
, tmp
, 0),
768 CONST0_RTX (V4SImode
),
769 gen_rtx_SUBREG (SImode
, reg
, 4)));
770 emit_insn (gen_vec_interleave_lowv4si
771 (gen_rtx_SUBREG (V4SImode
, vreg
, 0),
772 gen_rtx_SUBREG (V4SImode
, vreg
, 0),
773 gen_rtx_SUBREG (V4SImode
, tmp
, 0)));
777 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode
, vreg
, 0),
778 gen_gpr_to_xmm_move_src (vmode
, reg
)));
779 rtx_insn
*seq
= get_insns ();
781 emit_conversion_insns (seq
, insn
);
785 " Copied r%d to a vector register r%d for insn %d\n",
786 REGNO (reg
), REGNO (vreg
), INSN_UID (insn
));
789 /* Copy the definition SRC of INSN inside the chain to DST for
790 scalar uses outside of the chain. */
793 general_scalar_chain::convert_reg (rtx_insn
*insn
, rtx dst
, rtx src
)
796 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC
)
798 rtx tmp
= assign_386_stack_local (smode
, SLOT_STV_TEMP
);
799 emit_move_insn (tmp
, src
);
800 if (!TARGET_64BIT
&& smode
== DImode
)
802 emit_move_insn (gen_rtx_SUBREG (SImode
, dst
, 0),
803 adjust_address (tmp
, SImode
, 0));
804 emit_move_insn (gen_rtx_SUBREG (SImode
, dst
, 4),
805 adjust_address (tmp
, SImode
, 4));
808 emit_move_insn (dst
, copy_rtx (tmp
));
810 else if (!TARGET_64BIT
&& smode
== DImode
)
814 rtx tmp
= gen_rtx_PARALLEL (VOIDmode
,
815 gen_rtvec (1, const0_rtx
));
818 (gen_rtx_SUBREG (SImode
, dst
, 0),
819 gen_rtx_VEC_SELECT (SImode
,
820 gen_rtx_SUBREG (V4SImode
, src
, 0),
823 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (1, const1_rtx
));
826 (gen_rtx_SUBREG (SImode
, dst
, 4),
827 gen_rtx_VEC_SELECT (SImode
,
828 gen_rtx_SUBREG (V4SImode
, src
, 0),
833 rtx vcopy
= gen_reg_rtx (V2DImode
);
834 emit_move_insn (vcopy
, gen_rtx_SUBREG (V2DImode
, src
, 0));
835 emit_move_insn (gen_rtx_SUBREG (SImode
, dst
, 0),
836 gen_rtx_SUBREG (SImode
, vcopy
, 0));
837 emit_move_insn (vcopy
,
838 gen_rtx_LSHIFTRT (V2DImode
,
839 vcopy
, GEN_INT (32)));
840 emit_move_insn (gen_rtx_SUBREG (SImode
, dst
, 4),
841 gen_rtx_SUBREG (SImode
, vcopy
, 0));
845 emit_move_insn (dst
, src
);
847 rtx_insn
*seq
= get_insns ();
849 emit_conversion_insns (seq
, insn
);
853 " Copied r%d to a scalar register r%d for insn %d\n",
854 REGNO (src
), REGNO (dst
), INSN_UID (insn
));
857 /* Convert operand OP in INSN. We should handle
858 memory operands and uninitialized registers.
859 All other register uses are converted during
860 registers conversion. */
863 general_scalar_chain::convert_op (rtx
*op
, rtx_insn
*insn
)
865 *op
= copy_rtx_if_shared (*op
);
867 if (GET_CODE (*op
) == NOT
)
869 convert_op (&XEXP (*op
, 0), insn
);
870 PUT_MODE (*op
, vmode
);
872 else if (MEM_P (*op
))
874 rtx tmp
= gen_reg_rtx (GET_MODE (*op
));
877 if (!memory_operand (*op
, GET_MODE (*op
)))
879 rtx tmp2
= gen_reg_rtx (GET_MODE (*op
));
881 emit_insn_before (gen_rtx_SET (tmp2
, *op
), insn
);
885 emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode
, tmp
, 0),
886 gen_gpr_to_xmm_move_src (vmode
, *op
)),
888 *op
= gen_rtx_SUBREG (vmode
, tmp
, 0);
891 fprintf (dump_file
, " Preloading operand for insn %d into r%d\n",
892 INSN_UID (insn
), REGNO (tmp
));
894 else if (REG_P (*op
))
896 *op
= gen_rtx_SUBREG (vmode
, *op
, 0);
898 else if (CONST_INT_P (*op
))
901 rtx tmp
= gen_rtx_SUBREG (vmode
, gen_reg_rtx (smode
), 0);
903 /* Prefer all ones vector in case of -1. */
904 if (constm1_operand (*op
, GET_MODE (*op
)))
905 vec_cst
= CONSTM1_RTX (vmode
);
908 unsigned n
= GET_MODE_NUNITS (vmode
);
909 rtx
*v
= XALLOCAVEC (rtx
, n
);
911 for (unsigned i
= 1; i
< n
; ++i
)
913 vec_cst
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (n
, v
));
916 if (!standard_sse_constant_p (vec_cst
, vmode
))
919 vec_cst
= validize_mem (force_const_mem (vmode
, vec_cst
));
920 rtx_insn
*seq
= get_insns ();
922 emit_insn_before (seq
, insn
);
925 emit_insn_before (gen_move_insn (copy_rtx (tmp
), vec_cst
), insn
);
930 gcc_assert (SUBREG_P (*op
));
931 gcc_assert (GET_MODE (*op
) == vmode
);
935 /* Convert INSN to vector mode. */
938 general_scalar_chain::convert_insn (rtx_insn
*insn
)
940 /* Generate copies for out-of-chain uses of defs and adjust debug uses. */
941 for (df_ref ref
= DF_INSN_DEFS (insn
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
942 if (bitmap_bit_p (defs_conv
, DF_REF_REGNO (ref
)))
945 for (use
= DF_REF_CHAIN (ref
); use
; use
= use
->next
)
946 if (NONDEBUG_INSN_P (DF_REF_INSN (use
->ref
))
947 && (DF_REF_REG_MEM_P (use
->ref
)
948 || !bitmap_bit_p (insns
, DF_REF_INSN_UID (use
->ref
))))
951 convert_reg (insn
, DF_REF_REG (ref
),
952 *defs_map
.get (regno_reg_rtx
[DF_REF_REGNO (ref
)]));
953 else if (MAY_HAVE_DEBUG_BIND_INSNS
)
955 /* If we generated a scalar copy we can leave debug-insns
956 as-is, if not, we have to adjust them. */
957 auto_vec
<rtx_insn
*, 5> to_reset_debug_insns
;
958 for (use
= DF_REF_CHAIN (ref
); use
; use
= use
->next
)
959 if (DEBUG_INSN_P (DF_REF_INSN (use
->ref
)))
961 rtx_insn
*debug_insn
= DF_REF_INSN (use
->ref
);
962 /* If there's a reaching definition outside of the
963 chain we have to reset. */
965 for (def
= DF_REF_CHAIN (use
->ref
); def
; def
= def
->next
)
966 if (!bitmap_bit_p (insns
, DF_REF_INSN_UID (def
->ref
)))
969 to_reset_debug_insns
.safe_push (debug_insn
);
972 *DF_REF_REAL_LOC (use
->ref
)
973 = *defs_map
.get (regno_reg_rtx
[DF_REF_REGNO (ref
)]);
974 df_insn_rescan (debug_insn
);
977 /* Have to do the reset outside of the DF_CHAIN walk to not
979 while (!to_reset_debug_insns
.is_empty ())
981 rtx_insn
*debug_insn
= to_reset_debug_insns
.pop ();
982 INSN_VAR_LOCATION_LOC (debug_insn
) = gen_rtx_UNKNOWN_VAR_LOC ();
983 df_insn_rescan_debug_internal (debug_insn
);
988 /* Replace uses in this insn with the defs we use in the chain. */
989 for (df_ref ref
= DF_INSN_USES (insn
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
990 if (!DF_REF_REG_MEM_P (ref
))
991 if (rtx
*vreg
= defs_map
.get (regno_reg_rtx
[DF_REF_REGNO (ref
)]))
993 /* Also update a corresponding REG_DEAD note. */
994 rtx note
= find_reg_note (insn
, REG_DEAD
, DF_REF_REG (ref
));
996 XEXP (note
, 0) = *vreg
;
997 *DF_REF_REAL_LOC (ref
) = *vreg
;
1000 rtx def_set
= single_set (insn
);
1001 rtx src
= SET_SRC (def_set
);
1002 rtx dst
= SET_DEST (def_set
);
1005 if (MEM_P (dst
) && !REG_P (src
))
1007 /* There are no scalar integer instructions and therefore
1008 temporary register usage is required. */
1009 rtx tmp
= gen_reg_rtx (smode
);
1010 emit_conversion_insns (gen_move_insn (dst
, tmp
), insn
);
1011 dst
= gen_rtx_SUBREG (vmode
, tmp
, 0);
1013 else if (REG_P (dst
))
1015 /* Replace the definition with a SUBREG to the definition we
1016 use inside the chain. */
1017 rtx
*vdef
= defs_map
.get (dst
);
1020 dst
= gen_rtx_SUBREG (vmode
, dst
, 0);
1021 /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
1022 is a non-REG_P. So kill those off. */
1023 rtx note
= find_reg_equal_equiv_note (insn
);
1025 remove_note (insn
, note
);
1028 switch (GET_CODE (src
))
1039 convert_op (&XEXP (src
, 1), insn
);
1046 convert_op (&XEXP (src
, 0), insn
);
1047 PUT_MODE (src
, vmode
);
1051 src
= XEXP (src
, 0);
1053 if (GET_CODE (src
) == ABS
)
1055 src
= XEXP (src
, 0);
1056 convert_op (&src
, insn
);
1057 subreg
= gen_reg_rtx (vmode
);
1058 emit_insn_before (gen_rtx_SET (subreg
,
1059 gen_rtx_ABS (vmode
, src
)), insn
);
1063 convert_op (&src
, insn
);
1065 subreg
= gen_reg_rtx (vmode
);
1066 emit_insn_before (gen_move_insn (subreg
, CONST0_RTX (vmode
)), insn
);
1067 src
= gen_rtx_MINUS (vmode
, subreg
, src
);
1071 src
= XEXP (src
, 0);
1072 convert_op (&src
, insn
);
1073 subreg
= gen_reg_rtx (vmode
);
1074 emit_insn_before (gen_move_insn (subreg
, CONSTM1_RTX (vmode
)), insn
);
1075 src
= gen_rtx_XOR (vmode
, src
, subreg
);
1080 convert_op (&src
, insn
);
1085 convert_op (&src
, insn
);
1089 gcc_assert (GET_MODE (src
) == vmode
);
1093 src
= SUBREG_REG (XEXP (XEXP (src
, 0), 0));
1095 gcc_assert (REG_P (src
) && GET_MODE (src
) == DImode
);
1096 subreg
= gen_rtx_SUBREG (V2DImode
, src
, 0);
1097 emit_insn_before (gen_vec_interleave_lowv2di
1098 (copy_rtx_if_shared (subreg
),
1099 copy_rtx_if_shared (subreg
),
1100 copy_rtx_if_shared (subreg
)),
1102 dst
= gen_rtx_REG (CCmode
, FLAGS_REG
);
1103 src
= gen_rtx_UNSPEC (CCmode
, gen_rtvec (2, copy_rtx_if_shared (subreg
),
1104 copy_rtx_if_shared (subreg
)),
1109 convert_op (&src
, insn
);
1116 SET_SRC (def_set
) = src
;
1117 SET_DEST (def_set
) = dst
;
1119 /* Drop possible dead definitions. */
1120 PATTERN (insn
) = def_set
;
1122 INSN_CODE (insn
) = -1;
1123 int patt
= recog_memoized (insn
);
1125 fatal_insn_not_found (insn
);
1126 df_insn_rescan (insn
);
1129 /* Fix uses of converted REG in debug insns. */
1132 timode_scalar_chain::fix_debug_reg_uses (rtx reg
)
1134 if (!flag_var_tracking
)
1138 for (ref
= DF_REG_USE_CHAIN (REGNO (reg
)); ref
; ref
= next
)
1140 rtx_insn
*insn
= DF_REF_INSN (ref
);
1141 /* Make sure the next ref is for a different instruction,
1142 so that we're not affected by the rescan. */
1143 next
= DF_REF_NEXT_REG (ref
);
1144 while (next
&& DF_REF_INSN (next
) == insn
)
1145 next
= DF_REF_NEXT_REG (next
);
1147 if (DEBUG_INSN_P (insn
))
1149 /* It may be a debug insn with a TImode variable in
1151 bool changed
= false;
1152 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
1154 rtx
*loc
= DF_REF_LOC (ref
);
1155 if (REG_P (*loc
) && GET_MODE (*loc
) == V1TImode
)
1157 *loc
= gen_rtx_SUBREG (TImode
, *loc
, 0);
1162 df_insn_rescan (insn
);
1167 /* Convert INSN from TImode to V1T1mode. */
1170 timode_scalar_chain::convert_insn (rtx_insn
*insn
)
1172 rtx def_set
= single_set (insn
);
1173 rtx src
= SET_SRC (def_set
);
1174 rtx dst
= SET_DEST (def_set
);
1176 switch (GET_CODE (dst
))
1180 rtx tmp
= find_reg_equal_equiv_note (insn
);
1182 PUT_MODE (XEXP (tmp
, 0), V1TImode
);
1183 PUT_MODE (dst
, V1TImode
);
1184 fix_debug_reg_uses (dst
);
1188 PUT_MODE (dst
, V1TImode
);
1195 switch (GET_CODE (src
))
1198 PUT_MODE (src
, V1TImode
);
1199 /* Call fix_debug_reg_uses only if SRC is never defined. */
1200 if (!DF_REG_DEF_CHAIN (REGNO (src
)))
1201 fix_debug_reg_uses (src
);
1205 PUT_MODE (src
, V1TImode
);
1208 case CONST_WIDE_INT
:
1209 if (NONDEBUG_INSN_P (insn
))
1211 /* Since there are no instructions to store 128-bit constant,
1212 temporary register usage is required. */
1213 rtx tmp
= gen_reg_rtx (V1TImode
);
1215 src
= gen_rtx_CONST_VECTOR (V1TImode
, gen_rtvec (1, src
));
1216 src
= validize_mem (force_const_mem (V1TImode
, src
));
1217 rtx_insn
*seq
= get_insns ();
1220 emit_insn_before (seq
, insn
);
1221 emit_conversion_insns (gen_rtx_SET (dst
, tmp
), insn
);
1227 switch (standard_sse_constant_p (src
, TImode
))
1230 src
= CONST0_RTX (GET_MODE (dst
));
1233 src
= CONSTM1_RTX (GET_MODE (dst
));
1238 if (NONDEBUG_INSN_P (insn
))
1240 rtx tmp
= gen_reg_rtx (V1TImode
);
1241 /* Since there are no instructions to store standard SSE
1242 constant, temporary register usage is required. */
1243 emit_conversion_insns (gen_rtx_SET (dst
, tmp
), insn
);
1252 SET_SRC (def_set
) = src
;
1253 SET_DEST (def_set
) = dst
;
1255 /* Drop possible dead definitions. */
1256 PATTERN (insn
) = def_set
;
1258 INSN_CODE (insn
) = -1;
1259 recog_memoized (insn
);
1260 df_insn_rescan (insn
);
1263 /* Generate copies from defs used by the chain but not defined therein.
1264 Also populates defs_map which is used later by convert_insn. */
1267 general_scalar_chain::convert_registers ()
1271 EXECUTE_IF_SET_IN_BITMAP (defs_conv
, 0, id
, bi
)
1273 rtx chain_reg
= gen_reg_rtx (smode
);
1274 defs_map
.put (regno_reg_rtx
[id
], chain_reg
);
1276 EXECUTE_IF_SET_IN_BITMAP (insns_conv
, 0, id
, bi
)
1277 for (df_ref ref
= DF_INSN_UID_DEFS (id
); ref
; ref
= DF_REF_NEXT_LOC (ref
))
1278 if (bitmap_bit_p (defs_conv
, DF_REF_REGNO (ref
)))
1279 make_vector_copies (DF_REF_INSN (ref
), DF_REF_REAL_REG (ref
));
1282 /* Convert whole chain creating required register
1283 conversions and copies. */
1286 scalar_chain::convert ()
1290 int converted_insns
= 0;
1292 if (!dbg_cnt (stv_conversion
))
1296 fprintf (dump_file
, "Converting chain #%d...\n", chain_id
);
1298 convert_registers ();
1300 EXECUTE_IF_SET_IN_BITMAP (insns
, 0, id
, bi
)
1302 convert_insn (DF_INSN_UID_GET (id
)->insn
);
1306 return converted_insns
;
1309 /* Return the SET expression if INSN doesn't reference hard register.
1310 Return NULL if INSN uses or defines a hard register, excluding
1311 pseudo register pushes, hard register uses in a memory address,
1312 clobbers and flags definitions. */
1315 pseudo_reg_set (rtx_insn
*insn
)
1317 rtx set
= single_set (insn
);
1321 /* Check pseudo register push first. */
1322 machine_mode mode
= TARGET_64BIT
? TImode
: DImode
;
1323 if (REG_P (SET_SRC (set
))
1324 && !HARD_REGISTER_P (SET_SRC (set
))
1325 && push_operand (SET_DEST (set
), mode
))
1329 FOR_EACH_INSN_DEF (ref
, insn
)
1330 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref
))
1331 && !DF_REF_FLAGS_IS_SET (ref
, DF_REF_MUST_CLOBBER
)
1332 && DF_REF_REGNO (ref
) != FLAGS_REG
)
1335 FOR_EACH_INSN_USE (ref
, insn
)
1336 if (!DF_REF_REG_MEM_P (ref
) && HARD_REGISTER_P (DF_REF_REAL_REG (ref
)))
1342 /* Check if comparison INSN may be transformed
1343 into vector comparison. Currently we transform
1344 zero checks only which look like:
1346 (set (reg:CCZ 17 flags)
1347 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1348 (subreg:SI (reg:DI x) 0))
1349 (const_int 0 [0]))) */
1352 convertible_comparison_p (rtx_insn
*insn
, enum machine_mode mode
)
1354 /* ??? Currently convertible for double-word DImode chain only. */
1355 if (TARGET_64BIT
|| mode
!= DImode
)
1361 rtx def_set
= single_set (insn
);
1363 gcc_assert (def_set
);
1365 rtx src
= SET_SRC (def_set
);
1366 rtx dst
= SET_DEST (def_set
);
1368 gcc_assert (GET_CODE (src
) == COMPARE
);
1370 if (GET_CODE (dst
) != REG
1371 || REGNO (dst
) != FLAGS_REG
1372 || GET_MODE (dst
) != CCZmode
)
1375 rtx op1
= XEXP (src
, 0);
1376 rtx op2
= XEXP (src
, 1);
1378 if (op2
!= CONST0_RTX (GET_MODE (op2
)))
1381 if (GET_CODE (op1
) != IOR
)
1384 op2
= XEXP (op1
, 1);
1385 op1
= XEXP (op1
, 0);
1389 || GET_MODE (op1
) != SImode
1390 || GET_MODE (op2
) != SImode
1391 || ((SUBREG_BYTE (op1
) != 0
1392 || SUBREG_BYTE (op2
) != GET_MODE_SIZE (SImode
))
1393 && (SUBREG_BYTE (op2
) != 0
1394 || SUBREG_BYTE (op1
) != GET_MODE_SIZE (SImode
))))
1397 op1
= SUBREG_REG (op1
);
1398 op2
= SUBREG_REG (op2
);
1402 || GET_MODE (op1
) != DImode
)
1408 /* The general version of scalar_to_vector_candidate_p. */
1411 general_scalar_to_vector_candidate_p (rtx_insn
*insn
, enum machine_mode mode
)
1413 rtx def_set
= pseudo_reg_set (insn
);
1418 rtx src
= SET_SRC (def_set
);
1419 rtx dst
= SET_DEST (def_set
);
1421 if (GET_CODE (src
) == COMPARE
)
1422 return convertible_comparison_p (insn
, mode
);
1424 /* We are interested in "mode" only. */
1425 if ((GET_MODE (src
) != mode
1426 && !CONST_INT_P (src
))
1427 || GET_MODE (dst
) != mode
)
1430 if (!REG_P (dst
) && !MEM_P (dst
))
1433 switch (GET_CODE (src
))
1436 if (!TARGET_AVX512VL
)
1442 if (!CONST_INT_P (XEXP (src
, 1))
1443 || !IN_RANGE (INTVAL (XEXP (src
, 1)), 0, GET_MODE_BITSIZE (mode
)-1))
1451 if ((mode
== DImode
&& !TARGET_AVX512VL
)
1452 || (mode
== SImode
&& !TARGET_SSE4_1
))
1461 if (!REG_P (XEXP (src
, 1))
1462 && !MEM_P (XEXP (src
, 1))
1463 && !CONST_INT_P (XEXP (src
, 1)))
1466 if (GET_MODE (XEXP (src
, 1)) != mode
1467 && !CONST_INT_P (XEXP (src
, 1)))
1470 /* Check for andnot case. */
1471 if (GET_CODE (src
) != AND
1472 || GET_CODE (XEXP (src
, 0)) != NOT
)
1475 src
= XEXP (src
, 0);
1482 /* Check for nabs case. */
1483 if (GET_CODE (XEXP (src
, 0)) != ABS
)
1486 src
= XEXP (src
, 0);
1490 if ((mode
== DImode
&& !TARGET_AVX512VL
)
1491 || (mode
== SImode
&& !TARGET_SSSE3
))
1506 if (!REG_P (XEXP (src
, 0))
1507 && !MEM_P (XEXP (src
, 0))
1508 && !CONST_INT_P (XEXP (src
, 0)))
1511 if (GET_MODE (XEXP (src
, 0)) != mode
1512 && !CONST_INT_P (XEXP (src
, 0)))
1518 /* The TImode version of scalar_to_vector_candidate_p. */
1521 timode_scalar_to_vector_candidate_p (rtx_insn
*insn
)
1523 rtx def_set
= pseudo_reg_set (insn
);
1528 rtx src
= SET_SRC (def_set
);
1529 rtx dst
= SET_DEST (def_set
);
1531 /* Only TImode load and store are allowed. */
1532 if (GET_MODE (dst
) != TImode
)
1537 /* Check for store. Memory must be aligned or unaligned store
1538 is optimal. Only support store from register, standard SSE
1539 constant or CONST_WIDE_INT generated from piecewise store.
1541 ??? Verify performance impact before enabling CONST_INT for
1543 if (misaligned_operand (dst
, TImode
)
1544 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL
)
1547 switch (GET_CODE (src
))
1553 case CONST_WIDE_INT
:
1557 return standard_sse_constant_p (src
, TImode
);
1560 else if (MEM_P (src
))
1562 /* Check for load. Memory must be aligned or unaligned load is
1565 && (!misaligned_operand (src
, TImode
)
1566 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
));
1572 /* For a register REGNO, scan instructions for its defs and uses.
1573 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1576 timode_check_non_convertible_regs (bitmap candidates
, bitmap regs
,
1579 for (df_ref def
= DF_REG_DEF_CHAIN (regno
);
1581 def
= DF_REF_NEXT_REG (def
))
1583 if (!bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
1587 "r%d has non convertible def in insn %d\n",
1588 regno
, DF_REF_INSN_UID (def
));
1590 bitmap_set_bit (regs
, regno
);
1595 for (df_ref ref
= DF_REG_USE_CHAIN (regno
);
1597 ref
= DF_REF_NEXT_REG (ref
))
1599 /* Debug instructions are skipped. */
1600 if (NONDEBUG_INSN_P (DF_REF_INSN (ref
))
1601 && !bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)))
1605 "r%d has non convertible use in insn %d\n",
1606 regno
, DF_REF_INSN_UID (ref
));
1608 bitmap_set_bit (regs
, regno
);
1614 /* The TImode version of remove_non_convertible_regs. */
1617 timode_remove_non_convertible_regs (bitmap candidates
)
1621 bitmap regs
= BITMAP_ALLOC (NULL
);
1623 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, id
, bi
)
1625 rtx def_set
= single_set (DF_INSN_UID_GET (id
)->insn
);
1626 rtx dest
= SET_DEST (def_set
);
1627 rtx src
= SET_SRC (def_set
);
1630 || bitmap_bit_p (regs
, REGNO (dest
))
1631 || HARD_REGISTER_P (dest
))
1633 || bitmap_bit_p (regs
, REGNO (src
))
1634 || HARD_REGISTER_P (src
)))
1638 timode_check_non_convertible_regs (candidates
, regs
,
1642 timode_check_non_convertible_regs (candidates
, regs
,
1646 EXECUTE_IF_SET_IN_BITMAP (regs
, 0, id
, bi
)
1648 for (df_ref def
= DF_REG_DEF_CHAIN (id
);
1650 def
= DF_REF_NEXT_REG (def
))
1651 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (def
)))
1654 fprintf (dump_file
, "Removing insn %d from candidates list\n",
1655 DF_REF_INSN_UID (def
));
1657 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (def
));
1660 for (df_ref ref
= DF_REG_USE_CHAIN (id
);
1662 ref
= DF_REF_NEXT_REG (ref
))
1663 if (bitmap_bit_p (candidates
, DF_REF_INSN_UID (ref
)))
1666 fprintf (dump_file
, "Removing insn %d from candidates list\n",
1667 DF_REF_INSN_UID (ref
));
1669 bitmap_clear_bit (candidates
, DF_REF_INSN_UID (ref
));
1676 /* Main STV pass function. Find and convert scalar
1677 instructions into vector mode when profitable. */
1680 convert_scalars_to_vector (bool timode_p
)
1683 int converted_insns
= 0;
1685 bitmap_obstack_initialize (NULL
);
1686 const machine_mode cand_mode
[3] = { SImode
, DImode
, TImode
};
1687 const machine_mode cand_vmode
[3] = { V4SImode
, V2DImode
, V1TImode
};
1688 bitmap_head candidates
[3]; /* { SImode, DImode, TImode } */
1689 for (unsigned i
= 0; i
< 3; ++i
)
1690 bitmap_initialize (&candidates
[i
], &bitmap_default_obstack
);
1692 calculate_dominance_info (CDI_DOMINATORS
);
1693 df_set_flags (DF_DEFER_INSN_RESCAN
| DF_RD_PRUNE_DEAD_DEFS
);
1694 df_chain_add_problem (DF_DU_CHAIN
| DF_UD_CHAIN
);
1697 /* Find all instructions we want to convert into vector mode. */
1699 fprintf (dump_file
, "Searching for mode conversion candidates...\n");
1701 FOR_EACH_BB_FN (bb
, cfun
)
1704 FOR_BB_INSNS (bb
, insn
)
1706 && timode_scalar_to_vector_candidate_p (insn
))
1709 fprintf (dump_file
, " insn %d is marked as a TImode candidate\n",
1712 bitmap_set_bit (&candidates
[2], INSN_UID (insn
));
1716 /* Check {SI,DI}mode. */
1717 for (unsigned i
= 0; i
<= 1; ++i
)
1718 if (general_scalar_to_vector_candidate_p (insn
, cand_mode
[i
]))
1721 fprintf (dump_file
, " insn %d is marked as a %s candidate\n",
1722 INSN_UID (insn
), i
== 0 ? "SImode" : "DImode");
1724 bitmap_set_bit (&candidates
[i
], INSN_UID (insn
));
1731 timode_remove_non_convertible_regs (&candidates
[2]);
1733 for (unsigned i
= 0; i
<= 2; ++i
)
1734 if (!bitmap_empty_p (&candidates
[i
]))
1736 else if (i
== 2 && dump_file
)
1737 fprintf (dump_file
, "There are no candidates for optimization.\n");
1739 for (unsigned i
= 0; i
<= 2; ++i
)
1740 while (!bitmap_empty_p (&candidates
[i
]))
1742 unsigned uid
= bitmap_first_set_bit (&candidates
[i
]);
1743 scalar_chain
*chain
;
1745 if (cand_mode
[i
] == TImode
)
1746 chain
= new timode_scalar_chain
;
1748 chain
= new general_scalar_chain (cand_mode
[i
], cand_vmode
[i
]);
1750 /* Find instructions chain we want to convert to vector mode.
1751 Check all uses and definitions to estimate all required
1753 chain
->build (&candidates
[i
], uid
);
1755 if (chain
->compute_convert_gain () > 0)
1756 converted_insns
+= chain
->convert ();
1759 fprintf (dump_file
, "Chain #%d conversion is not profitable\n",
1766 fprintf (dump_file
, "Total insns converted: %d\n", converted_insns
);
1768 for (unsigned i
= 0; i
<= 2; ++i
)
1769 bitmap_release (&candidates
[i
]);
1770 bitmap_obstack_release (NULL
);
1771 df_process_deferred_rescans ();
1773 /* Conversion means we may have 128bit register spills/fills
1774 which require aligned stack. */
1775 if (converted_insns
)
1777 if (crtl
->stack_alignment_needed
< 128)
1778 crtl
->stack_alignment_needed
= 128;
1779 if (crtl
->stack_alignment_estimated
< 128)
1780 crtl
->stack_alignment_estimated
= 128;
1782 crtl
->stack_realign_needed
1783 = INCOMING_STACK_BOUNDARY
< crtl
->stack_alignment_estimated
;
1784 crtl
->stack_realign_tried
= crtl
->stack_realign_needed
;
1786 crtl
->stack_realign_processed
= true;
1788 if (!crtl
->drap_reg
)
1790 rtx drap_rtx
= targetm
.calls
.get_drap_rtx ();
1792 /* stack_realign_drap and drap_rtx must match. */
1793 gcc_assert ((stack_realign_drap
!= 0) == (drap_rtx
!= NULL
));
1795 /* Do nothing if NULL is returned,
1796 which means DRAP is not needed. */
1797 if (drap_rtx
!= NULL
)
1799 crtl
->args
.internal_arg_pointer
= drap_rtx
;
1801 /* Call fixup_tail_calls to clean up
1802 REG_EQUIV note if DRAP is needed. */
1803 fixup_tail_calls ();
1807 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
1809 for (tree parm
= DECL_ARGUMENTS (current_function_decl
);
1810 parm
; parm
= DECL_CHAIN (parm
))
1812 if (TYPE_MODE (TREE_TYPE (parm
)) != TImode
)
1814 if (DECL_RTL_SET_P (parm
)
1815 && GET_MODE (DECL_RTL (parm
)) == V1TImode
)
1817 rtx r
= DECL_RTL (parm
);
1819 SET_DECL_RTL (parm
, gen_rtx_SUBREG (TImode
, r
, 0));
1821 if (DECL_INCOMING_RTL (parm
)
1822 && GET_MODE (DECL_INCOMING_RTL (parm
)) == V1TImode
)
1824 rtx r
= DECL_INCOMING_RTL (parm
);
1826 DECL_INCOMING_RTL (parm
) = gen_rtx_SUBREG (TImode
, r
, 0);
1835 rest_of_handle_insert_vzeroupper (void)
1837 /* vzeroupper instructions are inserted immediately after reload to
1838 account for possible spills from 256bit or 512bit registers. The pass
1839 reuses mode switching infrastructure by re-running mode insertion
1840 pass, so disable entities that have already been processed. */
1841 for (int i
= 0; i
< MAX_386_ENTITIES
; i
++)
1842 ix86_optimize_mode_switching
[i
] = 0;
1844 ix86_optimize_mode_switching
[AVX_U128
] = 1;
1846 /* Call optimize_mode_switching. */
1847 g
->get_passes ()->execute_pass_mode_switching ();
1855 const pass_data pass_data_insert_vzeroupper
=
1857 RTL_PASS
, /* type */
1858 "vzeroupper", /* name */
1859 OPTGROUP_NONE
, /* optinfo_flags */
1860 TV_MACH_DEP
, /* tv_id */
1861 0, /* properties_required */
1862 0, /* properties_provided */
1863 0, /* properties_destroyed */
1864 0, /* todo_flags_start */
1865 TODO_df_finish
, /* todo_flags_finish */
1868 class pass_insert_vzeroupper
: public rtl_opt_pass
1871 pass_insert_vzeroupper(gcc::context
*ctxt
)
1872 : rtl_opt_pass(pass_data_insert_vzeroupper
, ctxt
)
1875 /* opt_pass methods: */
1876 virtual bool gate (function
*)
1878 return TARGET_AVX
&& TARGET_VZEROUPPER
1879 && flag_expensive_optimizations
&& !optimize_size
;
1882 virtual unsigned int execute (function
*)
1884 return rest_of_handle_insert_vzeroupper ();
1887 }; // class pass_insert_vzeroupper
1889 const pass_data pass_data_stv
=
1891 RTL_PASS
, /* type */
1893 OPTGROUP_NONE
, /* optinfo_flags */
1894 TV_MACH_DEP
, /* tv_id */
1895 0, /* properties_required */
1896 0, /* properties_provided */
1897 0, /* properties_destroyed */
1898 0, /* todo_flags_start */
1899 TODO_df_finish
, /* todo_flags_finish */
1902 class pass_stv
: public rtl_opt_pass
1905 pass_stv (gcc::context
*ctxt
)
1906 : rtl_opt_pass (pass_data_stv
, ctxt
),
1910 /* opt_pass methods: */
1911 virtual bool gate (function
*)
1913 return ((!timode_p
|| TARGET_64BIT
)
1914 && TARGET_STV
&& TARGET_SSE2
&& optimize
> 1);
1917 virtual unsigned int execute (function
*)
1919 return convert_scalars_to_vector (timode_p
);
1924 return new pass_stv (m_ctxt
);
1927 void set_pass_param (unsigned int n
, bool param
)
1929 gcc_assert (n
== 0);
1935 }; // class pass_stv
1940 make_pass_insert_vzeroupper (gcc::context
*ctxt
)
1942 return new pass_insert_vzeroupper (ctxt
);
1946 make_pass_stv (gcc::context
*ctxt
)
1948 return new pass_stv (ctxt
);
1951 /* Inserting ENDBR and pseudo patchable-area instructions. */
1954 rest_of_insert_endbr_and_patchable_area (bool need_endbr
,
1955 unsigned int patchable_area_size
)
1959 rtx_insn
*endbr_insn
= NULL
;
1964 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
1965 is absent among function attributes. Later an optimization will
1966 be introduced to make analysis if an address of a static function
1967 is taken. A static function whose address is not taken will get
1968 a nocf_check attribute. This will allow to reduce the number of
1970 if (!lookup_attribute ("nocf_check",
1971 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
1972 && (!flag_manual_endbr
1973 || lookup_attribute ("cf_check",
1974 DECL_ATTRIBUTES (cfun
->decl
)))
1975 && (!cgraph_node::get (cfun
->decl
)->only_called_directly_p ()
1976 || ix86_cmodel
== CM_LARGE
1977 || ix86_cmodel
== CM_LARGE_PIC
1978 || flag_force_indirect_call
1979 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
1980 && DECL_DLLIMPORT_P (cfun
->decl
))))
1982 if (crtl
->profile
&& flag_fentry
)
1984 /* Queue ENDBR insertion to x86_function_profiler.
1985 NB: Any patchable-area insn will be inserted after
1987 cfun
->machine
->insn_queued_at_entrance
= TYPE_ENDBR
;
1991 endbr
= gen_nop_endbr ();
1992 bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
1993 rtx_insn
*insn
= BB_HEAD (bb
);
1994 endbr_insn
= emit_insn_before (endbr
, insn
);
1999 if (patchable_area_size
)
2001 if (crtl
->profile
&& flag_fentry
)
2003 /* Queue patchable-area insertion to x86_function_profiler.
2004 NB: If there is a queued ENDBR, x86_function_profiler
2005 will also handle patchable-area. */
2006 if (!cfun
->machine
->insn_queued_at_entrance
)
2007 cfun
->machine
->insn_queued_at_entrance
= TYPE_PATCHABLE_AREA
;
2012 = gen_patchable_area (GEN_INT (patchable_area_size
),
2013 GEN_INT (crtl
->patch_area_entry
== 0));
2015 emit_insn_after (patchable_area
, endbr_insn
);
2018 bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
2019 insn
= BB_HEAD (bb
);
2020 emit_insn_before (patchable_area
, insn
);
2029 FOR_EACH_BB_FN (bb
, cfun
)
2031 for (insn
= BB_HEAD (bb
); insn
!= NEXT_INSN (BB_END (bb
));
2032 insn
= NEXT_INSN (insn
))
2036 need_endbr
= find_reg_note (insn
, REG_SETJMP
, NULL
) != NULL
;
2037 if (!need_endbr
&& !SIBLING_CALL_P (insn
))
2039 rtx call
= get_call_rtx_from (insn
);
2040 rtx fnaddr
= XEXP (call
, 0);
2041 tree fndecl
= NULL_TREE
;
2043 /* Also generate ENDBRANCH for non-tail call which
2044 may return via indirect branch. */
2045 if (GET_CODE (XEXP (fnaddr
, 0)) == SYMBOL_REF
)
2046 fndecl
= SYMBOL_REF_DECL (XEXP (fnaddr
, 0));
2047 if (fndecl
== NULL_TREE
)
2048 fndecl
= MEM_EXPR (fnaddr
);
2050 && TREE_CODE (TREE_TYPE (fndecl
)) != FUNCTION_TYPE
2051 && TREE_CODE (TREE_TYPE (fndecl
)) != METHOD_TYPE
)
2053 if (fndecl
&& TYPE_ARG_TYPES (TREE_TYPE (fndecl
)))
2055 tree fntype
= TREE_TYPE (fndecl
);
2056 if (lookup_attribute ("indirect_return",
2057 TYPE_ATTRIBUTES (fntype
)))
2063 /* Generate ENDBRANCH after CALL, which can return more than
2064 twice, setjmp-like functions. */
2066 endbr
= gen_nop_endbr ();
2067 emit_insn_after_setloc (endbr
, insn
, INSN_LOCATION (insn
));
2071 if (JUMP_P (insn
) && flag_cet_switch
)
2073 rtx target
= JUMP_LABEL (insn
);
2074 if (target
== NULL_RTX
|| ANY_RETURN_P (target
))
2077 /* Check the jump is a switch table. */
2078 rtx_insn
*label
= as_a
<rtx_insn
*> (target
);
2079 rtx_insn
*table
= next_insn (label
);
2080 if (table
== NULL_RTX
|| !JUMP_TABLE_DATA_P (table
))
2083 /* For the indirect jump find out all places it jumps and insert
2084 ENDBRANCH there. It should be done under a special flag to
2085 control ENDBRANCH generation for switch stmts. */
2088 basic_block dest_blk
;
2090 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2095 insn
= BB_HEAD (dest_blk
);
2096 gcc_assert (LABEL_P (insn
));
2097 endbr
= gen_nop_endbr ();
2098 emit_insn_after (endbr
, insn
);
2103 if (LABEL_P (insn
) && LABEL_PRESERVE_P (insn
))
2105 endbr
= gen_nop_endbr ();
2106 emit_insn_after (endbr
, insn
);
2117 const pass_data pass_data_insert_endbr_and_patchable_area
=
2119 RTL_PASS
, /* type. */
2120 "endbr_and_patchable_area", /* name. */
2121 OPTGROUP_NONE
, /* optinfo_flags. */
2122 TV_MACH_DEP
, /* tv_id. */
2123 0, /* properties_required. */
2124 0, /* properties_provided. */
2125 0, /* properties_destroyed. */
2126 0, /* todo_flags_start. */
2127 0, /* todo_flags_finish. */
2130 class pass_insert_endbr_and_patchable_area
: public rtl_opt_pass
2133 pass_insert_endbr_and_patchable_area (gcc::context
*ctxt
)
2134 : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area
, ctxt
)
2137 /* opt_pass methods: */
2138 virtual bool gate (function
*)
2140 need_endbr
= (flag_cf_protection
& CF_BRANCH
) != 0;
2141 patchable_area_size
= crtl
->patch_area_size
- crtl
->patch_area_entry
;
2142 return need_endbr
|| patchable_area_size
;
2145 virtual unsigned int execute (function
*)
2147 timevar_push (TV_MACH_DEP
);
2148 rest_of_insert_endbr_and_patchable_area (need_endbr
,
2149 patchable_area_size
);
2150 timevar_pop (TV_MACH_DEP
);
2156 unsigned int patchable_area_size
;
2157 }; // class pass_insert_endbr_and_patchable_area
2162 make_pass_insert_endbr_and_patchable_area (gcc::context
*ctxt
)
2164 return new pass_insert_endbr_and_patchable_area (ctxt
);
2167 /* At entry of the nearest common dominator for basic blocks with
2168 conversions/rcp/sqrt/rsqrt/round, generate a single
2169 vxorps %xmmN, %xmmN, %xmmN
2171 vcvtss2sd op, %xmmN, %xmmX
2172 vcvtsd2ss op, %xmmN, %xmmX
2173 vcvtsi2ss op, %xmmN, %xmmX
2174 vcvtsi2sd op, %xmmN, %xmmX
2176 NB: We want to generate only a single vxorps to cover the whole
2177 function. The LCM algorithm isn't appropriate here since it may
2178 place a vxorps inside the loop. */
2181 remove_partial_avx_dependency (void)
2183 timevar_push (TV_MACH_DEP
);
2185 bitmap_obstack_initialize (NULL
);
2186 bitmap convert_bbs
= BITMAP_ALLOC (NULL
);
2189 rtx_insn
*insn
, *set_insn
;
2191 rtx v4sf_const0
= NULL_RTX
;
2193 auto_vec
<rtx_insn
*> control_flow_insns
;
2195 /* We create invalid RTL initially so defer rescans. */
2196 df_set_flags (DF_DEFER_INSN_RESCAN
);
2198 FOR_EACH_BB_FN (bb
, cfun
)
2200 FOR_BB_INSNS (bb
, insn
)
2202 if (!NONDEBUG_INSN_P (insn
))
2205 set
= single_set (insn
);
2209 if (get_attr_avx_partial_xmm_update (insn
)
2210 != AVX_PARTIAL_XMM_UPDATE_TRUE
)
2213 /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2214 SI -> SF, SI -> DF, DI -> SF, DI -> DF, sqrt, rsqrt, rcp,
2215 round, to vec_dup and vec_merge with subreg. */
2216 rtx src
= SET_SRC (set
);
2217 rtx dest
= SET_DEST (set
);
2218 machine_mode dest_mode
= GET_MODE (dest
);
2219 bool convert_p
= false;
2220 switch (GET_CODE (src
))
2224 case FLOAT_TRUNCATE
:
2225 case UNSIGNED_FLOAT
:
2232 /* Only hanlde conversion here. */
2233 machine_mode src_mode
2234 = convert_p
? GET_MODE (XEXP (src
, 0)) : VOIDmode
;
2239 if (TARGET_USE_VECTOR_FP_CONVERTS
2240 || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY
)
2245 if (TARGET_USE_VECTOR_CONVERTS
2246 || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY
)
2250 gcc_assert (!convert_p
);
2257 v4sf_const0
= gen_reg_rtx (V4SFmode
);
2260 machine_mode dest_vecmode
;
2264 dest_vecmode
= V8HFmode
;
2265 zero
= gen_rtx_SUBREG (V8HFmode
, v4sf_const0
, 0);
2268 dest_vecmode
= V4SFmode
;
2272 dest_vecmode
= V2DFmode
;
2273 zero
= gen_rtx_SUBREG (V2DFmode
, v4sf_const0
, 0);
2279 /* Change source to vector mode. */
2280 src
= gen_rtx_VEC_DUPLICATE (dest_vecmode
, src
);
2281 src
= gen_rtx_VEC_MERGE (dest_vecmode
, src
, zero
,
2282 GEN_INT (HOST_WIDE_INT_1U
));
2283 /* Change destination to vector mode. */
2284 rtx vec
= gen_reg_rtx (dest_vecmode
);
2285 /* Generate an XMM vector SET. */
2286 set
= gen_rtx_SET (vec
, src
);
2287 set_insn
= emit_insn_before (set
, insn
);
2288 df_insn_rescan (set_insn
);
2290 if (cfun
->can_throw_non_call_exceptions
)
2292 /* Handle REG_EH_REGION note. */
2293 rtx note
= find_reg_note (insn
, REG_EH_REGION
, NULL_RTX
);
2296 control_flow_insns
.safe_push (set_insn
);
2297 add_reg_note (set_insn
, REG_EH_REGION
, XEXP (note
, 0));
2301 src
= gen_rtx_SUBREG (dest_mode
, vec
, 0);
2302 set
= gen_rtx_SET (dest
, src
);
2304 /* Drop possible dead definitions. */
2305 PATTERN (insn
) = set
;
2307 INSN_CODE (insn
) = -1;
2308 recog_memoized (insn
);
2309 df_insn_rescan (insn
);
2310 bitmap_set_bit (convert_bbs
, bb
->index
);
2316 /* (Re-)discover loops so that bb->loop_father can be used in the
2318 calculate_dominance_info (CDI_DOMINATORS
);
2319 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2321 /* Generate a vxorps at entry of the nearest dominator for basic
2322 blocks with conversions, which is in the fake loop that
2323 contains the whole function, so that there is only a single
2324 vxorps in the whole function. */
2325 bb
= nearest_common_dominator_for_set (CDI_DOMINATORS
,
2327 while (bb
->loop_father
->latch
2328 != EXIT_BLOCK_PTR_FOR_FN (cfun
))
2329 bb
= get_immediate_dominator (CDI_DOMINATORS
,
2330 bb
->loop_father
->header
);
2332 set
= gen_rtx_SET (v4sf_const0
, CONST0_RTX (V4SFmode
));
2334 insn
= BB_HEAD (bb
);
2335 while (insn
&& !NONDEBUG_INSN_P (insn
))
2337 if (insn
== BB_END (bb
))
2342 insn
= NEXT_INSN (insn
);
2344 if (insn
== BB_HEAD (bb
))
2345 set_insn
= emit_insn_before (set
, insn
);
2347 set_insn
= emit_insn_after (set
,
2348 insn
? PREV_INSN (insn
) : BB_END (bb
));
2349 df_insn_rescan (set_insn
);
2350 loop_optimizer_finalize ();
2352 if (!control_flow_insns
.is_empty ())
2354 free_dominance_info (CDI_DOMINATORS
);
2357 FOR_EACH_VEC_ELT (control_flow_insns
, i
, insn
)
2358 if (control_flow_insn_p (insn
))
2360 /* Split the block after insn. There will be a fallthru
2361 edge, which is OK so we keep it. We have to create
2362 the exception edges ourselves. */
2363 bb
= BLOCK_FOR_INSN (insn
);
2364 split_block (bb
, insn
);
2365 rtl_make_eh_edge (NULL
, bb
, BB_END (bb
));
2370 df_process_deferred_rescans ();
2371 df_clear_flags (DF_DEFER_INSN_RESCAN
);
2372 bitmap_obstack_release (NULL
);
2373 BITMAP_FREE (convert_bbs
);
2375 timevar_pop (TV_MACH_DEP
);
2381 const pass_data pass_data_remove_partial_avx_dependency
=
2383 RTL_PASS
, /* type */
2385 OPTGROUP_NONE
, /* optinfo_flags */
2386 TV_MACH_DEP
, /* tv_id */
2387 0, /* properties_required */
2388 0, /* properties_provided */
2389 0, /* properties_destroyed */
2390 0, /* todo_flags_start */
2391 0, /* todo_flags_finish */
2394 class pass_remove_partial_avx_dependency
: public rtl_opt_pass
2397 pass_remove_partial_avx_dependency (gcc::context
*ctxt
)
2398 : rtl_opt_pass (pass_data_remove_partial_avx_dependency
, ctxt
)
2401 /* opt_pass methods: */
2402 virtual bool gate (function
*)
2405 && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2408 && optimize_function_for_speed_p (cfun
));
2411 virtual unsigned int execute (function
*)
2413 return remove_partial_avx_dependency ();
2415 }; // class pass_rpad
2420 make_pass_remove_partial_avx_dependency (gcc::context
*ctxt
)
2422 return new pass_remove_partial_avx_dependency (ctxt
);
2425 /* This compares the priority of target features in function DECL1
2426 and DECL2. It returns positive value if DECL1 is higher priority,
2427 negative value if DECL2 is higher priority and 0 if they are the
2431 ix86_compare_version_priority (tree decl1
, tree decl2
)
2433 unsigned int priority1
= get_builtin_code_for_version (decl1
, NULL
);
2434 unsigned int priority2
= get_builtin_code_for_version (decl2
, NULL
);
2436 return (int)priority1
- (int)priority2
;
2439 /* V1 and V2 point to function versions with different priorities
2440 based on the target ISA. This function compares their priorities. */
2443 feature_compare (const void *v1
, const void *v2
)
2445 typedef struct _function_version_info
2448 tree predicate_chain
;
2449 unsigned int dispatch_priority
;
2450 } function_version_info
;
2452 const function_version_info c1
= *(const function_version_info
*)v1
;
2453 const function_version_info c2
= *(const function_version_info
*)v2
;
2454 return (c2
.dispatch_priority
- c1
.dispatch_priority
);
2457 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2458 to return a pointer to VERSION_DECL if the outcome of the expression
2459 formed by PREDICATE_CHAIN is true. This function will be called during
2460 version dispatch to decide which function version to execute. It returns
2461 the basic block at the end, to which more conditions can be added. */
2464 add_condition_to_bb (tree function_decl
, tree version_decl
,
2465 tree predicate_chain
, basic_block new_bb
)
2467 gimple
*return_stmt
;
2468 tree convert_expr
, result_var
;
2469 gimple
*convert_stmt
;
2470 gimple
*call_cond_stmt
;
2471 gimple
*if_else_stmt
;
2473 basic_block bb1
, bb2
, bb3
;
2476 tree cond_var
, and_expr_var
= NULL_TREE
;
2479 tree predicate_decl
, predicate_arg
;
2481 push_cfun (DECL_STRUCT_FUNCTION (function_decl
));
2483 gcc_assert (new_bb
!= NULL
);
2484 gseq
= bb_seq (new_bb
);
2487 convert_expr
= build1 (CONVERT_EXPR
, ptr_type_node
,
2488 build_fold_addr_expr (version_decl
));
2489 result_var
= create_tmp_var (ptr_type_node
);
2490 convert_stmt
= gimple_build_assign (result_var
, convert_expr
);
2491 return_stmt
= gimple_build_return (result_var
);
2493 if (predicate_chain
== NULL_TREE
)
2495 gimple_seq_add_stmt (&gseq
, convert_stmt
);
2496 gimple_seq_add_stmt (&gseq
, return_stmt
);
2497 set_bb_seq (new_bb
, gseq
);
2498 gimple_set_bb (convert_stmt
, new_bb
);
2499 gimple_set_bb (return_stmt
, new_bb
);
2504 while (predicate_chain
!= NULL
)
2506 cond_var
= create_tmp_var (integer_type_node
);
2507 predicate_decl
= TREE_PURPOSE (predicate_chain
);
2508 predicate_arg
= TREE_VALUE (predicate_chain
);
2509 call_cond_stmt
= gimple_build_call (predicate_decl
, 1, predicate_arg
);
2510 gimple_call_set_lhs (call_cond_stmt
, cond_var
);
2512 gimple_set_block (call_cond_stmt
, DECL_INITIAL (function_decl
));
2513 gimple_set_bb (call_cond_stmt
, new_bb
);
2514 gimple_seq_add_stmt (&gseq
, call_cond_stmt
);
2516 predicate_chain
= TREE_CHAIN (predicate_chain
);
2518 if (and_expr_var
== NULL
)
2519 and_expr_var
= cond_var
;
2522 gimple
*assign_stmt
;
2523 /* Use MIN_EXPR to check if any integer is zero?.
2524 and_expr_var = min_expr <cond_var, and_expr_var> */
2525 assign_stmt
= gimple_build_assign (and_expr_var
,
2526 build2 (MIN_EXPR
, integer_type_node
,
2527 cond_var
, and_expr_var
));
2529 gimple_set_block (assign_stmt
, DECL_INITIAL (function_decl
));
2530 gimple_set_bb (assign_stmt
, new_bb
);
2531 gimple_seq_add_stmt (&gseq
, assign_stmt
);
2535 if_else_stmt
= gimple_build_cond (GT_EXPR
, and_expr_var
,
2537 NULL_TREE
, NULL_TREE
);
2538 gimple_set_block (if_else_stmt
, DECL_INITIAL (function_decl
));
2539 gimple_set_bb (if_else_stmt
, new_bb
);
2540 gimple_seq_add_stmt (&gseq
, if_else_stmt
);
2542 gimple_seq_add_stmt (&gseq
, convert_stmt
);
2543 gimple_seq_add_stmt (&gseq
, return_stmt
);
2544 set_bb_seq (new_bb
, gseq
);
2547 e12
= split_block (bb1
, if_else_stmt
);
2549 e12
->flags
&= ~EDGE_FALLTHRU
;
2550 e12
->flags
|= EDGE_TRUE_VALUE
;
2552 e23
= split_block (bb2
, return_stmt
);
2554 gimple_set_bb (convert_stmt
, bb2
);
2555 gimple_set_bb (return_stmt
, bb2
);
2558 make_edge (bb1
, bb3
, EDGE_FALSE_VALUE
);
2561 make_edge (bb2
, EXIT_BLOCK_PTR_FOR_FN (cfun
), 0);
2568 /* This function generates the dispatch function for
2569 multi-versioned functions. DISPATCH_DECL is the function which will
2570 contain the dispatch logic. FNDECLS are the function choices for
2571 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
2572 in DISPATCH_DECL in which the dispatch code is generated. */
2575 dispatch_function_versions (tree dispatch_decl
,
2577 basic_block
*empty_bb
)
2580 gimple
*ifunc_cpu_init_stmt
;
2585 unsigned int num_versions
= 0;
2586 unsigned int actual_versions
= 0;
2589 struct _function_version_info
2592 tree predicate_chain
;
2593 unsigned int dispatch_priority
;
2594 }*function_version_info
;
2596 gcc_assert (dispatch_decl
!= NULL
2597 && fndecls_p
!= NULL
2598 && empty_bb
!= NULL
);
2600 /*fndecls_p is actually a vector. */
2601 fndecls
= static_cast<vec
<tree
> *> (fndecls_p
);
2603 /* At least one more version other than the default. */
2604 num_versions
= fndecls
->length ();
2605 gcc_assert (num_versions
>= 2);
2607 function_version_info
= (struct _function_version_info
*)
2608 XNEWVEC (struct _function_version_info
, (num_versions
- 1));
2610 /* The first version in the vector is the default decl. */
2611 default_decl
= (*fndecls
)[0];
2613 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl
));
2615 gseq
= bb_seq (*empty_bb
);
2616 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
2617 constructors, so explicity call __builtin_cpu_init here. */
2619 = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT
), vNULL
);
2620 gimple_seq_add_stmt (&gseq
, ifunc_cpu_init_stmt
);
2621 gimple_set_bb (ifunc_cpu_init_stmt
, *empty_bb
);
2622 set_bb_seq (*empty_bb
, gseq
);
2627 for (ix
= 1; fndecls
->iterate (ix
, &ele
); ++ix
)
2629 tree version_decl
= ele
;
2630 tree predicate_chain
= NULL_TREE
;
2631 unsigned int priority
;
2632 /* Get attribute string, parse it and find the right predicate decl.
2633 The predicate function could be a lengthy combination of many
2634 features, like arch-type and various isa-variants. */
2635 priority
= get_builtin_code_for_version (version_decl
,
2638 if (predicate_chain
== NULL_TREE
)
2641 function_version_info
[actual_versions
].version_decl
= version_decl
;
2642 function_version_info
[actual_versions
].predicate_chain
2644 function_version_info
[actual_versions
].dispatch_priority
= priority
;
2648 /* Sort the versions according to descending order of dispatch priority. The
2649 priority is based on the ISA. This is not a perfect solution. There
2650 could still be ambiguity. If more than one function version is suitable
2651 to execute, which one should be dispatched? In future, allow the user
2652 to specify a dispatch priority next to the version. */
2653 qsort (function_version_info
, actual_versions
,
2654 sizeof (struct _function_version_info
), feature_compare
);
2656 for (i
= 0; i
< actual_versions
; ++i
)
2657 *empty_bb
= add_condition_to_bb (dispatch_decl
,
2658 function_version_info
[i
].version_decl
,
2659 function_version_info
[i
].predicate_chain
,
2662 /* dispatch default version at the end. */
2663 *empty_bb
= add_condition_to_bb (dispatch_decl
, default_decl
,
2666 free (function_version_info
);
2670 /* This function changes the assembler name for functions that are
2671 versions. If DECL is a function version and has a "target"
2672 attribute, it appends the attribute string to its assembler name. */
2675 ix86_mangle_function_version_assembler_name (tree decl
, tree id
)
2678 const char *orig_name
, *version_string
;
2679 char *attr_str
, *assembler_name
;
2681 if (DECL_DECLARED_INLINE_P (decl
)
2682 && lookup_attribute ("gnu_inline",
2683 DECL_ATTRIBUTES (decl
)))
2684 error_at (DECL_SOURCE_LOCATION (decl
),
2685 "function versions cannot be marked as %<gnu_inline%>,"
2686 " bodies have to be generated");
2688 if (DECL_VIRTUAL_P (decl
)
2689 || DECL_VINDEX (decl
))
2690 sorry ("virtual function multiversioning not supported");
2692 version_attr
= lookup_attribute ("target", DECL_ATTRIBUTES (decl
));
2694 /* target attribute string cannot be NULL. */
2695 gcc_assert (version_attr
!= NULL_TREE
);
2697 orig_name
= IDENTIFIER_POINTER (id
);
2699 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr
)));
2701 if (strcmp (version_string
, "default") == 0)
2704 attr_str
= sorted_attr_string (TREE_VALUE (version_attr
));
2705 assembler_name
= XNEWVEC (char, strlen (orig_name
) + strlen (attr_str
) + 2);
2707 sprintf (assembler_name
, "%s.%s", orig_name
, attr_str
);
2709 /* Allow assembler name to be modified if already set. */
2710 if (DECL_ASSEMBLER_NAME_SET_P (decl
))
2711 SET_DECL_RTL (decl
, NULL
);
2713 tree ret
= get_identifier (assembler_name
);
2714 XDELETEVEC (attr_str
);
2715 XDELETEVEC (assembler_name
);
2720 ix86_mangle_decl_assembler_name (tree decl
, tree id
)
2722 /* For function version, add the target suffix to the assembler name. */
2723 if (TREE_CODE (decl
) == FUNCTION_DECL
2724 && DECL_FUNCTION_VERSIONED (decl
))
2725 id
= ix86_mangle_function_version_assembler_name (decl
, id
);
2726 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2727 id
= SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl
, id
);
2733 /* Make a dispatcher declaration for the multi-versioned function DECL.
2734 Calls to DECL function will be replaced with calls to the dispatcher
2735 by the front-end. Returns the decl of the dispatcher function. */
2738 ix86_get_function_versions_dispatcher (void *decl
)
2740 tree fn
= (tree
) decl
;
2741 struct cgraph_node
*node
= NULL
;
2742 struct cgraph_node
*default_node
= NULL
;
2743 struct cgraph_function_version_info
*node_v
= NULL
;
2744 struct cgraph_function_version_info
*first_v
= NULL
;
2746 tree dispatch_decl
= NULL
;
2748 struct cgraph_function_version_info
*default_version_info
= NULL
;
2750 gcc_assert (fn
!= NULL
&& DECL_FUNCTION_VERSIONED (fn
));
2752 node
= cgraph_node::get (fn
);
2753 gcc_assert (node
!= NULL
);
2755 node_v
= node
->function_version ();
2756 gcc_assert (node_v
!= NULL
);
2758 if (node_v
->dispatcher_resolver
!= NULL
)
2759 return node_v
->dispatcher_resolver
;
2761 /* Find the default version and make it the first node. */
2763 /* Go to the beginning of the chain. */
2764 while (first_v
->prev
!= NULL
)
2765 first_v
= first_v
->prev
;
2766 default_version_info
= first_v
;
2767 while (default_version_info
!= NULL
)
2769 if (is_function_default_version
2770 (default_version_info
->this_node
->decl
))
2772 default_version_info
= default_version_info
->next
;
2775 /* If there is no default node, just return NULL. */
2776 if (default_version_info
== NULL
)
2779 /* Make default info the first node. */
2780 if (first_v
!= default_version_info
)
2782 default_version_info
->prev
->next
= default_version_info
->next
;
2783 if (default_version_info
->next
)
2784 default_version_info
->next
->prev
= default_version_info
->prev
;
2785 first_v
->prev
= default_version_info
;
2786 default_version_info
->next
= first_v
;
2787 default_version_info
->prev
= NULL
;
2790 default_node
= default_version_info
->this_node
;
2792 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2793 if (targetm
.has_ifunc_p ())
2795 struct cgraph_function_version_info
*it_v
= NULL
;
2796 struct cgraph_node
*dispatcher_node
= NULL
;
2797 struct cgraph_function_version_info
*dispatcher_version_info
= NULL
;
2799 /* Right now, the dispatching is done via ifunc. */
2800 dispatch_decl
= make_dispatcher_decl (default_node
->decl
);
2802 dispatcher_node
= cgraph_node::get_create (dispatch_decl
);
2803 gcc_assert (dispatcher_node
!= NULL
);
2804 dispatcher_node
->dispatcher_function
= 1;
2805 dispatcher_version_info
2806 = dispatcher_node
->insert_new_function_version ();
2807 dispatcher_version_info
->next
= default_version_info
;
2808 dispatcher_node
->definition
= 1;
2810 /* Set the dispatcher for all the versions. */
2811 it_v
= default_version_info
;
2812 while (it_v
!= NULL
)
2814 it_v
->dispatcher_resolver
= dispatch_decl
;
2821 error_at (DECL_SOURCE_LOCATION (default_node
->decl
),
2822 "multiversioning needs %<ifunc%> which is not supported "
2826 return dispatch_decl
;
2829 /* Make the resolver function decl to dispatch the versions of
2830 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
2831 ifunc alias that will point to the created resolver. Create an
2832 empty basic block in the resolver and store the pointer in
2833 EMPTY_BB. Return the decl of the resolver function. */
2836 make_resolver_func (const tree default_decl
,
2837 const tree ifunc_alias_decl
,
2838 basic_block
*empty_bb
)
2842 /* Create resolver function name based on default_decl. */
2843 tree decl_name
= clone_function_name (default_decl
, "resolver");
2844 const char *resolver_name
= IDENTIFIER_POINTER (decl_name
);
2846 /* The resolver function should return a (void *). */
2847 type
= build_function_type_list (ptr_type_node
, NULL_TREE
);
2849 decl
= build_fn_decl (resolver_name
, type
);
2850 SET_DECL_ASSEMBLER_NAME (decl
, decl_name
);
2852 DECL_NAME (decl
) = decl_name
;
2853 TREE_USED (decl
) = 1;
2854 DECL_ARTIFICIAL (decl
) = 1;
2855 DECL_IGNORED_P (decl
) = 1;
2856 TREE_PUBLIC (decl
) = 0;
2857 DECL_UNINLINABLE (decl
) = 1;
2859 /* Resolver is not external, body is generated. */
2860 DECL_EXTERNAL (decl
) = 0;
2861 DECL_EXTERNAL (ifunc_alias_decl
) = 0;
2863 DECL_CONTEXT (decl
) = NULL_TREE
;
2864 DECL_INITIAL (decl
) = make_node (BLOCK
);
2865 DECL_STATIC_CONSTRUCTOR (decl
) = 0;
2867 if (DECL_COMDAT_GROUP (default_decl
)
2868 || TREE_PUBLIC (default_decl
))
2870 /* In this case, each translation unit with a call to this
2871 versioned function will put out a resolver. Ensure it
2872 is comdat to keep just one copy. */
2873 DECL_COMDAT (decl
) = 1;
2874 make_decl_one_only (decl
, DECL_ASSEMBLER_NAME (decl
));
2877 TREE_PUBLIC (ifunc_alias_decl
) = 0;
2879 /* Build result decl and add to function_decl. */
2880 t
= build_decl (UNKNOWN_LOCATION
, RESULT_DECL
, NULL_TREE
, ptr_type_node
);
2881 DECL_CONTEXT (t
) = decl
;
2882 DECL_ARTIFICIAL (t
) = 1;
2883 DECL_IGNORED_P (t
) = 1;
2884 DECL_RESULT (decl
) = t
;
2886 gimplify_function_tree (decl
);
2887 push_cfun (DECL_STRUCT_FUNCTION (decl
));
2888 *empty_bb
= init_lowered_empty_function (decl
, false,
2889 profile_count::uninitialized ());
2891 cgraph_node::add_new_function (decl
, true);
2892 symtab
->call_cgraph_insertion_hooks (cgraph_node::get_create (decl
));
2896 gcc_assert (ifunc_alias_decl
!= NULL
);
2897 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
2898 DECL_ATTRIBUTES (ifunc_alias_decl
)
2899 = make_attribute ("ifunc", resolver_name
,
2900 DECL_ATTRIBUTES (ifunc_alias_decl
));
2902 /* Create the alias for dispatch to resolver here. */
2903 cgraph_node::create_same_body_alias (ifunc_alias_decl
, decl
);
2907 /* Generate the dispatching code body to dispatch multi-versioned function
2908 DECL. The target hook is called to process the "target" attributes and
2909 provide the code to dispatch the right function at run-time. NODE points
2910 to the dispatcher decl whose body will be created. */
2913 ix86_generate_version_dispatcher_body (void *node_p
)
2916 basic_block empty_bb
;
2917 tree default_ver_decl
;
2918 struct cgraph_node
*versn
;
2919 struct cgraph_node
*node
;
2921 struct cgraph_function_version_info
*node_version_info
= NULL
;
2922 struct cgraph_function_version_info
*versn_info
= NULL
;
2924 node
= (cgraph_node
*)node_p
;
2926 node_version_info
= node
->function_version ();
2927 gcc_assert (node
->dispatcher_function
2928 && node_version_info
!= NULL
);
2930 if (node_version_info
->dispatcher_resolver
)
2931 return node_version_info
->dispatcher_resolver
;
2933 /* The first version in the chain corresponds to the default version. */
2934 default_ver_decl
= node_version_info
->next
->this_node
->decl
;
2936 /* node is going to be an alias, so remove the finalized bit. */
2937 node
->definition
= false;
2939 resolver_decl
= make_resolver_func (default_ver_decl
,
2940 node
->decl
, &empty_bb
);
2942 node_version_info
->dispatcher_resolver
= resolver_decl
;
2944 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl
));
2946 auto_vec
<tree
, 2> fn_ver_vec
;
2948 for (versn_info
= node_version_info
->next
; versn_info
;
2949 versn_info
= versn_info
->next
)
2951 versn
= versn_info
->this_node
;
2952 /* Check for virtual functions here again, as by this time it should
2953 have been determined if this function needs a vtable index or
2954 not. This happens for methods in derived classes that override
2955 virtual methods in base classes but are not explicitly marked as
2957 if (DECL_VINDEX (versn
->decl
))
2958 sorry ("virtual function multiversioning not supported");
2960 fn_ver_vec
.safe_push (versn
->decl
);
2963 dispatch_function_versions (resolver_decl
, &fn_ver_vec
, &empty_bb
);
2964 cgraph_edge::rebuild_edges ();
2966 return resolver_decl
;