]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386-features.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / i386-features.c
1 /* Copyright (C) 1988-2022 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING3. If not see
17 <http://www.gnu.org/licenses/>. */
18
19 #define IN_TARGET_CODE 1
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic.h"
41 #include "cfgbuild.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "attribs.h"
45 #include "calls.h"
46 #include "stor-layout.h"
47 #include "varasm.h"
48 #include "output.h"
49 #include "insn-attr.h"
50 #include "flags.h"
51 #include "except.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "gimplify.h"
59 #include "dwarf2.h"
60 #include "tm-constrs.h"
61 #include "cselib.h"
62 #include "sched-int.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "pass_manager.h"
67 #include "target-globals.h"
68 #include "gimple-iterator.h"
69 #include "tree-vectorizer.h"
70 #include "shrink-wrap.h"
71 #include "builtins.h"
72 #include "rtl-iter.h"
73 #include "tree-iterator.h"
74 #include "dbgcnt.h"
75 #include "case-cfn-macros.h"
76 #include "dojump.h"
77 #include "fold-const-call.h"
78 #include "tree-vrp.h"
79 #include "tree-ssanames.h"
80 #include "selftest.h"
81 #include "selftest-rtl.h"
82 #include "print-rtl.h"
83 #include "intl.h"
84 #include "ifcvt.h"
85 #include "symbol-summary.h"
86 #include "ipa-prop.h"
87 #include "ipa-fnsummary.h"
88 #include "wide-int-bitmask.h"
89 #include "tree-vector-builder.h"
90 #include "debug.h"
91 #include "dwarf2out.h"
92 #include "i386-builtins.h"
93 #include "i386-features.h"
94
95 const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
96 "savms64",
97 "resms64",
98 "resms64x",
99 "savms64f",
100 "resms64f",
101 "resms64fx"
102 };
103
104 const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
105 /* The below offset values are where each register is stored for the layout
106 relative to incoming stack pointer. The value of each m_regs[].offset will
107 be relative to the incoming base pointer (rax or rsi) used by the stub.
108
109 s_instances: 0 1 2 3
110 Offset: realigned or aligned + 8
111 Register aligned aligned + 8 aligned w/HFP w/HFP */
112 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
113 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
114 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
115 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
116 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
117 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
118 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
119 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
120 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
121 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
122 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
123 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
124 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
125 BP_REG, /* 0xc0 0xc8 N/A N/A */
126 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
127 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
128 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
129 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
130 };
131
132 /* Instantiate static const values. */
133 const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
134 const unsigned xlogue_layout::MIN_REGS;
135 const unsigned xlogue_layout::MAX_REGS;
136 const unsigned xlogue_layout::MAX_EXTRA_REGS;
137 const unsigned xlogue_layout::VARIANT_COUNT;
138 const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
139
140 /* Initialize xlogue_layout::s_stub_names to zero. */
141 char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
142 [STUB_NAME_MAX_LEN];
143
144 /* Instantiates all xlogue_layout instances. */
145 const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
146 xlogue_layout (0, false),
147 xlogue_layout (8, false),
148 xlogue_layout (0, true),
149 xlogue_layout (8, true)
150 };
151
152 /* Return an appropriate const instance of xlogue_layout based upon values
153 in cfun->machine and crtl. */
154 const class xlogue_layout &
155 xlogue_layout::get_instance ()
156 {
157 enum xlogue_stub_sets stub_set;
158 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
159
160 if (stack_realign_fp)
161 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
162 else if (frame_pointer_needed)
163 stub_set = aligned_plus_8
164 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
165 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
166 else
167 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
168
169 return s_instances[stub_set];
170 }
171
172 /* Determine how many clobbered registers can be saved by the stub.
173 Returns the count of registers the stub will save and restore. */
174 unsigned
175 xlogue_layout::count_stub_managed_regs ()
176 {
177 bool hfp = frame_pointer_needed || stack_realign_fp;
178 unsigned i, count;
179 unsigned regno;
180
181 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
182 {
183 regno = REG_ORDER[i];
184 if (regno == BP_REG && hfp)
185 continue;
186 if (!ix86_save_reg (regno, false, false))
187 break;
188 ++count;
189 }
190 return count;
191 }
192
193 /* Determine if register REGNO is a stub managed register given the
194 total COUNT of stub managed registers. */
195 bool
196 xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
197 {
198 bool hfp = frame_pointer_needed || stack_realign_fp;
199 unsigned i;
200
201 for (i = 0; i < count; ++i)
202 {
203 gcc_assert (i < MAX_REGS);
204 if (REG_ORDER[i] == BP_REG && hfp)
205 ++count;
206 else if (REG_ORDER[i] == regno)
207 return true;
208 }
209 return false;
210 }
211
212 /* Constructor for xlogue_layout. */
213 xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
214 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
215 m_stack_align_off_in (stack_align_off_in)
216 {
217 HOST_WIDE_INT offset = stack_align_off_in;
218 unsigned i, j;
219
220 for (i = j = 0; i < MAX_REGS; ++i)
221 {
222 unsigned regno = REG_ORDER[i];
223
224 if (regno == BP_REG && hfp)
225 continue;
226 if (SSE_REGNO_P (regno))
227 {
228 offset += 16;
229 /* Verify that SSE regs are always aligned. */
230 gcc_assert (!((stack_align_off_in + offset) & 15));
231 }
232 else
233 offset += 8;
234
235 m_regs[j].regno = regno;
236 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
237 }
238 gcc_assert (j == m_nregs);
239 }
240
241 const char *
242 xlogue_layout::get_stub_name (enum xlogue_stub stub,
243 unsigned n_extra_regs)
244 {
245 const int have_avx = TARGET_AVX;
246 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
247
248 /* Lazy init */
249 if (!*name)
250 {
251 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
252 (have_avx ? "avx" : "sse"),
253 STUB_BASE_NAMES[stub],
254 MIN_REGS + n_extra_regs);
255 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
256 }
257
258 return name;
259 }
260
261 /* Return rtx of a symbol ref for the entry point (based upon
262 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
263 rtx
264 xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
265 {
266 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
267 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
268 gcc_assert (stub < XLOGUE_STUB_COUNT);
269 gcc_assert (crtl->stack_realign_finalized);
270
271 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
272 }
273
274 unsigned scalar_chain::max_id = 0;
275
276 namespace {
277
278 /* Initialize new chain. */
279
280 scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
281 {
282 smode = smode_;
283 vmode = vmode_;
284
285 chain_id = ++max_id;
286
287 if (dump_file)
288 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
289
290 bitmap_obstack_initialize (NULL);
291 insns = BITMAP_ALLOC (NULL);
292 defs = BITMAP_ALLOC (NULL);
293 defs_conv = BITMAP_ALLOC (NULL);
294 queue = NULL;
295 }
296
297 /* Free chain's data. */
298
299 scalar_chain::~scalar_chain ()
300 {
301 BITMAP_FREE (insns);
302 BITMAP_FREE (defs);
303 BITMAP_FREE (defs_conv);
304 bitmap_obstack_release (NULL);
305 }
306
307 /* Add instruction into chains' queue. */
308
309 void
310 scalar_chain::add_to_queue (unsigned insn_uid)
311 {
312 if (bitmap_bit_p (insns, insn_uid)
313 || bitmap_bit_p (queue, insn_uid))
314 return;
315
316 if (dump_file)
317 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
318 insn_uid, chain_id);
319 bitmap_set_bit (queue, insn_uid);
320 }
321
322 general_scalar_chain::general_scalar_chain (enum machine_mode smode_,
323 enum machine_mode vmode_)
324 : scalar_chain (smode_, vmode_)
325 {
326 insns_conv = BITMAP_ALLOC (NULL);
327 n_sse_to_integer = 0;
328 n_integer_to_sse = 0;
329 }
330
331 general_scalar_chain::~general_scalar_chain ()
332 {
333 BITMAP_FREE (insns_conv);
334 }
335
336 /* For DImode conversion, mark register defined by DEF as requiring
337 conversion. */
338
339 void
340 general_scalar_chain::mark_dual_mode_def (df_ref def)
341 {
342 gcc_assert (DF_REF_REG_DEF_P (def));
343
344 /* Record the def/insn pair so we can later efficiently iterate over
345 the defs to convert on insns not in the chain. */
346 bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
347 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
348 {
349 if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
350 && !reg_new)
351 return;
352 n_integer_to_sse++;
353 }
354 else
355 {
356 if (!reg_new)
357 return;
358 n_sse_to_integer++;
359 }
360
361 if (dump_file)
362 fprintf (dump_file,
363 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
364 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
365 }
366
367 /* For TImode conversion, it is unused. */
368
369 void
370 timode_scalar_chain::mark_dual_mode_def (df_ref)
371 {
372 gcc_unreachable ();
373 }
374
375 /* Check REF's chain to add new insns into a queue
376 and find registers requiring conversion. */
377
378 void
379 scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
380 {
381 df_link *chain;
382
383 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
384 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
385 add_to_queue (DF_REF_INSN_UID (ref));
386
387 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
388 {
389 unsigned uid = DF_REF_INSN_UID (chain->ref);
390
391 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
392 continue;
393
394 if (!DF_REF_REG_MEM_P (chain->ref))
395 {
396 if (bitmap_bit_p (insns, uid))
397 continue;
398
399 if (bitmap_bit_p (candidates, uid))
400 {
401 add_to_queue (uid);
402 continue;
403 }
404 }
405
406 if (DF_REF_REG_DEF_P (chain->ref))
407 {
408 if (dump_file)
409 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
410 DF_REF_REGNO (chain->ref), uid);
411 mark_dual_mode_def (chain->ref);
412 }
413 else
414 {
415 if (dump_file)
416 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
417 DF_REF_REGNO (chain->ref), uid);
418 mark_dual_mode_def (ref);
419 }
420 }
421 }
422
423 /* Add instruction into a chain. */
424
425 void
426 scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
427 {
428 if (bitmap_bit_p (insns, insn_uid))
429 return;
430
431 if (dump_file)
432 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
433
434 bitmap_set_bit (insns, insn_uid);
435
436 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
437 rtx def_set = single_set (insn);
438 if (def_set && REG_P (SET_DEST (def_set))
439 && !HARD_REGISTER_P (SET_DEST (def_set)))
440 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
441
442 /* ??? The following is quadratic since analyze_register_chain
443 iterates over all refs to look for dual-mode regs. Instead this
444 should be done separately for all regs mentioned in the chain once. */
445 df_ref ref;
446 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
447 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
448 analyze_register_chain (candidates, ref);
449 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
450 if (!DF_REF_REG_MEM_P (ref))
451 analyze_register_chain (candidates, ref);
452 }
453
454 /* Build new chain starting from insn INSN_UID recursively
455 adding all dependent uses and definitions. */
456
457 void
458 scalar_chain::build (bitmap candidates, unsigned insn_uid)
459 {
460 queue = BITMAP_ALLOC (NULL);
461 bitmap_set_bit (queue, insn_uid);
462
463 if (dump_file)
464 fprintf (dump_file, "Building chain #%d...\n", chain_id);
465
466 while (!bitmap_empty_p (queue))
467 {
468 insn_uid = bitmap_first_set_bit (queue);
469 bitmap_clear_bit (queue, insn_uid);
470 bitmap_clear_bit (candidates, insn_uid);
471 add_insn (candidates, insn_uid);
472 }
473
474 if (dump_file)
475 {
476 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
477 fprintf (dump_file, " insns: ");
478 dump_bitmap (dump_file, insns);
479 if (!bitmap_empty_p (defs_conv))
480 {
481 bitmap_iterator bi;
482 unsigned id;
483 const char *comma = "";
484 fprintf (dump_file, " defs to convert: ");
485 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
486 {
487 fprintf (dump_file, "%sr%d", comma, id);
488 comma = ", ";
489 }
490 fprintf (dump_file, "\n");
491 }
492 }
493
494 BITMAP_FREE (queue);
495 }
496
497 /* Return a cost of building a vector costant
498 instead of using a scalar one. */
499
500 int
501 general_scalar_chain::vector_const_cost (rtx exp)
502 {
503 gcc_assert (CONST_INT_P (exp));
504
505 if (standard_sse_constant_p (exp, vmode))
506 return ix86_cost->sse_op;
507 /* We have separate costs for SImode and DImode, use SImode costs
508 for smaller modes. */
509 return ix86_cost->sse_load[smode == DImode ? 1 : 0];
510 }
511
512 /* Compute a gain for chain conversion. */
513
514 int
515 general_scalar_chain::compute_convert_gain ()
516 {
517 bitmap_iterator bi;
518 unsigned insn_uid;
519 int gain = 0;
520 int cost = 0;
521
522 if (dump_file)
523 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
524
525 /* SSE costs distinguish between SImode and DImode loads/stores, for
526 int costs factor in the number of GPRs involved. When supporting
527 smaller modes than SImode the int load/store costs need to be
528 adjusted as well. */
529 unsigned sse_cost_idx = smode == DImode ? 1 : 0;
530 unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
531
532 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
533 {
534 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
535 rtx def_set = single_set (insn);
536 rtx src = SET_SRC (def_set);
537 rtx dst = SET_DEST (def_set);
538 int igain = 0;
539
540 if (REG_P (src) && REG_P (dst))
541 igain += 2 * m - ix86_cost->xmm_move;
542 else if (REG_P (src) && MEM_P (dst))
543 igain
544 += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
545 else if (MEM_P (src) && REG_P (dst))
546 igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
547 else
548 switch (GET_CODE (src))
549 {
550 case ASHIFT:
551 case ASHIFTRT:
552 case LSHIFTRT:
553 if (m == 2)
554 {
555 if (INTVAL (XEXP (src, 1)) >= 32)
556 igain += ix86_cost->add;
557 else
558 igain += ix86_cost->shift_const;
559 }
560
561 igain += ix86_cost->shift_const - ix86_cost->sse_op;
562
563 if (CONST_INT_P (XEXP (src, 0)))
564 igain -= vector_const_cost (XEXP (src, 0));
565 break;
566
567 case AND:
568 case IOR:
569 case XOR:
570 case PLUS:
571 case MINUS:
572 igain += m * ix86_cost->add - ix86_cost->sse_op;
573 /* Additional gain for andnot for targets without BMI. */
574 if (GET_CODE (XEXP (src, 0)) == NOT
575 && !TARGET_BMI)
576 igain += m * ix86_cost->add;
577
578 if (CONST_INT_P (XEXP (src, 0)))
579 igain -= vector_const_cost (XEXP (src, 0));
580 if (CONST_INT_P (XEXP (src, 1)))
581 igain -= vector_const_cost (XEXP (src, 1));
582 break;
583
584 case NEG:
585 case NOT:
586 igain -= ix86_cost->sse_op + COSTS_N_INSNS (1);
587
588 if (GET_CODE (XEXP (src, 0)) != ABS)
589 {
590 igain += m * ix86_cost->add;
591 break;
592 }
593 /* FALLTHRU */
594
595 case ABS:
596 case SMAX:
597 case SMIN:
598 case UMAX:
599 case UMIN:
600 /* We do not have any conditional move cost, estimate it as a
601 reg-reg move. Comparisons are costed as adds. */
602 igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
603 /* Integer SSE ops are all costed the same. */
604 igain -= ix86_cost->sse_op;
605 break;
606
607 case COMPARE:
608 /* Assume comparison cost is the same. */
609 break;
610
611 case CONST_INT:
612 if (REG_P (dst))
613 {
614 if (optimize_insn_for_size_p ())
615 {
616 /* xor (2 bytes) vs. xorps (3 bytes). */
617 if (src == const0_rtx)
618 igain -= COSTS_N_BYTES (1);
619 /* movdi_internal vs. movv2di_internal. */
620 /* => mov (5 bytes) vs. movaps (7 bytes). */
621 else if (x86_64_immediate_operand (src, SImode))
622 igain -= COSTS_N_BYTES (2);
623 else
624 /* ??? Larger immediate constants are placed in the
625 constant pool, where the size benefit/impact of
626 STV conversion is affected by whether and how
627 often each constant pool entry is shared/reused.
628 The value below is empirically derived from the
629 CSiBE benchmark (and the optimal value may drift
630 over time). */
631 igain += COSTS_N_BYTES (0);
632 }
633 else
634 {
635 /* DImode can be immediate for TARGET_64BIT
636 and SImode always. */
637 igain += m * COSTS_N_INSNS (1);
638 igain -= vector_const_cost (src);
639 }
640 }
641 else if (MEM_P (dst))
642 {
643 igain += (m * ix86_cost->int_store[2]
644 - ix86_cost->sse_store[sse_cost_idx]);
645 igain -= vector_const_cost (src);
646 }
647 break;
648
649 default:
650 gcc_unreachable ();
651 }
652
653 if (igain != 0 && dump_file)
654 {
655 fprintf (dump_file, " Instruction gain %d for ", igain);
656 dump_insn_slim (dump_file, insn);
657 }
658 gain += igain;
659 }
660
661 if (dump_file)
662 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
663
664 /* Cost the integer to sse and sse to integer moves. */
665 cost += n_sse_to_integer * ix86_cost->sse_to_integer;
666 /* ??? integer_to_sse but we only have that in the RA cost table.
667 Assume sse_to_integer/integer_to_sse are the same which they
668 are at the moment. */
669 cost += n_integer_to_sse * ix86_cost->sse_to_integer;
670
671 if (dump_file)
672 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
673
674 gain -= cost;
675
676 if (dump_file)
677 fprintf (dump_file, " Total gain: %d\n", gain);
678
679 return gain;
680 }
681
682 /* Insert generated conversion instruction sequence INSNS
683 after instruction AFTER. New BB may be required in case
684 instruction has EH region attached. */
685
686 void
687 scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
688 {
689 if (!control_flow_insn_p (after))
690 {
691 emit_insn_after (insns, after);
692 return;
693 }
694
695 basic_block bb = BLOCK_FOR_INSN (after);
696 edge e = find_fallthru_edge (bb->succs);
697 gcc_assert (e);
698
699 basic_block new_bb = split_edge (e);
700 emit_insn_after (insns, BB_HEAD (new_bb));
701 }
702
703 } // anon namespace
704
705 /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
706 zeroing the upper parts. */
707
708 static rtx
709 gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
710 {
711 switch (GET_MODE_NUNITS (vmode))
712 {
713 case 1:
714 /* We are not using this case currently. */
715 gcc_unreachable ();
716 case 2:
717 return gen_rtx_VEC_CONCAT (vmode, gpr,
718 CONST0_RTX (GET_MODE_INNER (vmode)));
719 default:
720 return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
721 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
722 }
723 }
724
725 /* Make vector copies for all register REGNO definitions
726 and replace its uses in a chain. */
727
728 void
729 general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
730 {
731 rtx vreg = *defs_map.get (reg);
732
733 start_sequence ();
734 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
735 {
736 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
737 if (smode == DImode && !TARGET_64BIT)
738 {
739 emit_move_insn (adjust_address (tmp, SImode, 0),
740 gen_rtx_SUBREG (SImode, reg, 0));
741 emit_move_insn (adjust_address (tmp, SImode, 4),
742 gen_rtx_SUBREG (SImode, reg, 4));
743 }
744 else
745 emit_move_insn (copy_rtx (tmp), reg);
746 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
747 gen_gpr_to_xmm_move_src (vmode, tmp)));
748 }
749 else if (!TARGET_64BIT && smode == DImode)
750 {
751 if (TARGET_SSE4_1)
752 {
753 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
754 CONST0_RTX (V4SImode),
755 gen_rtx_SUBREG (SImode, reg, 0)));
756 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
757 gen_rtx_SUBREG (V4SImode, vreg, 0),
758 gen_rtx_SUBREG (SImode, reg, 4),
759 GEN_INT (2)));
760 }
761 else
762 {
763 rtx tmp = gen_reg_rtx (DImode);
764 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
765 CONST0_RTX (V4SImode),
766 gen_rtx_SUBREG (SImode, reg, 0)));
767 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
768 CONST0_RTX (V4SImode),
769 gen_rtx_SUBREG (SImode, reg, 4)));
770 emit_insn (gen_vec_interleave_lowv4si
771 (gen_rtx_SUBREG (V4SImode, vreg, 0),
772 gen_rtx_SUBREG (V4SImode, vreg, 0),
773 gen_rtx_SUBREG (V4SImode, tmp, 0)));
774 }
775 }
776 else
777 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
778 gen_gpr_to_xmm_move_src (vmode, reg)));
779 rtx_insn *seq = get_insns ();
780 end_sequence ();
781 emit_conversion_insns (seq, insn);
782
783 if (dump_file)
784 fprintf (dump_file,
785 " Copied r%d to a vector register r%d for insn %d\n",
786 REGNO (reg), REGNO (vreg), INSN_UID (insn));
787 }
788
789 /* Copy the definition SRC of INSN inside the chain to DST for
790 scalar uses outside of the chain. */
791
792 void
793 general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
794 {
795 start_sequence ();
796 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
797 {
798 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
799 emit_move_insn (tmp, src);
800 if (!TARGET_64BIT && smode == DImode)
801 {
802 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
803 adjust_address (tmp, SImode, 0));
804 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
805 adjust_address (tmp, SImode, 4));
806 }
807 else
808 emit_move_insn (dst, copy_rtx (tmp));
809 }
810 else if (!TARGET_64BIT && smode == DImode)
811 {
812 if (TARGET_SSE4_1)
813 {
814 rtx tmp = gen_rtx_PARALLEL (VOIDmode,
815 gen_rtvec (1, const0_rtx));
816 emit_insn
817 (gen_rtx_SET
818 (gen_rtx_SUBREG (SImode, dst, 0),
819 gen_rtx_VEC_SELECT (SImode,
820 gen_rtx_SUBREG (V4SImode, src, 0),
821 tmp)));
822
823 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
824 emit_insn
825 (gen_rtx_SET
826 (gen_rtx_SUBREG (SImode, dst, 4),
827 gen_rtx_VEC_SELECT (SImode,
828 gen_rtx_SUBREG (V4SImode, src, 0),
829 tmp)));
830 }
831 else
832 {
833 rtx vcopy = gen_reg_rtx (V2DImode);
834 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
835 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
836 gen_rtx_SUBREG (SImode, vcopy, 0));
837 emit_move_insn (vcopy,
838 gen_rtx_LSHIFTRT (V2DImode,
839 vcopy, GEN_INT (32)));
840 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
841 gen_rtx_SUBREG (SImode, vcopy, 0));
842 }
843 }
844 else
845 emit_move_insn (dst, src);
846
847 rtx_insn *seq = get_insns ();
848 end_sequence ();
849 emit_conversion_insns (seq, insn);
850
851 if (dump_file)
852 fprintf (dump_file,
853 " Copied r%d to a scalar register r%d for insn %d\n",
854 REGNO (src), REGNO (dst), INSN_UID (insn));
855 }
856
857 /* Convert operand OP in INSN. We should handle
858 memory operands and uninitialized registers.
859 All other register uses are converted during
860 registers conversion. */
861
862 void
863 general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
864 {
865 *op = copy_rtx_if_shared (*op);
866
867 if (GET_CODE (*op) == NOT)
868 {
869 convert_op (&XEXP (*op, 0), insn);
870 PUT_MODE (*op, vmode);
871 }
872 else if (MEM_P (*op))
873 {
874 rtx tmp = gen_reg_rtx (GET_MODE (*op));
875
876 /* Handle movabs. */
877 if (!memory_operand (*op, GET_MODE (*op)))
878 {
879 rtx tmp2 = gen_reg_rtx (GET_MODE (*op));
880
881 emit_insn_before (gen_rtx_SET (tmp2, *op), insn);
882 *op = tmp2;
883 }
884
885 emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
886 gen_gpr_to_xmm_move_src (vmode, *op)),
887 insn);
888 *op = gen_rtx_SUBREG (vmode, tmp, 0);
889
890 if (dump_file)
891 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
892 INSN_UID (insn), REGNO (tmp));
893 }
894 else if (REG_P (*op))
895 {
896 *op = gen_rtx_SUBREG (vmode, *op, 0);
897 }
898 else if (CONST_INT_P (*op))
899 {
900 rtx vec_cst;
901 rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
902
903 /* Prefer all ones vector in case of -1. */
904 if (constm1_operand (*op, GET_MODE (*op)))
905 vec_cst = CONSTM1_RTX (vmode);
906 else
907 {
908 unsigned n = GET_MODE_NUNITS (vmode);
909 rtx *v = XALLOCAVEC (rtx, n);
910 v[0] = *op;
911 for (unsigned i = 1; i < n; ++i)
912 v[i] = const0_rtx;
913 vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
914 }
915
916 if (!standard_sse_constant_p (vec_cst, vmode))
917 {
918 start_sequence ();
919 vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
920 rtx_insn *seq = get_insns ();
921 end_sequence ();
922 emit_insn_before (seq, insn);
923 }
924
925 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
926 *op = tmp;
927 }
928 else
929 {
930 gcc_assert (SUBREG_P (*op));
931 gcc_assert (GET_MODE (*op) == vmode);
932 }
933 }
934
935 /* Convert INSN to vector mode. */
936
937 void
938 general_scalar_chain::convert_insn (rtx_insn *insn)
939 {
940 /* Generate copies for out-of-chain uses of defs and adjust debug uses. */
941 for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
942 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
943 {
944 df_link *use;
945 for (use = DF_REF_CHAIN (ref); use; use = use->next)
946 if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
947 && (DF_REF_REG_MEM_P (use->ref)
948 || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
949 break;
950 if (use)
951 convert_reg (insn, DF_REF_REG (ref),
952 *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
953 else if (MAY_HAVE_DEBUG_BIND_INSNS)
954 {
955 /* If we generated a scalar copy we can leave debug-insns
956 as-is, if not, we have to adjust them. */
957 auto_vec<rtx_insn *, 5> to_reset_debug_insns;
958 for (use = DF_REF_CHAIN (ref); use; use = use->next)
959 if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
960 {
961 rtx_insn *debug_insn = DF_REF_INSN (use->ref);
962 /* If there's a reaching definition outside of the
963 chain we have to reset. */
964 df_link *def;
965 for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
966 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
967 break;
968 if (def)
969 to_reset_debug_insns.safe_push (debug_insn);
970 else
971 {
972 *DF_REF_REAL_LOC (use->ref)
973 = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
974 df_insn_rescan (debug_insn);
975 }
976 }
977 /* Have to do the reset outside of the DF_CHAIN walk to not
978 disrupt it. */
979 while (!to_reset_debug_insns.is_empty ())
980 {
981 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
982 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
983 df_insn_rescan_debug_internal (debug_insn);
984 }
985 }
986 }
987
988 /* Replace uses in this insn with the defs we use in the chain. */
989 for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
990 if (!DF_REF_REG_MEM_P (ref))
991 if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
992 {
993 /* Also update a corresponding REG_DEAD note. */
994 rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
995 if (note)
996 XEXP (note, 0) = *vreg;
997 *DF_REF_REAL_LOC (ref) = *vreg;
998 }
999
1000 rtx def_set = single_set (insn);
1001 rtx src = SET_SRC (def_set);
1002 rtx dst = SET_DEST (def_set);
1003 rtx subreg;
1004
1005 if (MEM_P (dst) && !REG_P (src))
1006 {
1007 /* There are no scalar integer instructions and therefore
1008 temporary register usage is required. */
1009 rtx tmp = gen_reg_rtx (smode);
1010 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
1011 dst = gen_rtx_SUBREG (vmode, tmp, 0);
1012 }
1013 else if (REG_P (dst))
1014 {
1015 /* Replace the definition with a SUBREG to the definition we
1016 use inside the chain. */
1017 rtx *vdef = defs_map.get (dst);
1018 if (vdef)
1019 dst = *vdef;
1020 dst = gen_rtx_SUBREG (vmode, dst, 0);
1021 /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
1022 is a non-REG_P. So kill those off. */
1023 rtx note = find_reg_equal_equiv_note (insn);
1024 if (note)
1025 remove_note (insn, note);
1026 }
1027
1028 switch (GET_CODE (src))
1029 {
1030 case PLUS:
1031 case MINUS:
1032 case IOR:
1033 case XOR:
1034 case AND:
1035 case SMAX:
1036 case SMIN:
1037 case UMAX:
1038 case UMIN:
1039 convert_op (&XEXP (src, 1), insn);
1040 /* FALLTHRU */
1041
1042 case ABS:
1043 case ASHIFT:
1044 case ASHIFTRT:
1045 case LSHIFTRT:
1046 convert_op (&XEXP (src, 0), insn);
1047 PUT_MODE (src, vmode);
1048 break;
1049
1050 case NEG:
1051 src = XEXP (src, 0);
1052
1053 if (GET_CODE (src) == ABS)
1054 {
1055 src = XEXP (src, 0);
1056 convert_op (&src, insn);
1057 subreg = gen_reg_rtx (vmode);
1058 emit_insn_before (gen_rtx_SET (subreg,
1059 gen_rtx_ABS (vmode, src)), insn);
1060 src = subreg;
1061 }
1062 else
1063 convert_op (&src, insn);
1064
1065 subreg = gen_reg_rtx (vmode);
1066 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
1067 src = gen_rtx_MINUS (vmode, subreg, src);
1068 break;
1069
1070 case NOT:
1071 src = XEXP (src, 0);
1072 convert_op (&src, insn);
1073 subreg = gen_reg_rtx (vmode);
1074 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
1075 src = gen_rtx_XOR (vmode, src, subreg);
1076 break;
1077
1078 case MEM:
1079 if (!REG_P (dst))
1080 convert_op (&src, insn);
1081 break;
1082
1083 case REG:
1084 if (!MEM_P (dst))
1085 convert_op (&src, insn);
1086 break;
1087
1088 case SUBREG:
1089 gcc_assert (GET_MODE (src) == vmode);
1090 break;
1091
1092 case COMPARE:
1093 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
1094
1095 gcc_assert (REG_P (src) && GET_MODE (src) == DImode);
1096 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
1097 emit_insn_before (gen_vec_interleave_lowv2di
1098 (copy_rtx_if_shared (subreg),
1099 copy_rtx_if_shared (subreg),
1100 copy_rtx_if_shared (subreg)),
1101 insn);
1102 dst = gen_rtx_REG (CCmode, FLAGS_REG);
1103 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg),
1104 copy_rtx_if_shared (subreg)),
1105 UNSPEC_PTEST);
1106 break;
1107
1108 case CONST_INT:
1109 convert_op (&src, insn);
1110 break;
1111
1112 default:
1113 gcc_unreachable ();
1114 }
1115
1116 SET_SRC (def_set) = src;
1117 SET_DEST (def_set) = dst;
1118
1119 /* Drop possible dead definitions. */
1120 PATTERN (insn) = def_set;
1121
1122 INSN_CODE (insn) = -1;
1123 int patt = recog_memoized (insn);
1124 if (patt == -1)
1125 fatal_insn_not_found (insn);
1126 df_insn_rescan (insn);
1127 }
1128
1129 /* Fix uses of converted REG in debug insns. */
1130
1131 void
1132 timode_scalar_chain::fix_debug_reg_uses (rtx reg)
1133 {
1134 if (!flag_var_tracking)
1135 return;
1136
1137 df_ref ref, next;
1138 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
1139 {
1140 rtx_insn *insn = DF_REF_INSN (ref);
1141 /* Make sure the next ref is for a different instruction,
1142 so that we're not affected by the rescan. */
1143 next = DF_REF_NEXT_REG (ref);
1144 while (next && DF_REF_INSN (next) == insn)
1145 next = DF_REF_NEXT_REG (next);
1146
1147 if (DEBUG_INSN_P (insn))
1148 {
1149 /* It may be a debug insn with a TImode variable in
1150 register. */
1151 bool changed = false;
1152 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
1153 {
1154 rtx *loc = DF_REF_LOC (ref);
1155 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
1156 {
1157 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
1158 changed = true;
1159 }
1160 }
1161 if (changed)
1162 df_insn_rescan (insn);
1163 }
1164 }
1165 }
1166
1167 /* Convert INSN from TImode to V1T1mode. */
1168
1169 void
1170 timode_scalar_chain::convert_insn (rtx_insn *insn)
1171 {
1172 rtx def_set = single_set (insn);
1173 rtx src = SET_SRC (def_set);
1174 rtx dst = SET_DEST (def_set);
1175
1176 switch (GET_CODE (dst))
1177 {
1178 case REG:
1179 {
1180 rtx tmp = find_reg_equal_equiv_note (insn);
1181 if (tmp)
1182 PUT_MODE (XEXP (tmp, 0), V1TImode);
1183 PUT_MODE (dst, V1TImode);
1184 fix_debug_reg_uses (dst);
1185 }
1186 break;
1187 case MEM:
1188 PUT_MODE (dst, V1TImode);
1189 break;
1190
1191 default:
1192 gcc_unreachable ();
1193 }
1194
1195 switch (GET_CODE (src))
1196 {
1197 case REG:
1198 PUT_MODE (src, V1TImode);
1199 /* Call fix_debug_reg_uses only if SRC is never defined. */
1200 if (!DF_REG_DEF_CHAIN (REGNO (src)))
1201 fix_debug_reg_uses (src);
1202 break;
1203
1204 case MEM:
1205 PUT_MODE (src, V1TImode);
1206 break;
1207
1208 case CONST_WIDE_INT:
1209 if (NONDEBUG_INSN_P (insn))
1210 {
1211 /* Since there are no instructions to store 128-bit constant,
1212 temporary register usage is required. */
1213 rtx tmp = gen_reg_rtx (V1TImode);
1214 start_sequence ();
1215 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
1216 src = validize_mem (force_const_mem (V1TImode, src));
1217 rtx_insn *seq = get_insns ();
1218 end_sequence ();
1219 if (seq)
1220 emit_insn_before (seq, insn);
1221 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1222 dst = tmp;
1223 }
1224 break;
1225
1226 case CONST_INT:
1227 switch (standard_sse_constant_p (src, TImode))
1228 {
1229 case 1:
1230 src = CONST0_RTX (GET_MODE (dst));
1231 break;
1232 case 2:
1233 src = CONSTM1_RTX (GET_MODE (dst));
1234 break;
1235 default:
1236 gcc_unreachable ();
1237 }
1238 if (NONDEBUG_INSN_P (insn))
1239 {
1240 rtx tmp = gen_reg_rtx (V1TImode);
1241 /* Since there are no instructions to store standard SSE
1242 constant, temporary register usage is required. */
1243 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1244 dst = tmp;
1245 }
1246 break;
1247
1248 default:
1249 gcc_unreachable ();
1250 }
1251
1252 SET_SRC (def_set) = src;
1253 SET_DEST (def_set) = dst;
1254
1255 /* Drop possible dead definitions. */
1256 PATTERN (insn) = def_set;
1257
1258 INSN_CODE (insn) = -1;
1259 recog_memoized (insn);
1260 df_insn_rescan (insn);
1261 }
1262
1263 /* Generate copies from defs used by the chain but not defined therein.
1264 Also populates defs_map which is used later by convert_insn. */
1265
1266 void
1267 general_scalar_chain::convert_registers ()
1268 {
1269 bitmap_iterator bi;
1270 unsigned id;
1271 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
1272 {
1273 rtx chain_reg = gen_reg_rtx (smode);
1274 defs_map.put (regno_reg_rtx[id], chain_reg);
1275 }
1276 EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
1277 for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
1278 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
1279 make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
1280 }
1281
1282 /* Convert whole chain creating required register
1283 conversions and copies. */
1284
1285 int
1286 scalar_chain::convert ()
1287 {
1288 bitmap_iterator bi;
1289 unsigned id;
1290 int converted_insns = 0;
1291
1292 if (!dbg_cnt (stv_conversion))
1293 return 0;
1294
1295 if (dump_file)
1296 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
1297
1298 convert_registers ();
1299
1300 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
1301 {
1302 convert_insn (DF_INSN_UID_GET (id)->insn);
1303 converted_insns++;
1304 }
1305
1306 return converted_insns;
1307 }
1308
1309 /* Return the SET expression if INSN doesn't reference hard register.
1310 Return NULL if INSN uses or defines a hard register, excluding
1311 pseudo register pushes, hard register uses in a memory address,
1312 clobbers and flags definitions. */
1313
1314 static rtx
1315 pseudo_reg_set (rtx_insn *insn)
1316 {
1317 rtx set = single_set (insn);
1318 if (!set)
1319 return NULL;
1320
1321 /* Check pseudo register push first. */
1322 machine_mode mode = TARGET_64BIT ? TImode : DImode;
1323 if (REG_P (SET_SRC (set))
1324 && !HARD_REGISTER_P (SET_SRC (set))
1325 && push_operand (SET_DEST (set), mode))
1326 return set;
1327
1328 df_ref ref;
1329 FOR_EACH_INSN_DEF (ref, insn)
1330 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
1331 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
1332 && DF_REF_REGNO (ref) != FLAGS_REG)
1333 return NULL;
1334
1335 FOR_EACH_INSN_USE (ref, insn)
1336 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
1337 return NULL;
1338
1339 return set;
1340 }
1341
1342 /* Check if comparison INSN may be transformed
1343 into vector comparison. Currently we transform
1344 zero checks only which look like:
1345
1346 (set (reg:CCZ 17 flags)
1347 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1348 (subreg:SI (reg:DI x) 0))
1349 (const_int 0 [0]))) */
1350
1351 static bool
1352 convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
1353 {
1354 /* ??? Currently convertible for double-word DImode chain only. */
1355 if (TARGET_64BIT || mode != DImode)
1356 return false;
1357
1358 if (!TARGET_SSE4_1)
1359 return false;
1360
1361 rtx def_set = single_set (insn);
1362
1363 gcc_assert (def_set);
1364
1365 rtx src = SET_SRC (def_set);
1366 rtx dst = SET_DEST (def_set);
1367
1368 gcc_assert (GET_CODE (src) == COMPARE);
1369
1370 if (GET_CODE (dst) != REG
1371 || REGNO (dst) != FLAGS_REG
1372 || GET_MODE (dst) != CCZmode)
1373 return false;
1374
1375 rtx op1 = XEXP (src, 0);
1376 rtx op2 = XEXP (src, 1);
1377
1378 if (op2 != CONST0_RTX (GET_MODE (op2)))
1379 return false;
1380
1381 if (GET_CODE (op1) != IOR)
1382 return false;
1383
1384 op2 = XEXP (op1, 1);
1385 op1 = XEXP (op1, 0);
1386
1387 if (!SUBREG_P (op1)
1388 || !SUBREG_P (op2)
1389 || GET_MODE (op1) != SImode
1390 || GET_MODE (op2) != SImode
1391 || ((SUBREG_BYTE (op1) != 0
1392 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
1393 && (SUBREG_BYTE (op2) != 0
1394 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
1395 return false;
1396
1397 op1 = SUBREG_REG (op1);
1398 op2 = SUBREG_REG (op2);
1399
1400 if (op1 != op2
1401 || !REG_P (op1)
1402 || GET_MODE (op1) != DImode)
1403 return false;
1404
1405 return true;
1406 }
1407
1408 /* The general version of scalar_to_vector_candidate_p. */
1409
1410 static bool
1411 general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
1412 {
1413 rtx def_set = pseudo_reg_set (insn);
1414
1415 if (!def_set)
1416 return false;
1417
1418 rtx src = SET_SRC (def_set);
1419 rtx dst = SET_DEST (def_set);
1420
1421 if (GET_CODE (src) == COMPARE)
1422 return convertible_comparison_p (insn, mode);
1423
1424 /* We are interested in "mode" only. */
1425 if ((GET_MODE (src) != mode
1426 && !CONST_INT_P (src))
1427 || GET_MODE (dst) != mode)
1428 return false;
1429
1430 if (!REG_P (dst) && !MEM_P (dst))
1431 return false;
1432
1433 switch (GET_CODE (src))
1434 {
1435 case ASHIFTRT:
1436 if (!TARGET_AVX512VL)
1437 return false;
1438 /* FALLTHRU */
1439
1440 case ASHIFT:
1441 case LSHIFTRT:
1442 if (!CONST_INT_P (XEXP (src, 1))
1443 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
1444 return false;
1445 break;
1446
1447 case SMAX:
1448 case SMIN:
1449 case UMAX:
1450 case UMIN:
1451 if ((mode == DImode && !TARGET_AVX512VL)
1452 || (mode == SImode && !TARGET_SSE4_1))
1453 return false;
1454 /* Fallthru. */
1455
1456 case AND:
1457 case IOR:
1458 case XOR:
1459 case PLUS:
1460 case MINUS:
1461 if (!REG_P (XEXP (src, 1))
1462 && !MEM_P (XEXP (src, 1))
1463 && !CONST_INT_P (XEXP (src, 1)))
1464 return false;
1465
1466 if (GET_MODE (XEXP (src, 1)) != mode
1467 && !CONST_INT_P (XEXP (src, 1)))
1468 return false;
1469
1470 /* Check for andnot case. */
1471 if (GET_CODE (src) != AND
1472 || GET_CODE (XEXP (src, 0)) != NOT)
1473 break;
1474
1475 src = XEXP (src, 0);
1476 /* FALLTHRU */
1477
1478 case NOT:
1479 break;
1480
1481 case NEG:
1482 /* Check for nabs case. */
1483 if (GET_CODE (XEXP (src, 0)) != ABS)
1484 break;
1485
1486 src = XEXP (src, 0);
1487 /* FALLTHRU */
1488
1489 case ABS:
1490 if ((mode == DImode && !TARGET_AVX512VL)
1491 || (mode == SImode && !TARGET_SSSE3))
1492 return false;
1493 break;
1494
1495 case REG:
1496 return true;
1497
1498 case MEM:
1499 case CONST_INT:
1500 return REG_P (dst);
1501
1502 default:
1503 return false;
1504 }
1505
1506 if (!REG_P (XEXP (src, 0))
1507 && !MEM_P (XEXP (src, 0))
1508 && !CONST_INT_P (XEXP (src, 0)))
1509 return false;
1510
1511 if (GET_MODE (XEXP (src, 0)) != mode
1512 && !CONST_INT_P (XEXP (src, 0)))
1513 return false;
1514
1515 return true;
1516 }
1517
1518 /* The TImode version of scalar_to_vector_candidate_p. */
1519
1520 static bool
1521 timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1522 {
1523 rtx def_set = pseudo_reg_set (insn);
1524
1525 if (!def_set)
1526 return false;
1527
1528 rtx src = SET_SRC (def_set);
1529 rtx dst = SET_DEST (def_set);
1530
1531 /* Only TImode load and store are allowed. */
1532 if (GET_MODE (dst) != TImode)
1533 return false;
1534
1535 if (MEM_P (dst))
1536 {
1537 /* Check for store. Memory must be aligned or unaligned store
1538 is optimal. Only support store from register, standard SSE
1539 constant or CONST_WIDE_INT generated from piecewise store.
1540
1541 ??? Verify performance impact before enabling CONST_INT for
1542 __int128 store. */
1543 if (misaligned_operand (dst, TImode)
1544 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1545 return false;
1546
1547 switch (GET_CODE (src))
1548 {
1549 default:
1550 return false;
1551
1552 case REG:
1553 case CONST_WIDE_INT:
1554 return true;
1555
1556 case CONST_INT:
1557 return standard_sse_constant_p (src, TImode);
1558 }
1559 }
1560 else if (MEM_P (src))
1561 {
1562 /* Check for load. Memory must be aligned or unaligned load is
1563 optimal. */
1564 return (REG_P (dst)
1565 && (!misaligned_operand (src, TImode)
1566 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1567 }
1568
1569 return false;
1570 }
1571
1572 /* For a register REGNO, scan instructions for its defs and uses.
1573 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1574
1575 static void
1576 timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1577 unsigned int regno)
1578 {
1579 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1580 def;
1581 def = DF_REF_NEXT_REG (def))
1582 {
1583 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1584 {
1585 if (dump_file)
1586 fprintf (dump_file,
1587 "r%d has non convertible def in insn %d\n",
1588 regno, DF_REF_INSN_UID (def));
1589
1590 bitmap_set_bit (regs, regno);
1591 break;
1592 }
1593 }
1594
1595 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1596 ref;
1597 ref = DF_REF_NEXT_REG (ref))
1598 {
1599 /* Debug instructions are skipped. */
1600 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1601 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1602 {
1603 if (dump_file)
1604 fprintf (dump_file,
1605 "r%d has non convertible use in insn %d\n",
1606 regno, DF_REF_INSN_UID (ref));
1607
1608 bitmap_set_bit (regs, regno);
1609 break;
1610 }
1611 }
1612 }
1613
1614 /* The TImode version of remove_non_convertible_regs. */
1615
1616 static void
1617 timode_remove_non_convertible_regs (bitmap candidates)
1618 {
1619 bitmap_iterator bi;
1620 unsigned id;
1621 bitmap regs = BITMAP_ALLOC (NULL);
1622
1623 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1624 {
1625 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1626 rtx dest = SET_DEST (def_set);
1627 rtx src = SET_SRC (def_set);
1628
1629 if ((!REG_P (dest)
1630 || bitmap_bit_p (regs, REGNO (dest))
1631 || HARD_REGISTER_P (dest))
1632 && (!REG_P (src)
1633 || bitmap_bit_p (regs, REGNO (src))
1634 || HARD_REGISTER_P (src)))
1635 continue;
1636
1637 if (REG_P (dest))
1638 timode_check_non_convertible_regs (candidates, regs,
1639 REGNO (dest));
1640
1641 if (REG_P (src))
1642 timode_check_non_convertible_regs (candidates, regs,
1643 REGNO (src));
1644 }
1645
1646 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1647 {
1648 for (df_ref def = DF_REG_DEF_CHAIN (id);
1649 def;
1650 def = DF_REF_NEXT_REG (def))
1651 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1652 {
1653 if (dump_file)
1654 fprintf (dump_file, "Removing insn %d from candidates list\n",
1655 DF_REF_INSN_UID (def));
1656
1657 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1658 }
1659
1660 for (df_ref ref = DF_REG_USE_CHAIN (id);
1661 ref;
1662 ref = DF_REF_NEXT_REG (ref))
1663 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1664 {
1665 if (dump_file)
1666 fprintf (dump_file, "Removing insn %d from candidates list\n",
1667 DF_REF_INSN_UID (ref));
1668
1669 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1670 }
1671 }
1672
1673 BITMAP_FREE (regs);
1674 }
1675
1676 /* Main STV pass function. Find and convert scalar
1677 instructions into vector mode when profitable. */
1678
1679 static unsigned int
1680 convert_scalars_to_vector (bool timode_p)
1681 {
1682 basic_block bb;
1683 int converted_insns = 0;
1684
1685 bitmap_obstack_initialize (NULL);
1686 const machine_mode cand_mode[3] = { SImode, DImode, TImode };
1687 const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
1688 bitmap_head candidates[3]; /* { SImode, DImode, TImode } */
1689 for (unsigned i = 0; i < 3; ++i)
1690 bitmap_initialize (&candidates[i], &bitmap_default_obstack);
1691
1692 calculate_dominance_info (CDI_DOMINATORS);
1693 df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
1694 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
1695 df_analyze ();
1696
1697 /* Find all instructions we want to convert into vector mode. */
1698 if (dump_file)
1699 fprintf (dump_file, "Searching for mode conversion candidates...\n");
1700
1701 FOR_EACH_BB_FN (bb, cfun)
1702 {
1703 rtx_insn *insn;
1704 FOR_BB_INSNS (bb, insn)
1705 if (timode_p
1706 && timode_scalar_to_vector_candidate_p (insn))
1707 {
1708 if (dump_file)
1709 fprintf (dump_file, " insn %d is marked as a TImode candidate\n",
1710 INSN_UID (insn));
1711
1712 bitmap_set_bit (&candidates[2], INSN_UID (insn));
1713 }
1714 else if (!timode_p)
1715 {
1716 /* Check {SI,DI}mode. */
1717 for (unsigned i = 0; i <= 1; ++i)
1718 if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
1719 {
1720 if (dump_file)
1721 fprintf (dump_file, " insn %d is marked as a %s candidate\n",
1722 INSN_UID (insn), i == 0 ? "SImode" : "DImode");
1723
1724 bitmap_set_bit (&candidates[i], INSN_UID (insn));
1725 break;
1726 }
1727 }
1728 }
1729
1730 if (timode_p)
1731 timode_remove_non_convertible_regs (&candidates[2]);
1732
1733 for (unsigned i = 0; i <= 2; ++i)
1734 if (!bitmap_empty_p (&candidates[i]))
1735 break;
1736 else if (i == 2 && dump_file)
1737 fprintf (dump_file, "There are no candidates for optimization.\n");
1738
1739 for (unsigned i = 0; i <= 2; ++i)
1740 while (!bitmap_empty_p (&candidates[i]))
1741 {
1742 unsigned uid = bitmap_first_set_bit (&candidates[i]);
1743 scalar_chain *chain;
1744
1745 if (cand_mode[i] == TImode)
1746 chain = new timode_scalar_chain;
1747 else
1748 chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
1749
1750 /* Find instructions chain we want to convert to vector mode.
1751 Check all uses and definitions to estimate all required
1752 conversions. */
1753 chain->build (&candidates[i], uid);
1754
1755 if (chain->compute_convert_gain () > 0)
1756 converted_insns += chain->convert ();
1757 else
1758 if (dump_file)
1759 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
1760 chain->chain_id);
1761
1762 delete chain;
1763 }
1764
1765 if (dump_file)
1766 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
1767
1768 for (unsigned i = 0; i <= 2; ++i)
1769 bitmap_release (&candidates[i]);
1770 bitmap_obstack_release (NULL);
1771 df_process_deferred_rescans ();
1772
1773 /* Conversion means we may have 128bit register spills/fills
1774 which require aligned stack. */
1775 if (converted_insns)
1776 {
1777 if (crtl->stack_alignment_needed < 128)
1778 crtl->stack_alignment_needed = 128;
1779 if (crtl->stack_alignment_estimated < 128)
1780 crtl->stack_alignment_estimated = 128;
1781
1782 crtl->stack_realign_needed
1783 = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
1784 crtl->stack_realign_tried = crtl->stack_realign_needed;
1785
1786 crtl->stack_realign_processed = true;
1787
1788 if (!crtl->drap_reg)
1789 {
1790 rtx drap_rtx = targetm.calls.get_drap_rtx ();
1791
1792 /* stack_realign_drap and drap_rtx must match. */
1793 gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
1794
1795 /* Do nothing if NULL is returned,
1796 which means DRAP is not needed. */
1797 if (drap_rtx != NULL)
1798 {
1799 crtl->args.internal_arg_pointer = drap_rtx;
1800
1801 /* Call fixup_tail_calls to clean up
1802 REG_EQUIV note if DRAP is needed. */
1803 fixup_tail_calls ();
1804 }
1805 }
1806
1807 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
1808 if (TARGET_64BIT)
1809 for (tree parm = DECL_ARGUMENTS (current_function_decl);
1810 parm; parm = DECL_CHAIN (parm))
1811 {
1812 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
1813 continue;
1814 if (DECL_RTL_SET_P (parm)
1815 && GET_MODE (DECL_RTL (parm)) == V1TImode)
1816 {
1817 rtx r = DECL_RTL (parm);
1818 if (REG_P (r))
1819 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
1820 }
1821 if (DECL_INCOMING_RTL (parm)
1822 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
1823 {
1824 rtx r = DECL_INCOMING_RTL (parm);
1825 if (REG_P (r))
1826 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
1827 }
1828 }
1829 }
1830
1831 return 0;
1832 }
1833
1834 static unsigned int
1835 rest_of_handle_insert_vzeroupper (void)
1836 {
1837 /* vzeroupper instructions are inserted immediately after reload to
1838 account for possible spills from 256bit or 512bit registers. The pass
1839 reuses mode switching infrastructure by re-running mode insertion
1840 pass, so disable entities that have already been processed. */
1841 for (int i = 0; i < MAX_386_ENTITIES; i++)
1842 ix86_optimize_mode_switching[i] = 0;
1843
1844 ix86_optimize_mode_switching[AVX_U128] = 1;
1845
1846 /* Call optimize_mode_switching. */
1847 g->get_passes ()->execute_pass_mode_switching ();
1848
1849 df_analyze ();
1850 return 0;
1851 }
1852
1853 namespace {
1854
1855 const pass_data pass_data_insert_vzeroupper =
1856 {
1857 RTL_PASS, /* type */
1858 "vzeroupper", /* name */
1859 OPTGROUP_NONE, /* optinfo_flags */
1860 TV_MACH_DEP, /* tv_id */
1861 0, /* properties_required */
1862 0, /* properties_provided */
1863 0, /* properties_destroyed */
1864 0, /* todo_flags_start */
1865 TODO_df_finish, /* todo_flags_finish */
1866 };
1867
1868 class pass_insert_vzeroupper : public rtl_opt_pass
1869 {
1870 public:
1871 pass_insert_vzeroupper(gcc::context *ctxt)
1872 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
1873 {}
1874
1875 /* opt_pass methods: */
1876 virtual bool gate (function *)
1877 {
1878 return TARGET_AVX && TARGET_VZEROUPPER
1879 && flag_expensive_optimizations && !optimize_size;
1880 }
1881
1882 virtual unsigned int execute (function *)
1883 {
1884 return rest_of_handle_insert_vzeroupper ();
1885 }
1886
1887 }; // class pass_insert_vzeroupper
1888
1889 const pass_data pass_data_stv =
1890 {
1891 RTL_PASS, /* type */
1892 "stv", /* name */
1893 OPTGROUP_NONE, /* optinfo_flags */
1894 TV_MACH_DEP, /* tv_id */
1895 0, /* properties_required */
1896 0, /* properties_provided */
1897 0, /* properties_destroyed */
1898 0, /* todo_flags_start */
1899 TODO_df_finish, /* todo_flags_finish */
1900 };
1901
1902 class pass_stv : public rtl_opt_pass
1903 {
1904 public:
1905 pass_stv (gcc::context *ctxt)
1906 : rtl_opt_pass (pass_data_stv, ctxt),
1907 timode_p (false)
1908 {}
1909
1910 /* opt_pass methods: */
1911 virtual bool gate (function *)
1912 {
1913 return ((!timode_p || TARGET_64BIT)
1914 && TARGET_STV && TARGET_SSE2 && optimize > 1);
1915 }
1916
1917 virtual unsigned int execute (function *)
1918 {
1919 return convert_scalars_to_vector (timode_p);
1920 }
1921
1922 opt_pass *clone ()
1923 {
1924 return new pass_stv (m_ctxt);
1925 }
1926
1927 void set_pass_param (unsigned int n, bool param)
1928 {
1929 gcc_assert (n == 0);
1930 timode_p = param;
1931 }
1932
1933 private:
1934 bool timode_p;
1935 }; // class pass_stv
1936
1937 } // anon namespace
1938
1939 rtl_opt_pass *
1940 make_pass_insert_vzeroupper (gcc::context *ctxt)
1941 {
1942 return new pass_insert_vzeroupper (ctxt);
1943 }
1944
1945 rtl_opt_pass *
1946 make_pass_stv (gcc::context *ctxt)
1947 {
1948 return new pass_stv (ctxt);
1949 }
1950
1951 /* Inserting ENDBR and pseudo patchable-area instructions. */
1952
1953 static void
1954 rest_of_insert_endbr_and_patchable_area (bool need_endbr,
1955 unsigned int patchable_area_size)
1956 {
1957 rtx endbr;
1958 rtx_insn *insn;
1959 rtx_insn *endbr_insn = NULL;
1960 basic_block bb;
1961
1962 if (need_endbr)
1963 {
1964 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
1965 is absent among function attributes. Later an optimization will
1966 be introduced to make analysis if an address of a static function
1967 is taken. A static function whose address is not taken will get
1968 a nocf_check attribute. This will allow to reduce the number of
1969 EB. */
1970 if (!lookup_attribute ("nocf_check",
1971 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
1972 && (!flag_manual_endbr
1973 || lookup_attribute ("cf_check",
1974 DECL_ATTRIBUTES (cfun->decl)))
1975 && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
1976 || ix86_cmodel == CM_LARGE
1977 || ix86_cmodel == CM_LARGE_PIC
1978 || flag_force_indirect_call
1979 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
1980 && DECL_DLLIMPORT_P (cfun->decl))))
1981 {
1982 if (crtl->profile && flag_fentry)
1983 {
1984 /* Queue ENDBR insertion to x86_function_profiler.
1985 NB: Any patchable-area insn will be inserted after
1986 ENDBR. */
1987 cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
1988 }
1989 else
1990 {
1991 endbr = gen_nop_endbr ();
1992 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
1993 rtx_insn *insn = BB_HEAD (bb);
1994 endbr_insn = emit_insn_before (endbr, insn);
1995 }
1996 }
1997 }
1998
1999 if (patchable_area_size)
2000 {
2001 if (crtl->profile && flag_fentry)
2002 {
2003 /* Queue patchable-area insertion to x86_function_profiler.
2004 NB: If there is a queued ENDBR, x86_function_profiler
2005 will also handle patchable-area. */
2006 if (!cfun->machine->insn_queued_at_entrance)
2007 cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
2008 }
2009 else
2010 {
2011 rtx patchable_area
2012 = gen_patchable_area (GEN_INT (patchable_area_size),
2013 GEN_INT (crtl->patch_area_entry == 0));
2014 if (endbr_insn)
2015 emit_insn_after (patchable_area, endbr_insn);
2016 else
2017 {
2018 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2019 insn = BB_HEAD (bb);
2020 emit_insn_before (patchable_area, insn);
2021 }
2022 }
2023 }
2024
2025 if (!need_endbr)
2026 return;
2027
2028 bb = 0;
2029 FOR_EACH_BB_FN (bb, cfun)
2030 {
2031 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2032 insn = NEXT_INSN (insn))
2033 {
2034 if (CALL_P (insn))
2035 {
2036 need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
2037 if (!need_endbr && !SIBLING_CALL_P (insn))
2038 {
2039 rtx call = get_call_rtx_from (insn);
2040 rtx fnaddr = XEXP (call, 0);
2041 tree fndecl = NULL_TREE;
2042
2043 /* Also generate ENDBRANCH for non-tail call which
2044 may return via indirect branch. */
2045 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
2046 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
2047 if (fndecl == NULL_TREE)
2048 fndecl = MEM_EXPR (fnaddr);
2049 if (fndecl
2050 && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
2051 && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
2052 fndecl = NULL_TREE;
2053 if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
2054 {
2055 tree fntype = TREE_TYPE (fndecl);
2056 if (lookup_attribute ("indirect_return",
2057 TYPE_ATTRIBUTES (fntype)))
2058 need_endbr = true;
2059 }
2060 }
2061 if (!need_endbr)
2062 continue;
2063 /* Generate ENDBRANCH after CALL, which can return more than
2064 twice, setjmp-like functions. */
2065
2066 endbr = gen_nop_endbr ();
2067 emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
2068 continue;
2069 }
2070
2071 if (JUMP_P (insn) && flag_cet_switch)
2072 {
2073 rtx target = JUMP_LABEL (insn);
2074 if (target == NULL_RTX || ANY_RETURN_P (target))
2075 continue;
2076
2077 /* Check the jump is a switch table. */
2078 rtx_insn *label = as_a<rtx_insn *> (target);
2079 rtx_insn *table = next_insn (label);
2080 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2081 continue;
2082
2083 /* For the indirect jump find out all places it jumps and insert
2084 ENDBRANCH there. It should be done under a special flag to
2085 control ENDBRANCH generation for switch stmts. */
2086 edge_iterator ei;
2087 edge e;
2088 basic_block dest_blk;
2089
2090 FOR_EACH_EDGE (e, ei, bb->succs)
2091 {
2092 rtx_insn *insn;
2093
2094 dest_blk = e->dest;
2095 insn = BB_HEAD (dest_blk);
2096 gcc_assert (LABEL_P (insn));
2097 endbr = gen_nop_endbr ();
2098 emit_insn_after (endbr, insn);
2099 }
2100 continue;
2101 }
2102
2103 if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2104 {
2105 endbr = gen_nop_endbr ();
2106 emit_insn_after (endbr, insn);
2107 continue;
2108 }
2109 }
2110 }
2111
2112 return;
2113 }
2114
2115 namespace {
2116
2117 const pass_data pass_data_insert_endbr_and_patchable_area =
2118 {
2119 RTL_PASS, /* type. */
2120 "endbr_and_patchable_area", /* name. */
2121 OPTGROUP_NONE, /* optinfo_flags. */
2122 TV_MACH_DEP, /* tv_id. */
2123 0, /* properties_required. */
2124 0, /* properties_provided. */
2125 0, /* properties_destroyed. */
2126 0, /* todo_flags_start. */
2127 0, /* todo_flags_finish. */
2128 };
2129
2130 class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
2131 {
2132 public:
2133 pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
2134 : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
2135 {}
2136
2137 /* opt_pass methods: */
2138 virtual bool gate (function *)
2139 {
2140 need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
2141 patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
2142 return need_endbr || patchable_area_size;
2143 }
2144
2145 virtual unsigned int execute (function *)
2146 {
2147 timevar_push (TV_MACH_DEP);
2148 rest_of_insert_endbr_and_patchable_area (need_endbr,
2149 patchable_area_size);
2150 timevar_pop (TV_MACH_DEP);
2151 return 0;
2152 }
2153
2154 private:
2155 bool need_endbr;
2156 unsigned int patchable_area_size;
2157 }; // class pass_insert_endbr_and_patchable_area
2158
2159 } // anon namespace
2160
2161 rtl_opt_pass *
2162 make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
2163 {
2164 return new pass_insert_endbr_and_patchable_area (ctxt);
2165 }
2166
2167 /* At entry of the nearest common dominator for basic blocks with
2168 conversions/rcp/sqrt/rsqrt/round, generate a single
2169 vxorps %xmmN, %xmmN, %xmmN
2170 for all
2171 vcvtss2sd op, %xmmN, %xmmX
2172 vcvtsd2ss op, %xmmN, %xmmX
2173 vcvtsi2ss op, %xmmN, %xmmX
2174 vcvtsi2sd op, %xmmN, %xmmX
2175
2176 NB: We want to generate only a single vxorps to cover the whole
2177 function. The LCM algorithm isn't appropriate here since it may
2178 place a vxorps inside the loop. */
2179
2180 static unsigned int
2181 remove_partial_avx_dependency (void)
2182 {
2183 timevar_push (TV_MACH_DEP);
2184
2185 bitmap_obstack_initialize (NULL);
2186 bitmap convert_bbs = BITMAP_ALLOC (NULL);
2187
2188 basic_block bb;
2189 rtx_insn *insn, *set_insn;
2190 rtx set;
2191 rtx v4sf_const0 = NULL_RTX;
2192
2193 auto_vec<rtx_insn *> control_flow_insns;
2194
2195 /* We create invalid RTL initially so defer rescans. */
2196 df_set_flags (DF_DEFER_INSN_RESCAN);
2197
2198 FOR_EACH_BB_FN (bb, cfun)
2199 {
2200 FOR_BB_INSNS (bb, insn)
2201 {
2202 if (!NONDEBUG_INSN_P (insn))
2203 continue;
2204
2205 set = single_set (insn);
2206 if (!set)
2207 continue;
2208
2209 if (get_attr_avx_partial_xmm_update (insn)
2210 != AVX_PARTIAL_XMM_UPDATE_TRUE)
2211 continue;
2212
2213 /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2214 SI -> SF, SI -> DF, DI -> SF, DI -> DF, sqrt, rsqrt, rcp,
2215 round, to vec_dup and vec_merge with subreg. */
2216 rtx src = SET_SRC (set);
2217 rtx dest = SET_DEST (set);
2218 machine_mode dest_mode = GET_MODE (dest);
2219 bool convert_p = false;
2220 switch (GET_CODE (src))
2221 {
2222 case FLOAT:
2223 case FLOAT_EXTEND:
2224 case FLOAT_TRUNCATE:
2225 case UNSIGNED_FLOAT:
2226 convert_p = true;
2227 break;
2228 default:
2229 break;
2230 }
2231
2232 /* Only hanlde conversion here. */
2233 machine_mode src_mode
2234 = convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
2235 switch (src_mode)
2236 {
2237 case E_SFmode:
2238 case E_DFmode:
2239 if (TARGET_USE_VECTOR_FP_CONVERTS
2240 || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY)
2241 continue;
2242 break;
2243 case E_SImode:
2244 case E_DImode:
2245 if (TARGET_USE_VECTOR_CONVERTS
2246 || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY)
2247 continue;
2248 break;
2249 case E_VOIDmode:
2250 gcc_assert (!convert_p);
2251 break;
2252 default:
2253 gcc_unreachable ();
2254 }
2255
2256 if (!v4sf_const0)
2257 v4sf_const0 = gen_reg_rtx (V4SFmode);
2258
2259 rtx zero;
2260 machine_mode dest_vecmode;
2261 switch (dest_mode)
2262 {
2263 case E_HFmode:
2264 dest_vecmode = V8HFmode;
2265 zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0);
2266 break;
2267 case E_SFmode:
2268 dest_vecmode = V4SFmode;
2269 zero = v4sf_const0;
2270 break;
2271 case E_DFmode:
2272 dest_vecmode = V2DFmode;
2273 zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
2274 break;
2275 default:
2276 gcc_unreachable ();
2277 }
2278
2279 /* Change source to vector mode. */
2280 src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
2281 src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
2282 GEN_INT (HOST_WIDE_INT_1U));
2283 /* Change destination to vector mode. */
2284 rtx vec = gen_reg_rtx (dest_vecmode);
2285 /* Generate an XMM vector SET. */
2286 set = gen_rtx_SET (vec, src);
2287 set_insn = emit_insn_before (set, insn);
2288 df_insn_rescan (set_insn);
2289
2290 if (cfun->can_throw_non_call_exceptions)
2291 {
2292 /* Handle REG_EH_REGION note. */
2293 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
2294 if (note)
2295 {
2296 control_flow_insns.safe_push (set_insn);
2297 add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
2298 }
2299 }
2300
2301 src = gen_rtx_SUBREG (dest_mode, vec, 0);
2302 set = gen_rtx_SET (dest, src);
2303
2304 /* Drop possible dead definitions. */
2305 PATTERN (insn) = set;
2306
2307 INSN_CODE (insn) = -1;
2308 recog_memoized (insn);
2309 df_insn_rescan (insn);
2310 bitmap_set_bit (convert_bbs, bb->index);
2311 }
2312 }
2313
2314 if (v4sf_const0)
2315 {
2316 /* (Re-)discover loops so that bb->loop_father can be used in the
2317 analysis below. */
2318 calculate_dominance_info (CDI_DOMINATORS);
2319 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2320
2321 /* Generate a vxorps at entry of the nearest dominator for basic
2322 blocks with conversions, which is in the fake loop that
2323 contains the whole function, so that there is only a single
2324 vxorps in the whole function. */
2325 bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
2326 convert_bbs);
2327 while (bb->loop_father->latch
2328 != EXIT_BLOCK_PTR_FOR_FN (cfun))
2329 bb = get_immediate_dominator (CDI_DOMINATORS,
2330 bb->loop_father->header);
2331
2332 set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
2333
2334 insn = BB_HEAD (bb);
2335 while (insn && !NONDEBUG_INSN_P (insn))
2336 {
2337 if (insn == BB_END (bb))
2338 {
2339 insn = NULL;
2340 break;
2341 }
2342 insn = NEXT_INSN (insn);
2343 }
2344 if (insn == BB_HEAD (bb))
2345 set_insn = emit_insn_before (set, insn);
2346 else
2347 set_insn = emit_insn_after (set,
2348 insn ? PREV_INSN (insn) : BB_END (bb));
2349 df_insn_rescan (set_insn);
2350 loop_optimizer_finalize ();
2351
2352 if (!control_flow_insns.is_empty ())
2353 {
2354 free_dominance_info (CDI_DOMINATORS);
2355
2356 unsigned int i;
2357 FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
2358 if (control_flow_insn_p (insn))
2359 {
2360 /* Split the block after insn. There will be a fallthru
2361 edge, which is OK so we keep it. We have to create
2362 the exception edges ourselves. */
2363 bb = BLOCK_FOR_INSN (insn);
2364 split_block (bb, insn);
2365 rtl_make_eh_edge (NULL, bb, BB_END (bb));
2366 }
2367 }
2368 }
2369
2370 df_process_deferred_rescans ();
2371 df_clear_flags (DF_DEFER_INSN_RESCAN);
2372 bitmap_obstack_release (NULL);
2373 BITMAP_FREE (convert_bbs);
2374
2375 timevar_pop (TV_MACH_DEP);
2376 return 0;
2377 }
2378
2379 namespace {
2380
2381 const pass_data pass_data_remove_partial_avx_dependency =
2382 {
2383 RTL_PASS, /* type */
2384 "rpad", /* name */
2385 OPTGROUP_NONE, /* optinfo_flags */
2386 TV_MACH_DEP, /* tv_id */
2387 0, /* properties_required */
2388 0, /* properties_provided */
2389 0, /* properties_destroyed */
2390 0, /* todo_flags_start */
2391 0, /* todo_flags_finish */
2392 };
2393
2394 class pass_remove_partial_avx_dependency : public rtl_opt_pass
2395 {
2396 public:
2397 pass_remove_partial_avx_dependency (gcc::context *ctxt)
2398 : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
2399 {}
2400
2401 /* opt_pass methods: */
2402 virtual bool gate (function *)
2403 {
2404 return (TARGET_AVX
2405 && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2406 && TARGET_SSE_MATH
2407 && optimize
2408 && optimize_function_for_speed_p (cfun));
2409 }
2410
2411 virtual unsigned int execute (function *)
2412 {
2413 return remove_partial_avx_dependency ();
2414 }
2415 }; // class pass_rpad
2416
2417 } // anon namespace
2418
2419 rtl_opt_pass *
2420 make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
2421 {
2422 return new pass_remove_partial_avx_dependency (ctxt);
2423 }
2424
2425 /* This compares the priority of target features in function DECL1
2426 and DECL2. It returns positive value if DECL1 is higher priority,
2427 negative value if DECL2 is higher priority and 0 if they are the
2428 same. */
2429
2430 int
2431 ix86_compare_version_priority (tree decl1, tree decl2)
2432 {
2433 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
2434 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
2435
2436 return (int)priority1 - (int)priority2;
2437 }
2438
2439 /* V1 and V2 point to function versions with different priorities
2440 based on the target ISA. This function compares their priorities. */
2441
2442 static int
2443 feature_compare (const void *v1, const void *v2)
2444 {
2445 typedef struct _function_version_info
2446 {
2447 tree version_decl;
2448 tree predicate_chain;
2449 unsigned int dispatch_priority;
2450 } function_version_info;
2451
2452 const function_version_info c1 = *(const function_version_info *)v1;
2453 const function_version_info c2 = *(const function_version_info *)v2;
2454 return (c2.dispatch_priority - c1.dispatch_priority);
2455 }
2456
2457 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2458 to return a pointer to VERSION_DECL if the outcome of the expression
2459 formed by PREDICATE_CHAIN is true. This function will be called during
2460 version dispatch to decide which function version to execute. It returns
2461 the basic block at the end, to which more conditions can be added. */
2462
2463 static basic_block
2464 add_condition_to_bb (tree function_decl, tree version_decl,
2465 tree predicate_chain, basic_block new_bb)
2466 {
2467 gimple *return_stmt;
2468 tree convert_expr, result_var;
2469 gimple *convert_stmt;
2470 gimple *call_cond_stmt;
2471 gimple *if_else_stmt;
2472
2473 basic_block bb1, bb2, bb3;
2474 edge e12, e23;
2475
2476 tree cond_var, and_expr_var = NULL_TREE;
2477 gimple_seq gseq;
2478
2479 tree predicate_decl, predicate_arg;
2480
2481 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
2482
2483 gcc_assert (new_bb != NULL);
2484 gseq = bb_seq (new_bb);
2485
2486
2487 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
2488 build_fold_addr_expr (version_decl));
2489 result_var = create_tmp_var (ptr_type_node);
2490 convert_stmt = gimple_build_assign (result_var, convert_expr);
2491 return_stmt = gimple_build_return (result_var);
2492
2493 if (predicate_chain == NULL_TREE)
2494 {
2495 gimple_seq_add_stmt (&gseq, convert_stmt);
2496 gimple_seq_add_stmt (&gseq, return_stmt);
2497 set_bb_seq (new_bb, gseq);
2498 gimple_set_bb (convert_stmt, new_bb);
2499 gimple_set_bb (return_stmt, new_bb);
2500 pop_cfun ();
2501 return new_bb;
2502 }
2503
2504 while (predicate_chain != NULL)
2505 {
2506 cond_var = create_tmp_var (integer_type_node);
2507 predicate_decl = TREE_PURPOSE (predicate_chain);
2508 predicate_arg = TREE_VALUE (predicate_chain);
2509 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
2510 gimple_call_set_lhs (call_cond_stmt, cond_var);
2511
2512 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
2513 gimple_set_bb (call_cond_stmt, new_bb);
2514 gimple_seq_add_stmt (&gseq, call_cond_stmt);
2515
2516 predicate_chain = TREE_CHAIN (predicate_chain);
2517
2518 if (and_expr_var == NULL)
2519 and_expr_var = cond_var;
2520 else
2521 {
2522 gimple *assign_stmt;
2523 /* Use MIN_EXPR to check if any integer is zero?.
2524 and_expr_var = min_expr <cond_var, and_expr_var> */
2525 assign_stmt = gimple_build_assign (and_expr_var,
2526 build2 (MIN_EXPR, integer_type_node,
2527 cond_var, and_expr_var));
2528
2529 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
2530 gimple_set_bb (assign_stmt, new_bb);
2531 gimple_seq_add_stmt (&gseq, assign_stmt);
2532 }
2533 }
2534
2535 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
2536 integer_zero_node,
2537 NULL_TREE, NULL_TREE);
2538 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
2539 gimple_set_bb (if_else_stmt, new_bb);
2540 gimple_seq_add_stmt (&gseq, if_else_stmt);
2541
2542 gimple_seq_add_stmt (&gseq, convert_stmt);
2543 gimple_seq_add_stmt (&gseq, return_stmt);
2544 set_bb_seq (new_bb, gseq);
2545
2546 bb1 = new_bb;
2547 e12 = split_block (bb1, if_else_stmt);
2548 bb2 = e12->dest;
2549 e12->flags &= ~EDGE_FALLTHRU;
2550 e12->flags |= EDGE_TRUE_VALUE;
2551
2552 e23 = split_block (bb2, return_stmt);
2553
2554 gimple_set_bb (convert_stmt, bb2);
2555 gimple_set_bb (return_stmt, bb2);
2556
2557 bb3 = e23->dest;
2558 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2559
2560 remove_edge (e23);
2561 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
2562
2563 pop_cfun ();
2564
2565 return bb3;
2566 }
2567
2568 /* This function generates the dispatch function for
2569 multi-versioned functions. DISPATCH_DECL is the function which will
2570 contain the dispatch logic. FNDECLS are the function choices for
2571 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
2572 in DISPATCH_DECL in which the dispatch code is generated. */
2573
2574 static int
2575 dispatch_function_versions (tree dispatch_decl,
2576 void *fndecls_p,
2577 basic_block *empty_bb)
2578 {
2579 tree default_decl;
2580 gimple *ifunc_cpu_init_stmt;
2581 gimple_seq gseq;
2582 int ix;
2583 tree ele;
2584 vec<tree> *fndecls;
2585 unsigned int num_versions = 0;
2586 unsigned int actual_versions = 0;
2587 unsigned int i;
2588
2589 struct _function_version_info
2590 {
2591 tree version_decl;
2592 tree predicate_chain;
2593 unsigned int dispatch_priority;
2594 }*function_version_info;
2595
2596 gcc_assert (dispatch_decl != NULL
2597 && fndecls_p != NULL
2598 && empty_bb != NULL);
2599
2600 /*fndecls_p is actually a vector. */
2601 fndecls = static_cast<vec<tree> *> (fndecls_p);
2602
2603 /* At least one more version other than the default. */
2604 num_versions = fndecls->length ();
2605 gcc_assert (num_versions >= 2);
2606
2607 function_version_info = (struct _function_version_info *)
2608 XNEWVEC (struct _function_version_info, (num_versions - 1));
2609
2610 /* The first version in the vector is the default decl. */
2611 default_decl = (*fndecls)[0];
2612
2613 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
2614
2615 gseq = bb_seq (*empty_bb);
2616 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
2617 constructors, so explicity call __builtin_cpu_init here. */
2618 ifunc_cpu_init_stmt
2619 = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
2620 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
2621 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
2622 set_bb_seq (*empty_bb, gseq);
2623
2624 pop_cfun ();
2625
2626
2627 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
2628 {
2629 tree version_decl = ele;
2630 tree predicate_chain = NULL_TREE;
2631 unsigned int priority;
2632 /* Get attribute string, parse it and find the right predicate decl.
2633 The predicate function could be a lengthy combination of many
2634 features, like arch-type and various isa-variants. */
2635 priority = get_builtin_code_for_version (version_decl,
2636 &predicate_chain);
2637
2638 if (predicate_chain == NULL_TREE)
2639 continue;
2640
2641 function_version_info [actual_versions].version_decl = version_decl;
2642 function_version_info [actual_versions].predicate_chain
2643 = predicate_chain;
2644 function_version_info [actual_versions].dispatch_priority = priority;
2645 actual_versions++;
2646 }
2647
2648 /* Sort the versions according to descending order of dispatch priority. The
2649 priority is based on the ISA. This is not a perfect solution. There
2650 could still be ambiguity. If more than one function version is suitable
2651 to execute, which one should be dispatched? In future, allow the user
2652 to specify a dispatch priority next to the version. */
2653 qsort (function_version_info, actual_versions,
2654 sizeof (struct _function_version_info), feature_compare);
2655
2656 for (i = 0; i < actual_versions; ++i)
2657 *empty_bb = add_condition_to_bb (dispatch_decl,
2658 function_version_info[i].version_decl,
2659 function_version_info[i].predicate_chain,
2660 *empty_bb);
2661
2662 /* dispatch default version at the end. */
2663 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
2664 NULL, *empty_bb);
2665
2666 free (function_version_info);
2667 return 0;
2668 }
2669
2670 /* This function changes the assembler name for functions that are
2671 versions. If DECL is a function version and has a "target"
2672 attribute, it appends the attribute string to its assembler name. */
2673
2674 static tree
2675 ix86_mangle_function_version_assembler_name (tree decl, tree id)
2676 {
2677 tree version_attr;
2678 const char *orig_name, *version_string;
2679 char *attr_str, *assembler_name;
2680
2681 if (DECL_DECLARED_INLINE_P (decl)
2682 && lookup_attribute ("gnu_inline",
2683 DECL_ATTRIBUTES (decl)))
2684 error_at (DECL_SOURCE_LOCATION (decl),
2685 "function versions cannot be marked as %<gnu_inline%>,"
2686 " bodies have to be generated");
2687
2688 if (DECL_VIRTUAL_P (decl)
2689 || DECL_VINDEX (decl))
2690 sorry ("virtual function multiversioning not supported");
2691
2692 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
2693
2694 /* target attribute string cannot be NULL. */
2695 gcc_assert (version_attr != NULL_TREE);
2696
2697 orig_name = IDENTIFIER_POINTER (id);
2698 version_string
2699 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
2700
2701 if (strcmp (version_string, "default") == 0)
2702 return id;
2703
2704 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
2705 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
2706
2707 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
2708
2709 /* Allow assembler name to be modified if already set. */
2710 if (DECL_ASSEMBLER_NAME_SET_P (decl))
2711 SET_DECL_RTL (decl, NULL);
2712
2713 tree ret = get_identifier (assembler_name);
2714 XDELETEVEC (attr_str);
2715 XDELETEVEC (assembler_name);
2716 return ret;
2717 }
2718
2719 tree
2720 ix86_mangle_decl_assembler_name (tree decl, tree id)
2721 {
2722 /* For function version, add the target suffix to the assembler name. */
2723 if (TREE_CODE (decl) == FUNCTION_DECL
2724 && DECL_FUNCTION_VERSIONED (decl))
2725 id = ix86_mangle_function_version_assembler_name (decl, id);
2726 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2727 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
2728 #endif
2729
2730 return id;
2731 }
2732
2733 /* Make a dispatcher declaration for the multi-versioned function DECL.
2734 Calls to DECL function will be replaced with calls to the dispatcher
2735 by the front-end. Returns the decl of the dispatcher function. */
2736
2737 tree
2738 ix86_get_function_versions_dispatcher (void *decl)
2739 {
2740 tree fn = (tree) decl;
2741 struct cgraph_node *node = NULL;
2742 struct cgraph_node *default_node = NULL;
2743 struct cgraph_function_version_info *node_v = NULL;
2744 struct cgraph_function_version_info *first_v = NULL;
2745
2746 tree dispatch_decl = NULL;
2747
2748 struct cgraph_function_version_info *default_version_info = NULL;
2749
2750 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
2751
2752 node = cgraph_node::get (fn);
2753 gcc_assert (node != NULL);
2754
2755 node_v = node->function_version ();
2756 gcc_assert (node_v != NULL);
2757
2758 if (node_v->dispatcher_resolver != NULL)
2759 return node_v->dispatcher_resolver;
2760
2761 /* Find the default version and make it the first node. */
2762 first_v = node_v;
2763 /* Go to the beginning of the chain. */
2764 while (first_v->prev != NULL)
2765 first_v = first_v->prev;
2766 default_version_info = first_v;
2767 while (default_version_info != NULL)
2768 {
2769 if (is_function_default_version
2770 (default_version_info->this_node->decl))
2771 break;
2772 default_version_info = default_version_info->next;
2773 }
2774
2775 /* If there is no default node, just return NULL. */
2776 if (default_version_info == NULL)
2777 return NULL;
2778
2779 /* Make default info the first node. */
2780 if (first_v != default_version_info)
2781 {
2782 default_version_info->prev->next = default_version_info->next;
2783 if (default_version_info->next)
2784 default_version_info->next->prev = default_version_info->prev;
2785 first_v->prev = default_version_info;
2786 default_version_info->next = first_v;
2787 default_version_info->prev = NULL;
2788 }
2789
2790 default_node = default_version_info->this_node;
2791
2792 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2793 if (targetm.has_ifunc_p ())
2794 {
2795 struct cgraph_function_version_info *it_v = NULL;
2796 struct cgraph_node *dispatcher_node = NULL;
2797 struct cgraph_function_version_info *dispatcher_version_info = NULL;
2798
2799 /* Right now, the dispatching is done via ifunc. */
2800 dispatch_decl = make_dispatcher_decl (default_node->decl);
2801
2802 dispatcher_node = cgraph_node::get_create (dispatch_decl);
2803 gcc_assert (dispatcher_node != NULL);
2804 dispatcher_node->dispatcher_function = 1;
2805 dispatcher_version_info
2806 = dispatcher_node->insert_new_function_version ();
2807 dispatcher_version_info->next = default_version_info;
2808 dispatcher_node->definition = 1;
2809
2810 /* Set the dispatcher for all the versions. */
2811 it_v = default_version_info;
2812 while (it_v != NULL)
2813 {
2814 it_v->dispatcher_resolver = dispatch_decl;
2815 it_v = it_v->next;
2816 }
2817 }
2818 else
2819 #endif
2820 {
2821 error_at (DECL_SOURCE_LOCATION (default_node->decl),
2822 "multiversioning needs %<ifunc%> which is not supported "
2823 "on this target");
2824 }
2825
2826 return dispatch_decl;
2827 }
2828
2829 /* Make the resolver function decl to dispatch the versions of
2830 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
2831 ifunc alias that will point to the created resolver. Create an
2832 empty basic block in the resolver and store the pointer in
2833 EMPTY_BB. Return the decl of the resolver function. */
2834
2835 static tree
2836 make_resolver_func (const tree default_decl,
2837 const tree ifunc_alias_decl,
2838 basic_block *empty_bb)
2839 {
2840 tree decl, type, t;
2841
2842 /* Create resolver function name based on default_decl. */
2843 tree decl_name = clone_function_name (default_decl, "resolver");
2844 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
2845
2846 /* The resolver function should return a (void *). */
2847 type = build_function_type_list (ptr_type_node, NULL_TREE);
2848
2849 decl = build_fn_decl (resolver_name, type);
2850 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
2851
2852 DECL_NAME (decl) = decl_name;
2853 TREE_USED (decl) = 1;
2854 DECL_ARTIFICIAL (decl) = 1;
2855 DECL_IGNORED_P (decl) = 1;
2856 TREE_PUBLIC (decl) = 0;
2857 DECL_UNINLINABLE (decl) = 1;
2858
2859 /* Resolver is not external, body is generated. */
2860 DECL_EXTERNAL (decl) = 0;
2861 DECL_EXTERNAL (ifunc_alias_decl) = 0;
2862
2863 DECL_CONTEXT (decl) = NULL_TREE;
2864 DECL_INITIAL (decl) = make_node (BLOCK);
2865 DECL_STATIC_CONSTRUCTOR (decl) = 0;
2866
2867 if (DECL_COMDAT_GROUP (default_decl)
2868 || TREE_PUBLIC (default_decl))
2869 {
2870 /* In this case, each translation unit with a call to this
2871 versioned function will put out a resolver. Ensure it
2872 is comdat to keep just one copy. */
2873 DECL_COMDAT (decl) = 1;
2874 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
2875 }
2876 else
2877 TREE_PUBLIC (ifunc_alias_decl) = 0;
2878
2879 /* Build result decl and add to function_decl. */
2880 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
2881 DECL_CONTEXT (t) = decl;
2882 DECL_ARTIFICIAL (t) = 1;
2883 DECL_IGNORED_P (t) = 1;
2884 DECL_RESULT (decl) = t;
2885
2886 gimplify_function_tree (decl);
2887 push_cfun (DECL_STRUCT_FUNCTION (decl));
2888 *empty_bb = init_lowered_empty_function (decl, false,
2889 profile_count::uninitialized ());
2890
2891 cgraph_node::add_new_function (decl, true);
2892 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
2893
2894 pop_cfun ();
2895
2896 gcc_assert (ifunc_alias_decl != NULL);
2897 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
2898 DECL_ATTRIBUTES (ifunc_alias_decl)
2899 = make_attribute ("ifunc", resolver_name,
2900 DECL_ATTRIBUTES (ifunc_alias_decl));
2901
2902 /* Create the alias for dispatch to resolver here. */
2903 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
2904 return decl;
2905 }
2906
2907 /* Generate the dispatching code body to dispatch multi-versioned function
2908 DECL. The target hook is called to process the "target" attributes and
2909 provide the code to dispatch the right function at run-time. NODE points
2910 to the dispatcher decl whose body will be created. */
2911
2912 tree
2913 ix86_generate_version_dispatcher_body (void *node_p)
2914 {
2915 tree resolver_decl;
2916 basic_block empty_bb;
2917 tree default_ver_decl;
2918 struct cgraph_node *versn;
2919 struct cgraph_node *node;
2920
2921 struct cgraph_function_version_info *node_version_info = NULL;
2922 struct cgraph_function_version_info *versn_info = NULL;
2923
2924 node = (cgraph_node *)node_p;
2925
2926 node_version_info = node->function_version ();
2927 gcc_assert (node->dispatcher_function
2928 && node_version_info != NULL);
2929
2930 if (node_version_info->dispatcher_resolver)
2931 return node_version_info->dispatcher_resolver;
2932
2933 /* The first version in the chain corresponds to the default version. */
2934 default_ver_decl = node_version_info->next->this_node->decl;
2935
2936 /* node is going to be an alias, so remove the finalized bit. */
2937 node->definition = false;
2938
2939 resolver_decl = make_resolver_func (default_ver_decl,
2940 node->decl, &empty_bb);
2941
2942 node_version_info->dispatcher_resolver = resolver_decl;
2943
2944 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
2945
2946 auto_vec<tree, 2> fn_ver_vec;
2947
2948 for (versn_info = node_version_info->next; versn_info;
2949 versn_info = versn_info->next)
2950 {
2951 versn = versn_info->this_node;
2952 /* Check for virtual functions here again, as by this time it should
2953 have been determined if this function needs a vtable index or
2954 not. This happens for methods in derived classes that override
2955 virtual methods in base classes but are not explicitly marked as
2956 virtual. */
2957 if (DECL_VINDEX (versn->decl))
2958 sorry ("virtual function multiversioning not supported");
2959
2960 fn_ver_vec.safe_push (versn->decl);
2961 }
2962
2963 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
2964 cgraph_edge::rebuild_edges ();
2965 pop_cfun ();
2966 return resolver_decl;
2967 }
2968
2969