]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/i386-features.c
Remove pass_cpb which is related to enable avx512 embedded broadcast from constant...
[thirdparty/gcc.git] / gcc / config / i386 / i386-features.c
CommitLineData
99dee823 1/* Copyright (C) 1988-2021 Free Software Foundation, Inc.
2bf6d935
ML
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation; either version 3, or (at your option)
8any later version.
9
10GCC is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with GCC; see the file COPYING3. If not see
17<http://www.gnu.org/licenses/>. */
18
19#define IN_TARGET_CODE 1
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "memmodel.h"
28#include "gimple.h"
29#include "cfghooks.h"
30#include "cfgloop.h"
31#include "df.h"
32#include "tm_p.h"
33#include "stringpool.h"
34#include "expmed.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "cgraph.h"
40#include "diagnostic.h"
41#include "cfgbuild.h"
42#include "alias.h"
43#include "fold-const.h"
44#include "attribs.h"
45#include "calls.h"
46#include "stor-layout.h"
47#include "varasm.h"
48#include "output.h"
49#include "insn-attr.h"
50#include "flags.h"
51#include "except.h"
52#include "explow.h"
53#include "expr.h"
54#include "cfgrtl.h"
55#include "common/common-target.h"
56#include "langhooks.h"
57#include "reload.h"
58#include "gimplify.h"
59#include "dwarf2.h"
60#include "tm-constrs.h"
2bf6d935
ML
61#include "cselib.h"
62#include "sched-int.h"
63#include "opts.h"
64#include "tree-pass.h"
65#include "context.h"
66#include "pass_manager.h"
67#include "target-globals.h"
68#include "gimple-iterator.h"
69#include "tree-vectorizer.h"
70#include "shrink-wrap.h"
71#include "builtins.h"
72#include "rtl-iter.h"
73#include "tree-iterator.h"
74#include "dbgcnt.h"
75#include "case-cfn-macros.h"
76#include "dojump.h"
77#include "fold-const-call.h"
78#include "tree-vrp.h"
79#include "tree-ssanames.h"
80#include "selftest.h"
81#include "selftest-rtl.h"
82#include "print-rtl.h"
83#include "intl.h"
84#include "ifcvt.h"
85#include "symbol-summary.h"
86#include "ipa-prop.h"
87#include "ipa-fnsummary.h"
88#include "wide-int-bitmask.h"
89#include "tree-vector-builder.h"
90#include "debug.h"
91#include "dwarf2out.h"
92#include "i386-builtins.h"
93#include "i386-features.h"
94
95const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
96 "savms64",
97 "resms64",
98 "resms64x",
99 "savms64f",
100 "resms64f",
101 "resms64fx"
102};
103
104const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
105/* The below offset values are where each register is stored for the layout
106 relative to incoming stack pointer. The value of each m_regs[].offset will
107 be relative to the incoming base pointer (rax or rsi) used by the stub.
108
109 s_instances: 0 1 2 3
110 Offset: realigned or aligned + 8
111 Register aligned aligned + 8 aligned w/HFP w/HFP */
112 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
113 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
114 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
115 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
116 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
117 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
118 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
119 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
120 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
121 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
122 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
123 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
124 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
125 BP_REG, /* 0xc0 0xc8 N/A N/A */
126 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
127 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
128 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
129 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
130};
131
132/* Instantiate static const values. */
133const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
134const unsigned xlogue_layout::MIN_REGS;
135const unsigned xlogue_layout::MAX_REGS;
136const unsigned xlogue_layout::MAX_EXTRA_REGS;
137const unsigned xlogue_layout::VARIANT_COUNT;
138const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
139
140/* Initialize xlogue_layout::s_stub_names to zero. */
141char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
142 [STUB_NAME_MAX_LEN];
143
144/* Instantiates all xlogue_layout instances. */
145const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
146 xlogue_layout (0, false),
147 xlogue_layout (8, false),
148 xlogue_layout (0, true),
149 xlogue_layout (8, true)
150};
151
152/* Return an appropriate const instance of xlogue_layout based upon values
153 in cfun->machine and crtl. */
99b1c316 154const class xlogue_layout &
2bf6d935
ML
155xlogue_layout::get_instance ()
156{
157 enum xlogue_stub_sets stub_set;
158 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
159
160 if (stack_realign_fp)
161 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
162 else if (frame_pointer_needed)
163 stub_set = aligned_plus_8
164 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
165 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
166 else
167 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
168
169 return s_instances[stub_set];
170}
171
172/* Determine how many clobbered registers can be saved by the stub.
173 Returns the count of registers the stub will save and restore. */
174unsigned
175xlogue_layout::count_stub_managed_regs ()
176{
177 bool hfp = frame_pointer_needed || stack_realign_fp;
178 unsigned i, count;
179 unsigned regno;
180
181 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
182 {
183 regno = REG_ORDER[i];
184 if (regno == BP_REG && hfp)
185 continue;
186 if (!ix86_save_reg (regno, false, false))
187 break;
188 ++count;
189 }
190 return count;
191}
192
193/* Determine if register REGNO is a stub managed register given the
194 total COUNT of stub managed registers. */
195bool
196xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
197{
198 bool hfp = frame_pointer_needed || stack_realign_fp;
199 unsigned i;
200
201 for (i = 0; i < count; ++i)
202 {
203 gcc_assert (i < MAX_REGS);
204 if (REG_ORDER[i] == BP_REG && hfp)
205 ++count;
206 else if (REG_ORDER[i] == regno)
207 return true;
208 }
209 return false;
210}
211
212/* Constructor for xlogue_layout. */
213xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
214 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
215 m_stack_align_off_in (stack_align_off_in)
216{
217 HOST_WIDE_INT offset = stack_align_off_in;
218 unsigned i, j;
219
220 for (i = j = 0; i < MAX_REGS; ++i)
221 {
222 unsigned regno = REG_ORDER[i];
223
224 if (regno == BP_REG && hfp)
225 continue;
226 if (SSE_REGNO_P (regno))
227 {
228 offset += 16;
229 /* Verify that SSE regs are always aligned. */
230 gcc_assert (!((stack_align_off_in + offset) & 15));
231 }
232 else
233 offset += 8;
234
235 m_regs[j].regno = regno;
236 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
237 }
238 gcc_assert (j == m_nregs);
239}
240
241const char *
242xlogue_layout::get_stub_name (enum xlogue_stub stub,
243 unsigned n_extra_regs)
244{
245 const int have_avx = TARGET_AVX;
246 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
247
248 /* Lazy init */
249 if (!*name)
250 {
251 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
252 (have_avx ? "avx" : "sse"),
253 STUB_BASE_NAMES[stub],
254 MIN_REGS + n_extra_regs);
255 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
256 }
257
258 return name;
259}
260
261/* Return rtx of a symbol ref for the entry point (based upon
262 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
263rtx
264xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
265{
266 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
267 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
268 gcc_assert (stub < XLOGUE_STUB_COUNT);
269 gcc_assert (crtl->stack_realign_finalized);
270
271 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
272}
273
274unsigned scalar_chain::max_id = 0;
275
72bb85f8
ML
276namespace {
277
2bf6d935
ML
278/* Initialize new chain. */
279
93cf5515 280scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
2bf6d935 281{
93cf5515
RB
282 smode = smode_;
283 vmode = vmode_;
284
2bf6d935
ML
285 chain_id = ++max_id;
286
287 if (dump_file)
288 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
289
290 bitmap_obstack_initialize (NULL);
291 insns = BITMAP_ALLOC (NULL);
292 defs = BITMAP_ALLOC (NULL);
293 defs_conv = BITMAP_ALLOC (NULL);
294 queue = NULL;
295}
296
297/* Free chain's data. */
298
299scalar_chain::~scalar_chain ()
300{
301 BITMAP_FREE (insns);
302 BITMAP_FREE (defs);
303 BITMAP_FREE (defs_conv);
304 bitmap_obstack_release (NULL);
305}
306
307/* Add instruction into chains' queue. */
308
309void
310scalar_chain::add_to_queue (unsigned insn_uid)
311{
312 if (bitmap_bit_p (insns, insn_uid)
313 || bitmap_bit_p (queue, insn_uid))
314 return;
315
316 if (dump_file)
317 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
318 insn_uid, chain_id);
319 bitmap_set_bit (queue, insn_uid);
320}
321
b5a6addb
RB
322general_scalar_chain::general_scalar_chain (enum machine_mode smode_,
323 enum machine_mode vmode_)
324 : scalar_chain (smode_, vmode_)
325{
326 insns_conv = BITMAP_ALLOC (NULL);
327 n_sse_to_integer = 0;
328 n_integer_to_sse = 0;
329}
330
331general_scalar_chain::~general_scalar_chain ()
332{
333 BITMAP_FREE (insns_conv);
334}
335
2bf6d935
ML
336/* For DImode conversion, mark register defined by DEF as requiring
337 conversion. */
338
339void
93cf5515 340general_scalar_chain::mark_dual_mode_def (df_ref def)
2bf6d935
ML
341{
342 gcc_assert (DF_REF_REG_DEF_P (def));
343
b5a6addb
RB
344 /* Record the def/insn pair so we can later efficiently iterate over
345 the defs to convert on insns not in the chain. */
346 bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
347 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
348 {
349 if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
350 && !reg_new)
351 return;
352 n_integer_to_sse++;
353 }
354 else
355 {
356 if (!reg_new)
357 return;
358 n_sse_to_integer++;
359 }
360
2bf6d935
ML
361 if (dump_file)
362 fprintf (dump_file,
363 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
364 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2bf6d935
ML
365}
366
367/* For TImode conversion, it is unused. */
368
369void
370timode_scalar_chain::mark_dual_mode_def (df_ref)
371{
372 gcc_unreachable ();
373}
374
375/* Check REF's chain to add new insns into a queue
376 and find registers requiring conversion. */
377
378void
379scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
380{
381 df_link *chain;
382
383 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
384 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
385 add_to_queue (DF_REF_INSN_UID (ref));
386
387 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
388 {
389 unsigned uid = DF_REF_INSN_UID (chain->ref);
390
391 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
392 continue;
393
394 if (!DF_REF_REG_MEM_P (chain->ref))
395 {
396 if (bitmap_bit_p (insns, uid))
397 continue;
398
399 if (bitmap_bit_p (candidates, uid))
400 {
401 add_to_queue (uid);
402 continue;
403 }
404 }
405
406 if (DF_REF_REG_DEF_P (chain->ref))
407 {
408 if (dump_file)
409 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
410 DF_REF_REGNO (chain->ref), uid);
411 mark_dual_mode_def (chain->ref);
412 }
413 else
414 {
415 if (dump_file)
416 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
417 DF_REF_REGNO (chain->ref), uid);
418 mark_dual_mode_def (ref);
419 }
420 }
421}
422
423/* Add instruction into a chain. */
424
425void
426scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
427{
428 if (bitmap_bit_p (insns, insn_uid))
429 return;
430
431 if (dump_file)
432 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
433
434 bitmap_set_bit (insns, insn_uid);
435
436 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
437 rtx def_set = single_set (insn);
438 if (def_set && REG_P (SET_DEST (def_set))
439 && !HARD_REGISTER_P (SET_DEST (def_set)))
440 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
441
93cf5515
RB
442 /* ??? The following is quadratic since analyze_register_chain
443 iterates over all refs to look for dual-mode regs. Instead this
444 should be done separately for all regs mentioned in the chain once. */
2bf6d935 445 df_ref ref;
2bf6d935
ML
446 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
447 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
48a31a09 448 analyze_register_chain (candidates, ref);
2bf6d935
ML
449 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
450 if (!DF_REF_REG_MEM_P (ref))
451 analyze_register_chain (candidates, ref);
452}
453
454/* Build new chain starting from insn INSN_UID recursively
455 adding all dependent uses and definitions. */
456
457void
458scalar_chain::build (bitmap candidates, unsigned insn_uid)
459{
460 queue = BITMAP_ALLOC (NULL);
461 bitmap_set_bit (queue, insn_uid);
462
463 if (dump_file)
464 fprintf (dump_file, "Building chain #%d...\n", chain_id);
465
466 while (!bitmap_empty_p (queue))
467 {
468 insn_uid = bitmap_first_set_bit (queue);
469 bitmap_clear_bit (queue, insn_uid);
470 bitmap_clear_bit (candidates, insn_uid);
471 add_insn (candidates, insn_uid);
472 }
473
474 if (dump_file)
475 {
476 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
477 fprintf (dump_file, " insns: ");
478 dump_bitmap (dump_file, insns);
479 if (!bitmap_empty_p (defs_conv))
480 {
481 bitmap_iterator bi;
482 unsigned id;
483 const char *comma = "";
484 fprintf (dump_file, " defs to convert: ");
485 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
486 {
487 fprintf (dump_file, "%sr%d", comma, id);
488 comma = ", ";
489 }
490 fprintf (dump_file, "\n");
491 }
492 }
493
494 BITMAP_FREE (queue);
495}
496
497/* Return a cost of building a vector costant
498 instead of using a scalar one. */
499
500int
93cf5515 501general_scalar_chain::vector_const_cost (rtx exp)
2bf6d935
ML
502{
503 gcc_assert (CONST_INT_P (exp));
504
93cf5515
RB
505 if (standard_sse_constant_p (exp, vmode))
506 return ix86_cost->sse_op;
507 /* We have separate costs for SImode and DImode, use SImode costs
508 for smaller modes. */
509 return ix86_cost->sse_load[smode == DImode ? 1 : 0];
2bf6d935
ML
510}
511
512/* Compute a gain for chain conversion. */
513
514int
93cf5515 515general_scalar_chain::compute_convert_gain ()
2bf6d935
ML
516{
517 bitmap_iterator bi;
518 unsigned insn_uid;
519 int gain = 0;
520 int cost = 0;
521
522 if (dump_file)
523 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
524
93cf5515
RB
525 /* SSE costs distinguish between SImode and DImode loads/stores, for
526 int costs factor in the number of GPRs involved. When supporting
527 smaller modes than SImode the int load/store costs need to be
528 adjusted as well. */
529 unsigned sse_cost_idx = smode == DImode ? 1 : 0;
530 unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
531
2bf6d935
ML
532 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
533 {
534 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
535 rtx def_set = single_set (insn);
536 rtx src = SET_SRC (def_set);
537 rtx dst = SET_DEST (def_set);
c6521daa 538 int igain = 0;
2bf6d935
ML
539
540 if (REG_P (src) && REG_P (dst))
93cf5515 541 igain += 2 * m - ix86_cost->xmm_move;
2bf6d935 542 else if (REG_P (src) && MEM_P (dst))
93cf5515
RB
543 igain
544 += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
2bf6d935 545 else if (MEM_P (src) && REG_P (dst))
93cf5515 546 igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
9f6aeb85
UB
547 else
548 switch (GET_CODE (src))
549 {
550 case ASHIFT:
551 case ASHIFTRT:
552 case LSHIFTRT:
553 if (m == 2)
554 {
555 if (INTVAL (XEXP (src, 1)) >= 32)
556 igain += ix86_cost->add;
557 else
558 igain += ix86_cost->shift_const;
559 }
2a3daf5b 560
9f6aeb85 561 igain += ix86_cost->shift_const - ix86_cost->sse_op;
2a3daf5b 562
9f6aeb85
UB
563 if (CONST_INT_P (XEXP (src, 0)))
564 igain -= vector_const_cost (XEXP (src, 0));
565 break;
566
567 case AND:
568 case IOR:
569 case XOR:
570 case PLUS:
571 case MINUS:
572 igain += m * ix86_cost->add - ix86_cost->sse_op;
573 /* Additional gain for andnot for targets without BMI. */
574 if (GET_CODE (XEXP (src, 0)) == NOT
575 && !TARGET_BMI)
576 igain += m * ix86_cost->add;
577
578 if (CONST_INT_P (XEXP (src, 0)))
579 igain -= vector_const_cost (XEXP (src, 0));
580 if (CONST_INT_P (XEXP (src, 1)))
581 igain -= vector_const_cost (XEXP (src, 1));
582 break;
583
584 case NEG:
585 case NOT:
586 igain -= ix86_cost->sse_op + COSTS_N_INSNS (1);
587
588 if (GET_CODE (XEXP (src, 0)) != ABS)
589 {
590 igain += m * ix86_cost->add;
591 break;
592 }
593 /* FALLTHRU */
594
595 case ABS:
596 case SMAX:
597 case SMIN:
598 case UMAX:
599 case UMIN:
600 /* We do not have any conditional move cost, estimate it as a
601 reg-reg move. Comparisons are costed as adds. */
602 igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
603 /* Integer SSE ops are all costed the same. */
604 igain -= ix86_cost->sse_op;
605 break;
606
607 case COMPARE:
608 /* Assume comparison cost is the same. */
609 break;
610
611 case CONST_INT:
612 if (REG_P (dst))
613 /* DImode can be immediate for TARGET_64BIT and SImode always. */
614 igain += m * COSTS_N_INSNS (1);
615 else if (MEM_P (dst))
616 igain += (m * ix86_cost->int_store[2]
617 - ix86_cost->sse_store[sse_cost_idx]);
618 igain -= vector_const_cost (src);
619 break;
620
621 default:
622 gcc_unreachable ();
623 }
c6521daa
RB
624
625 if (igain != 0 && dump_file)
626 {
627 fprintf (dump_file, " Instruction gain %d for ", igain);
628 dump_insn_slim (dump_file, insn);
629 }
630 gain += igain;
2bf6d935
ML
631 }
632
633 if (dump_file)
634 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
635
b5a6addb
RB
636 /* Cost the integer to sse and sse to integer moves. */
637 cost += n_sse_to_integer * ix86_cost->sse_to_integer;
638 /* ??? integer_to_sse but we only have that in the RA cost table.
639 Assume sse_to_integer/integer_to_sse are the same which they
640 are at the moment. */
641 cost += n_integer_to_sse * ix86_cost->sse_to_integer;
2bf6d935
ML
642
643 if (dump_file)
644 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
645
646 gain -= cost;
647
648 if (dump_file)
649 fprintf (dump_file, " Total gain: %d\n", gain);
650
651 return gain;
652}
653
2bf6d935
ML
654/* Insert generated conversion instruction sequence INSNS
655 after instruction AFTER. New BB may be required in case
656 instruction has EH region attached. */
657
658void
659scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
660{
661 if (!control_flow_insn_p (after))
662 {
663 emit_insn_after (insns, after);
664 return;
665 }
666
667 basic_block bb = BLOCK_FOR_INSN (after);
668 edge e = find_fallthru_edge (bb->succs);
669 gcc_assert (e);
670
671 basic_block new_bb = split_edge (e);
672 emit_insn_after (insns, BB_HEAD (new_bb));
673}
674
72bb85f8
ML
675} // anon namespace
676
8ed1d2fa
RB
677/* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
678 zeroing the upper parts. */
679
680static rtx
681gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
682{
683 switch (GET_MODE_NUNITS (vmode))
684 {
685 case 1:
54dc8577
RB
686 /* We are not using this case currently. */
687 gcc_unreachable ();
8ed1d2fa
RB
688 case 2:
689 return gen_rtx_VEC_CONCAT (vmode, gpr,
690 CONST0_RTX (GET_MODE_INNER (vmode)));
691 default:
692 return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
693 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
694 }
695}
696
2bf6d935
ML
697/* Make vector copies for all register REGNO definitions
698 and replace its uses in a chain. */
699
700void
b5a6addb 701general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
2bf6d935 702{
b5a6addb 703 rtx vreg = *defs_map.get (reg);
2bf6d935 704
b5a6addb
RB
705 start_sequence ();
706 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
2bf6d935 707 {
b5a6addb
RB
708 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
709 if (smode == DImode && !TARGET_64BIT)
2bf6d935 710 {
b5a6addb
RB
711 emit_move_insn (adjust_address (tmp, SImode, 0),
712 gen_rtx_SUBREG (SImode, reg, 0));
713 emit_move_insn (adjust_address (tmp, SImode, 4),
714 gen_rtx_SUBREG (SImode, reg, 4));
2bf6d935 715 }
b5a6addb
RB
716 else
717 emit_move_insn (copy_rtx (tmp), reg);
718 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
719 gen_gpr_to_xmm_move_src (vmode, tmp)));
720 }
721 else if (!TARGET_64BIT && smode == DImode)
722 {
723 if (TARGET_SSE4_1)
2bf6d935 724 {
b5a6addb
RB
725 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
726 CONST0_RTX (V4SImode),
727 gen_rtx_SUBREG (SImode, reg, 0)));
728 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
729 gen_rtx_SUBREG (V4SImode, vreg, 0),
730 gen_rtx_SUBREG (SImode, reg, 4),
731 GEN_INT (2)));
2bf6d935 732 }
48a31a09 733 else
b5a6addb
RB
734 {
735 rtx tmp = gen_reg_rtx (DImode);
736 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
737 CONST0_RTX (V4SImode),
738 gen_rtx_SUBREG (SImode, reg, 0)));
739 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
740 CONST0_RTX (V4SImode),
741 gen_rtx_SUBREG (SImode, reg, 4)));
742 emit_insn (gen_vec_interleave_lowv4si
743 (gen_rtx_SUBREG (V4SImode, vreg, 0),
744 gen_rtx_SUBREG (V4SImode, vreg, 0),
745 gen_rtx_SUBREG (V4SImode, tmp, 0)));
746 }
48a31a09 747 }
b5a6addb
RB
748 else
749 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
750 gen_gpr_to_xmm_move_src (vmode, reg)));
751 rtx_insn *seq = get_insns ();
752 end_sequence ();
753 emit_conversion_insns (seq, insn);
754
755 if (dump_file)
756 fprintf (dump_file,
757 " Copied r%d to a vector register r%d for insn %d\n",
758 REGNO (reg), REGNO (vreg), INSN_UID (insn));
48a31a09 759}
2bf6d935 760
48a31a09
RB
761/* Copy the definition SRC of INSN inside the chain to DST for
762 scalar uses outside of the chain. */
2bf6d935 763
48a31a09
RB
764void
765general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
766{
767 start_sequence ();
768 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
769 {
770 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
771 emit_move_insn (tmp, src);
772 if (!TARGET_64BIT && smode == DImode)
773 {
774 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
775 adjust_address (tmp, SImode, 0));
776 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
777 adjust_address (tmp, SImode, 4));
778 }
779 else
780 emit_move_insn (dst, copy_rtx (tmp));
781 }
782 else if (!TARGET_64BIT && smode == DImode)
783 {
784 if (TARGET_SSE4_1)
785 {
786 rtx tmp = gen_rtx_PARALLEL (VOIDmode,
787 gen_rtvec (1, const0_rtx));
788 emit_insn
789 (gen_rtx_SET
790 (gen_rtx_SUBREG (SImode, dst, 0),
791 gen_rtx_VEC_SELECT (SImode,
792 gen_rtx_SUBREG (V4SImode, src, 0),
793 tmp)));
794
795 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
796 emit_insn
797 (gen_rtx_SET
798 (gen_rtx_SUBREG (SImode, dst, 4),
799 gen_rtx_VEC_SELECT (SImode,
800 gen_rtx_SUBREG (V4SImode, src, 0),
801 tmp)));
802 }
803 else
804 {
805 rtx vcopy = gen_reg_rtx (V2DImode);
806 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
807 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
808 gen_rtx_SUBREG (SImode, vcopy, 0));
809 emit_move_insn (vcopy,
810 gen_rtx_LSHIFTRT (V2DImode,
811 vcopy, GEN_INT (32)));
812 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
813 gen_rtx_SUBREG (SImode, vcopy, 0));
814 }
815 }
816 else
817 emit_move_insn (dst, src);
2bf6d935 818
48a31a09
RB
819 rtx_insn *seq = get_insns ();
820 end_sequence ();
821 emit_conversion_insns (seq, insn);
2bf6d935 822
48a31a09
RB
823 if (dump_file)
824 fprintf (dump_file,
825 " Copied r%d to a scalar register r%d for insn %d\n",
826 REGNO (src), REGNO (dst), INSN_UID (insn));
2bf6d935
ML
827}
828
829/* Convert operand OP in INSN. We should handle
830 memory operands and uninitialized registers.
831 All other register uses are converted during
832 registers conversion. */
833
834void
93cf5515 835general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2bf6d935
ML
836{
837 *op = copy_rtx_if_shared (*op);
838
839 if (GET_CODE (*op) == NOT)
840 {
841 convert_op (&XEXP (*op, 0), insn);
93cf5515 842 PUT_MODE (*op, vmode);
2bf6d935
ML
843 }
844 else if (MEM_P (*op))
845 {
93cf5515 846 rtx tmp = gen_reg_rtx (GET_MODE (*op));
2bf6d935 847
b049c269
RB
848 /* Handle movabs. */
849 if (!memory_operand (*op, GET_MODE (*op)))
850 {
851 rtx tmp2 = gen_reg_rtx (GET_MODE (*op));
852
853 emit_insn_before (gen_rtx_SET (tmp2, *op), insn);
854 *op = tmp2;
855 }
856
f386ca41
RB
857 emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
858 gen_gpr_to_xmm_move_src (vmode, *op)),
859 insn);
93cf5515 860 *op = gen_rtx_SUBREG (vmode, tmp, 0);
2bf6d935
ML
861
862 if (dump_file)
863 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
864 INSN_UID (insn), REGNO (tmp));
865 }
866 else if (REG_P (*op))
867 {
93cf5515 868 *op = gen_rtx_SUBREG (vmode, *op, 0);
2bf6d935
ML
869 }
870 else if (CONST_INT_P (*op))
871 {
872 rtx vec_cst;
93cf5515 873 rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
2bf6d935
ML
874
875 /* Prefer all ones vector in case of -1. */
876 if (constm1_operand (*op, GET_MODE (*op)))
93cf5515 877 vec_cst = CONSTM1_RTX (vmode);
2bf6d935 878 else
93cf5515
RB
879 {
880 unsigned n = GET_MODE_NUNITS (vmode);
881 rtx *v = XALLOCAVEC (rtx, n);
882 v[0] = *op;
883 for (unsigned i = 1; i < n; ++i)
884 v[i] = const0_rtx;
885 vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
886 }
2bf6d935 887
93cf5515 888 if (!standard_sse_constant_p (vec_cst, vmode))
2bf6d935
ML
889 {
890 start_sequence ();
93cf5515 891 vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
2bf6d935
ML
892 rtx_insn *seq = get_insns ();
893 end_sequence ();
894 emit_insn_before (seq, insn);
895 }
896
897 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
898 *op = tmp;
899 }
900 else
901 {
902 gcc_assert (SUBREG_P (*op));
93cf5515 903 gcc_assert (GET_MODE (*op) == vmode);
2bf6d935
ML
904 }
905}
906
907/* Convert INSN to vector mode. */
908
909void
93cf5515 910general_scalar_chain::convert_insn (rtx_insn *insn)
2bf6d935 911{
c49609be 912 /* Generate copies for out-of-chain uses of defs and adjust debug uses. */
48a31a09
RB
913 for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
914 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
915 {
916 df_link *use;
917 for (use = DF_REF_CHAIN (ref); use; use = use->next)
c49609be
RB
918 if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
919 && (DF_REF_REG_MEM_P (use->ref)
920 || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
48a31a09
RB
921 break;
922 if (use)
923 convert_reg (insn, DF_REF_REG (ref),
924 *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
132e2b41 925 else if (MAY_HAVE_DEBUG_BIND_INSNS)
c49609be
RB
926 {
927 /* If we generated a scalar copy we can leave debug-insns
928 as-is, if not, we have to adjust them. */
929 auto_vec<rtx_insn *, 5> to_reset_debug_insns;
930 for (use = DF_REF_CHAIN (ref); use; use = use->next)
931 if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
932 {
933 rtx_insn *debug_insn = DF_REF_INSN (use->ref);
934 /* If there's a reaching definition outside of the
935 chain we have to reset. */
936 df_link *def;
937 for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
938 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
939 break;
940 if (def)
941 to_reset_debug_insns.safe_push (debug_insn);
942 else
943 {
944 *DF_REF_REAL_LOC (use->ref)
945 = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
946 df_insn_rescan (debug_insn);
947 }
948 }
949 /* Have to do the reset outside of the DF_CHAIN walk to not
950 disrupt it. */
951 while (!to_reset_debug_insns.is_empty ())
952 {
953 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
954 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
955 df_insn_rescan_debug_internal (debug_insn);
956 }
957 }
48a31a09
RB
958 }
959
960 /* Replace uses in this insn with the defs we use in the chain. */
961 for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
962 if (!DF_REF_REG_MEM_P (ref))
963 if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
964 {
965 /* Also update a corresponding REG_DEAD note. */
966 rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
967 if (note)
968 XEXP (note, 0) = *vreg;
969 *DF_REF_REAL_LOC (ref) = *vreg;
970 }
971
2bf6d935
ML
972 rtx def_set = single_set (insn);
973 rtx src = SET_SRC (def_set);
974 rtx dst = SET_DEST (def_set);
975 rtx subreg;
976
977 if (MEM_P (dst) && !REG_P (src))
978 {
979 /* There are no scalar integer instructions and therefore
980 temporary register usage is required. */
93cf5515 981 rtx tmp = gen_reg_rtx (smode);
2bf6d935 982 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
93cf5515 983 dst = gen_rtx_SUBREG (vmode, tmp, 0);
2bf6d935 984 }
48a31a09
RB
985 else if (REG_P (dst))
986 {
987 /* Replace the definition with a SUBREG to the definition we
988 use inside the chain. */
989 rtx *vdef = defs_map.get (dst);
990 if (vdef)
991 dst = *vdef;
992 dst = gen_rtx_SUBREG (vmode, dst, 0);
993 /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
994 is a non-REG_P. So kill those off. */
995 rtx note = find_reg_equal_equiv_note (insn);
996 if (note)
997 remove_note (insn, note);
998 }
2bf6d935
ML
999
1000 switch (GET_CODE (src))
1001 {
2bf6d935
ML
1002 case PLUS:
1003 case MINUS:
1004 case IOR:
1005 case XOR:
1006 case AND:
93cf5515
RB
1007 case SMAX:
1008 case SMIN:
1009 case UMAX:
1010 case UMIN:
2bf6d935 1011 convert_op (&XEXP (src, 1), insn);
fdace758
UB
1012 /* FALLTHRU */
1013
1014 case ABS:
1015 case ASHIFT:
1016 case ASHIFTRT:
1017 case LSHIFTRT:
1018 convert_op (&XEXP (src, 0), insn);
93cf5515 1019 PUT_MODE (src, vmode);
2bf6d935
ML
1020 break;
1021
1022 case NEG:
1023 src = XEXP (src, 0);
9f6aeb85
UB
1024
1025 if (GET_CODE (src) == ABS)
1026 {
1027 src = XEXP (src, 0);
1028 convert_op (&src, insn);
1029 subreg = gen_reg_rtx (vmode);
1030 emit_insn_before (gen_rtx_SET (subreg,
1031 gen_rtx_ABS (vmode, src)), insn);
1032 src = subreg;
1033 }
1034 else
1035 convert_op (&src, insn);
1036
93cf5515
RB
1037 subreg = gen_reg_rtx (vmode);
1038 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
1039 src = gen_rtx_MINUS (vmode, subreg, src);
2bf6d935
ML
1040 break;
1041
1042 case NOT:
1043 src = XEXP (src, 0);
1044 convert_op (&src, insn);
93cf5515
RB
1045 subreg = gen_reg_rtx (vmode);
1046 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
1047 src = gen_rtx_XOR (vmode, src, subreg);
2bf6d935
ML
1048 break;
1049
1050 case MEM:
1051 if (!REG_P (dst))
1052 convert_op (&src, insn);
1053 break;
1054
1055 case REG:
1056 if (!MEM_P (dst))
1057 convert_op (&src, insn);
1058 break;
1059
1060 case SUBREG:
93cf5515 1061 gcc_assert (GET_MODE (src) == vmode);
2bf6d935
ML
1062 break;
1063
1064 case COMPARE:
1065 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
1066
48a31a09
RB
1067 gcc_assert (REG_P (src) && GET_MODE (src) == DImode);
1068 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
9f6aeb85
UB
1069 emit_insn_before (gen_vec_interleave_lowv2di
1070 (copy_rtx_if_shared (subreg),
1071 copy_rtx_if_shared (subreg),
1072 copy_rtx_if_shared (subreg)),
2bf6d935
ML
1073 insn);
1074 dst = gen_rtx_REG (CCmode, FLAGS_REG);
48a31a09
RB
1075 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg),
1076 copy_rtx_if_shared (subreg)),
2bf6d935
ML
1077 UNSPEC_PTEST);
1078 break;
1079
1080 case CONST_INT:
1081 convert_op (&src, insn);
1082 break;
1083
1084 default:
1085 gcc_unreachable ();
1086 }
1087
1088 SET_SRC (def_set) = src;
1089 SET_DEST (def_set) = dst;
1090
1091 /* Drop possible dead definitions. */
1092 PATTERN (insn) = def_set;
1093
1094 INSN_CODE (insn) = -1;
93cf5515
RB
1095 int patt = recog_memoized (insn);
1096 if (patt == -1)
1097 fatal_insn_not_found (insn);
2bf6d935
ML
1098 df_insn_rescan (insn);
1099}
1100
1101/* Fix uses of converted REG in debug insns. */
1102
1103void
1104timode_scalar_chain::fix_debug_reg_uses (rtx reg)
1105{
1106 if (!flag_var_tracking)
1107 return;
1108
1109 df_ref ref, next;
1110 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
1111 {
1112 rtx_insn *insn = DF_REF_INSN (ref);
1113 /* Make sure the next ref is for a different instruction,
1114 so that we're not affected by the rescan. */
1115 next = DF_REF_NEXT_REG (ref);
1116 while (next && DF_REF_INSN (next) == insn)
1117 next = DF_REF_NEXT_REG (next);
1118
1119 if (DEBUG_INSN_P (insn))
1120 {
1121 /* It may be a debug insn with a TImode variable in
1122 register. */
1123 bool changed = false;
1124 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
1125 {
1126 rtx *loc = DF_REF_LOC (ref);
1127 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
1128 {
1129 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
1130 changed = true;
1131 }
1132 }
1133 if (changed)
1134 df_insn_rescan (insn);
1135 }
1136 }
1137}
1138
1139/* Convert INSN from TImode to V1T1mode. */
1140
1141void
1142timode_scalar_chain::convert_insn (rtx_insn *insn)
1143{
1144 rtx def_set = single_set (insn);
1145 rtx src = SET_SRC (def_set);
1146 rtx dst = SET_DEST (def_set);
1147
1148 switch (GET_CODE (dst))
1149 {
1150 case REG:
1151 {
1152 rtx tmp = find_reg_equal_equiv_note (insn);
1153 if (tmp)
1154 PUT_MODE (XEXP (tmp, 0), V1TImode);
1155 PUT_MODE (dst, V1TImode);
1156 fix_debug_reg_uses (dst);
1157 }
1158 break;
1159 case MEM:
1160 PUT_MODE (dst, V1TImode);
1161 break;
1162
1163 default:
1164 gcc_unreachable ();
1165 }
1166
1167 switch (GET_CODE (src))
1168 {
1169 case REG:
1170 PUT_MODE (src, V1TImode);
1171 /* Call fix_debug_reg_uses only if SRC is never defined. */
1172 if (!DF_REG_DEF_CHAIN (REGNO (src)))
1173 fix_debug_reg_uses (src);
1174 break;
1175
1176 case MEM:
1177 PUT_MODE (src, V1TImode);
1178 break;
1179
1180 case CONST_WIDE_INT:
1181 if (NONDEBUG_INSN_P (insn))
1182 {
1183 /* Since there are no instructions to store 128-bit constant,
1184 temporary register usage is required. */
1185 rtx tmp = gen_reg_rtx (V1TImode);
1186 start_sequence ();
1187 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
1188 src = validize_mem (force_const_mem (V1TImode, src));
1189 rtx_insn *seq = get_insns ();
1190 end_sequence ();
1191 if (seq)
1192 emit_insn_before (seq, insn);
1193 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1194 dst = tmp;
1195 }
1196 break;
1197
1198 case CONST_INT:
1199 switch (standard_sse_constant_p (src, TImode))
1200 {
1201 case 1:
1202 src = CONST0_RTX (GET_MODE (dst));
1203 break;
1204 case 2:
1205 src = CONSTM1_RTX (GET_MODE (dst));
1206 break;
1207 default:
1208 gcc_unreachable ();
1209 }
1210 if (NONDEBUG_INSN_P (insn))
1211 {
1212 rtx tmp = gen_reg_rtx (V1TImode);
1213 /* Since there are no instructions to store standard SSE
1214 constant, temporary register usage is required. */
1215 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1216 dst = tmp;
1217 }
1218 break;
1219
1220 default:
1221 gcc_unreachable ();
1222 }
1223
1224 SET_SRC (def_set) = src;
1225 SET_DEST (def_set) = dst;
1226
1227 /* Drop possible dead definitions. */
1228 PATTERN (insn) = def_set;
1229
1230 INSN_CODE (insn) = -1;
1231 recog_memoized (insn);
1232 df_insn_rescan (insn);
1233}
1234
48a31a09
RB
1235/* Generate copies from defs used by the chain but not defined therein.
1236 Also populates defs_map which is used later by convert_insn. */
1237
2bf6d935 1238void
93cf5515 1239general_scalar_chain::convert_registers ()
2bf6d935
ML
1240{
1241 bitmap_iterator bi;
1242 unsigned id;
48a31a09 1243 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
d865ed72
RB
1244 {
1245 rtx chain_reg = gen_reg_rtx (smode);
1246 defs_map.put (regno_reg_rtx[id], chain_reg);
1247 }
b5a6addb
RB
1248 EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
1249 for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
1250 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
1251 make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
2bf6d935
ML
1252}
1253
1254/* Convert whole chain creating required register
1255 conversions and copies. */
1256
1257int
1258scalar_chain::convert ()
1259{
1260 bitmap_iterator bi;
1261 unsigned id;
1262 int converted_insns = 0;
1263
1264 if (!dbg_cnt (stv_conversion))
1265 return 0;
1266
1267 if (dump_file)
1268 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
1269
1270 convert_registers ();
1271
1272 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
1273 {
1274 convert_insn (DF_INSN_UID_GET (id)->insn);
1275 converted_insns++;
1276 }
1277
1278 return converted_insns;
1279}
1280
266f44a9
L
1281/* Return the SET expression if INSN doesn't reference hard register.
1282 Return NULL if INSN uses or defines a hard register, excluding
1283 pseudo register pushes, hard register uses in a memory address,
1284 clobbers and flags definitions. */
2bf6d935 1285
266f44a9
L
1286static rtx
1287pseudo_reg_set (rtx_insn *insn)
2bf6d935 1288{
266f44a9
L
1289 rtx set = single_set (insn);
1290 if (!set)
1291 return NULL;
1292
1293 /* Check pseudo register push first. */
6643ca0b 1294 machine_mode mode = TARGET_64BIT ? TImode : DImode;
266f44a9
L
1295 if (REG_P (SET_SRC (set))
1296 && !HARD_REGISTER_P (SET_SRC (set))
6643ca0b 1297 && push_operand (SET_DEST (set), mode))
266f44a9
L
1298 return set;
1299
2bf6d935
ML
1300 df_ref ref;
1301 FOR_EACH_INSN_DEF (ref, insn)
1302 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
1303 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
1304 && DF_REF_REGNO (ref) != FLAGS_REG)
266f44a9 1305 return NULL;
2bf6d935
ML
1306
1307 FOR_EACH_INSN_USE (ref, insn)
1308 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
266f44a9 1309 return NULL;
2bf6d935 1310
266f44a9 1311 return set;
2bf6d935
ML
1312}
1313
1314/* Check if comparison INSN may be transformed
1315 into vector comparison. Currently we transform
1316 zero checks only which look like:
1317
1318 (set (reg:CCZ 17 flags)
1319 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1320 (subreg:SI (reg:DI x) 0))
1321 (const_int 0 [0]))) */
1322
1323static bool
3b45ae63 1324convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
2bf6d935 1325{
c839844a
UB
1326 /* ??? Currently convertible for double-word DImode chain only. */
1327 if (TARGET_64BIT || mode != DImode)
1328 return false;
1329
2bf6d935
ML
1330 if (!TARGET_SSE4_1)
1331 return false;
1332
1333 rtx def_set = single_set (insn);
1334
1335 gcc_assert (def_set);
1336
1337 rtx src = SET_SRC (def_set);
1338 rtx dst = SET_DEST (def_set);
1339
1340 gcc_assert (GET_CODE (src) == COMPARE);
1341
1342 if (GET_CODE (dst) != REG
1343 || REGNO (dst) != FLAGS_REG
1344 || GET_MODE (dst) != CCZmode)
1345 return false;
1346
1347 rtx op1 = XEXP (src, 0);
1348 rtx op2 = XEXP (src, 1);
1349
1350 if (op2 != CONST0_RTX (GET_MODE (op2)))
1351 return false;
1352
1353 if (GET_CODE (op1) != IOR)
1354 return false;
1355
1356 op2 = XEXP (op1, 1);
1357 op1 = XEXP (op1, 0);
1358
1359 if (!SUBREG_P (op1)
1360 || !SUBREG_P (op2)
c839844a
UB
1361 || GET_MODE (op1) != SImode
1362 || GET_MODE (op2) != SImode
2bf6d935 1363 || ((SUBREG_BYTE (op1) != 0
c839844a 1364 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2bf6d935 1365 && (SUBREG_BYTE (op2) != 0
c839844a 1366 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2bf6d935
ML
1367 return false;
1368
1369 op1 = SUBREG_REG (op1);
1370 op2 = SUBREG_REG (op2);
1371
1372 if (op1 != op2
1373 || !REG_P (op1)
c839844a 1374 || GET_MODE (op1) != DImode)
2bf6d935
ML
1375 return false;
1376
1377 return true;
1378}
1379
c839844a 1380/* The general version of scalar_to_vector_candidate_p. */
2bf6d935
ML
1381
1382static bool
93cf5515 1383general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
2bf6d935 1384{
266f44a9 1385 rtx def_set = pseudo_reg_set (insn);
2bf6d935
ML
1386
1387 if (!def_set)
1388 return false;
1389
2bf6d935
ML
1390 rtx src = SET_SRC (def_set);
1391 rtx dst = SET_DEST (def_set);
1392
1393 if (GET_CODE (src) == COMPARE)
93cf5515 1394 return convertible_comparison_p (insn, mode);
2bf6d935 1395
c839844a 1396 /* We are interested in "mode" only. */
93cf5515 1397 if ((GET_MODE (src) != mode
2bf6d935 1398 && !CONST_INT_P (src))
93cf5515 1399 || GET_MODE (dst) != mode)
2bf6d935
ML
1400 return false;
1401
1402 if (!REG_P (dst) && !MEM_P (dst))
1403 return false;
1404
1405 switch (GET_CODE (src))
1406 {
1407 case ASHIFTRT:
1408 if (!TARGET_AVX512VL)
1409 return false;
1410 /* FALLTHRU */
1411
1412 case ASHIFT:
1413 case LSHIFTRT:
1414 if (!CONST_INT_P (XEXP (src, 1))
2a3daf5b 1415 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
2bf6d935
ML
1416 return false;
1417 break;
1418
93cf5515
RB
1419 case SMAX:
1420 case SMIN:
1421 case UMAX:
1422 case UMIN:
1423 if ((mode == DImode && !TARGET_AVX512VL)
1424 || (mode == SImode && !TARGET_SSE4_1))
1425 return false;
1426 /* Fallthru. */
1427
9f6aeb85 1428 case AND:
2bf6d935
ML
1429 case IOR:
1430 case XOR:
9f6aeb85
UB
1431 case PLUS:
1432 case MINUS:
2bf6d935
ML
1433 if (!REG_P (XEXP (src, 1))
1434 && !MEM_P (XEXP (src, 1))
1435 && !CONST_INT_P (XEXP (src, 1)))
1436 return false;
1437
93cf5515 1438 if (GET_MODE (XEXP (src, 1)) != mode
2bf6d935
ML
1439 && !CONST_INT_P (XEXP (src, 1)))
1440 return false;
9f6aeb85
UB
1441
1442 /* Check for andnot case. */
1443 if (GET_CODE (src) != AND
1444 || GET_CODE (XEXP (src, 0)) != NOT)
1445 break;
1446
1447 src = XEXP (src, 0);
1448 /* FALLTHRU */
1449
1450 case NOT:
fdace758
UB
1451 break;
1452
9f6aeb85
UB
1453 case NEG:
1454 /* Check for nabs case. */
1455 if (GET_CODE (XEXP (src, 0)) != ABS)
1456 break;
1457
1458 src = XEXP (src, 0);
1459 /* FALLTHRU */
1460
fdace758
UB
1461 case ABS:
1462 if ((mode == DImode && !TARGET_AVX512VL)
1463 || (mode == SImode && !TARGET_SSSE3))
1464 return false;
2bf6d935
ML
1465 break;
1466
2bf6d935
ML
1467 case REG:
1468 return true;
1469
1470 case MEM:
1471 case CONST_INT:
1472 return REG_P (dst);
1473
1474 default:
1475 return false;
1476 }
1477
1478 if (!REG_P (XEXP (src, 0))
1479 && !MEM_P (XEXP (src, 0))
9f6aeb85
UB
1480 && !CONST_INT_P (XEXP (src, 0)))
1481 return false;
2bf6d935 1482
93cf5515 1483 if (GET_MODE (XEXP (src, 0)) != mode
2bf6d935
ML
1484 && !CONST_INT_P (XEXP (src, 0)))
1485 return false;
1486
1487 return true;
1488}
1489
1490/* The TImode version of scalar_to_vector_candidate_p. */
1491
1492static bool
1493timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1494{
266f44a9 1495 rtx def_set = pseudo_reg_set (insn);
2bf6d935
ML
1496
1497 if (!def_set)
1498 return false;
1499
2bf6d935
ML
1500 rtx src = SET_SRC (def_set);
1501 rtx dst = SET_DEST (def_set);
1502
1503 /* Only TImode load and store are allowed. */
1504 if (GET_MODE (dst) != TImode)
1505 return false;
1506
1507 if (MEM_P (dst))
1508 {
1509 /* Check for store. Memory must be aligned or unaligned store
1510 is optimal. Only support store from register, standard SSE
1511 constant or CONST_WIDE_INT generated from piecewise store.
1512
1513 ??? Verify performance impact before enabling CONST_INT for
1514 __int128 store. */
1515 if (misaligned_operand (dst, TImode)
1516 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1517 return false;
1518
1519 switch (GET_CODE (src))
1520 {
1521 default:
1522 return false;
1523
1524 case REG:
1525 case CONST_WIDE_INT:
1526 return true;
1527
1528 case CONST_INT:
1529 return standard_sse_constant_p (src, TImode);
1530 }
1531 }
1532 else if (MEM_P (src))
1533 {
1534 /* Check for load. Memory must be aligned or unaligned load is
1535 optimal. */
1536 return (REG_P (dst)
1537 && (!misaligned_operand (src, TImode)
1538 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1539 }
1540
1541 return false;
1542}
1543
2bf6d935
ML
1544/* For a register REGNO, scan instructions for its defs and uses.
1545 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1546
1547static void
1548timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1549 unsigned int regno)
1550{
1551 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1552 def;
1553 def = DF_REF_NEXT_REG (def))
1554 {
1555 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1556 {
1557 if (dump_file)
1558 fprintf (dump_file,
1559 "r%d has non convertible def in insn %d\n",
1560 regno, DF_REF_INSN_UID (def));
1561
1562 bitmap_set_bit (regs, regno);
1563 break;
1564 }
1565 }
1566
1567 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1568 ref;
1569 ref = DF_REF_NEXT_REG (ref))
1570 {
1571 /* Debug instructions are skipped. */
1572 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1573 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1574 {
1575 if (dump_file)
1576 fprintf (dump_file,
1577 "r%d has non convertible use in insn %d\n",
1578 regno, DF_REF_INSN_UID (ref));
1579
1580 bitmap_set_bit (regs, regno);
1581 break;
1582 }
1583 }
1584}
1585
1586/* The TImode version of remove_non_convertible_regs. */
1587
1588static void
1589timode_remove_non_convertible_regs (bitmap candidates)
1590{
1591 bitmap_iterator bi;
1592 unsigned id;
1593 bitmap regs = BITMAP_ALLOC (NULL);
1594
1595 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1596 {
1597 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1598 rtx dest = SET_DEST (def_set);
1599 rtx src = SET_SRC (def_set);
1600
1601 if ((!REG_P (dest)
1602 || bitmap_bit_p (regs, REGNO (dest))
1603 || HARD_REGISTER_P (dest))
1604 && (!REG_P (src)
1605 || bitmap_bit_p (regs, REGNO (src))
1606 || HARD_REGISTER_P (src)))
1607 continue;
1608
1609 if (REG_P (dest))
1610 timode_check_non_convertible_regs (candidates, regs,
1611 REGNO (dest));
1612
1613 if (REG_P (src))
1614 timode_check_non_convertible_regs (candidates, regs,
1615 REGNO (src));
1616 }
1617
1618 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1619 {
1620 for (df_ref def = DF_REG_DEF_CHAIN (id);
1621 def;
1622 def = DF_REF_NEXT_REG (def))
1623 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1624 {
1625 if (dump_file)
1626 fprintf (dump_file, "Removing insn %d from candidates list\n",
1627 DF_REF_INSN_UID (def));
1628
1629 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1630 }
1631
1632 for (df_ref ref = DF_REG_USE_CHAIN (id);
1633 ref;
1634 ref = DF_REF_NEXT_REG (ref))
1635 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1636 {
1637 if (dump_file)
1638 fprintf (dump_file, "Removing insn %d from candidates list\n",
1639 DF_REF_INSN_UID (ref));
1640
1641 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1642 }
1643 }
1644
1645 BITMAP_FREE (regs);
1646}
1647
2bf6d935
ML
1648/* Main STV pass function. Find and convert scalar
1649 instructions into vector mode when profitable. */
1650
1651static unsigned int
f386ca41 1652convert_scalars_to_vector (bool timode_p)
2bf6d935
ML
1653{
1654 basic_block bb;
2bf6d935
ML
1655 int converted_insns = 0;
1656
1657 bitmap_obstack_initialize (NULL);
93cf5515
RB
1658 const machine_mode cand_mode[3] = { SImode, DImode, TImode };
1659 const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
1660 bitmap_head candidates[3]; /* { SImode, DImode, TImode } */
1661 for (unsigned i = 0; i < 3; ++i)
1662 bitmap_initialize (&candidates[i], &bitmap_default_obstack);
2bf6d935
ML
1663
1664 calculate_dominance_info (CDI_DOMINATORS);
972918ee 1665 df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
2bf6d935 1666 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2bf6d935
ML
1667 df_analyze ();
1668
1669 /* Find all instructions we want to convert into vector mode. */
1670 if (dump_file)
1671 fprintf (dump_file, "Searching for mode conversion candidates...\n");
1672
1673 FOR_EACH_BB_FN (bb, cfun)
1674 {
1675 rtx_insn *insn;
1676 FOR_BB_INSNS (bb, insn)
f386ca41 1677 if (timode_p
93cf5515 1678 && timode_scalar_to_vector_candidate_p (insn))
2bf6d935
ML
1679 {
1680 if (dump_file)
93cf5515 1681 fprintf (dump_file, " insn %d is marked as a TImode candidate\n",
2bf6d935
ML
1682 INSN_UID (insn));
1683
93cf5515
RB
1684 bitmap_set_bit (&candidates[2], INSN_UID (insn));
1685 }
f386ca41 1686 else if (!timode_p)
93cf5515
RB
1687 {
1688 /* Check {SI,DI}mode. */
1689 for (unsigned i = 0; i <= 1; ++i)
1690 if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
1691 {
1692 if (dump_file)
1693 fprintf (dump_file, " insn %d is marked as a %s candidate\n",
1694 INSN_UID (insn), i == 0 ? "SImode" : "DImode");
1695
1696 bitmap_set_bit (&candidates[i], INSN_UID (insn));
1697 break;
1698 }
2bf6d935
ML
1699 }
1700 }
1701
f386ca41 1702 if (timode_p)
93cf5515 1703 timode_remove_non_convertible_regs (&candidates[2]);
2bf6d935 1704
93cf5515
RB
1705 for (unsigned i = 0; i <= 2; ++i)
1706 if (!bitmap_empty_p (&candidates[i]))
1707 break;
1708 else if (i == 2 && dump_file)
2bf6d935
ML
1709 fprintf (dump_file, "There are no candidates for optimization.\n");
1710
93cf5515
RB
1711 for (unsigned i = 0; i <= 2; ++i)
1712 while (!bitmap_empty_p (&candidates[i]))
1713 {
1714 unsigned uid = bitmap_first_set_bit (&candidates[i]);
1715 scalar_chain *chain;
2bf6d935 1716
93cf5515
RB
1717 if (cand_mode[i] == TImode)
1718 chain = new timode_scalar_chain;
1719 else
1720 chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
2bf6d935 1721
93cf5515
RB
1722 /* Find instructions chain we want to convert to vector mode.
1723 Check all uses and definitions to estimate all required
1724 conversions. */
1725 chain->build (&candidates[i], uid);
2bf6d935 1726
93cf5515
RB
1727 if (chain->compute_convert_gain () > 0)
1728 converted_insns += chain->convert ();
1729 else
1730 if (dump_file)
1731 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
1732 chain->chain_id);
2bf6d935 1733
93cf5515
RB
1734 delete chain;
1735 }
2bf6d935
ML
1736
1737 if (dump_file)
1738 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
1739
93cf5515
RB
1740 for (unsigned i = 0; i <= 2; ++i)
1741 bitmap_release (&candidates[i]);
2bf6d935
ML
1742 bitmap_obstack_release (NULL);
1743 df_process_deferred_rescans ();
1744
1745 /* Conversion means we may have 128bit register spills/fills
1746 which require aligned stack. */
1747 if (converted_insns)
1748 {
1749 if (crtl->stack_alignment_needed < 128)
1750 crtl->stack_alignment_needed = 128;
1751 if (crtl->stack_alignment_estimated < 128)
1752 crtl->stack_alignment_estimated = 128;
c1441faf
UB
1753
1754 crtl->stack_realign_needed
1755 = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
1756 crtl->stack_realign_tried = crtl->stack_realign_needed;
1757
1758 crtl->stack_realign_processed = true;
1759
1760 if (!crtl->drap_reg)
1761 {
1762 rtx drap_rtx = targetm.calls.get_drap_rtx ();
1763
1764 /* stack_realign_drap and drap_rtx must match. */
1765 gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
1766
1767 /* Do nothing if NULL is returned,
1768 which means DRAP is not needed. */
1769 if (drap_rtx != NULL)
1770 {
1771 crtl->args.internal_arg_pointer = drap_rtx;
1772
1773 /* Call fixup_tail_calls to clean up
1774 REG_EQUIV note if DRAP is needed. */
1775 fixup_tail_calls ();
1776 }
1777 }
1778
2bf6d935
ML
1779 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
1780 if (TARGET_64BIT)
1781 for (tree parm = DECL_ARGUMENTS (current_function_decl);
1782 parm; parm = DECL_CHAIN (parm))
1783 {
1784 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
1785 continue;
1786 if (DECL_RTL_SET_P (parm)
1787 && GET_MODE (DECL_RTL (parm)) == V1TImode)
1788 {
1789 rtx r = DECL_RTL (parm);
1790 if (REG_P (r))
1791 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
1792 }
1793 if (DECL_INCOMING_RTL (parm)
1794 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
1795 {
1796 rtx r = DECL_INCOMING_RTL (parm);
1797 if (REG_P (r))
1798 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
1799 }
1800 }
1801 }
1802
1803 return 0;
1804}
1805
1806static unsigned int
1807rest_of_handle_insert_vzeroupper (void)
1808{
9a90b311 1809 /* vzeroupper instructions are inserted immediately after reload to
1810 account for possible spills from 256bit or 512bit registers. The pass
1811 reuses mode switching infrastructure by re-running mode insertion
1812 pass, so disable entities that have already been processed. */
1813 for (int i = 0; i < MAX_386_ENTITIES; i++)
1814 ix86_optimize_mode_switching[i] = 0;
2bf6d935 1815
9a90b311 1816 ix86_optimize_mode_switching[AVX_U128] = 1;
2bf6d935 1817
9a90b311 1818 /* Call optimize_mode_switching. */
1819 g->get_passes ()->execute_pass_mode_switching ();
1820
1821 df_analyze ();
2bf6d935
ML
1822 return 0;
1823}
1824
1825namespace {
1826
1827const pass_data pass_data_insert_vzeroupper =
1828{
1829 RTL_PASS, /* type */
1830 "vzeroupper", /* name */
1831 OPTGROUP_NONE, /* optinfo_flags */
1832 TV_MACH_DEP, /* tv_id */
1833 0, /* properties_required */
1834 0, /* properties_provided */
1835 0, /* properties_destroyed */
1836 0, /* todo_flags_start */
1837 TODO_df_finish, /* todo_flags_finish */
1838};
1839
1840class pass_insert_vzeroupper : public rtl_opt_pass
1841{
1842public:
1843 pass_insert_vzeroupper(gcc::context *ctxt)
1844 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
1845 {}
1846
1847 /* opt_pass methods: */
1848 virtual bool gate (function *)
1849 {
9a90b311 1850 return TARGET_AVX && TARGET_VZEROUPPER
1851 && flag_expensive_optimizations && !optimize_size;
2bf6d935
ML
1852 }
1853
1854 virtual unsigned int execute (function *)
1855 {
1856 return rest_of_handle_insert_vzeroupper ();
1857 }
1858
1859}; // class pass_insert_vzeroupper
1860
1861const pass_data pass_data_stv =
1862{
1863 RTL_PASS, /* type */
1864 "stv", /* name */
1865 OPTGROUP_NONE, /* optinfo_flags */
1866 TV_MACH_DEP, /* tv_id */
1867 0, /* properties_required */
1868 0, /* properties_provided */
1869 0, /* properties_destroyed */
1870 0, /* todo_flags_start */
1871 TODO_df_finish, /* todo_flags_finish */
1872};
1873
1874class pass_stv : public rtl_opt_pass
1875{
1876public:
1877 pass_stv (gcc::context *ctxt)
1878 : rtl_opt_pass (pass_data_stv, ctxt),
1879 timode_p (false)
1880 {}
1881
1882 /* opt_pass methods: */
1883 virtual bool gate (function *)
1884 {
f386ca41 1885 return ((!timode_p || TARGET_64BIT)
2bf6d935
ML
1886 && TARGET_STV && TARGET_SSE2 && optimize > 1);
1887 }
1888
1889 virtual unsigned int execute (function *)
1890 {
f386ca41 1891 return convert_scalars_to_vector (timode_p);
2bf6d935
ML
1892 }
1893
1894 opt_pass *clone ()
1895 {
1896 return new pass_stv (m_ctxt);
1897 }
1898
1899 void set_pass_param (unsigned int n, bool param)
1900 {
1901 gcc_assert (n == 0);
1902 timode_p = param;
1903 }
1904
1905private:
1906 bool timode_p;
1907}; // class pass_stv
1908
1909} // anon namespace
1910
1911rtl_opt_pass *
1912make_pass_insert_vzeroupper (gcc::context *ctxt)
1913{
1914 return new pass_insert_vzeroupper (ctxt);
1915}
1916
1917rtl_opt_pass *
1918make_pass_stv (gcc::context *ctxt)
1919{
1920 return new pass_stv (ctxt);
1921}
1922
3dcea658 1923/* Inserting ENDBR and pseudo patchable-area instructions. */
2bf6d935 1924
3dcea658
L
1925static void
1926rest_of_insert_endbr_and_patchable_area (bool need_endbr,
1927 unsigned int patchable_area_size)
2bf6d935 1928{
3dcea658 1929 rtx endbr;
2bf6d935 1930 rtx_insn *insn;
3dcea658 1931 rtx_insn *endbr_insn = NULL;
2bf6d935
ML
1932 basic_block bb;
1933
3dcea658
L
1934 if (need_endbr)
1935 {
1936 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
1937 is absent among function attributes. Later an optimization will
1938 be introduced to make analysis if an address of a static function
1939 is taken. A static function whose address is not taken will get
1940 a nocf_check attribute. This will allow to reduce the number of
1941 EB. */
1942 if (!lookup_attribute ("nocf_check",
1943 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
1944 && (!flag_manual_endbr
1945 || lookup_attribute ("cf_check",
1946 DECL_ATTRIBUTES (cfun->decl)))
1947 && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
1948 || ix86_cmodel == CM_LARGE
1949 || ix86_cmodel == CM_LARGE_PIC
1950 || flag_force_indirect_call
1951 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
1952 && DECL_DLLIMPORT_P (cfun->decl))))
1953 {
1954 if (crtl->profile && flag_fentry)
1955 {
1956 /* Queue ENDBR insertion to x86_function_profiler.
1957 NB: Any patchable-area insn will be inserted after
1958 ENDBR. */
1959 cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
1960 }
1961 else
1962 {
1963 endbr = gen_nop_endbr ();
1964 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
1965 rtx_insn *insn = BB_HEAD (bb);
1966 endbr_insn = emit_insn_before (endbr, insn);
1967 }
1968 }
1969 }
1970
1971 if (patchable_area_size)
2bf6d935 1972 {
2bf6d935 1973 if (crtl->profile && flag_fentry)
3dcea658
L
1974 {
1975 /* Queue patchable-area insertion to x86_function_profiler.
1976 NB: If there is a queued ENDBR, x86_function_profiler
1977 will also handle patchable-area. */
1978 if (!cfun->machine->insn_queued_at_entrance)
1979 cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
1980 }
2bf6d935
ML
1981 else
1982 {
3dcea658
L
1983 rtx patchable_area
1984 = gen_patchable_area (GEN_INT (patchable_area_size),
1985 GEN_INT (crtl->patch_area_entry == 0));
1986 if (endbr_insn)
1987 emit_insn_after (patchable_area, endbr_insn);
1988 else
1989 {
1990 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
1991 insn = BB_HEAD (bb);
1992 emit_insn_before (patchable_area, insn);
1993 }
2bf6d935
ML
1994 }
1995 }
1996
3dcea658
L
1997 if (!need_endbr)
1998 return;
1999
2bf6d935
ML
2000 bb = 0;
2001 FOR_EACH_BB_FN (bb, cfun)
2002 {
2003 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2004 insn = NEXT_INSN (insn))
2005 {
2006 if (CALL_P (insn))
2007 {
2bf6d935
ML
2008 need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
2009 if (!need_endbr && !SIBLING_CALL_P (insn))
2010 {
2011 rtx call = get_call_rtx_from (insn);
2012 rtx fnaddr = XEXP (call, 0);
2013 tree fndecl = NULL_TREE;
2014
2015 /* Also generate ENDBRANCH for non-tail call which
2016 may return via indirect branch. */
2017 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
2018 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
2019 if (fndecl == NULL_TREE)
2020 fndecl = MEM_EXPR (fnaddr);
2021 if (fndecl
2022 && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
2023 && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
2024 fndecl = NULL_TREE;
2025 if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
2026 {
2027 tree fntype = TREE_TYPE (fndecl);
2028 if (lookup_attribute ("indirect_return",
2029 TYPE_ATTRIBUTES (fntype)))
2030 need_endbr = true;
2031 }
2032 }
2033 if (!need_endbr)
2034 continue;
2035 /* Generate ENDBRANCH after CALL, which can return more than
2036 twice, setjmp-like functions. */
2037
3dcea658
L
2038 endbr = gen_nop_endbr ();
2039 emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
2bf6d935
ML
2040 continue;
2041 }
2042
2043 if (JUMP_P (insn) && flag_cet_switch)
2044 {
2045 rtx target = JUMP_LABEL (insn);
2046 if (target == NULL_RTX || ANY_RETURN_P (target))
2047 continue;
2048
2049 /* Check the jump is a switch table. */
2050 rtx_insn *label = as_a<rtx_insn *> (target);
2051 rtx_insn *table = next_insn (label);
2052 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2053 continue;
2054
2055 /* For the indirect jump find out all places it jumps and insert
2056 ENDBRANCH there. It should be done under a special flag to
2057 control ENDBRANCH generation for switch stmts. */
2058 edge_iterator ei;
2059 edge e;
2060 basic_block dest_blk;
2061
2062 FOR_EACH_EDGE (e, ei, bb->succs)
2063 {
2064 rtx_insn *insn;
2065
2066 dest_blk = e->dest;
2067 insn = BB_HEAD (dest_blk);
2068 gcc_assert (LABEL_P (insn));
3dcea658
L
2069 endbr = gen_nop_endbr ();
2070 emit_insn_after (endbr, insn);
2bf6d935
ML
2071 }
2072 continue;
2073 }
2074
02ed9049 2075 if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2bf6d935 2076 {
3dcea658
L
2077 endbr = gen_nop_endbr ();
2078 emit_insn_after (endbr, insn);
2bf6d935
ML
2079 continue;
2080 }
2081 }
2082 }
2083
3dcea658 2084 return;
2bf6d935
ML
2085}
2086
2087namespace {
2088
3dcea658 2089const pass_data pass_data_insert_endbr_and_patchable_area =
2bf6d935
ML
2090{
2091 RTL_PASS, /* type. */
3dcea658 2092 "endbr_and_patchable_area", /* name. */
2bf6d935
ML
2093 OPTGROUP_NONE, /* optinfo_flags. */
2094 TV_MACH_DEP, /* tv_id. */
2095 0, /* properties_required. */
2096 0, /* properties_provided. */
2097 0, /* properties_destroyed. */
2098 0, /* todo_flags_start. */
2099 0, /* todo_flags_finish. */
2100};
2101
3dcea658 2102class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
2bf6d935
ML
2103{
2104public:
3dcea658
L
2105 pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
2106 : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
2bf6d935
ML
2107 {}
2108
2109 /* opt_pass methods: */
2110 virtual bool gate (function *)
2111 {
3dcea658
L
2112 need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
2113 patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
2114 return need_endbr || patchable_area_size;
2bf6d935
ML
2115 }
2116
2117 virtual unsigned int execute (function *)
2118 {
3dcea658
L
2119 timevar_push (TV_MACH_DEP);
2120 rest_of_insert_endbr_and_patchable_area (need_endbr,
2121 patchable_area_size);
2122 timevar_pop (TV_MACH_DEP);
2123 return 0;
2bf6d935
ML
2124 }
2125
3dcea658
L
2126private:
2127 bool need_endbr;
2128 unsigned int patchable_area_size;
2129}; // class pass_insert_endbr_and_patchable_area
2bf6d935
ML
2130
2131} // anon namespace
2132
2133rtl_opt_pass *
3dcea658 2134make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
2bf6d935 2135{
3dcea658 2136 return new pass_insert_endbr_and_patchable_area (ctxt);
2bf6d935
ML
2137}
2138
2139/* At entry of the nearest common dominator for basic blocks with
2140 conversions, generate a single
2141 vxorps %xmmN, %xmmN, %xmmN
2142 for all
2143 vcvtss2sd op, %xmmN, %xmmX
2144 vcvtsd2ss op, %xmmN, %xmmX
2145 vcvtsi2ss op, %xmmN, %xmmX
2146 vcvtsi2sd op, %xmmN, %xmmX
2147
2148 NB: We want to generate only a single vxorps to cover the whole
2149 function. The LCM algorithm isn't appropriate here since it may
2150 place a vxorps inside the loop. */
2151
2152static unsigned int
2153remove_partial_avx_dependency (void)
2154{
2155 timevar_push (TV_MACH_DEP);
2156
2157 bitmap_obstack_initialize (NULL);
2158 bitmap convert_bbs = BITMAP_ALLOC (NULL);
2159
2160 basic_block bb;
2161 rtx_insn *insn, *set_insn;
2162 rtx set;
2163 rtx v4sf_const0 = NULL_RTX;
2164
2165 auto_vec<rtx_insn *> control_flow_insns;
2166
a7f52181
RB
2167 /* We create invalid RTL initially so defer rescans. */
2168 df_set_flags (DF_DEFER_INSN_RESCAN);
2169
2bf6d935
ML
2170 FOR_EACH_BB_FN (bb, cfun)
2171 {
2172 FOR_BB_INSNS (bb, insn)
2173 {
2174 if (!NONDEBUG_INSN_P (insn))
2175 continue;
2176
2177 set = single_set (insn);
2178 if (!set)
2179 continue;
2180
2181 if (get_attr_avx_partial_xmm_update (insn)
2182 != AVX_PARTIAL_XMM_UPDATE_TRUE)
2183 continue;
2184
2185 if (!v4sf_const0)
a7f52181 2186 v4sf_const0 = gen_reg_rtx (V4SFmode);
2bf6d935
ML
2187
2188 /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2189 SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
2190 vec_merge with subreg. */
2191 rtx src = SET_SRC (set);
2192 rtx dest = SET_DEST (set);
2193 machine_mode dest_mode = GET_MODE (dest);
2194
2195 rtx zero;
2196 machine_mode dest_vecmode;
2197 if (dest_mode == E_SFmode)
2198 {
2199 dest_vecmode = V4SFmode;
2200 zero = v4sf_const0;
2201 }
2202 else
2203 {
2204 dest_vecmode = V2DFmode;
2205 zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
2206 }
2207
2208 /* Change source to vector mode. */
2209 src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
2210 src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
2211 GEN_INT (HOST_WIDE_INT_1U));
2212 /* Change destination to vector mode. */
2213 rtx vec = gen_reg_rtx (dest_vecmode);
2214 /* Generate an XMM vector SET. */
2215 set = gen_rtx_SET (vec, src);
2216 set_insn = emit_insn_before (set, insn);
2217 df_insn_rescan (set_insn);
2218
2219 if (cfun->can_throw_non_call_exceptions)
2220 {
2221 /* Handle REG_EH_REGION note. */
2222 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
2223 if (note)
2224 {
2225 control_flow_insns.safe_push (set_insn);
2226 add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
2227 }
2228 }
2229
2230 src = gen_rtx_SUBREG (dest_mode, vec, 0);
2231 set = gen_rtx_SET (dest, src);
2232
2233 /* Drop possible dead definitions. */
2234 PATTERN (insn) = set;
2235
2236 INSN_CODE (insn) = -1;
2237 recog_memoized (insn);
2238 df_insn_rescan (insn);
2239 bitmap_set_bit (convert_bbs, bb->index);
2240 }
2241 }
2242
2243 if (v4sf_const0)
2244 {
2245 /* (Re-)discover loops so that bb->loop_father can be used in the
2246 analysis below. */
a7f52181 2247 calculate_dominance_info (CDI_DOMINATORS);
2bf6d935
ML
2248 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2249
2250 /* Generate a vxorps at entry of the nearest dominator for basic
700d4cb0 2251 blocks with conversions, which is in the fake loop that
2bf6d935
ML
2252 contains the whole function, so that there is only a single
2253 vxorps in the whole function. */
2254 bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
2255 convert_bbs);
2256 while (bb->loop_father->latch
2257 != EXIT_BLOCK_PTR_FOR_FN (cfun))
2258 bb = get_immediate_dominator (CDI_DOMINATORS,
2259 bb->loop_father->header);
2260
2261 set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
2262
2263 insn = BB_HEAD (bb);
2264 while (insn && !NONDEBUG_INSN_P (insn))
2265 {
2266 if (insn == BB_END (bb))
2267 {
2268 insn = NULL;
2269 break;
2270 }
2271 insn = NEXT_INSN (insn);
2272 }
2273 if (insn == BB_HEAD (bb))
2274 set_insn = emit_insn_before (set, insn);
2275 else
2276 set_insn = emit_insn_after (set,
2277 insn ? PREV_INSN (insn) : BB_END (bb));
2278 df_insn_rescan (set_insn);
2bf6d935
ML
2279 loop_optimizer_finalize ();
2280
2281 if (!control_flow_insns.is_empty ())
2282 {
2283 free_dominance_info (CDI_DOMINATORS);
2284
2285 unsigned int i;
2286 FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
2287 if (control_flow_insn_p (insn))
2288 {
2289 /* Split the block after insn. There will be a fallthru
2290 edge, which is OK so we keep it. We have to create
2291 the exception edges ourselves. */
2292 bb = BLOCK_FOR_INSN (insn);
2293 split_block (bb, insn);
2294 rtl_make_eh_edge (NULL, bb, BB_END (bb));
2295 }
2296 }
2297 }
2298
a7f52181 2299 df_process_deferred_rescans ();
accc5ba5 2300 df_clear_flags (DF_DEFER_INSN_RESCAN);
2bf6d935
ML
2301 bitmap_obstack_release (NULL);
2302 BITMAP_FREE (convert_bbs);
2303
2304 timevar_pop (TV_MACH_DEP);
2305 return 0;
2306}
2307
2308namespace {
2309
2310const pass_data pass_data_remove_partial_avx_dependency =
2311{
2312 RTL_PASS, /* type */
2313 "rpad", /* name */
2314 OPTGROUP_NONE, /* optinfo_flags */
2315 TV_MACH_DEP, /* tv_id */
2316 0, /* properties_required */
2317 0, /* properties_provided */
2318 0, /* properties_destroyed */
2319 0, /* todo_flags_start */
a7f52181 2320 0, /* todo_flags_finish */
2bf6d935
ML
2321};
2322
2323class pass_remove_partial_avx_dependency : public rtl_opt_pass
2324{
2325public:
2326 pass_remove_partial_avx_dependency (gcc::context *ctxt)
2327 : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
2328 {}
2329
2330 /* opt_pass methods: */
2331 virtual bool gate (function *)
2332 {
a6291d88 2333 return (TARGET_AVX
2334 && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2335 && TARGET_SSE_MATH
2336 && optimize
2337 && optimize_function_for_speed_p (cfun));
2bf6d935
ML
2338 }
2339
2340 virtual unsigned int execute (function *)
2341 {
2342 return remove_partial_avx_dependency ();
2343 }
2344}; // class pass_rpad
2345
2346} // anon namespace
2347
2348rtl_opt_pass *
2349make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
2350{
2351 return new pass_remove_partial_avx_dependency (ctxt);
2352}
2353
2354/* This compares the priority of target features in function DECL1
2355 and DECL2. It returns positive value if DECL1 is higher priority,
2356 negative value if DECL2 is higher priority and 0 if they are the
2357 same. */
2358
2359int
2360ix86_compare_version_priority (tree decl1, tree decl2)
2361{
2362 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
2363 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
2364
2365 return (int)priority1 - (int)priority2;
2366}
2367
2368/* V1 and V2 point to function versions with different priorities
2369 based on the target ISA. This function compares their priorities. */
2370
2371static int
2372feature_compare (const void *v1, const void *v2)
2373{
2374 typedef struct _function_version_info
2375 {
2376 tree version_decl;
2377 tree predicate_chain;
2378 unsigned int dispatch_priority;
2379 } function_version_info;
2380
2381 const function_version_info c1 = *(const function_version_info *)v1;
2382 const function_version_info c2 = *(const function_version_info *)v2;
2383 return (c2.dispatch_priority - c1.dispatch_priority);
2384}
2385
2386/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2387 to return a pointer to VERSION_DECL if the outcome of the expression
2388 formed by PREDICATE_CHAIN is true. This function will be called during
2389 version dispatch to decide which function version to execute. It returns
2390 the basic block at the end, to which more conditions can be added. */
2391
2392static basic_block
2393add_condition_to_bb (tree function_decl, tree version_decl,
2394 tree predicate_chain, basic_block new_bb)
2395{
2396 gimple *return_stmt;
2397 tree convert_expr, result_var;
2398 gimple *convert_stmt;
2399 gimple *call_cond_stmt;
2400 gimple *if_else_stmt;
2401
2402 basic_block bb1, bb2, bb3;
2403 edge e12, e23;
2404
2405 tree cond_var, and_expr_var = NULL_TREE;
2406 gimple_seq gseq;
2407
2408 tree predicate_decl, predicate_arg;
2409
2410 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
2411
2412 gcc_assert (new_bb != NULL);
2413 gseq = bb_seq (new_bb);
2414
2415
2416 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
2417 build_fold_addr_expr (version_decl));
2418 result_var = create_tmp_var (ptr_type_node);
2419 convert_stmt = gimple_build_assign (result_var, convert_expr);
2420 return_stmt = gimple_build_return (result_var);
2421
2422 if (predicate_chain == NULL_TREE)
2423 {
2424 gimple_seq_add_stmt (&gseq, convert_stmt);
2425 gimple_seq_add_stmt (&gseq, return_stmt);
2426 set_bb_seq (new_bb, gseq);
2427 gimple_set_bb (convert_stmt, new_bb);
2428 gimple_set_bb (return_stmt, new_bb);
2429 pop_cfun ();
2430 return new_bb;
2431 }
2432
2433 while (predicate_chain != NULL)
2434 {
2435 cond_var = create_tmp_var (integer_type_node);
2436 predicate_decl = TREE_PURPOSE (predicate_chain);
2437 predicate_arg = TREE_VALUE (predicate_chain);
2438 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
2439 gimple_call_set_lhs (call_cond_stmt, cond_var);
2440
2441 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
2442 gimple_set_bb (call_cond_stmt, new_bb);
2443 gimple_seq_add_stmt (&gseq, call_cond_stmt);
2444
2445 predicate_chain = TREE_CHAIN (predicate_chain);
2446
2447 if (and_expr_var == NULL)
2448 and_expr_var = cond_var;
2449 else
2450 {
2451 gimple *assign_stmt;
2452 /* Use MIN_EXPR to check if any integer is zero?.
2453 and_expr_var = min_expr <cond_var, and_expr_var> */
2454 assign_stmt = gimple_build_assign (and_expr_var,
2455 build2 (MIN_EXPR, integer_type_node,
2456 cond_var, and_expr_var));
2457
2458 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
2459 gimple_set_bb (assign_stmt, new_bb);
2460 gimple_seq_add_stmt (&gseq, assign_stmt);
2461 }
2462 }
2463
2464 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
2465 integer_zero_node,
2466 NULL_TREE, NULL_TREE);
2467 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
2468 gimple_set_bb (if_else_stmt, new_bb);
2469 gimple_seq_add_stmt (&gseq, if_else_stmt);
2470
2471 gimple_seq_add_stmt (&gseq, convert_stmt);
2472 gimple_seq_add_stmt (&gseq, return_stmt);
2473 set_bb_seq (new_bb, gseq);
2474
2475 bb1 = new_bb;
2476 e12 = split_block (bb1, if_else_stmt);
2477 bb2 = e12->dest;
2478 e12->flags &= ~EDGE_FALLTHRU;
2479 e12->flags |= EDGE_TRUE_VALUE;
2480
2481 e23 = split_block (bb2, return_stmt);
2482
2483 gimple_set_bb (convert_stmt, bb2);
2484 gimple_set_bb (return_stmt, bb2);
2485
2486 bb3 = e23->dest;
2487 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2488
2489 remove_edge (e23);
2490 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
2491
2492 pop_cfun ();
2493
2494 return bb3;
2495}
2496
2497/* This function generates the dispatch function for
2498 multi-versioned functions. DISPATCH_DECL is the function which will
2499 contain the dispatch logic. FNDECLS are the function choices for
2500 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
2501 in DISPATCH_DECL in which the dispatch code is generated. */
2502
2503static int
2504dispatch_function_versions (tree dispatch_decl,
2505 void *fndecls_p,
2506 basic_block *empty_bb)
2507{
2508 tree default_decl;
2509 gimple *ifunc_cpu_init_stmt;
2510 gimple_seq gseq;
2511 int ix;
2512 tree ele;
2513 vec<tree> *fndecls;
2514 unsigned int num_versions = 0;
2515 unsigned int actual_versions = 0;
2516 unsigned int i;
2517
2518 struct _function_version_info
2519 {
2520 tree version_decl;
2521 tree predicate_chain;
2522 unsigned int dispatch_priority;
2523 }*function_version_info;
2524
2525 gcc_assert (dispatch_decl != NULL
2526 && fndecls_p != NULL
2527 && empty_bb != NULL);
2528
2529 /*fndecls_p is actually a vector. */
2530 fndecls = static_cast<vec<tree> *> (fndecls_p);
2531
2532 /* At least one more version other than the default. */
2533 num_versions = fndecls->length ();
2534 gcc_assert (num_versions >= 2);
2535
2536 function_version_info = (struct _function_version_info *)
2537 XNEWVEC (struct _function_version_info, (num_versions - 1));
2538
2539 /* The first version in the vector is the default decl. */
2540 default_decl = (*fndecls)[0];
2541
2542 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
2543
2544 gseq = bb_seq (*empty_bb);
2545 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
2546 constructors, so explicity call __builtin_cpu_init here. */
2547 ifunc_cpu_init_stmt
2548 = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
2549 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
2550 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
2551 set_bb_seq (*empty_bb, gseq);
2552
2553 pop_cfun ();
2554
2555
2556 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
2557 {
2558 tree version_decl = ele;
2559 tree predicate_chain = NULL_TREE;
2560 unsigned int priority;
2561 /* Get attribute string, parse it and find the right predicate decl.
2562 The predicate function could be a lengthy combination of many
2563 features, like arch-type and various isa-variants. */
2564 priority = get_builtin_code_for_version (version_decl,
2565 &predicate_chain);
2566
2567 if (predicate_chain == NULL_TREE)
2568 continue;
2569
2570 function_version_info [actual_versions].version_decl = version_decl;
2571 function_version_info [actual_versions].predicate_chain
2572 = predicate_chain;
2573 function_version_info [actual_versions].dispatch_priority = priority;
2574 actual_versions++;
2575 }
2576
2577 /* Sort the versions according to descending order of dispatch priority. The
2578 priority is based on the ISA. This is not a perfect solution. There
2579 could still be ambiguity. If more than one function version is suitable
2580 to execute, which one should be dispatched? In future, allow the user
2581 to specify a dispatch priority next to the version. */
2582 qsort (function_version_info, actual_versions,
2583 sizeof (struct _function_version_info), feature_compare);
2584
2585 for (i = 0; i < actual_versions; ++i)
2586 *empty_bb = add_condition_to_bb (dispatch_decl,
2587 function_version_info[i].version_decl,
2588 function_version_info[i].predicate_chain,
2589 *empty_bb);
2590
2591 /* dispatch default version at the end. */
2592 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
2593 NULL, *empty_bb);
2594
2595 free (function_version_info);
2596 return 0;
2597}
2598
2599/* This function changes the assembler name for functions that are
2600 versions. If DECL is a function version and has a "target"
2601 attribute, it appends the attribute string to its assembler name. */
2602
2603static tree
2604ix86_mangle_function_version_assembler_name (tree decl, tree id)
2605{
2606 tree version_attr;
2607 const char *orig_name, *version_string;
2608 char *attr_str, *assembler_name;
2609
2610 if (DECL_DECLARED_INLINE_P (decl)
2611 && lookup_attribute ("gnu_inline",
2612 DECL_ATTRIBUTES (decl)))
2613 error_at (DECL_SOURCE_LOCATION (decl),
a9c697b8 2614 "function versions cannot be marked as %<gnu_inline%>,"
2bf6d935
ML
2615 " bodies have to be generated");
2616
2617 if (DECL_VIRTUAL_P (decl)
2618 || DECL_VINDEX (decl))
2619 sorry ("virtual function multiversioning not supported");
2620
2621 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
2622
2623 /* target attribute string cannot be NULL. */
2624 gcc_assert (version_attr != NULL_TREE);
2625
2626 orig_name = IDENTIFIER_POINTER (id);
2627 version_string
2628 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
2629
2630 if (strcmp (version_string, "default") == 0)
2631 return id;
2632
2633 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
2634 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
2635
2636 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
2637
2638 /* Allow assembler name to be modified if already set. */
2639 if (DECL_ASSEMBLER_NAME_SET_P (decl))
2640 SET_DECL_RTL (decl, NULL);
2641
2642 tree ret = get_identifier (assembler_name);
2643 XDELETEVEC (attr_str);
2644 XDELETEVEC (assembler_name);
2645 return ret;
2646}
2647
2648tree
2649ix86_mangle_decl_assembler_name (tree decl, tree id)
2650{
2651 /* For function version, add the target suffix to the assembler name. */
2652 if (TREE_CODE (decl) == FUNCTION_DECL
2653 && DECL_FUNCTION_VERSIONED (decl))
2654 id = ix86_mangle_function_version_assembler_name (decl, id);
2655#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2656 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
2657#endif
2658
2659 return id;
2660}
2661
2662/* Make a dispatcher declaration for the multi-versioned function DECL.
2663 Calls to DECL function will be replaced with calls to the dispatcher
2664 by the front-end. Returns the decl of the dispatcher function. */
2665
2666tree
2667ix86_get_function_versions_dispatcher (void *decl)
2668{
2669 tree fn = (tree) decl;
2670 struct cgraph_node *node = NULL;
2671 struct cgraph_node *default_node = NULL;
2672 struct cgraph_function_version_info *node_v = NULL;
2673 struct cgraph_function_version_info *first_v = NULL;
2674
2675 tree dispatch_decl = NULL;
2676
2677 struct cgraph_function_version_info *default_version_info = NULL;
2678
2679 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
2680
2681 node = cgraph_node::get (fn);
2682 gcc_assert (node != NULL);
2683
2684 node_v = node->function_version ();
2685 gcc_assert (node_v != NULL);
2686
2687 if (node_v->dispatcher_resolver != NULL)
2688 return node_v->dispatcher_resolver;
2689
2690 /* Find the default version and make it the first node. */
2691 first_v = node_v;
2692 /* Go to the beginning of the chain. */
2693 while (first_v->prev != NULL)
2694 first_v = first_v->prev;
2695 default_version_info = first_v;
2696 while (default_version_info != NULL)
2697 {
2698 if (is_function_default_version
2699 (default_version_info->this_node->decl))
2700 break;
2701 default_version_info = default_version_info->next;
2702 }
2703
2704 /* If there is no default node, just return NULL. */
2705 if (default_version_info == NULL)
2706 return NULL;
2707
2708 /* Make default info the first node. */
2709 if (first_v != default_version_info)
2710 {
2711 default_version_info->prev->next = default_version_info->next;
2712 if (default_version_info->next)
2713 default_version_info->next->prev = default_version_info->prev;
2714 first_v->prev = default_version_info;
2715 default_version_info->next = first_v;
2716 default_version_info->prev = NULL;
2717 }
2718
2719 default_node = default_version_info->this_node;
2720
2721#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2722 if (targetm.has_ifunc_p ())
2723 {
2724 struct cgraph_function_version_info *it_v = NULL;
2725 struct cgraph_node *dispatcher_node = NULL;
2726 struct cgraph_function_version_info *dispatcher_version_info = NULL;
2727
2728 /* Right now, the dispatching is done via ifunc. */
2729 dispatch_decl = make_dispatcher_decl (default_node->decl);
2730
2731 dispatcher_node = cgraph_node::get_create (dispatch_decl);
2732 gcc_assert (dispatcher_node != NULL);
2733 dispatcher_node->dispatcher_function = 1;
2734 dispatcher_version_info
2735 = dispatcher_node->insert_new_function_version ();
2736 dispatcher_version_info->next = default_version_info;
2737 dispatcher_node->definition = 1;
2738
2739 /* Set the dispatcher for all the versions. */
2740 it_v = default_version_info;
2741 while (it_v != NULL)
2742 {
2743 it_v->dispatcher_resolver = dispatch_decl;
2744 it_v = it_v->next;
2745 }
2746 }
2747 else
2748#endif
2749 {
2750 error_at (DECL_SOURCE_LOCATION (default_node->decl),
0ecf545c 2751 "multiversioning needs %<ifunc%> which is not supported "
2bf6d935
ML
2752 "on this target");
2753 }
2754
2755 return dispatch_decl;
2756}
2757
2758/* Make the resolver function decl to dispatch the versions of
2759 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
2760 ifunc alias that will point to the created resolver. Create an
2761 empty basic block in the resolver and store the pointer in
2762 EMPTY_BB. Return the decl of the resolver function. */
2763
2764static tree
2765make_resolver_func (const tree default_decl,
2766 const tree ifunc_alias_decl,
2767 basic_block *empty_bb)
2768{
c2bd2b46 2769 tree decl, type, t;
2bf6d935 2770
c2bd2b46
ML
2771 /* Create resolver function name based on default_decl. */
2772 tree decl_name = clone_function_name (default_decl, "resolver");
2773 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
2bf6d935
ML
2774
2775 /* The resolver function should return a (void *). */
2776 type = build_function_type_list (ptr_type_node, NULL_TREE);
2777
2778 decl = build_fn_decl (resolver_name, type);
2bf6d935
ML
2779 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
2780
2781 DECL_NAME (decl) = decl_name;
2782 TREE_USED (decl) = 1;
2783 DECL_ARTIFICIAL (decl) = 1;
2784 DECL_IGNORED_P (decl) = 1;
2785 TREE_PUBLIC (decl) = 0;
2786 DECL_UNINLINABLE (decl) = 1;
2787
2788 /* Resolver is not external, body is generated. */
2789 DECL_EXTERNAL (decl) = 0;
2790 DECL_EXTERNAL (ifunc_alias_decl) = 0;
2791
2792 DECL_CONTEXT (decl) = NULL_TREE;
2793 DECL_INITIAL (decl) = make_node (BLOCK);
2794 DECL_STATIC_CONSTRUCTOR (decl) = 0;
2795
2796 if (DECL_COMDAT_GROUP (default_decl)
2797 || TREE_PUBLIC (default_decl))
2798 {
2799 /* In this case, each translation unit with a call to this
2800 versioned function will put out a resolver. Ensure it
2801 is comdat to keep just one copy. */
2802 DECL_COMDAT (decl) = 1;
2803 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
2804 }
724ec02c
ML
2805 else
2806 TREE_PUBLIC (ifunc_alias_decl) = 0;
2807
2bf6d935
ML
2808 /* Build result decl and add to function_decl. */
2809 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
2810 DECL_CONTEXT (t) = decl;
2811 DECL_ARTIFICIAL (t) = 1;
2812 DECL_IGNORED_P (t) = 1;
2813 DECL_RESULT (decl) = t;
2814
2815 gimplify_function_tree (decl);
2816 push_cfun (DECL_STRUCT_FUNCTION (decl));
2817 *empty_bb = init_lowered_empty_function (decl, false,
2818 profile_count::uninitialized ());
2819
2820 cgraph_node::add_new_function (decl, true);
2821 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
2822
2823 pop_cfun ();
2824
2825 gcc_assert (ifunc_alias_decl != NULL);
2826 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
2827 DECL_ATTRIBUTES (ifunc_alias_decl)
2828 = make_attribute ("ifunc", resolver_name,
2829 DECL_ATTRIBUTES (ifunc_alias_decl));
2830
2831 /* Create the alias for dispatch to resolver here. */
2832 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
2bf6d935
ML
2833 return decl;
2834}
2835
2836/* Generate the dispatching code body to dispatch multi-versioned function
2837 DECL. The target hook is called to process the "target" attributes and
2838 provide the code to dispatch the right function at run-time. NODE points
2839 to the dispatcher decl whose body will be created. */
2840
2841tree
2842ix86_generate_version_dispatcher_body (void *node_p)
2843{
2844 tree resolver_decl;
2845 basic_block empty_bb;
2846 tree default_ver_decl;
2847 struct cgraph_node *versn;
2848 struct cgraph_node *node;
2849
2850 struct cgraph_function_version_info *node_version_info = NULL;
2851 struct cgraph_function_version_info *versn_info = NULL;
2852
2853 node = (cgraph_node *)node_p;
2854
2855 node_version_info = node->function_version ();
2856 gcc_assert (node->dispatcher_function
2857 && node_version_info != NULL);
2858
2859 if (node_version_info->dispatcher_resolver)
2860 return node_version_info->dispatcher_resolver;
2861
2862 /* The first version in the chain corresponds to the default version. */
2863 default_ver_decl = node_version_info->next->this_node->decl;
2864
2865 /* node is going to be an alias, so remove the finalized bit. */
2866 node->definition = false;
2867
2868 resolver_decl = make_resolver_func (default_ver_decl,
2869 node->decl, &empty_bb);
2870
2871 node_version_info->dispatcher_resolver = resolver_decl;
2872
2873 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
2874
2875 auto_vec<tree, 2> fn_ver_vec;
2876
2877 for (versn_info = node_version_info->next; versn_info;
2878 versn_info = versn_info->next)
2879 {
2880 versn = versn_info->this_node;
2881 /* Check for virtual functions here again, as by this time it should
2882 have been determined if this function needs a vtable index or
2883 not. This happens for methods in derived classes that override
2884 virtual methods in base classes but are not explicitly marked as
2885 virtual. */
2886 if (DECL_VINDEX (versn->decl))
2887 sorry ("virtual function multiversioning not supported");
2888
2889 fn_ver_vec.safe_push (versn->decl);
2890 }
2891
2892 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
2893 cgraph_edge::rebuild_edges ();
2894 pop_cfun ();
2895 return resolver_decl;
2896}
2897
2898