]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/i386/i386-features.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / i386 / i386-features.c
CommitLineData
99dee823 1/* Copyright (C) 1988-2021 Free Software Foundation, Inc.
2bf6d935
ML
2
3This file is part of GCC.
4
5GCC is free software; you can redistribute it and/or modify
6it under the terms of the GNU General Public License as published by
7the Free Software Foundation; either version 3, or (at your option)
8any later version.
9
10GCC is distributed in the hope that it will be useful,
11but WITHOUT ANY WARRANTY; without even the implied warranty of
12MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13GNU General Public License for more details.
14
15You should have received a copy of the GNU General Public License
16along with GCC; see the file COPYING3. If not see
17<http://www.gnu.org/licenses/>. */
18
19#define IN_TARGET_CODE 1
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "backend.h"
25#include "rtl.h"
26#include "tree.h"
27#include "memmodel.h"
28#include "gimple.h"
29#include "cfghooks.h"
30#include "cfgloop.h"
31#include "df.h"
32#include "tm_p.h"
33#include "stringpool.h"
34#include "expmed.h"
35#include "optabs.h"
36#include "regs.h"
37#include "emit-rtl.h"
38#include "recog.h"
39#include "cgraph.h"
40#include "diagnostic.h"
41#include "cfgbuild.h"
42#include "alias.h"
43#include "fold-const.h"
44#include "attribs.h"
45#include "calls.h"
46#include "stor-layout.h"
47#include "varasm.h"
48#include "output.h"
49#include "insn-attr.h"
50#include "flags.h"
51#include "except.h"
52#include "explow.h"
53#include "expr.h"
54#include "cfgrtl.h"
55#include "common/common-target.h"
56#include "langhooks.h"
57#include "reload.h"
58#include "gimplify.h"
59#include "dwarf2.h"
60#include "tm-constrs.h"
2bf6d935
ML
61#include "cselib.h"
62#include "sched-int.h"
63#include "opts.h"
64#include "tree-pass.h"
65#include "context.h"
66#include "pass_manager.h"
67#include "target-globals.h"
68#include "gimple-iterator.h"
69#include "tree-vectorizer.h"
70#include "shrink-wrap.h"
71#include "builtins.h"
72#include "rtl-iter.h"
73#include "tree-iterator.h"
74#include "dbgcnt.h"
75#include "case-cfn-macros.h"
76#include "dojump.h"
77#include "fold-const-call.h"
78#include "tree-vrp.h"
79#include "tree-ssanames.h"
80#include "selftest.h"
81#include "selftest-rtl.h"
82#include "print-rtl.h"
83#include "intl.h"
84#include "ifcvt.h"
85#include "symbol-summary.h"
86#include "ipa-prop.h"
87#include "ipa-fnsummary.h"
88#include "wide-int-bitmask.h"
89#include "tree-vector-builder.h"
90#include "debug.h"
91#include "dwarf2out.h"
92#include "i386-builtins.h"
93#include "i386-features.h"
94
95const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
96 "savms64",
97 "resms64",
98 "resms64x",
99 "savms64f",
100 "resms64f",
101 "resms64fx"
102};
103
104const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
105/* The below offset values are where each register is stored for the layout
106 relative to incoming stack pointer. The value of each m_regs[].offset will
107 be relative to the incoming base pointer (rax or rsi) used by the stub.
108
109 s_instances: 0 1 2 3
110 Offset: realigned or aligned + 8
111 Register aligned aligned + 8 aligned w/HFP w/HFP */
112 XMM15_REG, /* 0x10 0x18 0x10 0x18 */
113 XMM14_REG, /* 0x20 0x28 0x20 0x28 */
114 XMM13_REG, /* 0x30 0x38 0x30 0x38 */
115 XMM12_REG, /* 0x40 0x48 0x40 0x48 */
116 XMM11_REG, /* 0x50 0x58 0x50 0x58 */
117 XMM10_REG, /* 0x60 0x68 0x60 0x68 */
118 XMM9_REG, /* 0x70 0x78 0x70 0x78 */
119 XMM8_REG, /* 0x80 0x88 0x80 0x88 */
120 XMM7_REG, /* 0x90 0x98 0x90 0x98 */
121 XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */
122 SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */
123 DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */
124 BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */
125 BP_REG, /* 0xc0 0xc8 N/A N/A */
126 R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */
127 R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */
128 R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */
129 R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */
130};
131
132/* Instantiate static const values. */
133const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
134const unsigned xlogue_layout::MIN_REGS;
135const unsigned xlogue_layout::MAX_REGS;
136const unsigned xlogue_layout::MAX_EXTRA_REGS;
137const unsigned xlogue_layout::VARIANT_COUNT;
138const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
139
140/* Initialize xlogue_layout::s_stub_names to zero. */
141char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
142 [STUB_NAME_MAX_LEN];
143
144/* Instantiates all xlogue_layout instances. */
145const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
146 xlogue_layout (0, false),
147 xlogue_layout (8, false),
148 xlogue_layout (0, true),
149 xlogue_layout (8, true)
150};
151
152/* Return an appropriate const instance of xlogue_layout based upon values
153 in cfun->machine and crtl. */
99b1c316 154const class xlogue_layout &
2bf6d935
ML
155xlogue_layout::get_instance ()
156{
157 enum xlogue_stub_sets stub_set;
158 bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
159
160 if (stack_realign_fp)
161 stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
162 else if (frame_pointer_needed)
163 stub_set = aligned_plus_8
164 ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
165 : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
166 else
167 stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
168
169 return s_instances[stub_set];
170}
171
172/* Determine how many clobbered registers can be saved by the stub.
173 Returns the count of registers the stub will save and restore. */
174unsigned
175xlogue_layout::count_stub_managed_regs ()
176{
177 bool hfp = frame_pointer_needed || stack_realign_fp;
178 unsigned i, count;
179 unsigned regno;
180
181 for (count = i = MIN_REGS; i < MAX_REGS; ++i)
182 {
183 regno = REG_ORDER[i];
184 if (regno == BP_REG && hfp)
185 continue;
186 if (!ix86_save_reg (regno, false, false))
187 break;
188 ++count;
189 }
190 return count;
191}
192
193/* Determine if register REGNO is a stub managed register given the
194 total COUNT of stub managed registers. */
195bool
196xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
197{
198 bool hfp = frame_pointer_needed || stack_realign_fp;
199 unsigned i;
200
201 for (i = 0; i < count; ++i)
202 {
203 gcc_assert (i < MAX_REGS);
204 if (REG_ORDER[i] == BP_REG && hfp)
205 ++count;
206 else if (REG_ORDER[i] == regno)
207 return true;
208 }
209 return false;
210}
211
212/* Constructor for xlogue_layout. */
213xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
214 : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
215 m_stack_align_off_in (stack_align_off_in)
216{
217 HOST_WIDE_INT offset = stack_align_off_in;
218 unsigned i, j;
219
220 for (i = j = 0; i < MAX_REGS; ++i)
221 {
222 unsigned regno = REG_ORDER[i];
223
224 if (regno == BP_REG && hfp)
225 continue;
226 if (SSE_REGNO_P (regno))
227 {
228 offset += 16;
229 /* Verify that SSE regs are always aligned. */
230 gcc_assert (!((stack_align_off_in + offset) & 15));
231 }
232 else
233 offset += 8;
234
235 m_regs[j].regno = regno;
236 m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
237 }
238 gcc_assert (j == m_nregs);
239}
240
241const char *
242xlogue_layout::get_stub_name (enum xlogue_stub stub,
243 unsigned n_extra_regs)
244{
245 const int have_avx = TARGET_AVX;
246 char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
247
248 /* Lazy init */
249 if (!*name)
250 {
251 int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
252 (have_avx ? "avx" : "sse"),
253 STUB_BASE_NAMES[stub],
254 MIN_REGS + n_extra_regs);
255 gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
256 }
257
258 return name;
259}
260
261/* Return rtx of a symbol ref for the entry point (based upon
262 cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */
263rtx
264xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
265{
266 const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
267 gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
268 gcc_assert (stub < XLOGUE_STUB_COUNT);
269 gcc_assert (crtl->stack_realign_finalized);
270
271 return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
272}
273
274unsigned scalar_chain::max_id = 0;
275
72bb85f8
ML
276namespace {
277
2bf6d935
ML
278/* Initialize new chain. */
279
93cf5515 280scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
2bf6d935 281{
93cf5515
RB
282 smode = smode_;
283 vmode = vmode_;
284
2bf6d935
ML
285 chain_id = ++max_id;
286
287 if (dump_file)
288 fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
289
290 bitmap_obstack_initialize (NULL);
291 insns = BITMAP_ALLOC (NULL);
292 defs = BITMAP_ALLOC (NULL);
293 defs_conv = BITMAP_ALLOC (NULL);
294 queue = NULL;
295}
296
297/* Free chain's data. */
298
299scalar_chain::~scalar_chain ()
300{
301 BITMAP_FREE (insns);
302 BITMAP_FREE (defs);
303 BITMAP_FREE (defs_conv);
304 bitmap_obstack_release (NULL);
305}
306
307/* Add instruction into chains' queue. */
308
309void
310scalar_chain::add_to_queue (unsigned insn_uid)
311{
312 if (bitmap_bit_p (insns, insn_uid)
313 || bitmap_bit_p (queue, insn_uid))
314 return;
315
316 if (dump_file)
317 fprintf (dump_file, " Adding insn %d into chain's #%d queue\n",
318 insn_uid, chain_id);
319 bitmap_set_bit (queue, insn_uid);
320}
321
b5a6addb
RB
322general_scalar_chain::general_scalar_chain (enum machine_mode smode_,
323 enum machine_mode vmode_)
324 : scalar_chain (smode_, vmode_)
325{
326 insns_conv = BITMAP_ALLOC (NULL);
327 n_sse_to_integer = 0;
328 n_integer_to_sse = 0;
329}
330
331general_scalar_chain::~general_scalar_chain ()
332{
333 BITMAP_FREE (insns_conv);
334}
335
2bf6d935
ML
336/* For DImode conversion, mark register defined by DEF as requiring
337 conversion. */
338
339void
93cf5515 340general_scalar_chain::mark_dual_mode_def (df_ref def)
2bf6d935
ML
341{
342 gcc_assert (DF_REF_REG_DEF_P (def));
343
b5a6addb
RB
344 /* Record the def/insn pair so we can later efficiently iterate over
345 the defs to convert on insns not in the chain. */
346 bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
347 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
348 {
349 if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
350 && !reg_new)
351 return;
352 n_integer_to_sse++;
353 }
354 else
355 {
356 if (!reg_new)
357 return;
358 n_sse_to_integer++;
359 }
360
2bf6d935
ML
361 if (dump_file)
362 fprintf (dump_file,
363 " Mark r%d def in insn %d as requiring both modes in chain #%d\n",
364 DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
2bf6d935
ML
365}
366
367/* For TImode conversion, it is unused. */
368
369void
370timode_scalar_chain::mark_dual_mode_def (df_ref)
371{
372 gcc_unreachable ();
373}
374
375/* Check REF's chain to add new insns into a queue
376 and find registers requiring conversion. */
377
378void
379scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
380{
381 df_link *chain;
382
383 gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))
384 || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)));
385 add_to_queue (DF_REF_INSN_UID (ref));
386
387 for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
388 {
389 unsigned uid = DF_REF_INSN_UID (chain->ref);
390
391 if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
392 continue;
393
394 if (!DF_REF_REG_MEM_P (chain->ref))
395 {
396 if (bitmap_bit_p (insns, uid))
397 continue;
398
399 if (bitmap_bit_p (candidates, uid))
400 {
401 add_to_queue (uid);
402 continue;
403 }
404 }
405
406 if (DF_REF_REG_DEF_P (chain->ref))
407 {
408 if (dump_file)
409 fprintf (dump_file, " r%d def in insn %d isn't convertible\n",
410 DF_REF_REGNO (chain->ref), uid);
411 mark_dual_mode_def (chain->ref);
412 }
413 else
414 {
415 if (dump_file)
416 fprintf (dump_file, " r%d use in insn %d isn't convertible\n",
417 DF_REF_REGNO (chain->ref), uid);
418 mark_dual_mode_def (ref);
419 }
420 }
421}
422
423/* Add instruction into a chain. */
424
425void
426scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
427{
428 if (bitmap_bit_p (insns, insn_uid))
429 return;
430
431 if (dump_file)
432 fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id);
433
434 bitmap_set_bit (insns, insn_uid);
435
436 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
437 rtx def_set = single_set (insn);
438 if (def_set && REG_P (SET_DEST (def_set))
439 && !HARD_REGISTER_P (SET_DEST (def_set)))
440 bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
441
93cf5515
RB
442 /* ??? The following is quadratic since analyze_register_chain
443 iterates over all refs to look for dual-mode regs. Instead this
444 should be done separately for all regs mentioned in the chain once. */
2bf6d935 445 df_ref ref;
2bf6d935
ML
446 for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
447 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
48a31a09 448 analyze_register_chain (candidates, ref);
2bf6d935
ML
449 for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
450 if (!DF_REF_REG_MEM_P (ref))
451 analyze_register_chain (candidates, ref);
452}
453
454/* Build new chain starting from insn INSN_UID recursively
455 adding all dependent uses and definitions. */
456
457void
458scalar_chain::build (bitmap candidates, unsigned insn_uid)
459{
460 queue = BITMAP_ALLOC (NULL);
461 bitmap_set_bit (queue, insn_uid);
462
463 if (dump_file)
464 fprintf (dump_file, "Building chain #%d...\n", chain_id);
465
466 while (!bitmap_empty_p (queue))
467 {
468 insn_uid = bitmap_first_set_bit (queue);
469 bitmap_clear_bit (queue, insn_uid);
470 bitmap_clear_bit (candidates, insn_uid);
471 add_insn (candidates, insn_uid);
472 }
473
474 if (dump_file)
475 {
476 fprintf (dump_file, "Collected chain #%d...\n", chain_id);
477 fprintf (dump_file, " insns: ");
478 dump_bitmap (dump_file, insns);
479 if (!bitmap_empty_p (defs_conv))
480 {
481 bitmap_iterator bi;
482 unsigned id;
483 const char *comma = "";
484 fprintf (dump_file, " defs to convert: ");
485 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
486 {
487 fprintf (dump_file, "%sr%d", comma, id);
488 comma = ", ";
489 }
490 fprintf (dump_file, "\n");
491 }
492 }
493
494 BITMAP_FREE (queue);
495}
496
497/* Return a cost of building a vector costant
498 instead of using a scalar one. */
499
500int
93cf5515 501general_scalar_chain::vector_const_cost (rtx exp)
2bf6d935
ML
502{
503 gcc_assert (CONST_INT_P (exp));
504
93cf5515
RB
505 if (standard_sse_constant_p (exp, vmode))
506 return ix86_cost->sse_op;
507 /* We have separate costs for SImode and DImode, use SImode costs
508 for smaller modes. */
509 return ix86_cost->sse_load[smode == DImode ? 1 : 0];
2bf6d935
ML
510}
511
512/* Compute a gain for chain conversion. */
513
514int
93cf5515 515general_scalar_chain::compute_convert_gain ()
2bf6d935
ML
516{
517 bitmap_iterator bi;
518 unsigned insn_uid;
519 int gain = 0;
520 int cost = 0;
521
522 if (dump_file)
523 fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
524
93cf5515
RB
525 /* SSE costs distinguish between SImode and DImode loads/stores, for
526 int costs factor in the number of GPRs involved. When supporting
527 smaller modes than SImode the int load/store costs need to be
528 adjusted as well. */
529 unsigned sse_cost_idx = smode == DImode ? 1 : 0;
530 unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
531
2bf6d935
ML
532 EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
533 {
534 rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
535 rtx def_set = single_set (insn);
536 rtx src = SET_SRC (def_set);
537 rtx dst = SET_DEST (def_set);
c6521daa 538 int igain = 0;
2bf6d935
ML
539
540 if (REG_P (src) && REG_P (dst))
93cf5515 541 igain += 2 * m - ix86_cost->xmm_move;
2bf6d935 542 else if (REG_P (src) && MEM_P (dst))
93cf5515
RB
543 igain
544 += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
2bf6d935 545 else if (MEM_P (src) && REG_P (dst))
93cf5515 546 igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
2bf6d935
ML
547 else if (GET_CODE (src) == ASHIFT
548 || GET_CODE (src) == ASHIFTRT
549 || GET_CODE (src) == LSHIFTRT)
550 {
2a3daf5b
UB
551 if (m == 2)
552 {
553 if (INTVAL (XEXP (src, 1)) >= 32)
554 igain += ix86_cost->add;
555 else
556 igain += ix86_cost->shift_const;
557 }
558
559 igain += ix86_cost->shift_const - ix86_cost->sse_op;
560
b5a6addb 561 if (CONST_INT_P (XEXP (src, 0)))
c6521daa 562 igain -= vector_const_cost (XEXP (src, 0));
2bf6d935
ML
563 }
564 else if (GET_CODE (src) == PLUS
565 || GET_CODE (src) == MINUS
566 || GET_CODE (src) == IOR
567 || GET_CODE (src) == XOR
568 || GET_CODE (src) == AND)
569 {
93cf5515 570 igain += m * ix86_cost->add - ix86_cost->sse_op;
2bf6d935
ML
571 /* Additional gain for andnot for targets without BMI. */
572 if (GET_CODE (XEXP (src, 0)) == NOT
573 && !TARGET_BMI)
93cf5515 574 igain += m * ix86_cost->add;
2bf6d935
ML
575
576 if (CONST_INT_P (XEXP (src, 0)))
c6521daa 577 igain -= vector_const_cost (XEXP (src, 0));
2bf6d935 578 if (CONST_INT_P (XEXP (src, 1)))
c6521daa 579 igain -= vector_const_cost (XEXP (src, 1));
2bf6d935
ML
580 }
581 else if (GET_CODE (src) == NEG
582 || GET_CODE (src) == NOT)
93cf5515 583 igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1);
fdace758
UB
584 else if (GET_CODE (src) == ABS
585 || GET_CODE (src) == SMAX
93cf5515
RB
586 || GET_CODE (src) == SMIN
587 || GET_CODE (src) == UMAX
588 || GET_CODE (src) == UMIN)
589 {
590 /* We do not have any conditional move cost, estimate it as a
591 reg-reg move. Comparisons are costed as adds. */
592 igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
593 /* Integer SSE ops are all costed the same. */
594 igain -= ix86_cost->sse_op;
595 }
2bf6d935
ML
596 else if (GET_CODE (src) == COMPARE)
597 {
598 /* Assume comparison cost is the same. */
599 }
600 else if (CONST_INT_P (src))
601 {
602 if (REG_P (dst))
93cf5515
RB
603 /* DImode can be immediate for TARGET_64BIT and SImode always. */
604 igain += m * COSTS_N_INSNS (1);
2bf6d935 605 else if (MEM_P (dst))
93cf5515
RB
606 igain += (m * ix86_cost->int_store[2]
607 - ix86_cost->sse_store[sse_cost_idx]);
c6521daa 608 igain -= vector_const_cost (src);
2bf6d935
ML
609 }
610 else
611 gcc_unreachable ();
c6521daa
RB
612
613 if (igain != 0 && dump_file)
614 {
615 fprintf (dump_file, " Instruction gain %d for ", igain);
616 dump_insn_slim (dump_file, insn);
617 }
618 gain += igain;
2bf6d935
ML
619 }
620
621 if (dump_file)
622 fprintf (dump_file, " Instruction conversion gain: %d\n", gain);
623
b5a6addb
RB
624 /* Cost the integer to sse and sse to integer moves. */
625 cost += n_sse_to_integer * ix86_cost->sse_to_integer;
626 /* ??? integer_to_sse but we only have that in the RA cost table.
627 Assume sse_to_integer/integer_to_sse are the same which they
628 are at the moment. */
629 cost += n_integer_to_sse * ix86_cost->sse_to_integer;
2bf6d935
ML
630
631 if (dump_file)
632 fprintf (dump_file, " Registers conversion cost: %d\n", cost);
633
634 gain -= cost;
635
636 if (dump_file)
637 fprintf (dump_file, " Total gain: %d\n", gain);
638
639 return gain;
640}
641
2bf6d935
ML
642/* Insert generated conversion instruction sequence INSNS
643 after instruction AFTER. New BB may be required in case
644 instruction has EH region attached. */
645
646void
647scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
648{
649 if (!control_flow_insn_p (after))
650 {
651 emit_insn_after (insns, after);
652 return;
653 }
654
655 basic_block bb = BLOCK_FOR_INSN (after);
656 edge e = find_fallthru_edge (bb->succs);
657 gcc_assert (e);
658
659 basic_block new_bb = split_edge (e);
660 emit_insn_after (insns, BB_HEAD (new_bb));
661}
662
72bb85f8
ML
663} // anon namespace
664
8ed1d2fa
RB
665/* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
666 zeroing the upper parts. */
667
668static rtx
669gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
670{
671 switch (GET_MODE_NUNITS (vmode))
672 {
673 case 1:
54dc8577
RB
674 /* We are not using this case currently. */
675 gcc_unreachable ();
8ed1d2fa
RB
676 case 2:
677 return gen_rtx_VEC_CONCAT (vmode, gpr,
678 CONST0_RTX (GET_MODE_INNER (vmode)));
679 default:
680 return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
681 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
682 }
683}
684
2bf6d935
ML
685/* Make vector copies for all register REGNO definitions
686 and replace its uses in a chain. */
687
688void
b5a6addb 689general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
2bf6d935 690{
b5a6addb 691 rtx vreg = *defs_map.get (reg);
2bf6d935 692
b5a6addb
RB
693 start_sequence ();
694 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
2bf6d935 695 {
b5a6addb
RB
696 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
697 if (smode == DImode && !TARGET_64BIT)
2bf6d935 698 {
b5a6addb
RB
699 emit_move_insn (adjust_address (tmp, SImode, 0),
700 gen_rtx_SUBREG (SImode, reg, 0));
701 emit_move_insn (adjust_address (tmp, SImode, 4),
702 gen_rtx_SUBREG (SImode, reg, 4));
2bf6d935 703 }
b5a6addb
RB
704 else
705 emit_move_insn (copy_rtx (tmp), reg);
706 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
707 gen_gpr_to_xmm_move_src (vmode, tmp)));
708 }
709 else if (!TARGET_64BIT && smode == DImode)
710 {
711 if (TARGET_SSE4_1)
2bf6d935 712 {
b5a6addb
RB
713 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
714 CONST0_RTX (V4SImode),
715 gen_rtx_SUBREG (SImode, reg, 0)));
716 emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
717 gen_rtx_SUBREG (V4SImode, vreg, 0),
718 gen_rtx_SUBREG (SImode, reg, 4),
719 GEN_INT (2)));
2bf6d935 720 }
48a31a09 721 else
b5a6addb
RB
722 {
723 rtx tmp = gen_reg_rtx (DImode);
724 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
725 CONST0_RTX (V4SImode),
726 gen_rtx_SUBREG (SImode, reg, 0)));
727 emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
728 CONST0_RTX (V4SImode),
729 gen_rtx_SUBREG (SImode, reg, 4)));
730 emit_insn (gen_vec_interleave_lowv4si
731 (gen_rtx_SUBREG (V4SImode, vreg, 0),
732 gen_rtx_SUBREG (V4SImode, vreg, 0),
733 gen_rtx_SUBREG (V4SImode, tmp, 0)));
734 }
48a31a09 735 }
b5a6addb
RB
736 else
737 emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
738 gen_gpr_to_xmm_move_src (vmode, reg)));
739 rtx_insn *seq = get_insns ();
740 end_sequence ();
741 emit_conversion_insns (seq, insn);
742
743 if (dump_file)
744 fprintf (dump_file,
745 " Copied r%d to a vector register r%d for insn %d\n",
746 REGNO (reg), REGNO (vreg), INSN_UID (insn));
48a31a09 747}
2bf6d935 748
48a31a09
RB
749/* Copy the definition SRC of INSN inside the chain to DST for
750 scalar uses outside of the chain. */
2bf6d935 751
48a31a09
RB
752void
753general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
754{
755 start_sequence ();
756 if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
757 {
758 rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
759 emit_move_insn (tmp, src);
760 if (!TARGET_64BIT && smode == DImode)
761 {
762 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
763 adjust_address (tmp, SImode, 0));
764 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
765 adjust_address (tmp, SImode, 4));
766 }
767 else
768 emit_move_insn (dst, copy_rtx (tmp));
769 }
770 else if (!TARGET_64BIT && smode == DImode)
771 {
772 if (TARGET_SSE4_1)
773 {
774 rtx tmp = gen_rtx_PARALLEL (VOIDmode,
775 gen_rtvec (1, const0_rtx));
776 emit_insn
777 (gen_rtx_SET
778 (gen_rtx_SUBREG (SImode, dst, 0),
779 gen_rtx_VEC_SELECT (SImode,
780 gen_rtx_SUBREG (V4SImode, src, 0),
781 tmp)));
782
783 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
784 emit_insn
785 (gen_rtx_SET
786 (gen_rtx_SUBREG (SImode, dst, 4),
787 gen_rtx_VEC_SELECT (SImode,
788 gen_rtx_SUBREG (V4SImode, src, 0),
789 tmp)));
790 }
791 else
792 {
793 rtx vcopy = gen_reg_rtx (V2DImode);
794 emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
795 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
796 gen_rtx_SUBREG (SImode, vcopy, 0));
797 emit_move_insn (vcopy,
798 gen_rtx_LSHIFTRT (V2DImode,
799 vcopy, GEN_INT (32)));
800 emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
801 gen_rtx_SUBREG (SImode, vcopy, 0));
802 }
803 }
804 else
805 emit_move_insn (dst, src);
2bf6d935 806
48a31a09
RB
807 rtx_insn *seq = get_insns ();
808 end_sequence ();
809 emit_conversion_insns (seq, insn);
2bf6d935 810
48a31a09
RB
811 if (dump_file)
812 fprintf (dump_file,
813 " Copied r%d to a scalar register r%d for insn %d\n",
814 REGNO (src), REGNO (dst), INSN_UID (insn));
2bf6d935
ML
815}
816
817/* Convert operand OP in INSN. We should handle
818 memory operands and uninitialized registers.
819 All other register uses are converted during
820 registers conversion. */
821
822void
93cf5515 823general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
2bf6d935
ML
824{
825 *op = copy_rtx_if_shared (*op);
826
827 if (GET_CODE (*op) == NOT)
828 {
829 convert_op (&XEXP (*op, 0), insn);
93cf5515 830 PUT_MODE (*op, vmode);
2bf6d935
ML
831 }
832 else if (MEM_P (*op))
833 {
93cf5515 834 rtx tmp = gen_reg_rtx (GET_MODE (*op));
2bf6d935 835
b049c269
RB
836 /* Handle movabs. */
837 if (!memory_operand (*op, GET_MODE (*op)))
838 {
839 rtx tmp2 = gen_reg_rtx (GET_MODE (*op));
840
841 emit_insn_before (gen_rtx_SET (tmp2, *op), insn);
842 *op = tmp2;
843 }
844
f386ca41
RB
845 emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0),
846 gen_gpr_to_xmm_move_src (vmode, *op)),
847 insn);
93cf5515 848 *op = gen_rtx_SUBREG (vmode, tmp, 0);
2bf6d935
ML
849
850 if (dump_file)
851 fprintf (dump_file, " Preloading operand for insn %d into r%d\n",
852 INSN_UID (insn), REGNO (tmp));
853 }
854 else if (REG_P (*op))
855 {
93cf5515 856 *op = gen_rtx_SUBREG (vmode, *op, 0);
2bf6d935
ML
857 }
858 else if (CONST_INT_P (*op))
859 {
860 rtx vec_cst;
93cf5515 861 rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
2bf6d935
ML
862
863 /* Prefer all ones vector in case of -1. */
864 if (constm1_operand (*op, GET_MODE (*op)))
93cf5515 865 vec_cst = CONSTM1_RTX (vmode);
2bf6d935 866 else
93cf5515
RB
867 {
868 unsigned n = GET_MODE_NUNITS (vmode);
869 rtx *v = XALLOCAVEC (rtx, n);
870 v[0] = *op;
871 for (unsigned i = 1; i < n; ++i)
872 v[i] = const0_rtx;
873 vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
874 }
2bf6d935 875
93cf5515 876 if (!standard_sse_constant_p (vec_cst, vmode))
2bf6d935
ML
877 {
878 start_sequence ();
93cf5515 879 vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
2bf6d935
ML
880 rtx_insn *seq = get_insns ();
881 end_sequence ();
882 emit_insn_before (seq, insn);
883 }
884
885 emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
886 *op = tmp;
887 }
888 else
889 {
890 gcc_assert (SUBREG_P (*op));
93cf5515 891 gcc_assert (GET_MODE (*op) == vmode);
2bf6d935
ML
892 }
893}
894
895/* Convert INSN to vector mode. */
896
897void
93cf5515 898general_scalar_chain::convert_insn (rtx_insn *insn)
2bf6d935 899{
c49609be 900 /* Generate copies for out-of-chain uses of defs and adjust debug uses. */
48a31a09
RB
901 for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
902 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
903 {
904 df_link *use;
905 for (use = DF_REF_CHAIN (ref); use; use = use->next)
c49609be
RB
906 if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
907 && (DF_REF_REG_MEM_P (use->ref)
908 || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
48a31a09
RB
909 break;
910 if (use)
911 convert_reg (insn, DF_REF_REG (ref),
912 *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
132e2b41 913 else if (MAY_HAVE_DEBUG_BIND_INSNS)
c49609be
RB
914 {
915 /* If we generated a scalar copy we can leave debug-insns
916 as-is, if not, we have to adjust them. */
917 auto_vec<rtx_insn *, 5> to_reset_debug_insns;
918 for (use = DF_REF_CHAIN (ref); use; use = use->next)
919 if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
920 {
921 rtx_insn *debug_insn = DF_REF_INSN (use->ref);
922 /* If there's a reaching definition outside of the
923 chain we have to reset. */
924 df_link *def;
925 for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
926 if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
927 break;
928 if (def)
929 to_reset_debug_insns.safe_push (debug_insn);
930 else
931 {
932 *DF_REF_REAL_LOC (use->ref)
933 = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
934 df_insn_rescan (debug_insn);
935 }
936 }
937 /* Have to do the reset outside of the DF_CHAIN walk to not
938 disrupt it. */
939 while (!to_reset_debug_insns.is_empty ())
940 {
941 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
942 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
943 df_insn_rescan_debug_internal (debug_insn);
944 }
945 }
48a31a09
RB
946 }
947
948 /* Replace uses in this insn with the defs we use in the chain. */
949 for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
950 if (!DF_REF_REG_MEM_P (ref))
951 if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
952 {
953 /* Also update a corresponding REG_DEAD note. */
954 rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
955 if (note)
956 XEXP (note, 0) = *vreg;
957 *DF_REF_REAL_LOC (ref) = *vreg;
958 }
959
2bf6d935
ML
960 rtx def_set = single_set (insn);
961 rtx src = SET_SRC (def_set);
962 rtx dst = SET_DEST (def_set);
963 rtx subreg;
964
965 if (MEM_P (dst) && !REG_P (src))
966 {
967 /* There are no scalar integer instructions and therefore
968 temporary register usage is required. */
93cf5515 969 rtx tmp = gen_reg_rtx (smode);
2bf6d935 970 emit_conversion_insns (gen_move_insn (dst, tmp), insn);
93cf5515 971 dst = gen_rtx_SUBREG (vmode, tmp, 0);
2bf6d935 972 }
48a31a09
RB
973 else if (REG_P (dst))
974 {
975 /* Replace the definition with a SUBREG to the definition we
976 use inside the chain. */
977 rtx *vdef = defs_map.get (dst);
978 if (vdef)
979 dst = *vdef;
980 dst = gen_rtx_SUBREG (vmode, dst, 0);
981 /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
982 is a non-REG_P. So kill those off. */
983 rtx note = find_reg_equal_equiv_note (insn);
984 if (note)
985 remove_note (insn, note);
986 }
2bf6d935
ML
987
988 switch (GET_CODE (src))
989 {
2bf6d935
ML
990 case PLUS:
991 case MINUS:
992 case IOR:
993 case XOR:
994 case AND:
93cf5515
RB
995 case SMAX:
996 case SMIN:
997 case UMAX:
998 case UMIN:
2bf6d935 999 convert_op (&XEXP (src, 1), insn);
fdace758
UB
1000 /* FALLTHRU */
1001
1002 case ABS:
1003 case ASHIFT:
1004 case ASHIFTRT:
1005 case LSHIFTRT:
1006 convert_op (&XEXP (src, 0), insn);
93cf5515 1007 PUT_MODE (src, vmode);
2bf6d935
ML
1008 break;
1009
1010 case NEG:
1011 src = XEXP (src, 0);
1012 convert_op (&src, insn);
93cf5515
RB
1013 subreg = gen_reg_rtx (vmode);
1014 emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
1015 src = gen_rtx_MINUS (vmode, subreg, src);
2bf6d935
ML
1016 break;
1017
1018 case NOT:
1019 src = XEXP (src, 0);
1020 convert_op (&src, insn);
93cf5515
RB
1021 subreg = gen_reg_rtx (vmode);
1022 emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
1023 src = gen_rtx_XOR (vmode, src, subreg);
2bf6d935
ML
1024 break;
1025
1026 case MEM:
1027 if (!REG_P (dst))
1028 convert_op (&src, insn);
1029 break;
1030
1031 case REG:
1032 if (!MEM_P (dst))
1033 convert_op (&src, insn);
1034 break;
1035
1036 case SUBREG:
93cf5515 1037 gcc_assert (GET_MODE (src) == vmode);
2bf6d935
ML
1038 break;
1039
1040 case COMPARE:
1041 src = SUBREG_REG (XEXP (XEXP (src, 0), 0));
1042
48a31a09
RB
1043 gcc_assert (REG_P (src) && GET_MODE (src) == DImode);
1044 subreg = gen_rtx_SUBREG (V2DImode, src, 0);
2bf6d935
ML
1045 emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg),
1046 copy_rtx_if_shared (subreg),
1047 copy_rtx_if_shared (subreg)),
1048 insn);
1049 dst = gen_rtx_REG (CCmode, FLAGS_REG);
48a31a09
RB
1050 src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg),
1051 copy_rtx_if_shared (subreg)),
2bf6d935
ML
1052 UNSPEC_PTEST);
1053 break;
1054
1055 case CONST_INT:
1056 convert_op (&src, insn);
1057 break;
1058
1059 default:
1060 gcc_unreachable ();
1061 }
1062
1063 SET_SRC (def_set) = src;
1064 SET_DEST (def_set) = dst;
1065
1066 /* Drop possible dead definitions. */
1067 PATTERN (insn) = def_set;
1068
1069 INSN_CODE (insn) = -1;
93cf5515
RB
1070 int patt = recog_memoized (insn);
1071 if (patt == -1)
1072 fatal_insn_not_found (insn);
2bf6d935
ML
1073 df_insn_rescan (insn);
1074}
1075
1076/* Fix uses of converted REG in debug insns. */
1077
1078void
1079timode_scalar_chain::fix_debug_reg_uses (rtx reg)
1080{
1081 if (!flag_var_tracking)
1082 return;
1083
1084 df_ref ref, next;
1085 for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
1086 {
1087 rtx_insn *insn = DF_REF_INSN (ref);
1088 /* Make sure the next ref is for a different instruction,
1089 so that we're not affected by the rescan. */
1090 next = DF_REF_NEXT_REG (ref);
1091 while (next && DF_REF_INSN (next) == insn)
1092 next = DF_REF_NEXT_REG (next);
1093
1094 if (DEBUG_INSN_P (insn))
1095 {
1096 /* It may be a debug insn with a TImode variable in
1097 register. */
1098 bool changed = false;
1099 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
1100 {
1101 rtx *loc = DF_REF_LOC (ref);
1102 if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
1103 {
1104 *loc = gen_rtx_SUBREG (TImode, *loc, 0);
1105 changed = true;
1106 }
1107 }
1108 if (changed)
1109 df_insn_rescan (insn);
1110 }
1111 }
1112}
1113
1114/* Convert INSN from TImode to V1T1mode. */
1115
1116void
1117timode_scalar_chain::convert_insn (rtx_insn *insn)
1118{
1119 rtx def_set = single_set (insn);
1120 rtx src = SET_SRC (def_set);
1121 rtx dst = SET_DEST (def_set);
1122
1123 switch (GET_CODE (dst))
1124 {
1125 case REG:
1126 {
1127 rtx tmp = find_reg_equal_equiv_note (insn);
1128 if (tmp)
1129 PUT_MODE (XEXP (tmp, 0), V1TImode);
1130 PUT_MODE (dst, V1TImode);
1131 fix_debug_reg_uses (dst);
1132 }
1133 break;
1134 case MEM:
1135 PUT_MODE (dst, V1TImode);
1136 break;
1137
1138 default:
1139 gcc_unreachable ();
1140 }
1141
1142 switch (GET_CODE (src))
1143 {
1144 case REG:
1145 PUT_MODE (src, V1TImode);
1146 /* Call fix_debug_reg_uses only if SRC is never defined. */
1147 if (!DF_REG_DEF_CHAIN (REGNO (src)))
1148 fix_debug_reg_uses (src);
1149 break;
1150
1151 case MEM:
1152 PUT_MODE (src, V1TImode);
1153 break;
1154
1155 case CONST_WIDE_INT:
1156 if (NONDEBUG_INSN_P (insn))
1157 {
1158 /* Since there are no instructions to store 128-bit constant,
1159 temporary register usage is required. */
1160 rtx tmp = gen_reg_rtx (V1TImode);
1161 start_sequence ();
1162 src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src));
1163 src = validize_mem (force_const_mem (V1TImode, src));
1164 rtx_insn *seq = get_insns ();
1165 end_sequence ();
1166 if (seq)
1167 emit_insn_before (seq, insn);
1168 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1169 dst = tmp;
1170 }
1171 break;
1172
1173 case CONST_INT:
1174 switch (standard_sse_constant_p (src, TImode))
1175 {
1176 case 1:
1177 src = CONST0_RTX (GET_MODE (dst));
1178 break;
1179 case 2:
1180 src = CONSTM1_RTX (GET_MODE (dst));
1181 break;
1182 default:
1183 gcc_unreachable ();
1184 }
1185 if (NONDEBUG_INSN_P (insn))
1186 {
1187 rtx tmp = gen_reg_rtx (V1TImode);
1188 /* Since there are no instructions to store standard SSE
1189 constant, temporary register usage is required. */
1190 emit_conversion_insns (gen_rtx_SET (dst, tmp), insn);
1191 dst = tmp;
1192 }
1193 break;
1194
1195 default:
1196 gcc_unreachable ();
1197 }
1198
1199 SET_SRC (def_set) = src;
1200 SET_DEST (def_set) = dst;
1201
1202 /* Drop possible dead definitions. */
1203 PATTERN (insn) = def_set;
1204
1205 INSN_CODE (insn) = -1;
1206 recog_memoized (insn);
1207 df_insn_rescan (insn);
1208}
1209
48a31a09
RB
1210/* Generate copies from defs used by the chain but not defined therein.
1211 Also populates defs_map which is used later by convert_insn. */
1212
2bf6d935 1213void
93cf5515 1214general_scalar_chain::convert_registers ()
2bf6d935
ML
1215{
1216 bitmap_iterator bi;
1217 unsigned id;
48a31a09 1218 EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
d865ed72
RB
1219 {
1220 rtx chain_reg = gen_reg_rtx (smode);
1221 defs_map.put (regno_reg_rtx[id], chain_reg);
1222 }
b5a6addb
RB
1223 EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
1224 for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
1225 if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
1226 make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
2bf6d935
ML
1227}
1228
1229/* Convert whole chain creating required register
1230 conversions and copies. */
1231
1232int
1233scalar_chain::convert ()
1234{
1235 bitmap_iterator bi;
1236 unsigned id;
1237 int converted_insns = 0;
1238
1239 if (!dbg_cnt (stv_conversion))
1240 return 0;
1241
1242 if (dump_file)
1243 fprintf (dump_file, "Converting chain #%d...\n", chain_id);
1244
1245 convert_registers ();
1246
1247 EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
1248 {
1249 convert_insn (DF_INSN_UID_GET (id)->insn);
1250 converted_insns++;
1251 }
1252
1253 return converted_insns;
1254}
1255
266f44a9
L
1256/* Return the SET expression if INSN doesn't reference hard register.
1257 Return NULL if INSN uses or defines a hard register, excluding
1258 pseudo register pushes, hard register uses in a memory address,
1259 clobbers and flags definitions. */
2bf6d935 1260
266f44a9
L
1261static rtx
1262pseudo_reg_set (rtx_insn *insn)
2bf6d935 1263{
266f44a9
L
1264 rtx set = single_set (insn);
1265 if (!set)
1266 return NULL;
1267
1268 /* Check pseudo register push first. */
6643ca0b 1269 machine_mode mode = TARGET_64BIT ? TImode : DImode;
266f44a9
L
1270 if (REG_P (SET_SRC (set))
1271 && !HARD_REGISTER_P (SET_SRC (set))
6643ca0b 1272 && push_operand (SET_DEST (set), mode))
266f44a9
L
1273 return set;
1274
2bf6d935
ML
1275 df_ref ref;
1276 FOR_EACH_INSN_DEF (ref, insn)
1277 if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
1278 && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
1279 && DF_REF_REGNO (ref) != FLAGS_REG)
266f44a9 1280 return NULL;
2bf6d935
ML
1281
1282 FOR_EACH_INSN_USE (ref, insn)
1283 if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
266f44a9 1284 return NULL;
2bf6d935 1285
266f44a9 1286 return set;
2bf6d935
ML
1287}
1288
1289/* Check if comparison INSN may be transformed
1290 into vector comparison. Currently we transform
1291 zero checks only which look like:
1292
1293 (set (reg:CCZ 17 flags)
1294 (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4)
1295 (subreg:SI (reg:DI x) 0))
1296 (const_int 0 [0]))) */
1297
1298static bool
3b45ae63 1299convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
2bf6d935 1300{
c839844a
UB
1301 /* ??? Currently convertible for double-word DImode chain only. */
1302 if (TARGET_64BIT || mode != DImode)
1303 return false;
1304
2bf6d935
ML
1305 if (!TARGET_SSE4_1)
1306 return false;
1307
1308 rtx def_set = single_set (insn);
1309
1310 gcc_assert (def_set);
1311
1312 rtx src = SET_SRC (def_set);
1313 rtx dst = SET_DEST (def_set);
1314
1315 gcc_assert (GET_CODE (src) == COMPARE);
1316
1317 if (GET_CODE (dst) != REG
1318 || REGNO (dst) != FLAGS_REG
1319 || GET_MODE (dst) != CCZmode)
1320 return false;
1321
1322 rtx op1 = XEXP (src, 0);
1323 rtx op2 = XEXP (src, 1);
1324
1325 if (op2 != CONST0_RTX (GET_MODE (op2)))
1326 return false;
1327
1328 if (GET_CODE (op1) != IOR)
1329 return false;
1330
1331 op2 = XEXP (op1, 1);
1332 op1 = XEXP (op1, 0);
1333
1334 if (!SUBREG_P (op1)
1335 || !SUBREG_P (op2)
c839844a
UB
1336 || GET_MODE (op1) != SImode
1337 || GET_MODE (op2) != SImode
2bf6d935 1338 || ((SUBREG_BYTE (op1) != 0
c839844a 1339 || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode))
2bf6d935 1340 && (SUBREG_BYTE (op2) != 0
c839844a 1341 || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode))))
2bf6d935
ML
1342 return false;
1343
1344 op1 = SUBREG_REG (op1);
1345 op2 = SUBREG_REG (op2);
1346
1347 if (op1 != op2
1348 || !REG_P (op1)
c839844a 1349 || GET_MODE (op1) != DImode)
2bf6d935
ML
1350 return false;
1351
1352 return true;
1353}
1354
c839844a 1355/* The general version of scalar_to_vector_candidate_p. */
2bf6d935
ML
1356
1357static bool
93cf5515 1358general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
2bf6d935 1359{
266f44a9 1360 rtx def_set = pseudo_reg_set (insn);
2bf6d935
ML
1361
1362 if (!def_set)
1363 return false;
1364
2bf6d935
ML
1365 rtx src = SET_SRC (def_set);
1366 rtx dst = SET_DEST (def_set);
1367
1368 if (GET_CODE (src) == COMPARE)
93cf5515 1369 return convertible_comparison_p (insn, mode);
2bf6d935 1370
c839844a 1371 /* We are interested in "mode" only. */
93cf5515 1372 if ((GET_MODE (src) != mode
2bf6d935 1373 && !CONST_INT_P (src))
93cf5515 1374 || GET_MODE (dst) != mode)
2bf6d935
ML
1375 return false;
1376
1377 if (!REG_P (dst) && !MEM_P (dst))
1378 return false;
1379
1380 switch (GET_CODE (src))
1381 {
1382 case ASHIFTRT:
1383 if (!TARGET_AVX512VL)
1384 return false;
1385 /* FALLTHRU */
1386
1387 case ASHIFT:
1388 case LSHIFTRT:
1389 if (!CONST_INT_P (XEXP (src, 1))
2a3daf5b 1390 || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
2bf6d935
ML
1391 return false;
1392 break;
1393
93cf5515
RB
1394 case SMAX:
1395 case SMIN:
1396 case UMAX:
1397 case UMIN:
1398 if ((mode == DImode && !TARGET_AVX512VL)
1399 || (mode == SImode && !TARGET_SSE4_1))
1400 return false;
1401 /* Fallthru. */
1402
2bf6d935
ML
1403 case PLUS:
1404 case MINUS:
1405 case IOR:
1406 case XOR:
1407 case AND:
1408 if (!REG_P (XEXP (src, 1))
1409 && !MEM_P (XEXP (src, 1))
1410 && !CONST_INT_P (XEXP (src, 1)))
1411 return false;
1412
93cf5515 1413 if (GET_MODE (XEXP (src, 1)) != mode
2bf6d935
ML
1414 && !CONST_INT_P (XEXP (src, 1)))
1415 return false;
fdace758
UB
1416 break;
1417
1418 case ABS:
1419 if ((mode == DImode && !TARGET_AVX512VL)
1420 || (mode == SImode && !TARGET_SSSE3))
1421 return false;
2bf6d935
ML
1422 break;
1423
1424 case NEG:
1425 case NOT:
1426 break;
1427
1428 case REG:
1429 return true;
1430
1431 case MEM:
1432 case CONST_INT:
1433 return REG_P (dst);
1434
1435 default:
1436 return false;
1437 }
1438
1439 if (!REG_P (XEXP (src, 0))
1440 && !MEM_P (XEXP (src, 0))
1441 && !CONST_INT_P (XEXP (src, 0))
1442 /* Check for andnot case. */
1443 && (GET_CODE (src) != AND
1444 || GET_CODE (XEXP (src, 0)) != NOT
1445 || !REG_P (XEXP (XEXP (src, 0), 0))))
1446 return false;
1447
93cf5515 1448 if (GET_MODE (XEXP (src, 0)) != mode
2bf6d935
ML
1449 && !CONST_INT_P (XEXP (src, 0)))
1450 return false;
1451
1452 return true;
1453}
1454
1455/* The TImode version of scalar_to_vector_candidate_p. */
1456
1457static bool
1458timode_scalar_to_vector_candidate_p (rtx_insn *insn)
1459{
266f44a9 1460 rtx def_set = pseudo_reg_set (insn);
2bf6d935
ML
1461
1462 if (!def_set)
1463 return false;
1464
2bf6d935
ML
1465 rtx src = SET_SRC (def_set);
1466 rtx dst = SET_DEST (def_set);
1467
1468 /* Only TImode load and store are allowed. */
1469 if (GET_MODE (dst) != TImode)
1470 return false;
1471
1472 if (MEM_P (dst))
1473 {
1474 /* Check for store. Memory must be aligned or unaligned store
1475 is optimal. Only support store from register, standard SSE
1476 constant or CONST_WIDE_INT generated from piecewise store.
1477
1478 ??? Verify performance impact before enabling CONST_INT for
1479 __int128 store. */
1480 if (misaligned_operand (dst, TImode)
1481 && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
1482 return false;
1483
1484 switch (GET_CODE (src))
1485 {
1486 default:
1487 return false;
1488
1489 case REG:
1490 case CONST_WIDE_INT:
1491 return true;
1492
1493 case CONST_INT:
1494 return standard_sse_constant_p (src, TImode);
1495 }
1496 }
1497 else if (MEM_P (src))
1498 {
1499 /* Check for load. Memory must be aligned or unaligned load is
1500 optimal. */
1501 return (REG_P (dst)
1502 && (!misaligned_operand (src, TImode)
1503 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
1504 }
1505
1506 return false;
1507}
1508
2bf6d935
ML
1509/* For a register REGNO, scan instructions for its defs and uses.
1510 Put REGNO in REGS if a def or use isn't in CANDIDATES. */
1511
1512static void
1513timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
1514 unsigned int regno)
1515{
1516 for (df_ref def = DF_REG_DEF_CHAIN (regno);
1517 def;
1518 def = DF_REF_NEXT_REG (def))
1519 {
1520 if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1521 {
1522 if (dump_file)
1523 fprintf (dump_file,
1524 "r%d has non convertible def in insn %d\n",
1525 regno, DF_REF_INSN_UID (def));
1526
1527 bitmap_set_bit (regs, regno);
1528 break;
1529 }
1530 }
1531
1532 for (df_ref ref = DF_REG_USE_CHAIN (regno);
1533 ref;
1534 ref = DF_REF_NEXT_REG (ref))
1535 {
1536 /* Debug instructions are skipped. */
1537 if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
1538 && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1539 {
1540 if (dump_file)
1541 fprintf (dump_file,
1542 "r%d has non convertible use in insn %d\n",
1543 regno, DF_REF_INSN_UID (ref));
1544
1545 bitmap_set_bit (regs, regno);
1546 break;
1547 }
1548 }
1549}
1550
1551/* The TImode version of remove_non_convertible_regs. */
1552
1553static void
1554timode_remove_non_convertible_regs (bitmap candidates)
1555{
1556 bitmap_iterator bi;
1557 unsigned id;
1558 bitmap regs = BITMAP_ALLOC (NULL);
1559
1560 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
1561 {
1562 rtx def_set = single_set (DF_INSN_UID_GET (id)->insn);
1563 rtx dest = SET_DEST (def_set);
1564 rtx src = SET_SRC (def_set);
1565
1566 if ((!REG_P (dest)
1567 || bitmap_bit_p (regs, REGNO (dest))
1568 || HARD_REGISTER_P (dest))
1569 && (!REG_P (src)
1570 || bitmap_bit_p (regs, REGNO (src))
1571 || HARD_REGISTER_P (src)))
1572 continue;
1573
1574 if (REG_P (dest))
1575 timode_check_non_convertible_regs (candidates, regs,
1576 REGNO (dest));
1577
1578 if (REG_P (src))
1579 timode_check_non_convertible_regs (candidates, regs,
1580 REGNO (src));
1581 }
1582
1583 EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
1584 {
1585 for (df_ref def = DF_REG_DEF_CHAIN (id);
1586 def;
1587 def = DF_REF_NEXT_REG (def))
1588 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
1589 {
1590 if (dump_file)
1591 fprintf (dump_file, "Removing insn %d from candidates list\n",
1592 DF_REF_INSN_UID (def));
1593
1594 bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
1595 }
1596
1597 for (df_ref ref = DF_REG_USE_CHAIN (id);
1598 ref;
1599 ref = DF_REF_NEXT_REG (ref))
1600 if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
1601 {
1602 if (dump_file)
1603 fprintf (dump_file, "Removing insn %d from candidates list\n",
1604 DF_REF_INSN_UID (ref));
1605
1606 bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
1607 }
1608 }
1609
1610 BITMAP_FREE (regs);
1611}
1612
2bf6d935
ML
1613/* Main STV pass function. Find and convert scalar
1614 instructions into vector mode when profitable. */
1615
1616static unsigned int
f386ca41 1617convert_scalars_to_vector (bool timode_p)
2bf6d935
ML
1618{
1619 basic_block bb;
2bf6d935
ML
1620 int converted_insns = 0;
1621
1622 bitmap_obstack_initialize (NULL);
93cf5515
RB
1623 const machine_mode cand_mode[3] = { SImode, DImode, TImode };
1624 const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
1625 bitmap_head candidates[3]; /* { SImode, DImode, TImode } */
1626 for (unsigned i = 0; i < 3; ++i)
1627 bitmap_initialize (&candidates[i], &bitmap_default_obstack);
2bf6d935
ML
1628
1629 calculate_dominance_info (CDI_DOMINATORS);
1630 df_set_flags (DF_DEFER_INSN_RESCAN);
1631 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2bf6d935
ML
1632 df_analyze ();
1633
1634 /* Find all instructions we want to convert into vector mode. */
1635 if (dump_file)
1636 fprintf (dump_file, "Searching for mode conversion candidates...\n");
1637
1638 FOR_EACH_BB_FN (bb, cfun)
1639 {
1640 rtx_insn *insn;
1641 FOR_BB_INSNS (bb, insn)
f386ca41 1642 if (timode_p
93cf5515 1643 && timode_scalar_to_vector_candidate_p (insn))
2bf6d935
ML
1644 {
1645 if (dump_file)
93cf5515 1646 fprintf (dump_file, " insn %d is marked as a TImode candidate\n",
2bf6d935
ML
1647 INSN_UID (insn));
1648
93cf5515
RB
1649 bitmap_set_bit (&candidates[2], INSN_UID (insn));
1650 }
f386ca41 1651 else if (!timode_p)
93cf5515
RB
1652 {
1653 /* Check {SI,DI}mode. */
1654 for (unsigned i = 0; i <= 1; ++i)
1655 if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
1656 {
1657 if (dump_file)
1658 fprintf (dump_file, " insn %d is marked as a %s candidate\n",
1659 INSN_UID (insn), i == 0 ? "SImode" : "DImode");
1660
1661 bitmap_set_bit (&candidates[i], INSN_UID (insn));
1662 break;
1663 }
2bf6d935
ML
1664 }
1665 }
1666
f386ca41 1667 if (timode_p)
93cf5515 1668 timode_remove_non_convertible_regs (&candidates[2]);
2bf6d935 1669
93cf5515
RB
1670 for (unsigned i = 0; i <= 2; ++i)
1671 if (!bitmap_empty_p (&candidates[i]))
1672 break;
1673 else if (i == 2 && dump_file)
2bf6d935
ML
1674 fprintf (dump_file, "There are no candidates for optimization.\n");
1675
93cf5515
RB
1676 for (unsigned i = 0; i <= 2; ++i)
1677 while (!bitmap_empty_p (&candidates[i]))
1678 {
1679 unsigned uid = bitmap_first_set_bit (&candidates[i]);
1680 scalar_chain *chain;
2bf6d935 1681
93cf5515
RB
1682 if (cand_mode[i] == TImode)
1683 chain = new timode_scalar_chain;
1684 else
1685 chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
2bf6d935 1686
93cf5515
RB
1687 /* Find instructions chain we want to convert to vector mode.
1688 Check all uses and definitions to estimate all required
1689 conversions. */
1690 chain->build (&candidates[i], uid);
2bf6d935 1691
93cf5515
RB
1692 if (chain->compute_convert_gain () > 0)
1693 converted_insns += chain->convert ();
1694 else
1695 if (dump_file)
1696 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
1697 chain->chain_id);
2bf6d935 1698
93cf5515
RB
1699 delete chain;
1700 }
2bf6d935
ML
1701
1702 if (dump_file)
1703 fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
1704
93cf5515
RB
1705 for (unsigned i = 0; i <= 2; ++i)
1706 bitmap_release (&candidates[i]);
2bf6d935
ML
1707 bitmap_obstack_release (NULL);
1708 df_process_deferred_rescans ();
1709
1710 /* Conversion means we may have 128bit register spills/fills
1711 which require aligned stack. */
1712 if (converted_insns)
1713 {
1714 if (crtl->stack_alignment_needed < 128)
1715 crtl->stack_alignment_needed = 128;
1716 if (crtl->stack_alignment_estimated < 128)
1717 crtl->stack_alignment_estimated = 128;
c1441faf
UB
1718
1719 crtl->stack_realign_needed
1720 = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
1721 crtl->stack_realign_tried = crtl->stack_realign_needed;
1722
1723 crtl->stack_realign_processed = true;
1724
1725 if (!crtl->drap_reg)
1726 {
1727 rtx drap_rtx = targetm.calls.get_drap_rtx ();
1728
1729 /* stack_realign_drap and drap_rtx must match. */
1730 gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
1731
1732 /* Do nothing if NULL is returned,
1733 which means DRAP is not needed. */
1734 if (drap_rtx != NULL)
1735 {
1736 crtl->args.internal_arg_pointer = drap_rtx;
1737
1738 /* Call fixup_tail_calls to clean up
1739 REG_EQUIV note if DRAP is needed. */
1740 fixup_tail_calls ();
1741 }
1742 }
1743
2bf6d935
ML
1744 /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */
1745 if (TARGET_64BIT)
1746 for (tree parm = DECL_ARGUMENTS (current_function_decl);
1747 parm; parm = DECL_CHAIN (parm))
1748 {
1749 if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
1750 continue;
1751 if (DECL_RTL_SET_P (parm)
1752 && GET_MODE (DECL_RTL (parm)) == V1TImode)
1753 {
1754 rtx r = DECL_RTL (parm);
1755 if (REG_P (r))
1756 SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
1757 }
1758 if (DECL_INCOMING_RTL (parm)
1759 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
1760 {
1761 rtx r = DECL_INCOMING_RTL (parm);
1762 if (REG_P (r))
1763 DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
1764 }
1765 }
1766 }
1767
1768 return 0;
1769}
1770
69811448
RS
1771/* Modify the vzeroupper pattern in INSN so that it describes the effect
1772 that the instruction has on the SSE registers. LIVE_REGS are the set
1773 of registers that are live across the instruction.
1774
1775 For a live register R we use:
1776
1777 (set (reg:V2DF R) (reg:V2DF R))
1778
b7b3378f 1779 which preserves the low 128 bits but clobbers the upper bits. */
69811448
RS
1780
1781static void
1782ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs)
1783{
1784 rtx pattern = PATTERN (insn);
1785 unsigned int nregs = TARGET_64BIT ? 16 : 8;
b7b3378f 1786 unsigned int npats = nregs;
69811448
RS
1787 for (unsigned int i = 0; i < nregs; ++i)
1788 {
1789 unsigned int regno = GET_SSE_REGNO (i);
b7b3378f
JJ
1790 if (!bitmap_bit_p (live_regs, regno))
1791 npats--;
1792 }
1793 if (npats == 0)
1794 return;
1795 rtvec vec = rtvec_alloc (npats + 1);
1796 RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0);
1797 for (unsigned int i = 0, j = 0; i < nregs; ++i)
1798 {
1799 unsigned int regno = GET_SSE_REGNO (i);
1800 if (!bitmap_bit_p (live_regs, regno))
1801 continue;
69811448 1802 rtx reg = gen_rtx_REG (V2DImode, regno);
b7b3378f
JJ
1803 ++j;
1804 RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg);
69811448
RS
1805 }
1806 XVEC (pattern, 0) = vec;
d5ad8ee0 1807 INSN_CODE (insn) = -1;
69811448
RS
1808 df_insn_rescan (insn);
1809}
1810
1811/* Walk the vzeroupper instructions in the function and annotate them
1812 with the effect that they have on the SSE registers. */
1813
1814static void
1815ix86_add_reg_usage_to_vzerouppers (void)
1816{
1817 basic_block bb;
1818 rtx_insn *insn;
1819 auto_bitmap live_regs;
1820
1821 df_analyze ();
1822 FOR_EACH_BB_FN (bb, cfun)
1823 {
1824 bitmap_copy (live_regs, df_get_live_out (bb));
1825 df_simulate_initialize_backwards (bb, live_regs);
1826 FOR_BB_INSNS_REVERSE (bb, insn)
1827 {
1828 if (!NONDEBUG_INSN_P (insn))
1829 continue;
1830 if (vzeroupper_pattern (PATTERN (insn), VOIDmode))
1831 ix86_add_reg_usage_to_vzeroupper (insn, live_regs);
1832 df_simulate_one_insn_backwards (bb, insn, live_regs);
1833 }
1834 }
1835}
1836
2bf6d935
ML
1837static unsigned int
1838rest_of_handle_insert_vzeroupper (void)
1839{
1840 int i;
1841
1842 /* vzeroupper instructions are inserted immediately after reload to
1843 account for possible spills from 256bit or 512bit registers. The pass
1844 reuses mode switching infrastructure by re-running mode insertion
1845 pass, so disable entities that have already been processed. */
1846 for (i = 0; i < MAX_386_ENTITIES; i++)
1847 ix86_optimize_mode_switching[i] = 0;
1848
1849 ix86_optimize_mode_switching[AVX_U128] = 1;
1850
1851 /* Call optimize_mode_switching. */
1852 g->get_passes ()->execute_pass_mode_switching ();
69811448 1853 ix86_add_reg_usage_to_vzerouppers ();
2bf6d935
ML
1854 return 0;
1855}
1856
1857namespace {
1858
1859const pass_data pass_data_insert_vzeroupper =
1860{
1861 RTL_PASS, /* type */
1862 "vzeroupper", /* name */
1863 OPTGROUP_NONE, /* optinfo_flags */
1864 TV_MACH_DEP, /* tv_id */
1865 0, /* properties_required */
1866 0, /* properties_provided */
1867 0, /* properties_destroyed */
1868 0, /* todo_flags_start */
1869 TODO_df_finish, /* todo_flags_finish */
1870};
1871
1872class pass_insert_vzeroupper : public rtl_opt_pass
1873{
1874public:
1875 pass_insert_vzeroupper(gcc::context *ctxt)
1876 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
1877 {}
1878
1879 /* opt_pass methods: */
1880 virtual bool gate (function *)
1881 {
1882 return TARGET_AVX
1883 && TARGET_VZEROUPPER && flag_expensive_optimizations
1884 && !optimize_size;
1885 }
1886
1887 virtual unsigned int execute (function *)
1888 {
1889 return rest_of_handle_insert_vzeroupper ();
1890 }
1891
1892}; // class pass_insert_vzeroupper
1893
1894const pass_data pass_data_stv =
1895{
1896 RTL_PASS, /* type */
1897 "stv", /* name */
1898 OPTGROUP_NONE, /* optinfo_flags */
1899 TV_MACH_DEP, /* tv_id */
1900 0, /* properties_required */
1901 0, /* properties_provided */
1902 0, /* properties_destroyed */
1903 0, /* todo_flags_start */
1904 TODO_df_finish, /* todo_flags_finish */
1905};
1906
1907class pass_stv : public rtl_opt_pass
1908{
1909public:
1910 pass_stv (gcc::context *ctxt)
1911 : rtl_opt_pass (pass_data_stv, ctxt),
1912 timode_p (false)
1913 {}
1914
1915 /* opt_pass methods: */
1916 virtual bool gate (function *)
1917 {
f386ca41 1918 return ((!timode_p || TARGET_64BIT)
2bf6d935
ML
1919 && TARGET_STV && TARGET_SSE2 && optimize > 1);
1920 }
1921
1922 virtual unsigned int execute (function *)
1923 {
f386ca41 1924 return convert_scalars_to_vector (timode_p);
2bf6d935
ML
1925 }
1926
1927 opt_pass *clone ()
1928 {
1929 return new pass_stv (m_ctxt);
1930 }
1931
1932 void set_pass_param (unsigned int n, bool param)
1933 {
1934 gcc_assert (n == 0);
1935 timode_p = param;
1936 }
1937
1938private:
1939 bool timode_p;
1940}; // class pass_stv
1941
1942} // anon namespace
1943
1944rtl_opt_pass *
1945make_pass_insert_vzeroupper (gcc::context *ctxt)
1946{
1947 return new pass_insert_vzeroupper (ctxt);
1948}
1949
1950rtl_opt_pass *
1951make_pass_stv (gcc::context *ctxt)
1952{
1953 return new pass_stv (ctxt);
1954}
1955
3dcea658 1956/* Inserting ENDBR and pseudo patchable-area instructions. */
2bf6d935 1957
3dcea658
L
1958static void
1959rest_of_insert_endbr_and_patchable_area (bool need_endbr,
1960 unsigned int patchable_area_size)
2bf6d935 1961{
3dcea658 1962 rtx endbr;
2bf6d935 1963 rtx_insn *insn;
3dcea658 1964 rtx_insn *endbr_insn = NULL;
2bf6d935
ML
1965 basic_block bb;
1966
3dcea658
L
1967 if (need_endbr)
1968 {
1969 /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
1970 is absent among function attributes. Later an optimization will
1971 be introduced to make analysis if an address of a static function
1972 is taken. A static function whose address is not taken will get
1973 a nocf_check attribute. This will allow to reduce the number of
1974 EB. */
1975 if (!lookup_attribute ("nocf_check",
1976 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
1977 && (!flag_manual_endbr
1978 || lookup_attribute ("cf_check",
1979 DECL_ATTRIBUTES (cfun->decl)))
1980 && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
1981 || ix86_cmodel == CM_LARGE
1982 || ix86_cmodel == CM_LARGE_PIC
1983 || flag_force_indirect_call
1984 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
1985 && DECL_DLLIMPORT_P (cfun->decl))))
1986 {
1987 if (crtl->profile && flag_fentry)
1988 {
1989 /* Queue ENDBR insertion to x86_function_profiler.
1990 NB: Any patchable-area insn will be inserted after
1991 ENDBR. */
1992 cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
1993 }
1994 else
1995 {
1996 endbr = gen_nop_endbr ();
1997 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
1998 rtx_insn *insn = BB_HEAD (bb);
1999 endbr_insn = emit_insn_before (endbr, insn);
2000 }
2001 }
2002 }
2003
2004 if (patchable_area_size)
2bf6d935 2005 {
2bf6d935 2006 if (crtl->profile && flag_fentry)
3dcea658
L
2007 {
2008 /* Queue patchable-area insertion to x86_function_profiler.
2009 NB: If there is a queued ENDBR, x86_function_profiler
2010 will also handle patchable-area. */
2011 if (!cfun->machine->insn_queued_at_entrance)
2012 cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
2013 }
2bf6d935
ML
2014 else
2015 {
3dcea658
L
2016 rtx patchable_area
2017 = gen_patchable_area (GEN_INT (patchable_area_size),
2018 GEN_INT (crtl->patch_area_entry == 0));
2019 if (endbr_insn)
2020 emit_insn_after (patchable_area, endbr_insn);
2021 else
2022 {
2023 bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
2024 insn = BB_HEAD (bb);
2025 emit_insn_before (patchable_area, insn);
2026 }
2bf6d935
ML
2027 }
2028 }
2029
3dcea658
L
2030 if (!need_endbr)
2031 return;
2032
2bf6d935
ML
2033 bb = 0;
2034 FOR_EACH_BB_FN (bb, cfun)
2035 {
2036 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
2037 insn = NEXT_INSN (insn))
2038 {
2039 if (CALL_P (insn))
2040 {
2bf6d935
ML
2041 need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
2042 if (!need_endbr && !SIBLING_CALL_P (insn))
2043 {
2044 rtx call = get_call_rtx_from (insn);
2045 rtx fnaddr = XEXP (call, 0);
2046 tree fndecl = NULL_TREE;
2047
2048 /* Also generate ENDBRANCH for non-tail call which
2049 may return via indirect branch. */
2050 if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
2051 fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
2052 if (fndecl == NULL_TREE)
2053 fndecl = MEM_EXPR (fnaddr);
2054 if (fndecl
2055 && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
2056 && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
2057 fndecl = NULL_TREE;
2058 if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
2059 {
2060 tree fntype = TREE_TYPE (fndecl);
2061 if (lookup_attribute ("indirect_return",
2062 TYPE_ATTRIBUTES (fntype)))
2063 need_endbr = true;
2064 }
2065 }
2066 if (!need_endbr)
2067 continue;
2068 /* Generate ENDBRANCH after CALL, which can return more than
2069 twice, setjmp-like functions. */
2070
3dcea658
L
2071 endbr = gen_nop_endbr ();
2072 emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
2bf6d935
ML
2073 continue;
2074 }
2075
2076 if (JUMP_P (insn) && flag_cet_switch)
2077 {
2078 rtx target = JUMP_LABEL (insn);
2079 if (target == NULL_RTX || ANY_RETURN_P (target))
2080 continue;
2081
2082 /* Check the jump is a switch table. */
2083 rtx_insn *label = as_a<rtx_insn *> (target);
2084 rtx_insn *table = next_insn (label);
2085 if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
2086 continue;
2087
2088 /* For the indirect jump find out all places it jumps and insert
2089 ENDBRANCH there. It should be done under a special flag to
2090 control ENDBRANCH generation for switch stmts. */
2091 edge_iterator ei;
2092 edge e;
2093 basic_block dest_blk;
2094
2095 FOR_EACH_EDGE (e, ei, bb->succs)
2096 {
2097 rtx_insn *insn;
2098
2099 dest_blk = e->dest;
2100 insn = BB_HEAD (dest_blk);
2101 gcc_assert (LABEL_P (insn));
3dcea658
L
2102 endbr = gen_nop_endbr ();
2103 emit_insn_after (endbr, insn);
2bf6d935
ML
2104 }
2105 continue;
2106 }
2107
02ed9049 2108 if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
2bf6d935 2109 {
3dcea658
L
2110 endbr = gen_nop_endbr ();
2111 emit_insn_after (endbr, insn);
2bf6d935
ML
2112 continue;
2113 }
2114 }
2115 }
2116
3dcea658 2117 return;
2bf6d935
ML
2118}
2119
2120namespace {
2121
3dcea658 2122const pass_data pass_data_insert_endbr_and_patchable_area =
2bf6d935
ML
2123{
2124 RTL_PASS, /* type. */
3dcea658 2125 "endbr_and_patchable_area", /* name. */
2bf6d935
ML
2126 OPTGROUP_NONE, /* optinfo_flags. */
2127 TV_MACH_DEP, /* tv_id. */
2128 0, /* properties_required. */
2129 0, /* properties_provided. */
2130 0, /* properties_destroyed. */
2131 0, /* todo_flags_start. */
2132 0, /* todo_flags_finish. */
2133};
2134
3dcea658 2135class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
2bf6d935
ML
2136{
2137public:
3dcea658
L
2138 pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
2139 : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
2bf6d935
ML
2140 {}
2141
2142 /* opt_pass methods: */
2143 virtual bool gate (function *)
2144 {
3dcea658
L
2145 need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
2146 patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
2147 return need_endbr || patchable_area_size;
2bf6d935
ML
2148 }
2149
2150 virtual unsigned int execute (function *)
2151 {
3dcea658
L
2152 timevar_push (TV_MACH_DEP);
2153 rest_of_insert_endbr_and_patchable_area (need_endbr,
2154 patchable_area_size);
2155 timevar_pop (TV_MACH_DEP);
2156 return 0;
2bf6d935
ML
2157 }
2158
3dcea658
L
2159private:
2160 bool need_endbr;
2161 unsigned int patchable_area_size;
2162}; // class pass_insert_endbr_and_patchable_area
2bf6d935
ML
2163
2164} // anon namespace
2165
2166rtl_opt_pass *
3dcea658 2167make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
2bf6d935 2168{
3dcea658 2169 return new pass_insert_endbr_and_patchable_area (ctxt);
2bf6d935
ML
2170}
2171
43373412 2172/* Replace all one-value const vector that are referenced by SYMBOL_REFs in x
2173 with embedded broadcast. i.e.transform
2174
2175 vpaddq .LC0(%rip), %zmm0, %zmm0
2176 ret
2177 .LC0:
2178 .quad 3
2179 .quad 3
2180 .quad 3
2181 .quad 3
2182 .quad 3
2183 .quad 3
2184 .quad 3
2185 .quad 3
2186
2187 to
2188
2189 vpaddq .LC0(%rip){1to8}, %zmm0, %zmm0
2190 ret
2191 .LC0:
2192 .quad 3 */
2193static void
2194replace_constant_pool_with_broadcast (rtx_insn *insn)
2195{
2196 subrtx_ptr_iterator::array_type array;
2197 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), ALL)
2198 {
2199 rtx *loc = *iter;
2200 rtx x = *loc;
2201 rtx broadcast_mem, vec_dup, constant, first;
2202 machine_mode mode;
2203
2204 /* Constant pool. */
2205 if (!MEM_P (x)
2206 || !SYMBOL_REF_P (XEXP (x, 0))
2207 || !CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
2208 continue;
2209
2210 /* Const vector. */
2211 mode = GET_MODE (x);
2212 if (!VECTOR_MODE_P (mode))
2213 return;
2214 constant = get_pool_constant (XEXP (x, 0));
2215 if (GET_CODE (constant) != CONST_VECTOR)
2216 return;
2217
2218 /* There could be some rtx like
2219 (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1")))
2220 but with "*.LC1" refer to V2DI constant vector. */
2221 if (GET_MODE (constant) != mode)
2222 {
2223 constant = simplify_subreg (mode, constant, GET_MODE (constant), 0);
2224 if (constant == NULL_RTX || GET_CODE (constant) != CONST_VECTOR)
2225 return;
2226 }
2227 first = XVECEXP (constant, 0, 0);
2228
2229 for (int i = 1; i < GET_MODE_NUNITS (mode); ++i)
2230 {
2231 rtx tmp = XVECEXP (constant, 0, i);
2232 /* Vector duplicate value. */
2233 if (!rtx_equal_p (tmp, first))
2234 return;
2235 }
2236
2237 /* Replace with embedded broadcast. */
2238 broadcast_mem = force_const_mem (GET_MODE_INNER (mode), first);
2239 vec_dup = gen_rtx_VEC_DUPLICATE (mode, broadcast_mem);
2240 validate_change (insn, loc, vec_dup, 0);
2241
2242 /* At most 1 memory_operand in an insn. */
2243 return;
2244 }
2245}
2246
2bf6d935
ML
2247/* At entry of the nearest common dominator for basic blocks with
2248 conversions, generate a single
2249 vxorps %xmmN, %xmmN, %xmmN
2250 for all
2251 vcvtss2sd op, %xmmN, %xmmX
2252 vcvtsd2ss op, %xmmN, %xmmX
2253 vcvtsi2ss op, %xmmN, %xmmX
2254 vcvtsi2sd op, %xmmN, %xmmX
2255
2256 NB: We want to generate only a single vxorps to cover the whole
2257 function. The LCM algorithm isn't appropriate here since it may
2258 place a vxorps inside the loop. */
2259
2260static unsigned int
2261remove_partial_avx_dependency (void)
2262{
2263 timevar_push (TV_MACH_DEP);
2264
2265 bitmap_obstack_initialize (NULL);
2266 bitmap convert_bbs = BITMAP_ALLOC (NULL);
2267
2268 basic_block bb;
2269 rtx_insn *insn, *set_insn;
2270 rtx set;
2271 rtx v4sf_const0 = NULL_RTX;
2272
2273 auto_vec<rtx_insn *> control_flow_insns;
2274
2275 FOR_EACH_BB_FN (bb, cfun)
2276 {
2277 FOR_BB_INSNS (bb, insn)
2278 {
2279 if (!NONDEBUG_INSN_P (insn))
2280 continue;
2281
43373412 2282 /* Handle AVX512 embedded broadcast here to save compile time. */
2283 if (TARGET_AVX512F)
2284 replace_constant_pool_with_broadcast (insn);
2285
2bf6d935
ML
2286 set = single_set (insn);
2287 if (!set)
2288 continue;
2289
2290 if (get_attr_avx_partial_xmm_update (insn)
2291 != AVX_PARTIAL_XMM_UPDATE_TRUE)
2292 continue;
2293
2294 if (!v4sf_const0)
2295 {
2296 calculate_dominance_info (CDI_DOMINATORS);
2297 df_set_flags (DF_DEFER_INSN_RESCAN);
2298 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
2299 df_md_add_problem ();
2300 df_analyze ();
2301 v4sf_const0 = gen_reg_rtx (V4SFmode);
2302 }
2303
2304 /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
2305 SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and
2306 vec_merge with subreg. */
2307 rtx src = SET_SRC (set);
2308 rtx dest = SET_DEST (set);
2309 machine_mode dest_mode = GET_MODE (dest);
2310
2311 rtx zero;
2312 machine_mode dest_vecmode;
2313 if (dest_mode == E_SFmode)
2314 {
2315 dest_vecmode = V4SFmode;
2316 zero = v4sf_const0;
2317 }
2318 else
2319 {
2320 dest_vecmode = V2DFmode;
2321 zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
2322 }
2323
2324 /* Change source to vector mode. */
2325 src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
2326 src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
2327 GEN_INT (HOST_WIDE_INT_1U));
2328 /* Change destination to vector mode. */
2329 rtx vec = gen_reg_rtx (dest_vecmode);
2330 /* Generate an XMM vector SET. */
2331 set = gen_rtx_SET (vec, src);
2332 set_insn = emit_insn_before (set, insn);
2333 df_insn_rescan (set_insn);
2334
2335 if (cfun->can_throw_non_call_exceptions)
2336 {
2337 /* Handle REG_EH_REGION note. */
2338 rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
2339 if (note)
2340 {
2341 control_flow_insns.safe_push (set_insn);
2342 add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
2343 }
2344 }
2345
2346 src = gen_rtx_SUBREG (dest_mode, vec, 0);
2347 set = gen_rtx_SET (dest, src);
2348
2349 /* Drop possible dead definitions. */
2350 PATTERN (insn) = set;
2351
2352 INSN_CODE (insn) = -1;
2353 recog_memoized (insn);
2354 df_insn_rescan (insn);
2355 bitmap_set_bit (convert_bbs, bb->index);
2356 }
2357 }
2358
2359 if (v4sf_const0)
2360 {
2361 /* (Re-)discover loops so that bb->loop_father can be used in the
2362 analysis below. */
2363 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2364
2365 /* Generate a vxorps at entry of the nearest dominator for basic
700d4cb0 2366 blocks with conversions, which is in the fake loop that
2bf6d935
ML
2367 contains the whole function, so that there is only a single
2368 vxorps in the whole function. */
2369 bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
2370 convert_bbs);
2371 while (bb->loop_father->latch
2372 != EXIT_BLOCK_PTR_FOR_FN (cfun))
2373 bb = get_immediate_dominator (CDI_DOMINATORS,
2374 bb->loop_father->header);
2375
2376 set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
2377
2378 insn = BB_HEAD (bb);
2379 while (insn && !NONDEBUG_INSN_P (insn))
2380 {
2381 if (insn == BB_END (bb))
2382 {
2383 insn = NULL;
2384 break;
2385 }
2386 insn = NEXT_INSN (insn);
2387 }
2388 if (insn == BB_HEAD (bb))
2389 set_insn = emit_insn_before (set, insn);
2390 else
2391 set_insn = emit_insn_after (set,
2392 insn ? PREV_INSN (insn) : BB_END (bb));
2393 df_insn_rescan (set_insn);
2394 df_process_deferred_rescans ();
2395 loop_optimizer_finalize ();
2396
2397 if (!control_flow_insns.is_empty ())
2398 {
2399 free_dominance_info (CDI_DOMINATORS);
2400
2401 unsigned int i;
2402 FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
2403 if (control_flow_insn_p (insn))
2404 {
2405 /* Split the block after insn. There will be a fallthru
2406 edge, which is OK so we keep it. We have to create
2407 the exception edges ourselves. */
2408 bb = BLOCK_FOR_INSN (insn);
2409 split_block (bb, insn);
2410 rtl_make_eh_edge (NULL, bb, BB_END (bb));
2411 }
2412 }
2413 }
2414
2415 bitmap_obstack_release (NULL);
2416 BITMAP_FREE (convert_bbs);
2417
2418 timevar_pop (TV_MACH_DEP);
2419 return 0;
2420}
2421
43373412 2422static bool
2423remove_partial_avx_dependency_gate ()
2424{
2425 return (TARGET_AVX
2426 && TARGET_SSE_PARTIAL_REG_DEPENDENCY
2427 && TARGET_SSE_MATH
2428 && optimize
2429 && optimize_function_for_speed_p (cfun));
2430}
2431
2bf6d935
ML
2432namespace {
2433
2434const pass_data pass_data_remove_partial_avx_dependency =
2435{
2436 RTL_PASS, /* type */
2437 "rpad", /* name */
2438 OPTGROUP_NONE, /* optinfo_flags */
2439 TV_MACH_DEP, /* tv_id */
2440 0, /* properties_required */
2441 0, /* properties_provided */
2442 0, /* properties_destroyed */
2443 0, /* todo_flags_start */
2444 TODO_df_finish, /* todo_flags_finish */
2445};
2446
2447class pass_remove_partial_avx_dependency : public rtl_opt_pass
2448{
2449public:
2450 pass_remove_partial_avx_dependency (gcc::context *ctxt)
2451 : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
2452 {}
2453
2454 /* opt_pass methods: */
2455 virtual bool gate (function *)
2456 {
43373412 2457 return remove_partial_avx_dependency_gate ();
2bf6d935
ML
2458 }
2459
2460 virtual unsigned int execute (function *)
2461 {
2462 return remove_partial_avx_dependency ();
2463 }
2464}; // class pass_rpad
2465
2466} // anon namespace
2467
2468rtl_opt_pass *
2469make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
2470{
2471 return new pass_remove_partial_avx_dependency (ctxt);
2472}
2473
43373412 2474/* For const vector having one duplicated value, there's no need to put
2475 whole vector in the constant pool when target supports embedded broadcast. */
2476static unsigned int
2477constant_pool_broadcast (void)
2478{
2479 timevar_push (TV_MACH_DEP);
2480 rtx_insn *insn;
2481
2482 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2483 {
2484 if (INSN_P (insn))
2485 replace_constant_pool_with_broadcast (insn);
2486 }
2487 timevar_pop (TV_MACH_DEP);
2488 return 0;
2489}
2490
2491namespace {
2492
2493const pass_data pass_data_constant_pool_broadcast =
2494{
2495 RTL_PASS, /* type */
2496 "cpb", /* name */
2497 OPTGROUP_NONE, /* optinfo_flags */
2498 TV_MACH_DEP, /* tv_id */
2499 0, /* properties_required */
2500 0, /* properties_provided */
2501 0, /* properties_destroyed */
2502 0, /* todo_flags_start */
2503 TODO_df_finish, /* todo_flags_finish */
2504};
2505
2506class pass_constant_pool_broadcast : public rtl_opt_pass
2507{
2508public:
2509 pass_constant_pool_broadcast (gcc::context *ctxt)
2510 : rtl_opt_pass (pass_data_constant_pool_broadcast, ctxt)
2511 {}
2512
2513 /* opt_pass methods: */
2514 virtual bool gate (function *)
2515 {
2516 /* Return false if rpad pass gate is true.
2517 replace_constant_pool_with_broadcast is called
2518 from both this pass and rpad pass. */
2519 return (TARGET_AVX512F && !remove_partial_avx_dependency_gate ());
2520 }
2521
2522 virtual unsigned int execute (function *)
2523 {
2524 return constant_pool_broadcast ();
2525 }
2526}; // class pass_cpb
2527
2528} // anon namespace
2529
2530rtl_opt_pass *
2531make_pass_constant_pool_broadcast (gcc::context *ctxt)
2532{
2533 return new pass_constant_pool_broadcast (ctxt);
2534}
2535
2bf6d935
ML
2536/* This compares the priority of target features in function DECL1
2537 and DECL2. It returns positive value if DECL1 is higher priority,
2538 negative value if DECL2 is higher priority and 0 if they are the
2539 same. */
2540
2541int
2542ix86_compare_version_priority (tree decl1, tree decl2)
2543{
2544 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
2545 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
2546
2547 return (int)priority1 - (int)priority2;
2548}
2549
2550/* V1 and V2 point to function versions with different priorities
2551 based on the target ISA. This function compares their priorities. */
2552
2553static int
2554feature_compare (const void *v1, const void *v2)
2555{
2556 typedef struct _function_version_info
2557 {
2558 tree version_decl;
2559 tree predicate_chain;
2560 unsigned int dispatch_priority;
2561 } function_version_info;
2562
2563 const function_version_info c1 = *(const function_version_info *)v1;
2564 const function_version_info c2 = *(const function_version_info *)v2;
2565 return (c2.dispatch_priority - c1.dispatch_priority);
2566}
2567
2568/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
2569 to return a pointer to VERSION_DECL if the outcome of the expression
2570 formed by PREDICATE_CHAIN is true. This function will be called during
2571 version dispatch to decide which function version to execute. It returns
2572 the basic block at the end, to which more conditions can be added. */
2573
2574static basic_block
2575add_condition_to_bb (tree function_decl, tree version_decl,
2576 tree predicate_chain, basic_block new_bb)
2577{
2578 gimple *return_stmt;
2579 tree convert_expr, result_var;
2580 gimple *convert_stmt;
2581 gimple *call_cond_stmt;
2582 gimple *if_else_stmt;
2583
2584 basic_block bb1, bb2, bb3;
2585 edge e12, e23;
2586
2587 tree cond_var, and_expr_var = NULL_TREE;
2588 gimple_seq gseq;
2589
2590 tree predicate_decl, predicate_arg;
2591
2592 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
2593
2594 gcc_assert (new_bb != NULL);
2595 gseq = bb_seq (new_bb);
2596
2597
2598 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
2599 build_fold_addr_expr (version_decl));
2600 result_var = create_tmp_var (ptr_type_node);
2601 convert_stmt = gimple_build_assign (result_var, convert_expr);
2602 return_stmt = gimple_build_return (result_var);
2603
2604 if (predicate_chain == NULL_TREE)
2605 {
2606 gimple_seq_add_stmt (&gseq, convert_stmt);
2607 gimple_seq_add_stmt (&gseq, return_stmt);
2608 set_bb_seq (new_bb, gseq);
2609 gimple_set_bb (convert_stmt, new_bb);
2610 gimple_set_bb (return_stmt, new_bb);
2611 pop_cfun ();
2612 return new_bb;
2613 }
2614
2615 while (predicate_chain != NULL)
2616 {
2617 cond_var = create_tmp_var (integer_type_node);
2618 predicate_decl = TREE_PURPOSE (predicate_chain);
2619 predicate_arg = TREE_VALUE (predicate_chain);
2620 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
2621 gimple_call_set_lhs (call_cond_stmt, cond_var);
2622
2623 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
2624 gimple_set_bb (call_cond_stmt, new_bb);
2625 gimple_seq_add_stmt (&gseq, call_cond_stmt);
2626
2627 predicate_chain = TREE_CHAIN (predicate_chain);
2628
2629 if (and_expr_var == NULL)
2630 and_expr_var = cond_var;
2631 else
2632 {
2633 gimple *assign_stmt;
2634 /* Use MIN_EXPR to check if any integer is zero?.
2635 and_expr_var = min_expr <cond_var, and_expr_var> */
2636 assign_stmt = gimple_build_assign (and_expr_var,
2637 build2 (MIN_EXPR, integer_type_node,
2638 cond_var, and_expr_var));
2639
2640 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
2641 gimple_set_bb (assign_stmt, new_bb);
2642 gimple_seq_add_stmt (&gseq, assign_stmt);
2643 }
2644 }
2645
2646 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
2647 integer_zero_node,
2648 NULL_TREE, NULL_TREE);
2649 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
2650 gimple_set_bb (if_else_stmt, new_bb);
2651 gimple_seq_add_stmt (&gseq, if_else_stmt);
2652
2653 gimple_seq_add_stmt (&gseq, convert_stmt);
2654 gimple_seq_add_stmt (&gseq, return_stmt);
2655 set_bb_seq (new_bb, gseq);
2656
2657 bb1 = new_bb;
2658 e12 = split_block (bb1, if_else_stmt);
2659 bb2 = e12->dest;
2660 e12->flags &= ~EDGE_FALLTHRU;
2661 e12->flags |= EDGE_TRUE_VALUE;
2662
2663 e23 = split_block (bb2, return_stmt);
2664
2665 gimple_set_bb (convert_stmt, bb2);
2666 gimple_set_bb (return_stmt, bb2);
2667
2668 bb3 = e23->dest;
2669 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2670
2671 remove_edge (e23);
2672 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
2673
2674 pop_cfun ();
2675
2676 return bb3;
2677}
2678
2679/* This function generates the dispatch function for
2680 multi-versioned functions. DISPATCH_DECL is the function which will
2681 contain the dispatch logic. FNDECLS are the function choices for
2682 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
2683 in DISPATCH_DECL in which the dispatch code is generated. */
2684
2685static int
2686dispatch_function_versions (tree dispatch_decl,
2687 void *fndecls_p,
2688 basic_block *empty_bb)
2689{
2690 tree default_decl;
2691 gimple *ifunc_cpu_init_stmt;
2692 gimple_seq gseq;
2693 int ix;
2694 tree ele;
2695 vec<tree> *fndecls;
2696 unsigned int num_versions = 0;
2697 unsigned int actual_versions = 0;
2698 unsigned int i;
2699
2700 struct _function_version_info
2701 {
2702 tree version_decl;
2703 tree predicate_chain;
2704 unsigned int dispatch_priority;
2705 }*function_version_info;
2706
2707 gcc_assert (dispatch_decl != NULL
2708 && fndecls_p != NULL
2709 && empty_bb != NULL);
2710
2711 /*fndecls_p is actually a vector. */
2712 fndecls = static_cast<vec<tree> *> (fndecls_p);
2713
2714 /* At least one more version other than the default. */
2715 num_versions = fndecls->length ();
2716 gcc_assert (num_versions >= 2);
2717
2718 function_version_info = (struct _function_version_info *)
2719 XNEWVEC (struct _function_version_info, (num_versions - 1));
2720
2721 /* The first version in the vector is the default decl. */
2722 default_decl = (*fndecls)[0];
2723
2724 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
2725
2726 gseq = bb_seq (*empty_bb);
2727 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
2728 constructors, so explicity call __builtin_cpu_init here. */
2729 ifunc_cpu_init_stmt
2730 = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
2731 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
2732 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
2733 set_bb_seq (*empty_bb, gseq);
2734
2735 pop_cfun ();
2736
2737
2738 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
2739 {
2740 tree version_decl = ele;
2741 tree predicate_chain = NULL_TREE;
2742 unsigned int priority;
2743 /* Get attribute string, parse it and find the right predicate decl.
2744 The predicate function could be a lengthy combination of many
2745 features, like arch-type and various isa-variants. */
2746 priority = get_builtin_code_for_version (version_decl,
2747 &predicate_chain);
2748
2749 if (predicate_chain == NULL_TREE)
2750 continue;
2751
2752 function_version_info [actual_versions].version_decl = version_decl;
2753 function_version_info [actual_versions].predicate_chain
2754 = predicate_chain;
2755 function_version_info [actual_versions].dispatch_priority = priority;
2756 actual_versions++;
2757 }
2758
2759 /* Sort the versions according to descending order of dispatch priority. The
2760 priority is based on the ISA. This is not a perfect solution. There
2761 could still be ambiguity. If more than one function version is suitable
2762 to execute, which one should be dispatched? In future, allow the user
2763 to specify a dispatch priority next to the version. */
2764 qsort (function_version_info, actual_versions,
2765 sizeof (struct _function_version_info), feature_compare);
2766
2767 for (i = 0; i < actual_versions; ++i)
2768 *empty_bb = add_condition_to_bb (dispatch_decl,
2769 function_version_info[i].version_decl,
2770 function_version_info[i].predicate_chain,
2771 *empty_bb);
2772
2773 /* dispatch default version at the end. */
2774 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
2775 NULL, *empty_bb);
2776
2777 free (function_version_info);
2778 return 0;
2779}
2780
2781/* This function changes the assembler name for functions that are
2782 versions. If DECL is a function version and has a "target"
2783 attribute, it appends the attribute string to its assembler name. */
2784
2785static tree
2786ix86_mangle_function_version_assembler_name (tree decl, tree id)
2787{
2788 tree version_attr;
2789 const char *orig_name, *version_string;
2790 char *attr_str, *assembler_name;
2791
2792 if (DECL_DECLARED_INLINE_P (decl)
2793 && lookup_attribute ("gnu_inline",
2794 DECL_ATTRIBUTES (decl)))
2795 error_at (DECL_SOURCE_LOCATION (decl),
a9c697b8 2796 "function versions cannot be marked as %<gnu_inline%>,"
2bf6d935
ML
2797 " bodies have to be generated");
2798
2799 if (DECL_VIRTUAL_P (decl)
2800 || DECL_VINDEX (decl))
2801 sorry ("virtual function multiversioning not supported");
2802
2803 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
2804
2805 /* target attribute string cannot be NULL. */
2806 gcc_assert (version_attr != NULL_TREE);
2807
2808 orig_name = IDENTIFIER_POINTER (id);
2809 version_string
2810 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
2811
2812 if (strcmp (version_string, "default") == 0)
2813 return id;
2814
2815 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
2816 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
2817
2818 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
2819
2820 /* Allow assembler name to be modified if already set. */
2821 if (DECL_ASSEMBLER_NAME_SET_P (decl))
2822 SET_DECL_RTL (decl, NULL);
2823
2824 tree ret = get_identifier (assembler_name);
2825 XDELETEVEC (attr_str);
2826 XDELETEVEC (assembler_name);
2827 return ret;
2828}
2829
2830tree
2831ix86_mangle_decl_assembler_name (tree decl, tree id)
2832{
2833 /* For function version, add the target suffix to the assembler name. */
2834 if (TREE_CODE (decl) == FUNCTION_DECL
2835 && DECL_FUNCTION_VERSIONED (decl))
2836 id = ix86_mangle_function_version_assembler_name (decl, id);
2837#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
2838 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
2839#endif
2840
2841 return id;
2842}
2843
2844/* Make a dispatcher declaration for the multi-versioned function DECL.
2845 Calls to DECL function will be replaced with calls to the dispatcher
2846 by the front-end. Returns the decl of the dispatcher function. */
2847
2848tree
2849ix86_get_function_versions_dispatcher (void *decl)
2850{
2851 tree fn = (tree) decl;
2852 struct cgraph_node *node = NULL;
2853 struct cgraph_node *default_node = NULL;
2854 struct cgraph_function_version_info *node_v = NULL;
2855 struct cgraph_function_version_info *first_v = NULL;
2856
2857 tree dispatch_decl = NULL;
2858
2859 struct cgraph_function_version_info *default_version_info = NULL;
2860
2861 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
2862
2863 node = cgraph_node::get (fn);
2864 gcc_assert (node != NULL);
2865
2866 node_v = node->function_version ();
2867 gcc_assert (node_v != NULL);
2868
2869 if (node_v->dispatcher_resolver != NULL)
2870 return node_v->dispatcher_resolver;
2871
2872 /* Find the default version and make it the first node. */
2873 first_v = node_v;
2874 /* Go to the beginning of the chain. */
2875 while (first_v->prev != NULL)
2876 first_v = first_v->prev;
2877 default_version_info = first_v;
2878 while (default_version_info != NULL)
2879 {
2880 if (is_function_default_version
2881 (default_version_info->this_node->decl))
2882 break;
2883 default_version_info = default_version_info->next;
2884 }
2885
2886 /* If there is no default node, just return NULL. */
2887 if (default_version_info == NULL)
2888 return NULL;
2889
2890 /* Make default info the first node. */
2891 if (first_v != default_version_info)
2892 {
2893 default_version_info->prev->next = default_version_info->next;
2894 if (default_version_info->next)
2895 default_version_info->next->prev = default_version_info->prev;
2896 first_v->prev = default_version_info;
2897 default_version_info->next = first_v;
2898 default_version_info->prev = NULL;
2899 }
2900
2901 default_node = default_version_info->this_node;
2902
2903#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
2904 if (targetm.has_ifunc_p ())
2905 {
2906 struct cgraph_function_version_info *it_v = NULL;
2907 struct cgraph_node *dispatcher_node = NULL;
2908 struct cgraph_function_version_info *dispatcher_version_info = NULL;
2909
2910 /* Right now, the dispatching is done via ifunc. */
2911 dispatch_decl = make_dispatcher_decl (default_node->decl);
2912
2913 dispatcher_node = cgraph_node::get_create (dispatch_decl);
2914 gcc_assert (dispatcher_node != NULL);
2915 dispatcher_node->dispatcher_function = 1;
2916 dispatcher_version_info
2917 = dispatcher_node->insert_new_function_version ();
2918 dispatcher_version_info->next = default_version_info;
2919 dispatcher_node->definition = 1;
2920
2921 /* Set the dispatcher for all the versions. */
2922 it_v = default_version_info;
2923 while (it_v != NULL)
2924 {
2925 it_v->dispatcher_resolver = dispatch_decl;
2926 it_v = it_v->next;
2927 }
2928 }
2929 else
2930#endif
2931 {
2932 error_at (DECL_SOURCE_LOCATION (default_node->decl),
0ecf545c 2933 "multiversioning needs %<ifunc%> which is not supported "
2bf6d935
ML
2934 "on this target");
2935 }
2936
2937 return dispatch_decl;
2938}
2939
2940/* Make the resolver function decl to dispatch the versions of
2941 a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is
2942 ifunc alias that will point to the created resolver. Create an
2943 empty basic block in the resolver and store the pointer in
2944 EMPTY_BB. Return the decl of the resolver function. */
2945
2946static tree
2947make_resolver_func (const tree default_decl,
2948 const tree ifunc_alias_decl,
2949 basic_block *empty_bb)
2950{
c2bd2b46 2951 tree decl, type, t;
2bf6d935 2952
c2bd2b46
ML
2953 /* Create resolver function name based on default_decl. */
2954 tree decl_name = clone_function_name (default_decl, "resolver");
2955 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
2bf6d935
ML
2956
2957 /* The resolver function should return a (void *). */
2958 type = build_function_type_list (ptr_type_node, NULL_TREE);
2959
2960 decl = build_fn_decl (resolver_name, type);
2bf6d935
ML
2961 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
2962
2963 DECL_NAME (decl) = decl_name;
2964 TREE_USED (decl) = 1;
2965 DECL_ARTIFICIAL (decl) = 1;
2966 DECL_IGNORED_P (decl) = 1;
2967 TREE_PUBLIC (decl) = 0;
2968 DECL_UNINLINABLE (decl) = 1;
2969
2970 /* Resolver is not external, body is generated. */
2971 DECL_EXTERNAL (decl) = 0;
2972 DECL_EXTERNAL (ifunc_alias_decl) = 0;
2973
2974 DECL_CONTEXT (decl) = NULL_TREE;
2975 DECL_INITIAL (decl) = make_node (BLOCK);
2976 DECL_STATIC_CONSTRUCTOR (decl) = 0;
2977
2978 if (DECL_COMDAT_GROUP (default_decl)
2979 || TREE_PUBLIC (default_decl))
2980 {
2981 /* In this case, each translation unit with a call to this
2982 versioned function will put out a resolver. Ensure it
2983 is comdat to keep just one copy. */
2984 DECL_COMDAT (decl) = 1;
2985 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
2986 }
724ec02c
ML
2987 else
2988 TREE_PUBLIC (ifunc_alias_decl) = 0;
2989
2bf6d935
ML
2990 /* Build result decl and add to function_decl. */
2991 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
2992 DECL_CONTEXT (t) = decl;
2993 DECL_ARTIFICIAL (t) = 1;
2994 DECL_IGNORED_P (t) = 1;
2995 DECL_RESULT (decl) = t;
2996
2997 gimplify_function_tree (decl);
2998 push_cfun (DECL_STRUCT_FUNCTION (decl));
2999 *empty_bb = init_lowered_empty_function (decl, false,
3000 profile_count::uninitialized ());
3001
3002 cgraph_node::add_new_function (decl, true);
3003 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
3004
3005 pop_cfun ();
3006
3007 gcc_assert (ifunc_alias_decl != NULL);
3008 /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */
3009 DECL_ATTRIBUTES (ifunc_alias_decl)
3010 = make_attribute ("ifunc", resolver_name,
3011 DECL_ATTRIBUTES (ifunc_alias_decl));
3012
3013 /* Create the alias for dispatch to resolver here. */
3014 cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
2bf6d935
ML
3015 return decl;
3016}
3017
3018/* Generate the dispatching code body to dispatch multi-versioned function
3019 DECL. The target hook is called to process the "target" attributes and
3020 provide the code to dispatch the right function at run-time. NODE points
3021 to the dispatcher decl whose body will be created. */
3022
3023tree
3024ix86_generate_version_dispatcher_body (void *node_p)
3025{
3026 tree resolver_decl;
3027 basic_block empty_bb;
3028 tree default_ver_decl;
3029 struct cgraph_node *versn;
3030 struct cgraph_node *node;
3031
3032 struct cgraph_function_version_info *node_version_info = NULL;
3033 struct cgraph_function_version_info *versn_info = NULL;
3034
3035 node = (cgraph_node *)node_p;
3036
3037 node_version_info = node->function_version ();
3038 gcc_assert (node->dispatcher_function
3039 && node_version_info != NULL);
3040
3041 if (node_version_info->dispatcher_resolver)
3042 return node_version_info->dispatcher_resolver;
3043
3044 /* The first version in the chain corresponds to the default version. */
3045 default_ver_decl = node_version_info->next->this_node->decl;
3046
3047 /* node is going to be an alias, so remove the finalized bit. */
3048 node->definition = false;
3049
3050 resolver_decl = make_resolver_func (default_ver_decl,
3051 node->decl, &empty_bb);
3052
3053 node_version_info->dispatcher_resolver = resolver_decl;
3054
3055 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
3056
3057 auto_vec<tree, 2> fn_ver_vec;
3058
3059 for (versn_info = node_version_info->next; versn_info;
3060 versn_info = versn_info->next)
3061 {
3062 versn = versn_info->this_node;
3063 /* Check for virtual functions here again, as by this time it should
3064 have been determined if this function needs a vtable index or
3065 not. This happens for methods in derived classes that override
3066 virtual methods in base classes but are not explicitly marked as
3067 virtual. */
3068 if (DECL_VINDEX (versn->decl))
3069 sorry ("virtual function multiversioning not supported");
3070
3071 fn_ver_vec.safe_push (versn->decl);
3072 }
3073
3074 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
3075 cgraph_edge::rebuild_edges ();
3076 pop_cfun ();
3077 return resolver_decl;
3078}
3079
3080