]>
Commit | Line | Data |
---|---|---|
99dee823 | 1 | /* Copyright (C) 1988-2021 Free Software Foundation, Inc. |
2bf6d935 ML |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 3, or (at your option) | |
8 | any later version. | |
9 | ||
10 | GCC is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License | |
16 | along with GCC; see the file COPYING3. If not see | |
17 | <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #define IN_TARGET_CODE 1 | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "backend.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "memmodel.h" | |
28 | #include "gimple.h" | |
29 | #include "cfghooks.h" | |
30 | #include "cfgloop.h" | |
31 | #include "df.h" | |
32 | #include "tm_p.h" | |
33 | #include "stringpool.h" | |
34 | #include "expmed.h" | |
35 | #include "optabs.h" | |
36 | #include "regs.h" | |
37 | #include "emit-rtl.h" | |
38 | #include "recog.h" | |
39 | #include "cgraph.h" | |
40 | #include "diagnostic.h" | |
41 | #include "cfgbuild.h" | |
42 | #include "alias.h" | |
43 | #include "fold-const.h" | |
44 | #include "attribs.h" | |
45 | #include "calls.h" | |
46 | #include "stor-layout.h" | |
47 | #include "varasm.h" | |
48 | #include "output.h" | |
49 | #include "insn-attr.h" | |
50 | #include "flags.h" | |
51 | #include "except.h" | |
52 | #include "explow.h" | |
53 | #include "expr.h" | |
54 | #include "cfgrtl.h" | |
55 | #include "common/common-target.h" | |
56 | #include "langhooks.h" | |
57 | #include "reload.h" | |
58 | #include "gimplify.h" | |
59 | #include "dwarf2.h" | |
60 | #include "tm-constrs.h" | |
2bf6d935 ML |
61 | #include "cselib.h" |
62 | #include "sched-int.h" | |
63 | #include "opts.h" | |
64 | #include "tree-pass.h" | |
65 | #include "context.h" | |
66 | #include "pass_manager.h" | |
67 | #include "target-globals.h" | |
68 | #include "gimple-iterator.h" | |
69 | #include "tree-vectorizer.h" | |
70 | #include "shrink-wrap.h" | |
71 | #include "builtins.h" | |
72 | #include "rtl-iter.h" | |
73 | #include "tree-iterator.h" | |
74 | #include "dbgcnt.h" | |
75 | #include "case-cfn-macros.h" | |
76 | #include "dojump.h" | |
77 | #include "fold-const-call.h" | |
78 | #include "tree-vrp.h" | |
79 | #include "tree-ssanames.h" | |
80 | #include "selftest.h" | |
81 | #include "selftest-rtl.h" | |
82 | #include "print-rtl.h" | |
83 | #include "intl.h" | |
84 | #include "ifcvt.h" | |
85 | #include "symbol-summary.h" | |
86 | #include "ipa-prop.h" | |
87 | #include "ipa-fnsummary.h" | |
88 | #include "wide-int-bitmask.h" | |
89 | #include "tree-vector-builder.h" | |
90 | #include "debug.h" | |
91 | #include "dwarf2out.h" | |
92 | #include "i386-builtins.h" | |
93 | #include "i386-features.h" | |
94 | ||
95 | const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { | |
96 | "savms64", | |
97 | "resms64", | |
98 | "resms64x", | |
99 | "savms64f", | |
100 | "resms64f", | |
101 | "resms64fx" | |
102 | }; | |
103 | ||
104 | const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { | |
105 | /* The below offset values are where each register is stored for the layout | |
106 | relative to incoming stack pointer. The value of each m_regs[].offset will | |
107 | be relative to the incoming base pointer (rax or rsi) used by the stub. | |
108 | ||
109 | s_instances: 0 1 2 3 | |
110 | Offset: realigned or aligned + 8 | |
111 | Register aligned aligned + 8 aligned w/HFP w/HFP */ | |
112 | XMM15_REG, /* 0x10 0x18 0x10 0x18 */ | |
113 | XMM14_REG, /* 0x20 0x28 0x20 0x28 */ | |
114 | XMM13_REG, /* 0x30 0x38 0x30 0x38 */ | |
115 | XMM12_REG, /* 0x40 0x48 0x40 0x48 */ | |
116 | XMM11_REG, /* 0x50 0x58 0x50 0x58 */ | |
117 | XMM10_REG, /* 0x60 0x68 0x60 0x68 */ | |
118 | XMM9_REG, /* 0x70 0x78 0x70 0x78 */ | |
119 | XMM8_REG, /* 0x80 0x88 0x80 0x88 */ | |
120 | XMM7_REG, /* 0x90 0x98 0x90 0x98 */ | |
121 | XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ | |
122 | SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ | |
123 | DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ | |
124 | BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ | |
125 | BP_REG, /* 0xc0 0xc8 N/A N/A */ | |
126 | R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ | |
127 | R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ | |
128 | R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ | |
129 | R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ | |
130 | }; | |
131 | ||
132 | /* Instantiate static const values. */ | |
133 | const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; | |
134 | const unsigned xlogue_layout::MIN_REGS; | |
135 | const unsigned xlogue_layout::MAX_REGS; | |
136 | const unsigned xlogue_layout::MAX_EXTRA_REGS; | |
137 | const unsigned xlogue_layout::VARIANT_COUNT; | |
138 | const unsigned xlogue_layout::STUB_NAME_MAX_LEN; | |
139 | ||
140 | /* Initialize xlogue_layout::s_stub_names to zero. */ | |
141 | char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] | |
142 | [STUB_NAME_MAX_LEN]; | |
143 | ||
144 | /* Instantiates all xlogue_layout instances. */ | |
145 | const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { | |
146 | xlogue_layout (0, false), | |
147 | xlogue_layout (8, false), | |
148 | xlogue_layout (0, true), | |
149 | xlogue_layout (8, true) | |
150 | }; | |
151 | ||
152 | /* Return an appropriate const instance of xlogue_layout based upon values | |
153 | in cfun->machine and crtl. */ | |
99b1c316 | 154 | const class xlogue_layout & |
2bf6d935 ML |
155 | xlogue_layout::get_instance () |
156 | { | |
157 | enum xlogue_stub_sets stub_set; | |
158 | bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; | |
159 | ||
160 | if (stack_realign_fp) | |
161 | stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; | |
162 | else if (frame_pointer_needed) | |
163 | stub_set = aligned_plus_8 | |
164 | ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 | |
165 | : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; | |
166 | else | |
167 | stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; | |
168 | ||
169 | return s_instances[stub_set]; | |
170 | } | |
171 | ||
172 | /* Determine how many clobbered registers can be saved by the stub. | |
173 | Returns the count of registers the stub will save and restore. */ | |
174 | unsigned | |
175 | xlogue_layout::count_stub_managed_regs () | |
176 | { | |
177 | bool hfp = frame_pointer_needed || stack_realign_fp; | |
178 | unsigned i, count; | |
179 | unsigned regno; | |
180 | ||
181 | for (count = i = MIN_REGS; i < MAX_REGS; ++i) | |
182 | { | |
183 | regno = REG_ORDER[i]; | |
184 | if (regno == BP_REG && hfp) | |
185 | continue; | |
186 | if (!ix86_save_reg (regno, false, false)) | |
187 | break; | |
188 | ++count; | |
189 | } | |
190 | return count; | |
191 | } | |
192 | ||
193 | /* Determine if register REGNO is a stub managed register given the | |
194 | total COUNT of stub managed registers. */ | |
195 | bool | |
196 | xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) | |
197 | { | |
198 | bool hfp = frame_pointer_needed || stack_realign_fp; | |
199 | unsigned i; | |
200 | ||
201 | for (i = 0; i < count; ++i) | |
202 | { | |
203 | gcc_assert (i < MAX_REGS); | |
204 | if (REG_ORDER[i] == BP_REG && hfp) | |
205 | ++count; | |
206 | else if (REG_ORDER[i] == regno) | |
207 | return true; | |
208 | } | |
209 | return false; | |
210 | } | |
211 | ||
212 | /* Constructor for xlogue_layout. */ | |
213 | xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) | |
214 | : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), | |
215 | m_stack_align_off_in (stack_align_off_in) | |
216 | { | |
217 | HOST_WIDE_INT offset = stack_align_off_in; | |
218 | unsigned i, j; | |
219 | ||
220 | for (i = j = 0; i < MAX_REGS; ++i) | |
221 | { | |
222 | unsigned regno = REG_ORDER[i]; | |
223 | ||
224 | if (regno == BP_REG && hfp) | |
225 | continue; | |
226 | if (SSE_REGNO_P (regno)) | |
227 | { | |
228 | offset += 16; | |
229 | /* Verify that SSE regs are always aligned. */ | |
230 | gcc_assert (!((stack_align_off_in + offset) & 15)); | |
231 | } | |
232 | else | |
233 | offset += 8; | |
234 | ||
235 | m_regs[j].regno = regno; | |
236 | m_regs[j++].offset = offset - STUB_INDEX_OFFSET; | |
237 | } | |
238 | gcc_assert (j == m_nregs); | |
239 | } | |
240 | ||
241 | const char * | |
242 | xlogue_layout::get_stub_name (enum xlogue_stub stub, | |
243 | unsigned n_extra_regs) | |
244 | { | |
245 | const int have_avx = TARGET_AVX; | |
246 | char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; | |
247 | ||
248 | /* Lazy init */ | |
249 | if (!*name) | |
250 | { | |
251 | int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u", | |
252 | (have_avx ? "avx" : "sse"), | |
253 | STUB_BASE_NAMES[stub], | |
254 | MIN_REGS + n_extra_regs); | |
255 | gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); | |
256 | } | |
257 | ||
258 | return name; | |
259 | } | |
260 | ||
261 | /* Return rtx of a symbol ref for the entry point (based upon | |
262 | cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ | |
263 | rtx | |
264 | xlogue_layout::get_stub_rtx (enum xlogue_stub stub) | |
265 | { | |
266 | const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs; | |
267 | gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); | |
268 | gcc_assert (stub < XLOGUE_STUB_COUNT); | |
269 | gcc_assert (crtl->stack_realign_finalized); | |
270 | ||
271 | return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); | |
272 | } | |
273 | ||
274 | unsigned scalar_chain::max_id = 0; | |
275 | ||
72bb85f8 ML |
276 | namespace { |
277 | ||
2bf6d935 ML |
278 | /* Initialize new chain. */ |
279 | ||
93cf5515 | 280 | scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) |
2bf6d935 | 281 | { |
93cf5515 RB |
282 | smode = smode_; |
283 | vmode = vmode_; | |
284 | ||
2bf6d935 ML |
285 | chain_id = ++max_id; |
286 | ||
287 | if (dump_file) | |
288 | fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); | |
289 | ||
290 | bitmap_obstack_initialize (NULL); | |
291 | insns = BITMAP_ALLOC (NULL); | |
292 | defs = BITMAP_ALLOC (NULL); | |
293 | defs_conv = BITMAP_ALLOC (NULL); | |
294 | queue = NULL; | |
295 | } | |
296 | ||
297 | /* Free chain's data. */ | |
298 | ||
299 | scalar_chain::~scalar_chain () | |
300 | { | |
301 | BITMAP_FREE (insns); | |
302 | BITMAP_FREE (defs); | |
303 | BITMAP_FREE (defs_conv); | |
304 | bitmap_obstack_release (NULL); | |
305 | } | |
306 | ||
307 | /* Add instruction into chains' queue. */ | |
308 | ||
309 | void | |
310 | scalar_chain::add_to_queue (unsigned insn_uid) | |
311 | { | |
312 | if (bitmap_bit_p (insns, insn_uid) | |
313 | || bitmap_bit_p (queue, insn_uid)) | |
314 | return; | |
315 | ||
316 | if (dump_file) | |
317 | fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", | |
318 | insn_uid, chain_id); | |
319 | bitmap_set_bit (queue, insn_uid); | |
320 | } | |
321 | ||
b5a6addb RB |
322 | general_scalar_chain::general_scalar_chain (enum machine_mode smode_, |
323 | enum machine_mode vmode_) | |
324 | : scalar_chain (smode_, vmode_) | |
325 | { | |
326 | insns_conv = BITMAP_ALLOC (NULL); | |
327 | n_sse_to_integer = 0; | |
328 | n_integer_to_sse = 0; | |
329 | } | |
330 | ||
331 | general_scalar_chain::~general_scalar_chain () | |
332 | { | |
333 | BITMAP_FREE (insns_conv); | |
334 | } | |
335 | ||
2bf6d935 ML |
336 | /* For DImode conversion, mark register defined by DEF as requiring |
337 | conversion. */ | |
338 | ||
339 | void | |
93cf5515 | 340 | general_scalar_chain::mark_dual_mode_def (df_ref def) |
2bf6d935 ML |
341 | { |
342 | gcc_assert (DF_REF_REG_DEF_P (def)); | |
343 | ||
b5a6addb RB |
344 | /* Record the def/insn pair so we can later efficiently iterate over |
345 | the defs to convert on insns not in the chain. */ | |
346 | bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); | |
347 | if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def))) | |
348 | { | |
349 | if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def)) | |
350 | && !reg_new) | |
351 | return; | |
352 | n_integer_to_sse++; | |
353 | } | |
354 | else | |
355 | { | |
356 | if (!reg_new) | |
357 | return; | |
358 | n_sse_to_integer++; | |
359 | } | |
360 | ||
2bf6d935 ML |
361 | if (dump_file) |
362 | fprintf (dump_file, | |
363 | " Mark r%d def in insn %d as requiring both modes in chain #%d\n", | |
364 | DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); | |
2bf6d935 ML |
365 | } |
366 | ||
367 | /* For TImode conversion, it is unused. */ | |
368 | ||
369 | void | |
370 | timode_scalar_chain::mark_dual_mode_def (df_ref) | |
371 | { | |
372 | gcc_unreachable (); | |
373 | } | |
374 | ||
375 | /* Check REF's chain to add new insns into a queue | |
376 | and find registers requiring conversion. */ | |
377 | ||
378 | void | |
379 | scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) | |
380 | { | |
381 | df_link *chain; | |
382 | ||
383 | gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) | |
384 | || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); | |
385 | add_to_queue (DF_REF_INSN_UID (ref)); | |
386 | ||
387 | for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) | |
388 | { | |
389 | unsigned uid = DF_REF_INSN_UID (chain->ref); | |
390 | ||
391 | if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) | |
392 | continue; | |
393 | ||
394 | if (!DF_REF_REG_MEM_P (chain->ref)) | |
395 | { | |
396 | if (bitmap_bit_p (insns, uid)) | |
397 | continue; | |
398 | ||
399 | if (bitmap_bit_p (candidates, uid)) | |
400 | { | |
401 | add_to_queue (uid); | |
402 | continue; | |
403 | } | |
404 | } | |
405 | ||
406 | if (DF_REF_REG_DEF_P (chain->ref)) | |
407 | { | |
408 | if (dump_file) | |
409 | fprintf (dump_file, " r%d def in insn %d isn't convertible\n", | |
410 | DF_REF_REGNO (chain->ref), uid); | |
411 | mark_dual_mode_def (chain->ref); | |
412 | } | |
413 | else | |
414 | { | |
415 | if (dump_file) | |
416 | fprintf (dump_file, " r%d use in insn %d isn't convertible\n", | |
417 | DF_REF_REGNO (chain->ref), uid); | |
418 | mark_dual_mode_def (ref); | |
419 | } | |
420 | } | |
421 | } | |
422 | ||
423 | /* Add instruction into a chain. */ | |
424 | ||
425 | void | |
426 | scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) | |
427 | { | |
428 | if (bitmap_bit_p (insns, insn_uid)) | |
429 | return; | |
430 | ||
431 | if (dump_file) | |
432 | fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); | |
433 | ||
434 | bitmap_set_bit (insns, insn_uid); | |
435 | ||
436 | rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; | |
437 | rtx def_set = single_set (insn); | |
438 | if (def_set && REG_P (SET_DEST (def_set)) | |
439 | && !HARD_REGISTER_P (SET_DEST (def_set))) | |
440 | bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); | |
441 | ||
93cf5515 RB |
442 | /* ??? The following is quadratic since analyze_register_chain |
443 | iterates over all refs to look for dual-mode regs. Instead this | |
444 | should be done separately for all regs mentioned in the chain once. */ | |
2bf6d935 | 445 | df_ref ref; |
2bf6d935 ML |
446 | for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) |
447 | if (!HARD_REGISTER_P (DF_REF_REG (ref))) | |
48a31a09 | 448 | analyze_register_chain (candidates, ref); |
2bf6d935 ML |
449 | for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) |
450 | if (!DF_REF_REG_MEM_P (ref)) | |
451 | analyze_register_chain (candidates, ref); | |
452 | } | |
453 | ||
454 | /* Build new chain starting from insn INSN_UID recursively | |
455 | adding all dependent uses and definitions. */ | |
456 | ||
457 | void | |
458 | scalar_chain::build (bitmap candidates, unsigned insn_uid) | |
459 | { | |
460 | queue = BITMAP_ALLOC (NULL); | |
461 | bitmap_set_bit (queue, insn_uid); | |
462 | ||
463 | if (dump_file) | |
464 | fprintf (dump_file, "Building chain #%d...\n", chain_id); | |
465 | ||
466 | while (!bitmap_empty_p (queue)) | |
467 | { | |
468 | insn_uid = bitmap_first_set_bit (queue); | |
469 | bitmap_clear_bit (queue, insn_uid); | |
470 | bitmap_clear_bit (candidates, insn_uid); | |
471 | add_insn (candidates, insn_uid); | |
472 | } | |
473 | ||
474 | if (dump_file) | |
475 | { | |
476 | fprintf (dump_file, "Collected chain #%d...\n", chain_id); | |
477 | fprintf (dump_file, " insns: "); | |
478 | dump_bitmap (dump_file, insns); | |
479 | if (!bitmap_empty_p (defs_conv)) | |
480 | { | |
481 | bitmap_iterator bi; | |
482 | unsigned id; | |
483 | const char *comma = ""; | |
484 | fprintf (dump_file, " defs to convert: "); | |
485 | EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) | |
486 | { | |
487 | fprintf (dump_file, "%sr%d", comma, id); | |
488 | comma = ", "; | |
489 | } | |
490 | fprintf (dump_file, "\n"); | |
491 | } | |
492 | } | |
493 | ||
494 | BITMAP_FREE (queue); | |
495 | } | |
496 | ||
497 | /* Return a cost of building a vector costant | |
498 | instead of using a scalar one. */ | |
499 | ||
500 | int | |
93cf5515 | 501 | general_scalar_chain::vector_const_cost (rtx exp) |
2bf6d935 ML |
502 | { |
503 | gcc_assert (CONST_INT_P (exp)); | |
504 | ||
93cf5515 RB |
505 | if (standard_sse_constant_p (exp, vmode)) |
506 | return ix86_cost->sse_op; | |
507 | /* We have separate costs for SImode and DImode, use SImode costs | |
508 | for smaller modes. */ | |
509 | return ix86_cost->sse_load[smode == DImode ? 1 : 0]; | |
2bf6d935 ML |
510 | } |
511 | ||
512 | /* Compute a gain for chain conversion. */ | |
513 | ||
514 | int | |
93cf5515 | 515 | general_scalar_chain::compute_convert_gain () |
2bf6d935 ML |
516 | { |
517 | bitmap_iterator bi; | |
518 | unsigned insn_uid; | |
519 | int gain = 0; | |
520 | int cost = 0; | |
521 | ||
522 | if (dump_file) | |
523 | fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); | |
524 | ||
93cf5515 RB |
525 | /* SSE costs distinguish between SImode and DImode loads/stores, for |
526 | int costs factor in the number of GPRs involved. When supporting | |
527 | smaller modes than SImode the int load/store costs need to be | |
528 | adjusted as well. */ | |
529 | unsigned sse_cost_idx = smode == DImode ? 1 : 0; | |
530 | unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1; | |
531 | ||
2bf6d935 ML |
532 | EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) |
533 | { | |
534 | rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; | |
535 | rtx def_set = single_set (insn); | |
536 | rtx src = SET_SRC (def_set); | |
537 | rtx dst = SET_DEST (def_set); | |
c6521daa | 538 | int igain = 0; |
2bf6d935 ML |
539 | |
540 | if (REG_P (src) && REG_P (dst)) | |
93cf5515 | 541 | igain += 2 * m - ix86_cost->xmm_move; |
2bf6d935 | 542 | else if (REG_P (src) && MEM_P (dst)) |
93cf5515 RB |
543 | igain |
544 | += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; | |
2bf6d935 | 545 | else if (MEM_P (src) && REG_P (dst)) |
93cf5515 | 546 | igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; |
2bf6d935 ML |
547 | else if (GET_CODE (src) == ASHIFT |
548 | || GET_CODE (src) == ASHIFTRT | |
549 | || GET_CODE (src) == LSHIFTRT) | |
550 | { | |
2a3daf5b UB |
551 | if (m == 2) |
552 | { | |
553 | if (INTVAL (XEXP (src, 1)) >= 32) | |
554 | igain += ix86_cost->add; | |
555 | else | |
556 | igain += ix86_cost->shift_const; | |
557 | } | |
558 | ||
559 | igain += ix86_cost->shift_const - ix86_cost->sse_op; | |
560 | ||
b5a6addb | 561 | if (CONST_INT_P (XEXP (src, 0))) |
c6521daa | 562 | igain -= vector_const_cost (XEXP (src, 0)); |
2bf6d935 ML |
563 | } |
564 | else if (GET_CODE (src) == PLUS | |
565 | || GET_CODE (src) == MINUS | |
566 | || GET_CODE (src) == IOR | |
567 | || GET_CODE (src) == XOR | |
568 | || GET_CODE (src) == AND) | |
569 | { | |
93cf5515 | 570 | igain += m * ix86_cost->add - ix86_cost->sse_op; |
2bf6d935 ML |
571 | /* Additional gain for andnot for targets without BMI. */ |
572 | if (GET_CODE (XEXP (src, 0)) == NOT | |
573 | && !TARGET_BMI) | |
93cf5515 | 574 | igain += m * ix86_cost->add; |
2bf6d935 ML |
575 | |
576 | if (CONST_INT_P (XEXP (src, 0))) | |
c6521daa | 577 | igain -= vector_const_cost (XEXP (src, 0)); |
2bf6d935 | 578 | if (CONST_INT_P (XEXP (src, 1))) |
c6521daa | 579 | igain -= vector_const_cost (XEXP (src, 1)); |
2bf6d935 ML |
580 | } |
581 | else if (GET_CODE (src) == NEG | |
582 | || GET_CODE (src) == NOT) | |
93cf5515 | 583 | igain += m * ix86_cost->add - ix86_cost->sse_op - COSTS_N_INSNS (1); |
fdace758 UB |
584 | else if (GET_CODE (src) == ABS |
585 | || GET_CODE (src) == SMAX | |
93cf5515 RB |
586 | || GET_CODE (src) == SMIN |
587 | || GET_CODE (src) == UMAX | |
588 | || GET_CODE (src) == UMIN) | |
589 | { | |
590 | /* We do not have any conditional move cost, estimate it as a | |
591 | reg-reg move. Comparisons are costed as adds. */ | |
592 | igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); | |
593 | /* Integer SSE ops are all costed the same. */ | |
594 | igain -= ix86_cost->sse_op; | |
595 | } | |
2bf6d935 ML |
596 | else if (GET_CODE (src) == COMPARE) |
597 | { | |
598 | /* Assume comparison cost is the same. */ | |
599 | } | |
600 | else if (CONST_INT_P (src)) | |
601 | { | |
602 | if (REG_P (dst)) | |
93cf5515 RB |
603 | /* DImode can be immediate for TARGET_64BIT and SImode always. */ |
604 | igain += m * COSTS_N_INSNS (1); | |
2bf6d935 | 605 | else if (MEM_P (dst)) |
93cf5515 RB |
606 | igain += (m * ix86_cost->int_store[2] |
607 | - ix86_cost->sse_store[sse_cost_idx]); | |
c6521daa | 608 | igain -= vector_const_cost (src); |
2bf6d935 ML |
609 | } |
610 | else | |
611 | gcc_unreachable (); | |
c6521daa RB |
612 | |
613 | if (igain != 0 && dump_file) | |
614 | { | |
615 | fprintf (dump_file, " Instruction gain %d for ", igain); | |
616 | dump_insn_slim (dump_file, insn); | |
617 | } | |
618 | gain += igain; | |
2bf6d935 ML |
619 | } |
620 | ||
621 | if (dump_file) | |
622 | fprintf (dump_file, " Instruction conversion gain: %d\n", gain); | |
623 | ||
b5a6addb RB |
624 | /* Cost the integer to sse and sse to integer moves. */ |
625 | cost += n_sse_to_integer * ix86_cost->sse_to_integer; | |
626 | /* ??? integer_to_sse but we only have that in the RA cost table. | |
627 | Assume sse_to_integer/integer_to_sse are the same which they | |
628 | are at the moment. */ | |
629 | cost += n_integer_to_sse * ix86_cost->sse_to_integer; | |
2bf6d935 ML |
630 | |
631 | if (dump_file) | |
632 | fprintf (dump_file, " Registers conversion cost: %d\n", cost); | |
633 | ||
634 | gain -= cost; | |
635 | ||
636 | if (dump_file) | |
637 | fprintf (dump_file, " Total gain: %d\n", gain); | |
638 | ||
639 | return gain; | |
640 | } | |
641 | ||
2bf6d935 ML |
642 | /* Insert generated conversion instruction sequence INSNS |
643 | after instruction AFTER. New BB may be required in case | |
644 | instruction has EH region attached. */ | |
645 | ||
646 | void | |
647 | scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) | |
648 | { | |
649 | if (!control_flow_insn_p (after)) | |
650 | { | |
651 | emit_insn_after (insns, after); | |
652 | return; | |
653 | } | |
654 | ||
655 | basic_block bb = BLOCK_FOR_INSN (after); | |
656 | edge e = find_fallthru_edge (bb->succs); | |
657 | gcc_assert (e); | |
658 | ||
659 | basic_block new_bb = split_edge (e); | |
660 | emit_insn_after (insns, BB_HEAD (new_bb)); | |
661 | } | |
662 | ||
72bb85f8 ML |
663 | } // anon namespace |
664 | ||
8ed1d2fa RB |
665 | /* Generate the canonical SET_SRC to move GPR to a VMODE vector register, |
666 | zeroing the upper parts. */ | |
667 | ||
668 | static rtx | |
669 | gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr) | |
670 | { | |
671 | switch (GET_MODE_NUNITS (vmode)) | |
672 | { | |
673 | case 1: | |
54dc8577 RB |
674 | /* We are not using this case currently. */ |
675 | gcc_unreachable (); | |
8ed1d2fa RB |
676 | case 2: |
677 | return gen_rtx_VEC_CONCAT (vmode, gpr, | |
678 | CONST0_RTX (GET_MODE_INNER (vmode))); | |
679 | default: | |
680 | return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr), | |
681 | CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U)); | |
682 | } | |
683 | } | |
684 | ||
2bf6d935 ML |
685 | /* Make vector copies for all register REGNO definitions |
686 | and replace its uses in a chain. */ | |
687 | ||
688 | void | |
b5a6addb | 689 | general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg) |
2bf6d935 | 690 | { |
b5a6addb | 691 | rtx vreg = *defs_map.get (reg); |
2bf6d935 | 692 | |
b5a6addb RB |
693 | start_sequence (); |
694 | if (!TARGET_INTER_UNIT_MOVES_TO_VEC) | |
2bf6d935 | 695 | { |
b5a6addb RB |
696 | rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP); |
697 | if (smode == DImode && !TARGET_64BIT) | |
2bf6d935 | 698 | { |
b5a6addb RB |
699 | emit_move_insn (adjust_address (tmp, SImode, 0), |
700 | gen_rtx_SUBREG (SImode, reg, 0)); | |
701 | emit_move_insn (adjust_address (tmp, SImode, 4), | |
702 | gen_rtx_SUBREG (SImode, reg, 4)); | |
2bf6d935 | 703 | } |
b5a6addb RB |
704 | else |
705 | emit_move_insn (copy_rtx (tmp), reg); | |
706 | emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), | |
707 | gen_gpr_to_xmm_move_src (vmode, tmp))); | |
708 | } | |
709 | else if (!TARGET_64BIT && smode == DImode) | |
710 | { | |
711 | if (TARGET_SSE4_1) | |
2bf6d935 | 712 | { |
b5a6addb RB |
713 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), |
714 | CONST0_RTX (V4SImode), | |
715 | gen_rtx_SUBREG (SImode, reg, 0))); | |
716 | emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
717 | gen_rtx_SUBREG (V4SImode, vreg, 0), | |
718 | gen_rtx_SUBREG (SImode, reg, 4), | |
719 | GEN_INT (2))); | |
2bf6d935 | 720 | } |
48a31a09 | 721 | else |
b5a6addb RB |
722 | { |
723 | rtx tmp = gen_reg_rtx (DImode); | |
724 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
725 | CONST0_RTX (V4SImode), | |
726 | gen_rtx_SUBREG (SImode, reg, 0))); | |
727 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), | |
728 | CONST0_RTX (V4SImode), | |
729 | gen_rtx_SUBREG (SImode, reg, 4))); | |
730 | emit_insn (gen_vec_interleave_lowv4si | |
731 | (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
732 | gen_rtx_SUBREG (V4SImode, vreg, 0), | |
733 | gen_rtx_SUBREG (V4SImode, tmp, 0))); | |
734 | } | |
48a31a09 | 735 | } |
b5a6addb RB |
736 | else |
737 | emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), | |
738 | gen_gpr_to_xmm_move_src (vmode, reg))); | |
739 | rtx_insn *seq = get_insns (); | |
740 | end_sequence (); | |
741 | emit_conversion_insns (seq, insn); | |
742 | ||
743 | if (dump_file) | |
744 | fprintf (dump_file, | |
745 | " Copied r%d to a vector register r%d for insn %d\n", | |
746 | REGNO (reg), REGNO (vreg), INSN_UID (insn)); | |
48a31a09 | 747 | } |
2bf6d935 | 748 | |
48a31a09 RB |
749 | /* Copy the definition SRC of INSN inside the chain to DST for |
750 | scalar uses outside of the chain. */ | |
2bf6d935 | 751 | |
48a31a09 RB |
752 | void |
753 | general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src) | |
754 | { | |
755 | start_sequence (); | |
756 | if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) | |
757 | { | |
758 | rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP); | |
759 | emit_move_insn (tmp, src); | |
760 | if (!TARGET_64BIT && smode == DImode) | |
761 | { | |
762 | emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0), | |
763 | adjust_address (tmp, SImode, 0)); | |
764 | emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4), | |
765 | adjust_address (tmp, SImode, 4)); | |
766 | } | |
767 | else | |
768 | emit_move_insn (dst, copy_rtx (tmp)); | |
769 | } | |
770 | else if (!TARGET_64BIT && smode == DImode) | |
771 | { | |
772 | if (TARGET_SSE4_1) | |
773 | { | |
774 | rtx tmp = gen_rtx_PARALLEL (VOIDmode, | |
775 | gen_rtvec (1, const0_rtx)); | |
776 | emit_insn | |
777 | (gen_rtx_SET | |
778 | (gen_rtx_SUBREG (SImode, dst, 0), | |
779 | gen_rtx_VEC_SELECT (SImode, | |
780 | gen_rtx_SUBREG (V4SImode, src, 0), | |
781 | tmp))); | |
782 | ||
783 | tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); | |
784 | emit_insn | |
785 | (gen_rtx_SET | |
786 | (gen_rtx_SUBREG (SImode, dst, 4), | |
787 | gen_rtx_VEC_SELECT (SImode, | |
788 | gen_rtx_SUBREG (V4SImode, src, 0), | |
789 | tmp))); | |
790 | } | |
791 | else | |
792 | { | |
793 | rtx vcopy = gen_reg_rtx (V2DImode); | |
794 | emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0)); | |
795 | emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0), | |
796 | gen_rtx_SUBREG (SImode, vcopy, 0)); | |
797 | emit_move_insn (vcopy, | |
798 | gen_rtx_LSHIFTRT (V2DImode, | |
799 | vcopy, GEN_INT (32))); | |
800 | emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4), | |
801 | gen_rtx_SUBREG (SImode, vcopy, 0)); | |
802 | } | |
803 | } | |
804 | else | |
805 | emit_move_insn (dst, src); | |
2bf6d935 | 806 | |
48a31a09 RB |
807 | rtx_insn *seq = get_insns (); |
808 | end_sequence (); | |
809 | emit_conversion_insns (seq, insn); | |
2bf6d935 | 810 | |
48a31a09 RB |
811 | if (dump_file) |
812 | fprintf (dump_file, | |
813 | " Copied r%d to a scalar register r%d for insn %d\n", | |
814 | REGNO (src), REGNO (dst), INSN_UID (insn)); | |
2bf6d935 ML |
815 | } |
816 | ||
817 | /* Convert operand OP in INSN. We should handle | |
818 | memory operands and uninitialized registers. | |
819 | All other register uses are converted during | |
820 | registers conversion. */ | |
821 | ||
822 | void | |
93cf5515 | 823 | general_scalar_chain::convert_op (rtx *op, rtx_insn *insn) |
2bf6d935 ML |
824 | { |
825 | *op = copy_rtx_if_shared (*op); | |
826 | ||
827 | if (GET_CODE (*op) == NOT) | |
828 | { | |
829 | convert_op (&XEXP (*op, 0), insn); | |
93cf5515 | 830 | PUT_MODE (*op, vmode); |
2bf6d935 ML |
831 | } |
832 | else if (MEM_P (*op)) | |
833 | { | |
93cf5515 | 834 | rtx tmp = gen_reg_rtx (GET_MODE (*op)); |
2bf6d935 | 835 | |
b049c269 RB |
836 | /* Handle movabs. */ |
837 | if (!memory_operand (*op, GET_MODE (*op))) | |
838 | { | |
839 | rtx tmp2 = gen_reg_rtx (GET_MODE (*op)); | |
840 | ||
841 | emit_insn_before (gen_rtx_SET (tmp2, *op), insn); | |
842 | *op = tmp2; | |
843 | } | |
844 | ||
f386ca41 RB |
845 | emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0), |
846 | gen_gpr_to_xmm_move_src (vmode, *op)), | |
847 | insn); | |
93cf5515 | 848 | *op = gen_rtx_SUBREG (vmode, tmp, 0); |
2bf6d935 ML |
849 | |
850 | if (dump_file) | |
851 | fprintf (dump_file, " Preloading operand for insn %d into r%d\n", | |
852 | INSN_UID (insn), REGNO (tmp)); | |
853 | } | |
854 | else if (REG_P (*op)) | |
855 | { | |
93cf5515 | 856 | *op = gen_rtx_SUBREG (vmode, *op, 0); |
2bf6d935 ML |
857 | } |
858 | else if (CONST_INT_P (*op)) | |
859 | { | |
860 | rtx vec_cst; | |
93cf5515 | 861 | rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0); |
2bf6d935 ML |
862 | |
863 | /* Prefer all ones vector in case of -1. */ | |
864 | if (constm1_operand (*op, GET_MODE (*op))) | |
93cf5515 | 865 | vec_cst = CONSTM1_RTX (vmode); |
2bf6d935 | 866 | else |
93cf5515 RB |
867 | { |
868 | unsigned n = GET_MODE_NUNITS (vmode); | |
869 | rtx *v = XALLOCAVEC (rtx, n); | |
870 | v[0] = *op; | |
871 | for (unsigned i = 1; i < n; ++i) | |
872 | v[i] = const0_rtx; | |
873 | vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v)); | |
874 | } | |
2bf6d935 | 875 | |
93cf5515 | 876 | if (!standard_sse_constant_p (vec_cst, vmode)) |
2bf6d935 ML |
877 | { |
878 | start_sequence (); | |
93cf5515 | 879 | vec_cst = validize_mem (force_const_mem (vmode, vec_cst)); |
2bf6d935 ML |
880 | rtx_insn *seq = get_insns (); |
881 | end_sequence (); | |
882 | emit_insn_before (seq, insn); | |
883 | } | |
884 | ||
885 | emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); | |
886 | *op = tmp; | |
887 | } | |
888 | else | |
889 | { | |
890 | gcc_assert (SUBREG_P (*op)); | |
93cf5515 | 891 | gcc_assert (GET_MODE (*op) == vmode); |
2bf6d935 ML |
892 | } |
893 | } | |
894 | ||
895 | /* Convert INSN to vector mode. */ | |
896 | ||
897 | void | |
93cf5515 | 898 | general_scalar_chain::convert_insn (rtx_insn *insn) |
2bf6d935 | 899 | { |
c49609be | 900 | /* Generate copies for out-of-chain uses of defs and adjust debug uses. */ |
48a31a09 RB |
901 | for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref)) |
902 | if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref))) | |
903 | { | |
904 | df_link *use; | |
905 | for (use = DF_REF_CHAIN (ref); use; use = use->next) | |
c49609be RB |
906 | if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref)) |
907 | && (DF_REF_REG_MEM_P (use->ref) | |
908 | || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref)))) | |
48a31a09 RB |
909 | break; |
910 | if (use) | |
911 | convert_reg (insn, DF_REF_REG (ref), | |
912 | *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)])); | |
132e2b41 | 913 | else if (MAY_HAVE_DEBUG_BIND_INSNS) |
c49609be RB |
914 | { |
915 | /* If we generated a scalar copy we can leave debug-insns | |
916 | as-is, if not, we have to adjust them. */ | |
917 | auto_vec<rtx_insn *, 5> to_reset_debug_insns; | |
918 | for (use = DF_REF_CHAIN (ref); use; use = use->next) | |
919 | if (DEBUG_INSN_P (DF_REF_INSN (use->ref))) | |
920 | { | |
921 | rtx_insn *debug_insn = DF_REF_INSN (use->ref); | |
922 | /* If there's a reaching definition outside of the | |
923 | chain we have to reset. */ | |
924 | df_link *def; | |
925 | for (def = DF_REF_CHAIN (use->ref); def; def = def->next) | |
926 | if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref))) | |
927 | break; | |
928 | if (def) | |
929 | to_reset_debug_insns.safe_push (debug_insn); | |
930 | else | |
931 | { | |
932 | *DF_REF_REAL_LOC (use->ref) | |
933 | = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]); | |
934 | df_insn_rescan (debug_insn); | |
935 | } | |
936 | } | |
937 | /* Have to do the reset outside of the DF_CHAIN walk to not | |
938 | disrupt it. */ | |
939 | while (!to_reset_debug_insns.is_empty ()) | |
940 | { | |
941 | rtx_insn *debug_insn = to_reset_debug_insns.pop (); | |
942 | INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC (); | |
943 | df_insn_rescan_debug_internal (debug_insn); | |
944 | } | |
945 | } | |
48a31a09 RB |
946 | } |
947 | ||
948 | /* Replace uses in this insn with the defs we use in the chain. */ | |
949 | for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref)) | |
950 | if (!DF_REF_REG_MEM_P (ref)) | |
951 | if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)])) | |
952 | { | |
953 | /* Also update a corresponding REG_DEAD note. */ | |
954 | rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref)); | |
955 | if (note) | |
956 | XEXP (note, 0) = *vreg; | |
957 | *DF_REF_REAL_LOC (ref) = *vreg; | |
958 | } | |
959 | ||
2bf6d935 ML |
960 | rtx def_set = single_set (insn); |
961 | rtx src = SET_SRC (def_set); | |
962 | rtx dst = SET_DEST (def_set); | |
963 | rtx subreg; | |
964 | ||
965 | if (MEM_P (dst) && !REG_P (src)) | |
966 | { | |
967 | /* There are no scalar integer instructions and therefore | |
968 | temporary register usage is required. */ | |
93cf5515 | 969 | rtx tmp = gen_reg_rtx (smode); |
2bf6d935 | 970 | emit_conversion_insns (gen_move_insn (dst, tmp), insn); |
93cf5515 | 971 | dst = gen_rtx_SUBREG (vmode, tmp, 0); |
2bf6d935 | 972 | } |
48a31a09 RB |
973 | else if (REG_P (dst)) |
974 | { | |
975 | /* Replace the definition with a SUBREG to the definition we | |
976 | use inside the chain. */ | |
977 | rtx *vdef = defs_map.get (dst); | |
978 | if (vdef) | |
979 | dst = *vdef; | |
980 | dst = gen_rtx_SUBREG (vmode, dst, 0); | |
981 | /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST | |
982 | is a non-REG_P. So kill those off. */ | |
983 | rtx note = find_reg_equal_equiv_note (insn); | |
984 | if (note) | |
985 | remove_note (insn, note); | |
986 | } | |
2bf6d935 ML |
987 | |
988 | switch (GET_CODE (src)) | |
989 | { | |
2bf6d935 ML |
990 | case PLUS: |
991 | case MINUS: | |
992 | case IOR: | |
993 | case XOR: | |
994 | case AND: | |
93cf5515 RB |
995 | case SMAX: |
996 | case SMIN: | |
997 | case UMAX: | |
998 | case UMIN: | |
2bf6d935 | 999 | convert_op (&XEXP (src, 1), insn); |
fdace758 UB |
1000 | /* FALLTHRU */ |
1001 | ||
1002 | case ABS: | |
1003 | case ASHIFT: | |
1004 | case ASHIFTRT: | |
1005 | case LSHIFTRT: | |
1006 | convert_op (&XEXP (src, 0), insn); | |
93cf5515 | 1007 | PUT_MODE (src, vmode); |
2bf6d935 ML |
1008 | break; |
1009 | ||
1010 | case NEG: | |
1011 | src = XEXP (src, 0); | |
1012 | convert_op (&src, insn); | |
93cf5515 RB |
1013 | subreg = gen_reg_rtx (vmode); |
1014 | emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn); | |
1015 | src = gen_rtx_MINUS (vmode, subreg, src); | |
2bf6d935 ML |
1016 | break; |
1017 | ||
1018 | case NOT: | |
1019 | src = XEXP (src, 0); | |
1020 | convert_op (&src, insn); | |
93cf5515 RB |
1021 | subreg = gen_reg_rtx (vmode); |
1022 | emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn); | |
1023 | src = gen_rtx_XOR (vmode, src, subreg); | |
2bf6d935 ML |
1024 | break; |
1025 | ||
1026 | case MEM: | |
1027 | if (!REG_P (dst)) | |
1028 | convert_op (&src, insn); | |
1029 | break; | |
1030 | ||
1031 | case REG: | |
1032 | if (!MEM_P (dst)) | |
1033 | convert_op (&src, insn); | |
1034 | break; | |
1035 | ||
1036 | case SUBREG: | |
93cf5515 | 1037 | gcc_assert (GET_MODE (src) == vmode); |
2bf6d935 ML |
1038 | break; |
1039 | ||
1040 | case COMPARE: | |
1041 | src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); | |
1042 | ||
48a31a09 RB |
1043 | gcc_assert (REG_P (src) && GET_MODE (src) == DImode); |
1044 | subreg = gen_rtx_SUBREG (V2DImode, src, 0); | |
2bf6d935 ML |
1045 | emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), |
1046 | copy_rtx_if_shared (subreg), | |
1047 | copy_rtx_if_shared (subreg)), | |
1048 | insn); | |
1049 | dst = gen_rtx_REG (CCmode, FLAGS_REG); | |
48a31a09 RB |
1050 | src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg), |
1051 | copy_rtx_if_shared (subreg)), | |
2bf6d935 ML |
1052 | UNSPEC_PTEST); |
1053 | break; | |
1054 | ||
1055 | case CONST_INT: | |
1056 | convert_op (&src, insn); | |
1057 | break; | |
1058 | ||
1059 | default: | |
1060 | gcc_unreachable (); | |
1061 | } | |
1062 | ||
1063 | SET_SRC (def_set) = src; | |
1064 | SET_DEST (def_set) = dst; | |
1065 | ||
1066 | /* Drop possible dead definitions. */ | |
1067 | PATTERN (insn) = def_set; | |
1068 | ||
1069 | INSN_CODE (insn) = -1; | |
93cf5515 RB |
1070 | int patt = recog_memoized (insn); |
1071 | if (patt == -1) | |
1072 | fatal_insn_not_found (insn); | |
2bf6d935 ML |
1073 | df_insn_rescan (insn); |
1074 | } | |
1075 | ||
1076 | /* Fix uses of converted REG in debug insns. */ | |
1077 | ||
1078 | void | |
1079 | timode_scalar_chain::fix_debug_reg_uses (rtx reg) | |
1080 | { | |
1081 | if (!flag_var_tracking) | |
1082 | return; | |
1083 | ||
1084 | df_ref ref, next; | |
1085 | for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) | |
1086 | { | |
1087 | rtx_insn *insn = DF_REF_INSN (ref); | |
1088 | /* Make sure the next ref is for a different instruction, | |
1089 | so that we're not affected by the rescan. */ | |
1090 | next = DF_REF_NEXT_REG (ref); | |
1091 | while (next && DF_REF_INSN (next) == insn) | |
1092 | next = DF_REF_NEXT_REG (next); | |
1093 | ||
1094 | if (DEBUG_INSN_P (insn)) | |
1095 | { | |
1096 | /* It may be a debug insn with a TImode variable in | |
1097 | register. */ | |
1098 | bool changed = false; | |
1099 | for (; ref != next; ref = DF_REF_NEXT_REG (ref)) | |
1100 | { | |
1101 | rtx *loc = DF_REF_LOC (ref); | |
1102 | if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) | |
1103 | { | |
1104 | *loc = gen_rtx_SUBREG (TImode, *loc, 0); | |
1105 | changed = true; | |
1106 | } | |
1107 | } | |
1108 | if (changed) | |
1109 | df_insn_rescan (insn); | |
1110 | } | |
1111 | } | |
1112 | } | |
1113 | ||
1114 | /* Convert INSN from TImode to V1T1mode. */ | |
1115 | ||
1116 | void | |
1117 | timode_scalar_chain::convert_insn (rtx_insn *insn) | |
1118 | { | |
1119 | rtx def_set = single_set (insn); | |
1120 | rtx src = SET_SRC (def_set); | |
1121 | rtx dst = SET_DEST (def_set); | |
1122 | ||
1123 | switch (GET_CODE (dst)) | |
1124 | { | |
1125 | case REG: | |
1126 | { | |
1127 | rtx tmp = find_reg_equal_equiv_note (insn); | |
1128 | if (tmp) | |
1129 | PUT_MODE (XEXP (tmp, 0), V1TImode); | |
1130 | PUT_MODE (dst, V1TImode); | |
1131 | fix_debug_reg_uses (dst); | |
1132 | } | |
1133 | break; | |
1134 | case MEM: | |
1135 | PUT_MODE (dst, V1TImode); | |
1136 | break; | |
1137 | ||
1138 | default: | |
1139 | gcc_unreachable (); | |
1140 | } | |
1141 | ||
1142 | switch (GET_CODE (src)) | |
1143 | { | |
1144 | case REG: | |
1145 | PUT_MODE (src, V1TImode); | |
1146 | /* Call fix_debug_reg_uses only if SRC is never defined. */ | |
1147 | if (!DF_REG_DEF_CHAIN (REGNO (src))) | |
1148 | fix_debug_reg_uses (src); | |
1149 | break; | |
1150 | ||
1151 | case MEM: | |
1152 | PUT_MODE (src, V1TImode); | |
1153 | break; | |
1154 | ||
1155 | case CONST_WIDE_INT: | |
1156 | if (NONDEBUG_INSN_P (insn)) | |
1157 | { | |
1158 | /* Since there are no instructions to store 128-bit constant, | |
1159 | temporary register usage is required. */ | |
1160 | rtx tmp = gen_reg_rtx (V1TImode); | |
1161 | start_sequence (); | |
1162 | src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); | |
1163 | src = validize_mem (force_const_mem (V1TImode, src)); | |
1164 | rtx_insn *seq = get_insns (); | |
1165 | end_sequence (); | |
1166 | if (seq) | |
1167 | emit_insn_before (seq, insn); | |
1168 | emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); | |
1169 | dst = tmp; | |
1170 | } | |
1171 | break; | |
1172 | ||
1173 | case CONST_INT: | |
1174 | switch (standard_sse_constant_p (src, TImode)) | |
1175 | { | |
1176 | case 1: | |
1177 | src = CONST0_RTX (GET_MODE (dst)); | |
1178 | break; | |
1179 | case 2: | |
1180 | src = CONSTM1_RTX (GET_MODE (dst)); | |
1181 | break; | |
1182 | default: | |
1183 | gcc_unreachable (); | |
1184 | } | |
1185 | if (NONDEBUG_INSN_P (insn)) | |
1186 | { | |
1187 | rtx tmp = gen_reg_rtx (V1TImode); | |
1188 | /* Since there are no instructions to store standard SSE | |
1189 | constant, temporary register usage is required. */ | |
1190 | emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); | |
1191 | dst = tmp; | |
1192 | } | |
1193 | break; | |
1194 | ||
1195 | default: | |
1196 | gcc_unreachable (); | |
1197 | } | |
1198 | ||
1199 | SET_SRC (def_set) = src; | |
1200 | SET_DEST (def_set) = dst; | |
1201 | ||
1202 | /* Drop possible dead definitions. */ | |
1203 | PATTERN (insn) = def_set; | |
1204 | ||
1205 | INSN_CODE (insn) = -1; | |
1206 | recog_memoized (insn); | |
1207 | df_insn_rescan (insn); | |
1208 | } | |
1209 | ||
48a31a09 RB |
1210 | /* Generate copies from defs used by the chain but not defined therein. |
1211 | Also populates defs_map which is used later by convert_insn. */ | |
1212 | ||
2bf6d935 | 1213 | void |
93cf5515 | 1214 | general_scalar_chain::convert_registers () |
2bf6d935 ML |
1215 | { |
1216 | bitmap_iterator bi; | |
1217 | unsigned id; | |
48a31a09 | 1218 | EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) |
d865ed72 RB |
1219 | { |
1220 | rtx chain_reg = gen_reg_rtx (smode); | |
1221 | defs_map.put (regno_reg_rtx[id], chain_reg); | |
1222 | } | |
b5a6addb RB |
1223 | EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi) |
1224 | for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref)) | |
1225 | if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref))) | |
1226 | make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref)); | |
2bf6d935 ML |
1227 | } |
1228 | ||
1229 | /* Convert whole chain creating required register | |
1230 | conversions and copies. */ | |
1231 | ||
1232 | int | |
1233 | scalar_chain::convert () | |
1234 | { | |
1235 | bitmap_iterator bi; | |
1236 | unsigned id; | |
1237 | int converted_insns = 0; | |
1238 | ||
1239 | if (!dbg_cnt (stv_conversion)) | |
1240 | return 0; | |
1241 | ||
1242 | if (dump_file) | |
1243 | fprintf (dump_file, "Converting chain #%d...\n", chain_id); | |
1244 | ||
1245 | convert_registers (); | |
1246 | ||
1247 | EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) | |
1248 | { | |
1249 | convert_insn (DF_INSN_UID_GET (id)->insn); | |
1250 | converted_insns++; | |
1251 | } | |
1252 | ||
1253 | return converted_insns; | |
1254 | } | |
1255 | ||
266f44a9 L |
1256 | /* Return the SET expression if INSN doesn't reference hard register. |
1257 | Return NULL if INSN uses or defines a hard register, excluding | |
1258 | pseudo register pushes, hard register uses in a memory address, | |
1259 | clobbers and flags definitions. */ | |
2bf6d935 | 1260 | |
266f44a9 L |
1261 | static rtx |
1262 | pseudo_reg_set (rtx_insn *insn) | |
2bf6d935 | 1263 | { |
266f44a9 L |
1264 | rtx set = single_set (insn); |
1265 | if (!set) | |
1266 | return NULL; | |
1267 | ||
1268 | /* Check pseudo register push first. */ | |
6643ca0b | 1269 | machine_mode mode = TARGET_64BIT ? TImode : DImode; |
266f44a9 L |
1270 | if (REG_P (SET_SRC (set)) |
1271 | && !HARD_REGISTER_P (SET_SRC (set)) | |
6643ca0b | 1272 | && push_operand (SET_DEST (set), mode)) |
266f44a9 L |
1273 | return set; |
1274 | ||
2bf6d935 ML |
1275 | df_ref ref; |
1276 | FOR_EACH_INSN_DEF (ref, insn) | |
1277 | if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) | |
1278 | && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) | |
1279 | && DF_REF_REGNO (ref) != FLAGS_REG) | |
266f44a9 | 1280 | return NULL; |
2bf6d935 ML |
1281 | |
1282 | FOR_EACH_INSN_USE (ref, insn) | |
1283 | if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) | |
266f44a9 | 1284 | return NULL; |
2bf6d935 | 1285 | |
266f44a9 | 1286 | return set; |
2bf6d935 ML |
1287 | } |
1288 | ||
1289 | /* Check if comparison INSN may be transformed | |
1290 | into vector comparison. Currently we transform | |
1291 | zero checks only which look like: | |
1292 | ||
1293 | (set (reg:CCZ 17 flags) | |
1294 | (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) | |
1295 | (subreg:SI (reg:DI x) 0)) | |
1296 | (const_int 0 [0]))) */ | |
1297 | ||
1298 | static bool | |
3b45ae63 | 1299 | convertible_comparison_p (rtx_insn *insn, enum machine_mode mode) |
2bf6d935 | 1300 | { |
c839844a UB |
1301 | /* ??? Currently convertible for double-word DImode chain only. */ |
1302 | if (TARGET_64BIT || mode != DImode) | |
1303 | return false; | |
1304 | ||
2bf6d935 ML |
1305 | if (!TARGET_SSE4_1) |
1306 | return false; | |
1307 | ||
1308 | rtx def_set = single_set (insn); | |
1309 | ||
1310 | gcc_assert (def_set); | |
1311 | ||
1312 | rtx src = SET_SRC (def_set); | |
1313 | rtx dst = SET_DEST (def_set); | |
1314 | ||
1315 | gcc_assert (GET_CODE (src) == COMPARE); | |
1316 | ||
1317 | if (GET_CODE (dst) != REG | |
1318 | || REGNO (dst) != FLAGS_REG | |
1319 | || GET_MODE (dst) != CCZmode) | |
1320 | return false; | |
1321 | ||
1322 | rtx op1 = XEXP (src, 0); | |
1323 | rtx op2 = XEXP (src, 1); | |
1324 | ||
1325 | if (op2 != CONST0_RTX (GET_MODE (op2))) | |
1326 | return false; | |
1327 | ||
1328 | if (GET_CODE (op1) != IOR) | |
1329 | return false; | |
1330 | ||
1331 | op2 = XEXP (op1, 1); | |
1332 | op1 = XEXP (op1, 0); | |
1333 | ||
1334 | if (!SUBREG_P (op1) | |
1335 | || !SUBREG_P (op2) | |
c839844a UB |
1336 | || GET_MODE (op1) != SImode |
1337 | || GET_MODE (op2) != SImode | |
2bf6d935 | 1338 | || ((SUBREG_BYTE (op1) != 0 |
c839844a | 1339 | || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) |
2bf6d935 | 1340 | && (SUBREG_BYTE (op2) != 0 |
c839844a | 1341 | || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) |
2bf6d935 ML |
1342 | return false; |
1343 | ||
1344 | op1 = SUBREG_REG (op1); | |
1345 | op2 = SUBREG_REG (op2); | |
1346 | ||
1347 | if (op1 != op2 | |
1348 | || !REG_P (op1) | |
c839844a | 1349 | || GET_MODE (op1) != DImode) |
2bf6d935 ML |
1350 | return false; |
1351 | ||
1352 | return true; | |
1353 | } | |
1354 | ||
c839844a | 1355 | /* The general version of scalar_to_vector_candidate_p. */ |
2bf6d935 ML |
1356 | |
1357 | static bool | |
93cf5515 | 1358 | general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) |
2bf6d935 | 1359 | { |
266f44a9 | 1360 | rtx def_set = pseudo_reg_set (insn); |
2bf6d935 ML |
1361 | |
1362 | if (!def_set) | |
1363 | return false; | |
1364 | ||
2bf6d935 ML |
1365 | rtx src = SET_SRC (def_set); |
1366 | rtx dst = SET_DEST (def_set); | |
1367 | ||
1368 | if (GET_CODE (src) == COMPARE) | |
93cf5515 | 1369 | return convertible_comparison_p (insn, mode); |
2bf6d935 | 1370 | |
c839844a | 1371 | /* We are interested in "mode" only. */ |
93cf5515 | 1372 | if ((GET_MODE (src) != mode |
2bf6d935 | 1373 | && !CONST_INT_P (src)) |
93cf5515 | 1374 | || GET_MODE (dst) != mode) |
2bf6d935 ML |
1375 | return false; |
1376 | ||
1377 | if (!REG_P (dst) && !MEM_P (dst)) | |
1378 | return false; | |
1379 | ||
1380 | switch (GET_CODE (src)) | |
1381 | { | |
1382 | case ASHIFTRT: | |
1383 | if (!TARGET_AVX512VL) | |
1384 | return false; | |
1385 | /* FALLTHRU */ | |
1386 | ||
1387 | case ASHIFT: | |
1388 | case LSHIFTRT: | |
1389 | if (!CONST_INT_P (XEXP (src, 1)) | |
2a3daf5b | 1390 | || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1)) |
2bf6d935 ML |
1391 | return false; |
1392 | break; | |
1393 | ||
93cf5515 RB |
1394 | case SMAX: |
1395 | case SMIN: | |
1396 | case UMAX: | |
1397 | case UMIN: | |
1398 | if ((mode == DImode && !TARGET_AVX512VL) | |
1399 | || (mode == SImode && !TARGET_SSE4_1)) | |
1400 | return false; | |
1401 | /* Fallthru. */ | |
1402 | ||
2bf6d935 ML |
1403 | case PLUS: |
1404 | case MINUS: | |
1405 | case IOR: | |
1406 | case XOR: | |
1407 | case AND: | |
1408 | if (!REG_P (XEXP (src, 1)) | |
1409 | && !MEM_P (XEXP (src, 1)) | |
1410 | && !CONST_INT_P (XEXP (src, 1))) | |
1411 | return false; | |
1412 | ||
93cf5515 | 1413 | if (GET_MODE (XEXP (src, 1)) != mode |
2bf6d935 ML |
1414 | && !CONST_INT_P (XEXP (src, 1))) |
1415 | return false; | |
fdace758 UB |
1416 | break; |
1417 | ||
1418 | case ABS: | |
1419 | if ((mode == DImode && !TARGET_AVX512VL) | |
1420 | || (mode == SImode && !TARGET_SSSE3)) | |
1421 | return false; | |
2bf6d935 ML |
1422 | break; |
1423 | ||
1424 | case NEG: | |
1425 | case NOT: | |
1426 | break; | |
1427 | ||
1428 | case REG: | |
1429 | return true; | |
1430 | ||
1431 | case MEM: | |
1432 | case CONST_INT: | |
1433 | return REG_P (dst); | |
1434 | ||
1435 | default: | |
1436 | return false; | |
1437 | } | |
1438 | ||
1439 | if (!REG_P (XEXP (src, 0)) | |
1440 | && !MEM_P (XEXP (src, 0)) | |
1441 | && !CONST_INT_P (XEXP (src, 0)) | |
1442 | /* Check for andnot case. */ | |
1443 | && (GET_CODE (src) != AND | |
1444 | || GET_CODE (XEXP (src, 0)) != NOT | |
1445 | || !REG_P (XEXP (XEXP (src, 0), 0)))) | |
1446 | return false; | |
1447 | ||
93cf5515 | 1448 | if (GET_MODE (XEXP (src, 0)) != mode |
2bf6d935 ML |
1449 | && !CONST_INT_P (XEXP (src, 0))) |
1450 | return false; | |
1451 | ||
1452 | return true; | |
1453 | } | |
1454 | ||
1455 | /* The TImode version of scalar_to_vector_candidate_p. */ | |
1456 | ||
1457 | static bool | |
1458 | timode_scalar_to_vector_candidate_p (rtx_insn *insn) | |
1459 | { | |
266f44a9 | 1460 | rtx def_set = pseudo_reg_set (insn); |
2bf6d935 ML |
1461 | |
1462 | if (!def_set) | |
1463 | return false; | |
1464 | ||
2bf6d935 ML |
1465 | rtx src = SET_SRC (def_set); |
1466 | rtx dst = SET_DEST (def_set); | |
1467 | ||
1468 | /* Only TImode load and store are allowed. */ | |
1469 | if (GET_MODE (dst) != TImode) | |
1470 | return false; | |
1471 | ||
1472 | if (MEM_P (dst)) | |
1473 | { | |
1474 | /* Check for store. Memory must be aligned or unaligned store | |
1475 | is optimal. Only support store from register, standard SSE | |
1476 | constant or CONST_WIDE_INT generated from piecewise store. | |
1477 | ||
1478 | ??? Verify performance impact before enabling CONST_INT for | |
1479 | __int128 store. */ | |
1480 | if (misaligned_operand (dst, TImode) | |
1481 | && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) | |
1482 | return false; | |
1483 | ||
1484 | switch (GET_CODE (src)) | |
1485 | { | |
1486 | default: | |
1487 | return false; | |
1488 | ||
1489 | case REG: | |
1490 | case CONST_WIDE_INT: | |
1491 | return true; | |
1492 | ||
1493 | case CONST_INT: | |
1494 | return standard_sse_constant_p (src, TImode); | |
1495 | } | |
1496 | } | |
1497 | else if (MEM_P (src)) | |
1498 | { | |
1499 | /* Check for load. Memory must be aligned or unaligned load is | |
1500 | optimal. */ | |
1501 | return (REG_P (dst) | |
1502 | && (!misaligned_operand (src, TImode) | |
1503 | || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); | |
1504 | } | |
1505 | ||
1506 | return false; | |
1507 | } | |
1508 | ||
2bf6d935 ML |
1509 | /* For a register REGNO, scan instructions for its defs and uses. |
1510 | Put REGNO in REGS if a def or use isn't in CANDIDATES. */ | |
1511 | ||
1512 | static void | |
1513 | timode_check_non_convertible_regs (bitmap candidates, bitmap regs, | |
1514 | unsigned int regno) | |
1515 | { | |
1516 | for (df_ref def = DF_REG_DEF_CHAIN (regno); | |
1517 | def; | |
1518 | def = DF_REF_NEXT_REG (def)) | |
1519 | { | |
1520 | if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) | |
1521 | { | |
1522 | if (dump_file) | |
1523 | fprintf (dump_file, | |
1524 | "r%d has non convertible def in insn %d\n", | |
1525 | regno, DF_REF_INSN_UID (def)); | |
1526 | ||
1527 | bitmap_set_bit (regs, regno); | |
1528 | break; | |
1529 | } | |
1530 | } | |
1531 | ||
1532 | for (df_ref ref = DF_REG_USE_CHAIN (regno); | |
1533 | ref; | |
1534 | ref = DF_REF_NEXT_REG (ref)) | |
1535 | { | |
1536 | /* Debug instructions are skipped. */ | |
1537 | if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) | |
1538 | && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) | |
1539 | { | |
1540 | if (dump_file) | |
1541 | fprintf (dump_file, | |
1542 | "r%d has non convertible use in insn %d\n", | |
1543 | regno, DF_REF_INSN_UID (ref)); | |
1544 | ||
1545 | bitmap_set_bit (regs, regno); | |
1546 | break; | |
1547 | } | |
1548 | } | |
1549 | } | |
1550 | ||
1551 | /* The TImode version of remove_non_convertible_regs. */ | |
1552 | ||
1553 | static void | |
1554 | timode_remove_non_convertible_regs (bitmap candidates) | |
1555 | { | |
1556 | bitmap_iterator bi; | |
1557 | unsigned id; | |
1558 | bitmap regs = BITMAP_ALLOC (NULL); | |
1559 | ||
1560 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) | |
1561 | { | |
1562 | rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); | |
1563 | rtx dest = SET_DEST (def_set); | |
1564 | rtx src = SET_SRC (def_set); | |
1565 | ||
1566 | if ((!REG_P (dest) | |
1567 | || bitmap_bit_p (regs, REGNO (dest)) | |
1568 | || HARD_REGISTER_P (dest)) | |
1569 | && (!REG_P (src) | |
1570 | || bitmap_bit_p (regs, REGNO (src)) | |
1571 | || HARD_REGISTER_P (src))) | |
1572 | continue; | |
1573 | ||
1574 | if (REG_P (dest)) | |
1575 | timode_check_non_convertible_regs (candidates, regs, | |
1576 | REGNO (dest)); | |
1577 | ||
1578 | if (REG_P (src)) | |
1579 | timode_check_non_convertible_regs (candidates, regs, | |
1580 | REGNO (src)); | |
1581 | } | |
1582 | ||
1583 | EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) | |
1584 | { | |
1585 | for (df_ref def = DF_REG_DEF_CHAIN (id); | |
1586 | def; | |
1587 | def = DF_REF_NEXT_REG (def)) | |
1588 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) | |
1589 | { | |
1590 | if (dump_file) | |
1591 | fprintf (dump_file, "Removing insn %d from candidates list\n", | |
1592 | DF_REF_INSN_UID (def)); | |
1593 | ||
1594 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); | |
1595 | } | |
1596 | ||
1597 | for (df_ref ref = DF_REG_USE_CHAIN (id); | |
1598 | ref; | |
1599 | ref = DF_REF_NEXT_REG (ref)) | |
1600 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) | |
1601 | { | |
1602 | if (dump_file) | |
1603 | fprintf (dump_file, "Removing insn %d from candidates list\n", | |
1604 | DF_REF_INSN_UID (ref)); | |
1605 | ||
1606 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); | |
1607 | } | |
1608 | } | |
1609 | ||
1610 | BITMAP_FREE (regs); | |
1611 | } | |
1612 | ||
2bf6d935 ML |
1613 | /* Main STV pass function. Find and convert scalar |
1614 | instructions into vector mode when profitable. */ | |
1615 | ||
1616 | static unsigned int | |
f386ca41 | 1617 | convert_scalars_to_vector (bool timode_p) |
2bf6d935 ML |
1618 | { |
1619 | basic_block bb; | |
2bf6d935 ML |
1620 | int converted_insns = 0; |
1621 | ||
1622 | bitmap_obstack_initialize (NULL); | |
93cf5515 RB |
1623 | const machine_mode cand_mode[3] = { SImode, DImode, TImode }; |
1624 | const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode }; | |
1625 | bitmap_head candidates[3]; /* { SImode, DImode, TImode } */ | |
1626 | for (unsigned i = 0; i < 3; ++i) | |
1627 | bitmap_initialize (&candidates[i], &bitmap_default_obstack); | |
2bf6d935 ML |
1628 | |
1629 | calculate_dominance_info (CDI_DOMINATORS); | |
1630 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
1631 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); | |
2bf6d935 ML |
1632 | df_analyze (); |
1633 | ||
1634 | /* Find all instructions we want to convert into vector mode. */ | |
1635 | if (dump_file) | |
1636 | fprintf (dump_file, "Searching for mode conversion candidates...\n"); | |
1637 | ||
1638 | FOR_EACH_BB_FN (bb, cfun) | |
1639 | { | |
1640 | rtx_insn *insn; | |
1641 | FOR_BB_INSNS (bb, insn) | |
f386ca41 | 1642 | if (timode_p |
93cf5515 | 1643 | && timode_scalar_to_vector_candidate_p (insn)) |
2bf6d935 ML |
1644 | { |
1645 | if (dump_file) | |
93cf5515 | 1646 | fprintf (dump_file, " insn %d is marked as a TImode candidate\n", |
2bf6d935 ML |
1647 | INSN_UID (insn)); |
1648 | ||
93cf5515 RB |
1649 | bitmap_set_bit (&candidates[2], INSN_UID (insn)); |
1650 | } | |
f386ca41 | 1651 | else if (!timode_p) |
93cf5515 RB |
1652 | { |
1653 | /* Check {SI,DI}mode. */ | |
1654 | for (unsigned i = 0; i <= 1; ++i) | |
1655 | if (general_scalar_to_vector_candidate_p (insn, cand_mode[i])) | |
1656 | { | |
1657 | if (dump_file) | |
1658 | fprintf (dump_file, " insn %d is marked as a %s candidate\n", | |
1659 | INSN_UID (insn), i == 0 ? "SImode" : "DImode"); | |
1660 | ||
1661 | bitmap_set_bit (&candidates[i], INSN_UID (insn)); | |
1662 | break; | |
1663 | } | |
2bf6d935 ML |
1664 | } |
1665 | } | |
1666 | ||
f386ca41 | 1667 | if (timode_p) |
93cf5515 | 1668 | timode_remove_non_convertible_regs (&candidates[2]); |
2bf6d935 | 1669 | |
93cf5515 RB |
1670 | for (unsigned i = 0; i <= 2; ++i) |
1671 | if (!bitmap_empty_p (&candidates[i])) | |
1672 | break; | |
1673 | else if (i == 2 && dump_file) | |
2bf6d935 ML |
1674 | fprintf (dump_file, "There are no candidates for optimization.\n"); |
1675 | ||
93cf5515 RB |
1676 | for (unsigned i = 0; i <= 2; ++i) |
1677 | while (!bitmap_empty_p (&candidates[i])) | |
1678 | { | |
1679 | unsigned uid = bitmap_first_set_bit (&candidates[i]); | |
1680 | scalar_chain *chain; | |
2bf6d935 | 1681 | |
93cf5515 RB |
1682 | if (cand_mode[i] == TImode) |
1683 | chain = new timode_scalar_chain; | |
1684 | else | |
1685 | chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]); | |
2bf6d935 | 1686 | |
93cf5515 RB |
1687 | /* Find instructions chain we want to convert to vector mode. |
1688 | Check all uses and definitions to estimate all required | |
1689 | conversions. */ | |
1690 | chain->build (&candidates[i], uid); | |
2bf6d935 | 1691 | |
93cf5515 RB |
1692 | if (chain->compute_convert_gain () > 0) |
1693 | converted_insns += chain->convert (); | |
1694 | else | |
1695 | if (dump_file) | |
1696 | fprintf (dump_file, "Chain #%d conversion is not profitable\n", | |
1697 | chain->chain_id); | |
2bf6d935 | 1698 | |
93cf5515 RB |
1699 | delete chain; |
1700 | } | |
2bf6d935 ML |
1701 | |
1702 | if (dump_file) | |
1703 | fprintf (dump_file, "Total insns converted: %d\n", converted_insns); | |
1704 | ||
93cf5515 RB |
1705 | for (unsigned i = 0; i <= 2; ++i) |
1706 | bitmap_release (&candidates[i]); | |
2bf6d935 ML |
1707 | bitmap_obstack_release (NULL); |
1708 | df_process_deferred_rescans (); | |
1709 | ||
1710 | /* Conversion means we may have 128bit register spills/fills | |
1711 | which require aligned stack. */ | |
1712 | if (converted_insns) | |
1713 | { | |
1714 | if (crtl->stack_alignment_needed < 128) | |
1715 | crtl->stack_alignment_needed = 128; | |
1716 | if (crtl->stack_alignment_estimated < 128) | |
1717 | crtl->stack_alignment_estimated = 128; | |
c1441faf UB |
1718 | |
1719 | crtl->stack_realign_needed | |
1720 | = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated; | |
1721 | crtl->stack_realign_tried = crtl->stack_realign_needed; | |
1722 | ||
1723 | crtl->stack_realign_processed = true; | |
1724 | ||
1725 | if (!crtl->drap_reg) | |
1726 | { | |
1727 | rtx drap_rtx = targetm.calls.get_drap_rtx (); | |
1728 | ||
1729 | /* stack_realign_drap and drap_rtx must match. */ | |
1730 | gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL)); | |
1731 | ||
1732 | /* Do nothing if NULL is returned, | |
1733 | which means DRAP is not needed. */ | |
1734 | if (drap_rtx != NULL) | |
1735 | { | |
1736 | crtl->args.internal_arg_pointer = drap_rtx; | |
1737 | ||
1738 | /* Call fixup_tail_calls to clean up | |
1739 | REG_EQUIV note if DRAP is needed. */ | |
1740 | fixup_tail_calls (); | |
1741 | } | |
1742 | } | |
1743 | ||
2bf6d935 ML |
1744 | /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ |
1745 | if (TARGET_64BIT) | |
1746 | for (tree parm = DECL_ARGUMENTS (current_function_decl); | |
1747 | parm; parm = DECL_CHAIN (parm)) | |
1748 | { | |
1749 | if (TYPE_MODE (TREE_TYPE (parm)) != TImode) | |
1750 | continue; | |
1751 | if (DECL_RTL_SET_P (parm) | |
1752 | && GET_MODE (DECL_RTL (parm)) == V1TImode) | |
1753 | { | |
1754 | rtx r = DECL_RTL (parm); | |
1755 | if (REG_P (r)) | |
1756 | SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); | |
1757 | } | |
1758 | if (DECL_INCOMING_RTL (parm) | |
1759 | && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) | |
1760 | { | |
1761 | rtx r = DECL_INCOMING_RTL (parm); | |
1762 | if (REG_P (r)) | |
1763 | DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); | |
1764 | } | |
1765 | } | |
1766 | } | |
1767 | ||
1768 | return 0; | |
1769 | } | |
1770 | ||
69811448 RS |
1771 | /* Modify the vzeroupper pattern in INSN so that it describes the effect |
1772 | that the instruction has on the SSE registers. LIVE_REGS are the set | |
1773 | of registers that are live across the instruction. | |
1774 | ||
1775 | For a live register R we use: | |
1776 | ||
1777 | (set (reg:V2DF R) (reg:V2DF R)) | |
1778 | ||
b7b3378f | 1779 | which preserves the low 128 bits but clobbers the upper bits. */ |
69811448 RS |
1780 | |
1781 | static void | |
1782 | ix86_add_reg_usage_to_vzeroupper (rtx_insn *insn, bitmap live_regs) | |
1783 | { | |
1784 | rtx pattern = PATTERN (insn); | |
1785 | unsigned int nregs = TARGET_64BIT ? 16 : 8; | |
b7b3378f | 1786 | unsigned int npats = nregs; |
69811448 RS |
1787 | for (unsigned int i = 0; i < nregs; ++i) |
1788 | { | |
1789 | unsigned int regno = GET_SSE_REGNO (i); | |
b7b3378f JJ |
1790 | if (!bitmap_bit_p (live_regs, regno)) |
1791 | npats--; | |
1792 | } | |
1793 | if (npats == 0) | |
1794 | return; | |
1795 | rtvec vec = rtvec_alloc (npats + 1); | |
1796 | RTVEC_ELT (vec, 0) = XVECEXP (pattern, 0, 0); | |
1797 | for (unsigned int i = 0, j = 0; i < nregs; ++i) | |
1798 | { | |
1799 | unsigned int regno = GET_SSE_REGNO (i); | |
1800 | if (!bitmap_bit_p (live_regs, regno)) | |
1801 | continue; | |
69811448 | 1802 | rtx reg = gen_rtx_REG (V2DImode, regno); |
b7b3378f JJ |
1803 | ++j; |
1804 | RTVEC_ELT (vec, j) = gen_rtx_SET (reg, reg); | |
69811448 RS |
1805 | } |
1806 | XVEC (pattern, 0) = vec; | |
d5ad8ee0 | 1807 | INSN_CODE (insn) = -1; |
69811448 RS |
1808 | df_insn_rescan (insn); |
1809 | } | |
1810 | ||
1811 | /* Walk the vzeroupper instructions in the function and annotate them | |
1812 | with the effect that they have on the SSE registers. */ | |
1813 | ||
1814 | static void | |
1815 | ix86_add_reg_usage_to_vzerouppers (void) | |
1816 | { | |
1817 | basic_block bb; | |
1818 | rtx_insn *insn; | |
1819 | auto_bitmap live_regs; | |
1820 | ||
1821 | df_analyze (); | |
1822 | FOR_EACH_BB_FN (bb, cfun) | |
1823 | { | |
1824 | bitmap_copy (live_regs, df_get_live_out (bb)); | |
1825 | df_simulate_initialize_backwards (bb, live_regs); | |
1826 | FOR_BB_INSNS_REVERSE (bb, insn) | |
1827 | { | |
1828 | if (!NONDEBUG_INSN_P (insn)) | |
1829 | continue; | |
1830 | if (vzeroupper_pattern (PATTERN (insn), VOIDmode)) | |
1831 | ix86_add_reg_usage_to_vzeroupper (insn, live_regs); | |
1832 | df_simulate_one_insn_backwards (bb, insn, live_regs); | |
1833 | } | |
1834 | } | |
1835 | } | |
1836 | ||
2bf6d935 ML |
1837 | static unsigned int |
1838 | rest_of_handle_insert_vzeroupper (void) | |
1839 | { | |
1840 | int i; | |
1841 | ||
1842 | /* vzeroupper instructions are inserted immediately after reload to | |
1843 | account for possible spills from 256bit or 512bit registers. The pass | |
1844 | reuses mode switching infrastructure by re-running mode insertion | |
1845 | pass, so disable entities that have already been processed. */ | |
1846 | for (i = 0; i < MAX_386_ENTITIES; i++) | |
1847 | ix86_optimize_mode_switching[i] = 0; | |
1848 | ||
1849 | ix86_optimize_mode_switching[AVX_U128] = 1; | |
1850 | ||
1851 | /* Call optimize_mode_switching. */ | |
1852 | g->get_passes ()->execute_pass_mode_switching (); | |
69811448 | 1853 | ix86_add_reg_usage_to_vzerouppers (); |
2bf6d935 ML |
1854 | return 0; |
1855 | } | |
1856 | ||
1857 | namespace { | |
1858 | ||
1859 | const pass_data pass_data_insert_vzeroupper = | |
1860 | { | |
1861 | RTL_PASS, /* type */ | |
1862 | "vzeroupper", /* name */ | |
1863 | OPTGROUP_NONE, /* optinfo_flags */ | |
1864 | TV_MACH_DEP, /* tv_id */ | |
1865 | 0, /* properties_required */ | |
1866 | 0, /* properties_provided */ | |
1867 | 0, /* properties_destroyed */ | |
1868 | 0, /* todo_flags_start */ | |
1869 | TODO_df_finish, /* todo_flags_finish */ | |
1870 | }; | |
1871 | ||
1872 | class pass_insert_vzeroupper : public rtl_opt_pass | |
1873 | { | |
1874 | public: | |
1875 | pass_insert_vzeroupper(gcc::context *ctxt) | |
1876 | : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) | |
1877 | {} | |
1878 | ||
1879 | /* opt_pass methods: */ | |
1880 | virtual bool gate (function *) | |
1881 | { | |
1882 | return TARGET_AVX | |
1883 | && TARGET_VZEROUPPER && flag_expensive_optimizations | |
1884 | && !optimize_size; | |
1885 | } | |
1886 | ||
1887 | virtual unsigned int execute (function *) | |
1888 | { | |
1889 | return rest_of_handle_insert_vzeroupper (); | |
1890 | } | |
1891 | ||
1892 | }; // class pass_insert_vzeroupper | |
1893 | ||
1894 | const pass_data pass_data_stv = | |
1895 | { | |
1896 | RTL_PASS, /* type */ | |
1897 | "stv", /* name */ | |
1898 | OPTGROUP_NONE, /* optinfo_flags */ | |
1899 | TV_MACH_DEP, /* tv_id */ | |
1900 | 0, /* properties_required */ | |
1901 | 0, /* properties_provided */ | |
1902 | 0, /* properties_destroyed */ | |
1903 | 0, /* todo_flags_start */ | |
1904 | TODO_df_finish, /* todo_flags_finish */ | |
1905 | }; | |
1906 | ||
1907 | class pass_stv : public rtl_opt_pass | |
1908 | { | |
1909 | public: | |
1910 | pass_stv (gcc::context *ctxt) | |
1911 | : rtl_opt_pass (pass_data_stv, ctxt), | |
1912 | timode_p (false) | |
1913 | {} | |
1914 | ||
1915 | /* opt_pass methods: */ | |
1916 | virtual bool gate (function *) | |
1917 | { | |
f386ca41 | 1918 | return ((!timode_p || TARGET_64BIT) |
2bf6d935 ML |
1919 | && TARGET_STV && TARGET_SSE2 && optimize > 1); |
1920 | } | |
1921 | ||
1922 | virtual unsigned int execute (function *) | |
1923 | { | |
f386ca41 | 1924 | return convert_scalars_to_vector (timode_p); |
2bf6d935 ML |
1925 | } |
1926 | ||
1927 | opt_pass *clone () | |
1928 | { | |
1929 | return new pass_stv (m_ctxt); | |
1930 | } | |
1931 | ||
1932 | void set_pass_param (unsigned int n, bool param) | |
1933 | { | |
1934 | gcc_assert (n == 0); | |
1935 | timode_p = param; | |
1936 | } | |
1937 | ||
1938 | private: | |
1939 | bool timode_p; | |
1940 | }; // class pass_stv | |
1941 | ||
1942 | } // anon namespace | |
1943 | ||
1944 | rtl_opt_pass * | |
1945 | make_pass_insert_vzeroupper (gcc::context *ctxt) | |
1946 | { | |
1947 | return new pass_insert_vzeroupper (ctxt); | |
1948 | } | |
1949 | ||
1950 | rtl_opt_pass * | |
1951 | make_pass_stv (gcc::context *ctxt) | |
1952 | { | |
1953 | return new pass_stv (ctxt); | |
1954 | } | |
1955 | ||
3dcea658 | 1956 | /* Inserting ENDBR and pseudo patchable-area instructions. */ |
2bf6d935 | 1957 | |
3dcea658 L |
1958 | static void |
1959 | rest_of_insert_endbr_and_patchable_area (bool need_endbr, | |
1960 | unsigned int patchable_area_size) | |
2bf6d935 | 1961 | { |
3dcea658 | 1962 | rtx endbr; |
2bf6d935 | 1963 | rtx_insn *insn; |
3dcea658 | 1964 | rtx_insn *endbr_insn = NULL; |
2bf6d935 ML |
1965 | basic_block bb; |
1966 | ||
3dcea658 L |
1967 | if (need_endbr) |
1968 | { | |
1969 | /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' | |
1970 | is absent among function attributes. Later an optimization will | |
1971 | be introduced to make analysis if an address of a static function | |
1972 | is taken. A static function whose address is not taken will get | |
1973 | a nocf_check attribute. This will allow to reduce the number of | |
1974 | EB. */ | |
1975 | if (!lookup_attribute ("nocf_check", | |
1976 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) | |
1977 | && (!flag_manual_endbr | |
1978 | || lookup_attribute ("cf_check", | |
1979 | DECL_ATTRIBUTES (cfun->decl))) | |
1980 | && (!cgraph_node::get (cfun->decl)->only_called_directly_p () | |
1981 | || ix86_cmodel == CM_LARGE | |
1982 | || ix86_cmodel == CM_LARGE_PIC | |
1983 | || flag_force_indirect_call | |
1984 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES | |
1985 | && DECL_DLLIMPORT_P (cfun->decl)))) | |
1986 | { | |
1987 | if (crtl->profile && flag_fentry) | |
1988 | { | |
1989 | /* Queue ENDBR insertion to x86_function_profiler. | |
1990 | NB: Any patchable-area insn will be inserted after | |
1991 | ENDBR. */ | |
1992 | cfun->machine->insn_queued_at_entrance = TYPE_ENDBR; | |
1993 | } | |
1994 | else | |
1995 | { | |
1996 | endbr = gen_nop_endbr (); | |
1997 | bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; | |
1998 | rtx_insn *insn = BB_HEAD (bb); | |
1999 | endbr_insn = emit_insn_before (endbr, insn); | |
2000 | } | |
2001 | } | |
2002 | } | |
2003 | ||
2004 | if (patchable_area_size) | |
2bf6d935 | 2005 | { |
2bf6d935 | 2006 | if (crtl->profile && flag_fentry) |
3dcea658 L |
2007 | { |
2008 | /* Queue patchable-area insertion to x86_function_profiler. | |
2009 | NB: If there is a queued ENDBR, x86_function_profiler | |
2010 | will also handle patchable-area. */ | |
2011 | if (!cfun->machine->insn_queued_at_entrance) | |
2012 | cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA; | |
2013 | } | |
2bf6d935 ML |
2014 | else |
2015 | { | |
3dcea658 L |
2016 | rtx patchable_area |
2017 | = gen_patchable_area (GEN_INT (patchable_area_size), | |
2018 | GEN_INT (crtl->patch_area_entry == 0)); | |
2019 | if (endbr_insn) | |
2020 | emit_insn_after (patchable_area, endbr_insn); | |
2021 | else | |
2022 | { | |
2023 | bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; | |
2024 | insn = BB_HEAD (bb); | |
2025 | emit_insn_before (patchable_area, insn); | |
2026 | } | |
2bf6d935 ML |
2027 | } |
2028 | } | |
2029 | ||
3dcea658 L |
2030 | if (!need_endbr) |
2031 | return; | |
2032 | ||
2bf6d935 ML |
2033 | bb = 0; |
2034 | FOR_EACH_BB_FN (bb, cfun) | |
2035 | { | |
2036 | for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); | |
2037 | insn = NEXT_INSN (insn)) | |
2038 | { | |
2039 | if (CALL_P (insn)) | |
2040 | { | |
2bf6d935 ML |
2041 | need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL; |
2042 | if (!need_endbr && !SIBLING_CALL_P (insn)) | |
2043 | { | |
2044 | rtx call = get_call_rtx_from (insn); | |
2045 | rtx fnaddr = XEXP (call, 0); | |
2046 | tree fndecl = NULL_TREE; | |
2047 | ||
2048 | /* Also generate ENDBRANCH for non-tail call which | |
2049 | may return via indirect branch. */ | |
2050 | if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) | |
2051 | fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); | |
2052 | if (fndecl == NULL_TREE) | |
2053 | fndecl = MEM_EXPR (fnaddr); | |
2054 | if (fndecl | |
2055 | && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE | |
2056 | && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) | |
2057 | fndecl = NULL_TREE; | |
2058 | if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl))) | |
2059 | { | |
2060 | tree fntype = TREE_TYPE (fndecl); | |
2061 | if (lookup_attribute ("indirect_return", | |
2062 | TYPE_ATTRIBUTES (fntype))) | |
2063 | need_endbr = true; | |
2064 | } | |
2065 | } | |
2066 | if (!need_endbr) | |
2067 | continue; | |
2068 | /* Generate ENDBRANCH after CALL, which can return more than | |
2069 | twice, setjmp-like functions. */ | |
2070 | ||
3dcea658 L |
2071 | endbr = gen_nop_endbr (); |
2072 | emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn)); | |
2bf6d935 ML |
2073 | continue; |
2074 | } | |
2075 | ||
2076 | if (JUMP_P (insn) && flag_cet_switch) | |
2077 | { | |
2078 | rtx target = JUMP_LABEL (insn); | |
2079 | if (target == NULL_RTX || ANY_RETURN_P (target)) | |
2080 | continue; | |
2081 | ||
2082 | /* Check the jump is a switch table. */ | |
2083 | rtx_insn *label = as_a<rtx_insn *> (target); | |
2084 | rtx_insn *table = next_insn (label); | |
2085 | if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) | |
2086 | continue; | |
2087 | ||
2088 | /* For the indirect jump find out all places it jumps and insert | |
2089 | ENDBRANCH there. It should be done under a special flag to | |
2090 | control ENDBRANCH generation for switch stmts. */ | |
2091 | edge_iterator ei; | |
2092 | edge e; | |
2093 | basic_block dest_blk; | |
2094 | ||
2095 | FOR_EACH_EDGE (e, ei, bb->succs) | |
2096 | { | |
2097 | rtx_insn *insn; | |
2098 | ||
2099 | dest_blk = e->dest; | |
2100 | insn = BB_HEAD (dest_blk); | |
2101 | gcc_assert (LABEL_P (insn)); | |
3dcea658 L |
2102 | endbr = gen_nop_endbr (); |
2103 | emit_insn_after (endbr, insn); | |
2bf6d935 ML |
2104 | } |
2105 | continue; | |
2106 | } | |
2107 | ||
02ed9049 | 2108 | if (LABEL_P (insn) && LABEL_PRESERVE_P (insn)) |
2bf6d935 | 2109 | { |
3dcea658 L |
2110 | endbr = gen_nop_endbr (); |
2111 | emit_insn_after (endbr, insn); | |
2bf6d935 ML |
2112 | continue; |
2113 | } | |
2114 | } | |
2115 | } | |
2116 | ||
3dcea658 | 2117 | return; |
2bf6d935 ML |
2118 | } |
2119 | ||
2120 | namespace { | |
2121 | ||
3dcea658 | 2122 | const pass_data pass_data_insert_endbr_and_patchable_area = |
2bf6d935 ML |
2123 | { |
2124 | RTL_PASS, /* type. */ | |
3dcea658 | 2125 | "endbr_and_patchable_area", /* name. */ |
2bf6d935 ML |
2126 | OPTGROUP_NONE, /* optinfo_flags. */ |
2127 | TV_MACH_DEP, /* tv_id. */ | |
2128 | 0, /* properties_required. */ | |
2129 | 0, /* properties_provided. */ | |
2130 | 0, /* properties_destroyed. */ | |
2131 | 0, /* todo_flags_start. */ | |
2132 | 0, /* todo_flags_finish. */ | |
2133 | }; | |
2134 | ||
3dcea658 | 2135 | class pass_insert_endbr_and_patchable_area : public rtl_opt_pass |
2bf6d935 ML |
2136 | { |
2137 | public: | |
3dcea658 L |
2138 | pass_insert_endbr_and_patchable_area (gcc::context *ctxt) |
2139 | : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt) | |
2bf6d935 ML |
2140 | {} |
2141 | ||
2142 | /* opt_pass methods: */ | |
2143 | virtual bool gate (function *) | |
2144 | { | |
3dcea658 L |
2145 | need_endbr = (flag_cf_protection & CF_BRANCH) != 0; |
2146 | patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry; | |
2147 | return need_endbr || patchable_area_size; | |
2bf6d935 ML |
2148 | } |
2149 | ||
2150 | virtual unsigned int execute (function *) | |
2151 | { | |
3dcea658 L |
2152 | timevar_push (TV_MACH_DEP); |
2153 | rest_of_insert_endbr_and_patchable_area (need_endbr, | |
2154 | patchable_area_size); | |
2155 | timevar_pop (TV_MACH_DEP); | |
2156 | return 0; | |
2bf6d935 ML |
2157 | } |
2158 | ||
3dcea658 L |
2159 | private: |
2160 | bool need_endbr; | |
2161 | unsigned int patchable_area_size; | |
2162 | }; // class pass_insert_endbr_and_patchable_area | |
2bf6d935 ML |
2163 | |
2164 | } // anon namespace | |
2165 | ||
2166 | rtl_opt_pass * | |
3dcea658 | 2167 | make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt) |
2bf6d935 | 2168 | { |
3dcea658 | 2169 | return new pass_insert_endbr_and_patchable_area (ctxt); |
2bf6d935 ML |
2170 | } |
2171 | ||
43373412 | 2172 | /* Replace all one-value const vector that are referenced by SYMBOL_REFs in x |
2173 | with embedded broadcast. i.e.transform | |
2174 | ||
2175 | vpaddq .LC0(%rip), %zmm0, %zmm0 | |
2176 | ret | |
2177 | .LC0: | |
2178 | .quad 3 | |
2179 | .quad 3 | |
2180 | .quad 3 | |
2181 | .quad 3 | |
2182 | .quad 3 | |
2183 | .quad 3 | |
2184 | .quad 3 | |
2185 | .quad 3 | |
2186 | ||
2187 | to | |
2188 | ||
2189 | vpaddq .LC0(%rip){1to8}, %zmm0, %zmm0 | |
2190 | ret | |
2191 | .LC0: | |
2192 | .quad 3 */ | |
2193 | static void | |
2194 | replace_constant_pool_with_broadcast (rtx_insn *insn) | |
2195 | { | |
2196 | subrtx_ptr_iterator::array_type array; | |
2197 | FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), ALL) | |
2198 | { | |
2199 | rtx *loc = *iter; | |
2200 | rtx x = *loc; | |
2201 | rtx broadcast_mem, vec_dup, constant, first; | |
2202 | machine_mode mode; | |
2203 | ||
2204 | /* Constant pool. */ | |
2205 | if (!MEM_P (x) | |
2206 | || !SYMBOL_REF_P (XEXP (x, 0)) | |
2207 | || !CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))) | |
2208 | continue; | |
2209 | ||
2210 | /* Const vector. */ | |
2211 | mode = GET_MODE (x); | |
2212 | if (!VECTOR_MODE_P (mode)) | |
2213 | return; | |
2214 | constant = get_pool_constant (XEXP (x, 0)); | |
2215 | if (GET_CODE (constant) != CONST_VECTOR) | |
2216 | return; | |
2217 | ||
2218 | /* There could be some rtx like | |
2219 | (mem/u/c:V16QI (symbol_ref/u:DI ("*.LC1"))) | |
2220 | but with "*.LC1" refer to V2DI constant vector. */ | |
2221 | if (GET_MODE (constant) != mode) | |
2222 | { | |
2223 | constant = simplify_subreg (mode, constant, GET_MODE (constant), 0); | |
2224 | if (constant == NULL_RTX || GET_CODE (constant) != CONST_VECTOR) | |
2225 | return; | |
2226 | } | |
2227 | first = XVECEXP (constant, 0, 0); | |
2228 | ||
2229 | for (int i = 1; i < GET_MODE_NUNITS (mode); ++i) | |
2230 | { | |
2231 | rtx tmp = XVECEXP (constant, 0, i); | |
2232 | /* Vector duplicate value. */ | |
2233 | if (!rtx_equal_p (tmp, first)) | |
2234 | return; | |
2235 | } | |
2236 | ||
2237 | /* Replace with embedded broadcast. */ | |
2238 | broadcast_mem = force_const_mem (GET_MODE_INNER (mode), first); | |
2239 | vec_dup = gen_rtx_VEC_DUPLICATE (mode, broadcast_mem); | |
2240 | validate_change (insn, loc, vec_dup, 0); | |
2241 | ||
2242 | /* At most 1 memory_operand in an insn. */ | |
2243 | return; | |
2244 | } | |
2245 | } | |
2246 | ||
2bf6d935 ML |
2247 | /* At entry of the nearest common dominator for basic blocks with |
2248 | conversions, generate a single | |
2249 | vxorps %xmmN, %xmmN, %xmmN | |
2250 | for all | |
2251 | vcvtss2sd op, %xmmN, %xmmX | |
2252 | vcvtsd2ss op, %xmmN, %xmmX | |
2253 | vcvtsi2ss op, %xmmN, %xmmX | |
2254 | vcvtsi2sd op, %xmmN, %xmmX | |
2255 | ||
2256 | NB: We want to generate only a single vxorps to cover the whole | |
2257 | function. The LCM algorithm isn't appropriate here since it may | |
2258 | place a vxorps inside the loop. */ | |
2259 | ||
2260 | static unsigned int | |
2261 | remove_partial_avx_dependency (void) | |
2262 | { | |
2263 | timevar_push (TV_MACH_DEP); | |
2264 | ||
2265 | bitmap_obstack_initialize (NULL); | |
2266 | bitmap convert_bbs = BITMAP_ALLOC (NULL); | |
2267 | ||
2268 | basic_block bb; | |
2269 | rtx_insn *insn, *set_insn; | |
2270 | rtx set; | |
2271 | rtx v4sf_const0 = NULL_RTX; | |
2272 | ||
2273 | auto_vec<rtx_insn *> control_flow_insns; | |
2274 | ||
2275 | FOR_EACH_BB_FN (bb, cfun) | |
2276 | { | |
2277 | FOR_BB_INSNS (bb, insn) | |
2278 | { | |
2279 | if (!NONDEBUG_INSN_P (insn)) | |
2280 | continue; | |
2281 | ||
43373412 | 2282 | /* Handle AVX512 embedded broadcast here to save compile time. */ |
2283 | if (TARGET_AVX512F) | |
2284 | replace_constant_pool_with_broadcast (insn); | |
2285 | ||
2bf6d935 ML |
2286 | set = single_set (insn); |
2287 | if (!set) | |
2288 | continue; | |
2289 | ||
2290 | if (get_attr_avx_partial_xmm_update (insn) | |
2291 | != AVX_PARTIAL_XMM_UPDATE_TRUE) | |
2292 | continue; | |
2293 | ||
2294 | if (!v4sf_const0) | |
2295 | { | |
2296 | calculate_dominance_info (CDI_DOMINATORS); | |
2297 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
2298 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); | |
2299 | df_md_add_problem (); | |
2300 | df_analyze (); | |
2301 | v4sf_const0 = gen_reg_rtx (V4SFmode); | |
2302 | } | |
2303 | ||
2304 | /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, | |
2305 | SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and | |
2306 | vec_merge with subreg. */ | |
2307 | rtx src = SET_SRC (set); | |
2308 | rtx dest = SET_DEST (set); | |
2309 | machine_mode dest_mode = GET_MODE (dest); | |
2310 | ||
2311 | rtx zero; | |
2312 | machine_mode dest_vecmode; | |
2313 | if (dest_mode == E_SFmode) | |
2314 | { | |
2315 | dest_vecmode = V4SFmode; | |
2316 | zero = v4sf_const0; | |
2317 | } | |
2318 | else | |
2319 | { | |
2320 | dest_vecmode = V2DFmode; | |
2321 | zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); | |
2322 | } | |
2323 | ||
2324 | /* Change source to vector mode. */ | |
2325 | src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src); | |
2326 | src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero, | |
2327 | GEN_INT (HOST_WIDE_INT_1U)); | |
2328 | /* Change destination to vector mode. */ | |
2329 | rtx vec = gen_reg_rtx (dest_vecmode); | |
2330 | /* Generate an XMM vector SET. */ | |
2331 | set = gen_rtx_SET (vec, src); | |
2332 | set_insn = emit_insn_before (set, insn); | |
2333 | df_insn_rescan (set_insn); | |
2334 | ||
2335 | if (cfun->can_throw_non_call_exceptions) | |
2336 | { | |
2337 | /* Handle REG_EH_REGION note. */ | |
2338 | rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); | |
2339 | if (note) | |
2340 | { | |
2341 | control_flow_insns.safe_push (set_insn); | |
2342 | add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0)); | |
2343 | } | |
2344 | } | |
2345 | ||
2346 | src = gen_rtx_SUBREG (dest_mode, vec, 0); | |
2347 | set = gen_rtx_SET (dest, src); | |
2348 | ||
2349 | /* Drop possible dead definitions. */ | |
2350 | PATTERN (insn) = set; | |
2351 | ||
2352 | INSN_CODE (insn) = -1; | |
2353 | recog_memoized (insn); | |
2354 | df_insn_rescan (insn); | |
2355 | bitmap_set_bit (convert_bbs, bb->index); | |
2356 | } | |
2357 | } | |
2358 | ||
2359 | if (v4sf_const0) | |
2360 | { | |
2361 | /* (Re-)discover loops so that bb->loop_father can be used in the | |
2362 | analysis below. */ | |
2363 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
2364 | ||
2365 | /* Generate a vxorps at entry of the nearest dominator for basic | |
700d4cb0 | 2366 | blocks with conversions, which is in the fake loop that |
2bf6d935 ML |
2367 | contains the whole function, so that there is only a single |
2368 | vxorps in the whole function. */ | |
2369 | bb = nearest_common_dominator_for_set (CDI_DOMINATORS, | |
2370 | convert_bbs); | |
2371 | while (bb->loop_father->latch | |
2372 | != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
2373 | bb = get_immediate_dominator (CDI_DOMINATORS, | |
2374 | bb->loop_father->header); | |
2375 | ||
2376 | set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); | |
2377 | ||
2378 | insn = BB_HEAD (bb); | |
2379 | while (insn && !NONDEBUG_INSN_P (insn)) | |
2380 | { | |
2381 | if (insn == BB_END (bb)) | |
2382 | { | |
2383 | insn = NULL; | |
2384 | break; | |
2385 | } | |
2386 | insn = NEXT_INSN (insn); | |
2387 | } | |
2388 | if (insn == BB_HEAD (bb)) | |
2389 | set_insn = emit_insn_before (set, insn); | |
2390 | else | |
2391 | set_insn = emit_insn_after (set, | |
2392 | insn ? PREV_INSN (insn) : BB_END (bb)); | |
2393 | df_insn_rescan (set_insn); | |
2394 | df_process_deferred_rescans (); | |
2395 | loop_optimizer_finalize (); | |
2396 | ||
2397 | if (!control_flow_insns.is_empty ()) | |
2398 | { | |
2399 | free_dominance_info (CDI_DOMINATORS); | |
2400 | ||
2401 | unsigned int i; | |
2402 | FOR_EACH_VEC_ELT (control_flow_insns, i, insn) | |
2403 | if (control_flow_insn_p (insn)) | |
2404 | { | |
2405 | /* Split the block after insn. There will be a fallthru | |
2406 | edge, which is OK so we keep it. We have to create | |
2407 | the exception edges ourselves. */ | |
2408 | bb = BLOCK_FOR_INSN (insn); | |
2409 | split_block (bb, insn); | |
2410 | rtl_make_eh_edge (NULL, bb, BB_END (bb)); | |
2411 | } | |
2412 | } | |
2413 | } | |
2414 | ||
2415 | bitmap_obstack_release (NULL); | |
2416 | BITMAP_FREE (convert_bbs); | |
2417 | ||
2418 | timevar_pop (TV_MACH_DEP); | |
2419 | return 0; | |
2420 | } | |
2421 | ||
43373412 | 2422 | static bool |
2423 | remove_partial_avx_dependency_gate () | |
2424 | { | |
2425 | return (TARGET_AVX | |
2426 | && TARGET_SSE_PARTIAL_REG_DEPENDENCY | |
2427 | && TARGET_SSE_MATH | |
2428 | && optimize | |
2429 | && optimize_function_for_speed_p (cfun)); | |
2430 | } | |
2431 | ||
2bf6d935 ML |
2432 | namespace { |
2433 | ||
2434 | const pass_data pass_data_remove_partial_avx_dependency = | |
2435 | { | |
2436 | RTL_PASS, /* type */ | |
2437 | "rpad", /* name */ | |
2438 | OPTGROUP_NONE, /* optinfo_flags */ | |
2439 | TV_MACH_DEP, /* tv_id */ | |
2440 | 0, /* properties_required */ | |
2441 | 0, /* properties_provided */ | |
2442 | 0, /* properties_destroyed */ | |
2443 | 0, /* todo_flags_start */ | |
2444 | TODO_df_finish, /* todo_flags_finish */ | |
2445 | }; | |
2446 | ||
2447 | class pass_remove_partial_avx_dependency : public rtl_opt_pass | |
2448 | { | |
2449 | public: | |
2450 | pass_remove_partial_avx_dependency (gcc::context *ctxt) | |
2451 | : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt) | |
2452 | {} | |
2453 | ||
2454 | /* opt_pass methods: */ | |
2455 | virtual bool gate (function *) | |
2456 | { | |
43373412 | 2457 | return remove_partial_avx_dependency_gate (); |
2bf6d935 ML |
2458 | } |
2459 | ||
2460 | virtual unsigned int execute (function *) | |
2461 | { | |
2462 | return remove_partial_avx_dependency (); | |
2463 | } | |
2464 | }; // class pass_rpad | |
2465 | ||
2466 | } // anon namespace | |
2467 | ||
2468 | rtl_opt_pass * | |
2469 | make_pass_remove_partial_avx_dependency (gcc::context *ctxt) | |
2470 | { | |
2471 | return new pass_remove_partial_avx_dependency (ctxt); | |
2472 | } | |
2473 | ||
43373412 | 2474 | /* For const vector having one duplicated value, there's no need to put |
2475 | whole vector in the constant pool when target supports embedded broadcast. */ | |
2476 | static unsigned int | |
2477 | constant_pool_broadcast (void) | |
2478 | { | |
2479 | timevar_push (TV_MACH_DEP); | |
2480 | rtx_insn *insn; | |
2481 | ||
2482 | for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
2483 | { | |
2484 | if (INSN_P (insn)) | |
2485 | replace_constant_pool_with_broadcast (insn); | |
2486 | } | |
2487 | timevar_pop (TV_MACH_DEP); | |
2488 | return 0; | |
2489 | } | |
2490 | ||
2491 | namespace { | |
2492 | ||
2493 | const pass_data pass_data_constant_pool_broadcast = | |
2494 | { | |
2495 | RTL_PASS, /* type */ | |
2496 | "cpb", /* name */ | |
2497 | OPTGROUP_NONE, /* optinfo_flags */ | |
2498 | TV_MACH_DEP, /* tv_id */ | |
2499 | 0, /* properties_required */ | |
2500 | 0, /* properties_provided */ | |
2501 | 0, /* properties_destroyed */ | |
2502 | 0, /* todo_flags_start */ | |
2503 | TODO_df_finish, /* todo_flags_finish */ | |
2504 | }; | |
2505 | ||
2506 | class pass_constant_pool_broadcast : public rtl_opt_pass | |
2507 | { | |
2508 | public: | |
2509 | pass_constant_pool_broadcast (gcc::context *ctxt) | |
2510 | : rtl_opt_pass (pass_data_constant_pool_broadcast, ctxt) | |
2511 | {} | |
2512 | ||
2513 | /* opt_pass methods: */ | |
2514 | virtual bool gate (function *) | |
2515 | { | |
2516 | /* Return false if rpad pass gate is true. | |
2517 | replace_constant_pool_with_broadcast is called | |
2518 | from both this pass and rpad pass. */ | |
2519 | return (TARGET_AVX512F && !remove_partial_avx_dependency_gate ()); | |
2520 | } | |
2521 | ||
2522 | virtual unsigned int execute (function *) | |
2523 | { | |
2524 | return constant_pool_broadcast (); | |
2525 | } | |
2526 | }; // class pass_cpb | |
2527 | ||
2528 | } // anon namespace | |
2529 | ||
2530 | rtl_opt_pass * | |
2531 | make_pass_constant_pool_broadcast (gcc::context *ctxt) | |
2532 | { | |
2533 | return new pass_constant_pool_broadcast (ctxt); | |
2534 | } | |
2535 | ||
2bf6d935 ML |
2536 | /* This compares the priority of target features in function DECL1 |
2537 | and DECL2. It returns positive value if DECL1 is higher priority, | |
2538 | negative value if DECL2 is higher priority and 0 if they are the | |
2539 | same. */ | |
2540 | ||
2541 | int | |
2542 | ix86_compare_version_priority (tree decl1, tree decl2) | |
2543 | { | |
2544 | unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); | |
2545 | unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); | |
2546 | ||
2547 | return (int)priority1 - (int)priority2; | |
2548 | } | |
2549 | ||
2550 | /* V1 and V2 point to function versions with different priorities | |
2551 | based on the target ISA. This function compares their priorities. */ | |
2552 | ||
2553 | static int | |
2554 | feature_compare (const void *v1, const void *v2) | |
2555 | { | |
2556 | typedef struct _function_version_info | |
2557 | { | |
2558 | tree version_decl; | |
2559 | tree predicate_chain; | |
2560 | unsigned int dispatch_priority; | |
2561 | } function_version_info; | |
2562 | ||
2563 | const function_version_info c1 = *(const function_version_info *)v1; | |
2564 | const function_version_info c2 = *(const function_version_info *)v2; | |
2565 | return (c2.dispatch_priority - c1.dispatch_priority); | |
2566 | } | |
2567 | ||
2568 | /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL | |
2569 | to return a pointer to VERSION_DECL if the outcome of the expression | |
2570 | formed by PREDICATE_CHAIN is true. This function will be called during | |
2571 | version dispatch to decide which function version to execute. It returns | |
2572 | the basic block at the end, to which more conditions can be added. */ | |
2573 | ||
2574 | static basic_block | |
2575 | add_condition_to_bb (tree function_decl, tree version_decl, | |
2576 | tree predicate_chain, basic_block new_bb) | |
2577 | { | |
2578 | gimple *return_stmt; | |
2579 | tree convert_expr, result_var; | |
2580 | gimple *convert_stmt; | |
2581 | gimple *call_cond_stmt; | |
2582 | gimple *if_else_stmt; | |
2583 | ||
2584 | basic_block bb1, bb2, bb3; | |
2585 | edge e12, e23; | |
2586 | ||
2587 | tree cond_var, and_expr_var = NULL_TREE; | |
2588 | gimple_seq gseq; | |
2589 | ||
2590 | tree predicate_decl, predicate_arg; | |
2591 | ||
2592 | push_cfun (DECL_STRUCT_FUNCTION (function_decl)); | |
2593 | ||
2594 | gcc_assert (new_bb != NULL); | |
2595 | gseq = bb_seq (new_bb); | |
2596 | ||
2597 | ||
2598 | convert_expr = build1 (CONVERT_EXPR, ptr_type_node, | |
2599 | build_fold_addr_expr (version_decl)); | |
2600 | result_var = create_tmp_var (ptr_type_node); | |
2601 | convert_stmt = gimple_build_assign (result_var, convert_expr); | |
2602 | return_stmt = gimple_build_return (result_var); | |
2603 | ||
2604 | if (predicate_chain == NULL_TREE) | |
2605 | { | |
2606 | gimple_seq_add_stmt (&gseq, convert_stmt); | |
2607 | gimple_seq_add_stmt (&gseq, return_stmt); | |
2608 | set_bb_seq (new_bb, gseq); | |
2609 | gimple_set_bb (convert_stmt, new_bb); | |
2610 | gimple_set_bb (return_stmt, new_bb); | |
2611 | pop_cfun (); | |
2612 | return new_bb; | |
2613 | } | |
2614 | ||
2615 | while (predicate_chain != NULL) | |
2616 | { | |
2617 | cond_var = create_tmp_var (integer_type_node); | |
2618 | predicate_decl = TREE_PURPOSE (predicate_chain); | |
2619 | predicate_arg = TREE_VALUE (predicate_chain); | |
2620 | call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); | |
2621 | gimple_call_set_lhs (call_cond_stmt, cond_var); | |
2622 | ||
2623 | gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); | |
2624 | gimple_set_bb (call_cond_stmt, new_bb); | |
2625 | gimple_seq_add_stmt (&gseq, call_cond_stmt); | |
2626 | ||
2627 | predicate_chain = TREE_CHAIN (predicate_chain); | |
2628 | ||
2629 | if (and_expr_var == NULL) | |
2630 | and_expr_var = cond_var; | |
2631 | else | |
2632 | { | |
2633 | gimple *assign_stmt; | |
2634 | /* Use MIN_EXPR to check if any integer is zero?. | |
2635 | and_expr_var = min_expr <cond_var, and_expr_var> */ | |
2636 | assign_stmt = gimple_build_assign (and_expr_var, | |
2637 | build2 (MIN_EXPR, integer_type_node, | |
2638 | cond_var, and_expr_var)); | |
2639 | ||
2640 | gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); | |
2641 | gimple_set_bb (assign_stmt, new_bb); | |
2642 | gimple_seq_add_stmt (&gseq, assign_stmt); | |
2643 | } | |
2644 | } | |
2645 | ||
2646 | if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, | |
2647 | integer_zero_node, | |
2648 | NULL_TREE, NULL_TREE); | |
2649 | gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); | |
2650 | gimple_set_bb (if_else_stmt, new_bb); | |
2651 | gimple_seq_add_stmt (&gseq, if_else_stmt); | |
2652 | ||
2653 | gimple_seq_add_stmt (&gseq, convert_stmt); | |
2654 | gimple_seq_add_stmt (&gseq, return_stmt); | |
2655 | set_bb_seq (new_bb, gseq); | |
2656 | ||
2657 | bb1 = new_bb; | |
2658 | e12 = split_block (bb1, if_else_stmt); | |
2659 | bb2 = e12->dest; | |
2660 | e12->flags &= ~EDGE_FALLTHRU; | |
2661 | e12->flags |= EDGE_TRUE_VALUE; | |
2662 | ||
2663 | e23 = split_block (bb2, return_stmt); | |
2664 | ||
2665 | gimple_set_bb (convert_stmt, bb2); | |
2666 | gimple_set_bb (return_stmt, bb2); | |
2667 | ||
2668 | bb3 = e23->dest; | |
2669 | make_edge (bb1, bb3, EDGE_FALSE_VALUE); | |
2670 | ||
2671 | remove_edge (e23); | |
2672 | make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); | |
2673 | ||
2674 | pop_cfun (); | |
2675 | ||
2676 | return bb3; | |
2677 | } | |
2678 | ||
2679 | /* This function generates the dispatch function for | |
2680 | multi-versioned functions. DISPATCH_DECL is the function which will | |
2681 | contain the dispatch logic. FNDECLS are the function choices for | |
2682 | dispatch, and is a tree chain. EMPTY_BB is the basic block pointer | |
2683 | in DISPATCH_DECL in which the dispatch code is generated. */ | |
2684 | ||
2685 | static int | |
2686 | dispatch_function_versions (tree dispatch_decl, | |
2687 | void *fndecls_p, | |
2688 | basic_block *empty_bb) | |
2689 | { | |
2690 | tree default_decl; | |
2691 | gimple *ifunc_cpu_init_stmt; | |
2692 | gimple_seq gseq; | |
2693 | int ix; | |
2694 | tree ele; | |
2695 | vec<tree> *fndecls; | |
2696 | unsigned int num_versions = 0; | |
2697 | unsigned int actual_versions = 0; | |
2698 | unsigned int i; | |
2699 | ||
2700 | struct _function_version_info | |
2701 | { | |
2702 | tree version_decl; | |
2703 | tree predicate_chain; | |
2704 | unsigned int dispatch_priority; | |
2705 | }*function_version_info; | |
2706 | ||
2707 | gcc_assert (dispatch_decl != NULL | |
2708 | && fndecls_p != NULL | |
2709 | && empty_bb != NULL); | |
2710 | ||
2711 | /*fndecls_p is actually a vector. */ | |
2712 | fndecls = static_cast<vec<tree> *> (fndecls_p); | |
2713 | ||
2714 | /* At least one more version other than the default. */ | |
2715 | num_versions = fndecls->length (); | |
2716 | gcc_assert (num_versions >= 2); | |
2717 | ||
2718 | function_version_info = (struct _function_version_info *) | |
2719 | XNEWVEC (struct _function_version_info, (num_versions - 1)); | |
2720 | ||
2721 | /* The first version in the vector is the default decl. */ | |
2722 | default_decl = (*fndecls)[0]; | |
2723 | ||
2724 | push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); | |
2725 | ||
2726 | gseq = bb_seq (*empty_bb); | |
2727 | /* Function version dispatch is via IFUNC. IFUNC resolvers fire before | |
2728 | constructors, so explicity call __builtin_cpu_init here. */ | |
2729 | ifunc_cpu_init_stmt | |
2730 | = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL); | |
2731 | gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); | |
2732 | gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); | |
2733 | set_bb_seq (*empty_bb, gseq); | |
2734 | ||
2735 | pop_cfun (); | |
2736 | ||
2737 | ||
2738 | for (ix = 1; fndecls->iterate (ix, &ele); ++ix) | |
2739 | { | |
2740 | tree version_decl = ele; | |
2741 | tree predicate_chain = NULL_TREE; | |
2742 | unsigned int priority; | |
2743 | /* Get attribute string, parse it and find the right predicate decl. | |
2744 | The predicate function could be a lengthy combination of many | |
2745 | features, like arch-type and various isa-variants. */ | |
2746 | priority = get_builtin_code_for_version (version_decl, | |
2747 | &predicate_chain); | |
2748 | ||
2749 | if (predicate_chain == NULL_TREE) | |
2750 | continue; | |
2751 | ||
2752 | function_version_info [actual_versions].version_decl = version_decl; | |
2753 | function_version_info [actual_versions].predicate_chain | |
2754 | = predicate_chain; | |
2755 | function_version_info [actual_versions].dispatch_priority = priority; | |
2756 | actual_versions++; | |
2757 | } | |
2758 | ||
2759 | /* Sort the versions according to descending order of dispatch priority. The | |
2760 | priority is based on the ISA. This is not a perfect solution. There | |
2761 | could still be ambiguity. If more than one function version is suitable | |
2762 | to execute, which one should be dispatched? In future, allow the user | |
2763 | to specify a dispatch priority next to the version. */ | |
2764 | qsort (function_version_info, actual_versions, | |
2765 | sizeof (struct _function_version_info), feature_compare); | |
2766 | ||
2767 | for (i = 0; i < actual_versions; ++i) | |
2768 | *empty_bb = add_condition_to_bb (dispatch_decl, | |
2769 | function_version_info[i].version_decl, | |
2770 | function_version_info[i].predicate_chain, | |
2771 | *empty_bb); | |
2772 | ||
2773 | /* dispatch default version at the end. */ | |
2774 | *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, | |
2775 | NULL, *empty_bb); | |
2776 | ||
2777 | free (function_version_info); | |
2778 | return 0; | |
2779 | } | |
2780 | ||
2781 | /* This function changes the assembler name for functions that are | |
2782 | versions. If DECL is a function version and has a "target" | |
2783 | attribute, it appends the attribute string to its assembler name. */ | |
2784 | ||
2785 | static tree | |
2786 | ix86_mangle_function_version_assembler_name (tree decl, tree id) | |
2787 | { | |
2788 | tree version_attr; | |
2789 | const char *orig_name, *version_string; | |
2790 | char *attr_str, *assembler_name; | |
2791 | ||
2792 | if (DECL_DECLARED_INLINE_P (decl) | |
2793 | && lookup_attribute ("gnu_inline", | |
2794 | DECL_ATTRIBUTES (decl))) | |
2795 | error_at (DECL_SOURCE_LOCATION (decl), | |
a9c697b8 | 2796 | "function versions cannot be marked as %<gnu_inline%>," |
2bf6d935 ML |
2797 | " bodies have to be generated"); |
2798 | ||
2799 | if (DECL_VIRTUAL_P (decl) | |
2800 | || DECL_VINDEX (decl)) | |
2801 | sorry ("virtual function multiversioning not supported"); | |
2802 | ||
2803 | version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); | |
2804 | ||
2805 | /* target attribute string cannot be NULL. */ | |
2806 | gcc_assert (version_attr != NULL_TREE); | |
2807 | ||
2808 | orig_name = IDENTIFIER_POINTER (id); | |
2809 | version_string | |
2810 | = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); | |
2811 | ||
2812 | if (strcmp (version_string, "default") == 0) | |
2813 | return id; | |
2814 | ||
2815 | attr_str = sorted_attr_string (TREE_VALUE (version_attr)); | |
2816 | assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); | |
2817 | ||
2818 | sprintf (assembler_name, "%s.%s", orig_name, attr_str); | |
2819 | ||
2820 | /* Allow assembler name to be modified if already set. */ | |
2821 | if (DECL_ASSEMBLER_NAME_SET_P (decl)) | |
2822 | SET_DECL_RTL (decl, NULL); | |
2823 | ||
2824 | tree ret = get_identifier (assembler_name); | |
2825 | XDELETEVEC (attr_str); | |
2826 | XDELETEVEC (assembler_name); | |
2827 | return ret; | |
2828 | } | |
2829 | ||
2830 | tree | |
2831 | ix86_mangle_decl_assembler_name (tree decl, tree id) | |
2832 | { | |
2833 | /* For function version, add the target suffix to the assembler name. */ | |
2834 | if (TREE_CODE (decl) == FUNCTION_DECL | |
2835 | && DECL_FUNCTION_VERSIONED (decl)) | |
2836 | id = ix86_mangle_function_version_assembler_name (decl, id); | |
2837 | #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME | |
2838 | id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); | |
2839 | #endif | |
2840 | ||
2841 | return id; | |
2842 | } | |
2843 | ||
2844 | /* Make a dispatcher declaration for the multi-versioned function DECL. | |
2845 | Calls to DECL function will be replaced with calls to the dispatcher | |
2846 | by the front-end. Returns the decl of the dispatcher function. */ | |
2847 | ||
2848 | tree | |
2849 | ix86_get_function_versions_dispatcher (void *decl) | |
2850 | { | |
2851 | tree fn = (tree) decl; | |
2852 | struct cgraph_node *node = NULL; | |
2853 | struct cgraph_node *default_node = NULL; | |
2854 | struct cgraph_function_version_info *node_v = NULL; | |
2855 | struct cgraph_function_version_info *first_v = NULL; | |
2856 | ||
2857 | tree dispatch_decl = NULL; | |
2858 | ||
2859 | struct cgraph_function_version_info *default_version_info = NULL; | |
2860 | ||
2861 | gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); | |
2862 | ||
2863 | node = cgraph_node::get (fn); | |
2864 | gcc_assert (node != NULL); | |
2865 | ||
2866 | node_v = node->function_version (); | |
2867 | gcc_assert (node_v != NULL); | |
2868 | ||
2869 | if (node_v->dispatcher_resolver != NULL) | |
2870 | return node_v->dispatcher_resolver; | |
2871 | ||
2872 | /* Find the default version and make it the first node. */ | |
2873 | first_v = node_v; | |
2874 | /* Go to the beginning of the chain. */ | |
2875 | while (first_v->prev != NULL) | |
2876 | first_v = first_v->prev; | |
2877 | default_version_info = first_v; | |
2878 | while (default_version_info != NULL) | |
2879 | { | |
2880 | if (is_function_default_version | |
2881 | (default_version_info->this_node->decl)) | |
2882 | break; | |
2883 | default_version_info = default_version_info->next; | |
2884 | } | |
2885 | ||
2886 | /* If there is no default node, just return NULL. */ | |
2887 | if (default_version_info == NULL) | |
2888 | return NULL; | |
2889 | ||
2890 | /* Make default info the first node. */ | |
2891 | if (first_v != default_version_info) | |
2892 | { | |
2893 | default_version_info->prev->next = default_version_info->next; | |
2894 | if (default_version_info->next) | |
2895 | default_version_info->next->prev = default_version_info->prev; | |
2896 | first_v->prev = default_version_info; | |
2897 | default_version_info->next = first_v; | |
2898 | default_version_info->prev = NULL; | |
2899 | } | |
2900 | ||
2901 | default_node = default_version_info->this_node; | |
2902 | ||
2903 | #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) | |
2904 | if (targetm.has_ifunc_p ()) | |
2905 | { | |
2906 | struct cgraph_function_version_info *it_v = NULL; | |
2907 | struct cgraph_node *dispatcher_node = NULL; | |
2908 | struct cgraph_function_version_info *dispatcher_version_info = NULL; | |
2909 | ||
2910 | /* Right now, the dispatching is done via ifunc. */ | |
2911 | dispatch_decl = make_dispatcher_decl (default_node->decl); | |
2912 | ||
2913 | dispatcher_node = cgraph_node::get_create (dispatch_decl); | |
2914 | gcc_assert (dispatcher_node != NULL); | |
2915 | dispatcher_node->dispatcher_function = 1; | |
2916 | dispatcher_version_info | |
2917 | = dispatcher_node->insert_new_function_version (); | |
2918 | dispatcher_version_info->next = default_version_info; | |
2919 | dispatcher_node->definition = 1; | |
2920 | ||
2921 | /* Set the dispatcher for all the versions. */ | |
2922 | it_v = default_version_info; | |
2923 | while (it_v != NULL) | |
2924 | { | |
2925 | it_v->dispatcher_resolver = dispatch_decl; | |
2926 | it_v = it_v->next; | |
2927 | } | |
2928 | } | |
2929 | else | |
2930 | #endif | |
2931 | { | |
2932 | error_at (DECL_SOURCE_LOCATION (default_node->decl), | |
0ecf545c | 2933 | "multiversioning needs %<ifunc%> which is not supported " |
2bf6d935 ML |
2934 | "on this target"); |
2935 | } | |
2936 | ||
2937 | return dispatch_decl; | |
2938 | } | |
2939 | ||
2940 | /* Make the resolver function decl to dispatch the versions of | |
2941 | a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is | |
2942 | ifunc alias that will point to the created resolver. Create an | |
2943 | empty basic block in the resolver and store the pointer in | |
2944 | EMPTY_BB. Return the decl of the resolver function. */ | |
2945 | ||
2946 | static tree | |
2947 | make_resolver_func (const tree default_decl, | |
2948 | const tree ifunc_alias_decl, | |
2949 | basic_block *empty_bb) | |
2950 | { | |
c2bd2b46 | 2951 | tree decl, type, t; |
2bf6d935 | 2952 | |
c2bd2b46 ML |
2953 | /* Create resolver function name based on default_decl. */ |
2954 | tree decl_name = clone_function_name (default_decl, "resolver"); | |
2955 | const char *resolver_name = IDENTIFIER_POINTER (decl_name); | |
2bf6d935 ML |
2956 | |
2957 | /* The resolver function should return a (void *). */ | |
2958 | type = build_function_type_list (ptr_type_node, NULL_TREE); | |
2959 | ||
2960 | decl = build_fn_decl (resolver_name, type); | |
2bf6d935 ML |
2961 | SET_DECL_ASSEMBLER_NAME (decl, decl_name); |
2962 | ||
2963 | DECL_NAME (decl) = decl_name; | |
2964 | TREE_USED (decl) = 1; | |
2965 | DECL_ARTIFICIAL (decl) = 1; | |
2966 | DECL_IGNORED_P (decl) = 1; | |
2967 | TREE_PUBLIC (decl) = 0; | |
2968 | DECL_UNINLINABLE (decl) = 1; | |
2969 | ||
2970 | /* Resolver is not external, body is generated. */ | |
2971 | DECL_EXTERNAL (decl) = 0; | |
2972 | DECL_EXTERNAL (ifunc_alias_decl) = 0; | |
2973 | ||
2974 | DECL_CONTEXT (decl) = NULL_TREE; | |
2975 | DECL_INITIAL (decl) = make_node (BLOCK); | |
2976 | DECL_STATIC_CONSTRUCTOR (decl) = 0; | |
2977 | ||
2978 | if (DECL_COMDAT_GROUP (default_decl) | |
2979 | || TREE_PUBLIC (default_decl)) | |
2980 | { | |
2981 | /* In this case, each translation unit with a call to this | |
2982 | versioned function will put out a resolver. Ensure it | |
2983 | is comdat to keep just one copy. */ | |
2984 | DECL_COMDAT (decl) = 1; | |
2985 | make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); | |
2986 | } | |
724ec02c ML |
2987 | else |
2988 | TREE_PUBLIC (ifunc_alias_decl) = 0; | |
2989 | ||
2bf6d935 ML |
2990 | /* Build result decl and add to function_decl. */ |
2991 | t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); | |
2992 | DECL_CONTEXT (t) = decl; | |
2993 | DECL_ARTIFICIAL (t) = 1; | |
2994 | DECL_IGNORED_P (t) = 1; | |
2995 | DECL_RESULT (decl) = t; | |
2996 | ||
2997 | gimplify_function_tree (decl); | |
2998 | push_cfun (DECL_STRUCT_FUNCTION (decl)); | |
2999 | *empty_bb = init_lowered_empty_function (decl, false, | |
3000 | profile_count::uninitialized ()); | |
3001 | ||
3002 | cgraph_node::add_new_function (decl, true); | |
3003 | symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); | |
3004 | ||
3005 | pop_cfun (); | |
3006 | ||
3007 | gcc_assert (ifunc_alias_decl != NULL); | |
3008 | /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ | |
3009 | DECL_ATTRIBUTES (ifunc_alias_decl) | |
3010 | = make_attribute ("ifunc", resolver_name, | |
3011 | DECL_ATTRIBUTES (ifunc_alias_decl)); | |
3012 | ||
3013 | /* Create the alias for dispatch to resolver here. */ | |
3014 | cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); | |
2bf6d935 ML |
3015 | return decl; |
3016 | } | |
3017 | ||
3018 | /* Generate the dispatching code body to dispatch multi-versioned function | |
3019 | DECL. The target hook is called to process the "target" attributes and | |
3020 | provide the code to dispatch the right function at run-time. NODE points | |
3021 | to the dispatcher decl whose body will be created. */ | |
3022 | ||
3023 | tree | |
3024 | ix86_generate_version_dispatcher_body (void *node_p) | |
3025 | { | |
3026 | tree resolver_decl; | |
3027 | basic_block empty_bb; | |
3028 | tree default_ver_decl; | |
3029 | struct cgraph_node *versn; | |
3030 | struct cgraph_node *node; | |
3031 | ||
3032 | struct cgraph_function_version_info *node_version_info = NULL; | |
3033 | struct cgraph_function_version_info *versn_info = NULL; | |
3034 | ||
3035 | node = (cgraph_node *)node_p; | |
3036 | ||
3037 | node_version_info = node->function_version (); | |
3038 | gcc_assert (node->dispatcher_function | |
3039 | && node_version_info != NULL); | |
3040 | ||
3041 | if (node_version_info->dispatcher_resolver) | |
3042 | return node_version_info->dispatcher_resolver; | |
3043 | ||
3044 | /* The first version in the chain corresponds to the default version. */ | |
3045 | default_ver_decl = node_version_info->next->this_node->decl; | |
3046 | ||
3047 | /* node is going to be an alias, so remove the finalized bit. */ | |
3048 | node->definition = false; | |
3049 | ||
3050 | resolver_decl = make_resolver_func (default_ver_decl, | |
3051 | node->decl, &empty_bb); | |
3052 | ||
3053 | node_version_info->dispatcher_resolver = resolver_decl; | |
3054 | ||
3055 | push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); | |
3056 | ||
3057 | auto_vec<tree, 2> fn_ver_vec; | |
3058 | ||
3059 | for (versn_info = node_version_info->next; versn_info; | |
3060 | versn_info = versn_info->next) | |
3061 | { | |
3062 | versn = versn_info->this_node; | |
3063 | /* Check for virtual functions here again, as by this time it should | |
3064 | have been determined if this function needs a vtable index or | |
3065 | not. This happens for methods in derived classes that override | |
3066 | virtual methods in base classes but are not explicitly marked as | |
3067 | virtual. */ | |
3068 | if (DECL_VINDEX (versn->decl)) | |
3069 | sorry ("virtual function multiversioning not supported"); | |
3070 | ||
3071 | fn_ver_vec.safe_push (versn->decl); | |
3072 | } | |
3073 | ||
3074 | dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); | |
3075 | cgraph_edge::rebuild_edges (); | |
3076 | pop_cfun (); | |
3077 | return resolver_decl; | |
3078 | } | |
3079 | ||
3080 |