]>
Commit | Line | Data |
---|---|---|
99dee823 | 1 | /* Copyright (C) 1988-2021 Free Software Foundation, Inc. |
2bf6d935 ML |
2 | |
3 | This file is part of GCC. | |
4 | ||
5 | GCC is free software; you can redistribute it and/or modify | |
6 | it under the terms of the GNU General Public License as published by | |
7 | the Free Software Foundation; either version 3, or (at your option) | |
8 | any later version. | |
9 | ||
10 | GCC is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | GNU General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU General Public License | |
16 | along with GCC; see the file COPYING3. If not see | |
17 | <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | #define IN_TARGET_CODE 1 | |
20 | ||
21 | #include "config.h" | |
22 | #include "system.h" | |
23 | #include "coretypes.h" | |
24 | #include "backend.h" | |
25 | #include "rtl.h" | |
26 | #include "tree.h" | |
27 | #include "memmodel.h" | |
28 | #include "gimple.h" | |
29 | #include "cfghooks.h" | |
30 | #include "cfgloop.h" | |
31 | #include "df.h" | |
32 | #include "tm_p.h" | |
33 | #include "stringpool.h" | |
34 | #include "expmed.h" | |
35 | #include "optabs.h" | |
36 | #include "regs.h" | |
37 | #include "emit-rtl.h" | |
38 | #include "recog.h" | |
39 | #include "cgraph.h" | |
40 | #include "diagnostic.h" | |
41 | #include "cfgbuild.h" | |
42 | #include "alias.h" | |
43 | #include "fold-const.h" | |
44 | #include "attribs.h" | |
45 | #include "calls.h" | |
46 | #include "stor-layout.h" | |
47 | #include "varasm.h" | |
48 | #include "output.h" | |
49 | #include "insn-attr.h" | |
50 | #include "flags.h" | |
51 | #include "except.h" | |
52 | #include "explow.h" | |
53 | #include "expr.h" | |
54 | #include "cfgrtl.h" | |
55 | #include "common/common-target.h" | |
56 | #include "langhooks.h" | |
57 | #include "reload.h" | |
58 | #include "gimplify.h" | |
59 | #include "dwarf2.h" | |
60 | #include "tm-constrs.h" | |
2bf6d935 ML |
61 | #include "cselib.h" |
62 | #include "sched-int.h" | |
63 | #include "opts.h" | |
64 | #include "tree-pass.h" | |
65 | #include "context.h" | |
66 | #include "pass_manager.h" | |
67 | #include "target-globals.h" | |
68 | #include "gimple-iterator.h" | |
69 | #include "tree-vectorizer.h" | |
70 | #include "shrink-wrap.h" | |
71 | #include "builtins.h" | |
72 | #include "rtl-iter.h" | |
73 | #include "tree-iterator.h" | |
74 | #include "dbgcnt.h" | |
75 | #include "case-cfn-macros.h" | |
76 | #include "dojump.h" | |
77 | #include "fold-const-call.h" | |
78 | #include "tree-vrp.h" | |
79 | #include "tree-ssanames.h" | |
80 | #include "selftest.h" | |
81 | #include "selftest-rtl.h" | |
82 | #include "print-rtl.h" | |
83 | #include "intl.h" | |
84 | #include "ifcvt.h" | |
85 | #include "symbol-summary.h" | |
86 | #include "ipa-prop.h" | |
87 | #include "ipa-fnsummary.h" | |
88 | #include "wide-int-bitmask.h" | |
89 | #include "tree-vector-builder.h" | |
90 | #include "debug.h" | |
91 | #include "dwarf2out.h" | |
92 | #include "i386-builtins.h" | |
93 | #include "i386-features.h" | |
94 | ||
95 | const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { | |
96 | "savms64", | |
97 | "resms64", | |
98 | "resms64x", | |
99 | "savms64f", | |
100 | "resms64f", | |
101 | "resms64fx" | |
102 | }; | |
103 | ||
104 | const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { | |
105 | /* The below offset values are where each register is stored for the layout | |
106 | relative to incoming stack pointer. The value of each m_regs[].offset will | |
107 | be relative to the incoming base pointer (rax or rsi) used by the stub. | |
108 | ||
109 | s_instances: 0 1 2 3 | |
110 | Offset: realigned or aligned + 8 | |
111 | Register aligned aligned + 8 aligned w/HFP w/HFP */ | |
112 | XMM15_REG, /* 0x10 0x18 0x10 0x18 */ | |
113 | XMM14_REG, /* 0x20 0x28 0x20 0x28 */ | |
114 | XMM13_REG, /* 0x30 0x38 0x30 0x38 */ | |
115 | XMM12_REG, /* 0x40 0x48 0x40 0x48 */ | |
116 | XMM11_REG, /* 0x50 0x58 0x50 0x58 */ | |
117 | XMM10_REG, /* 0x60 0x68 0x60 0x68 */ | |
118 | XMM9_REG, /* 0x70 0x78 0x70 0x78 */ | |
119 | XMM8_REG, /* 0x80 0x88 0x80 0x88 */ | |
120 | XMM7_REG, /* 0x90 0x98 0x90 0x98 */ | |
121 | XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ | |
122 | SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ | |
123 | DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ | |
124 | BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ | |
125 | BP_REG, /* 0xc0 0xc8 N/A N/A */ | |
126 | R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ | |
127 | R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ | |
128 | R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ | |
129 | R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ | |
130 | }; | |
131 | ||
132 | /* Instantiate static const values. */ | |
133 | const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; | |
134 | const unsigned xlogue_layout::MIN_REGS; | |
135 | const unsigned xlogue_layout::MAX_REGS; | |
136 | const unsigned xlogue_layout::MAX_EXTRA_REGS; | |
137 | const unsigned xlogue_layout::VARIANT_COUNT; | |
138 | const unsigned xlogue_layout::STUB_NAME_MAX_LEN; | |
139 | ||
140 | /* Initialize xlogue_layout::s_stub_names to zero. */ | |
141 | char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] | |
142 | [STUB_NAME_MAX_LEN]; | |
143 | ||
144 | /* Instantiates all xlogue_layout instances. */ | |
145 | const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { | |
146 | xlogue_layout (0, false), | |
147 | xlogue_layout (8, false), | |
148 | xlogue_layout (0, true), | |
149 | xlogue_layout (8, true) | |
150 | }; | |
151 | ||
152 | /* Return an appropriate const instance of xlogue_layout based upon values | |
153 | in cfun->machine and crtl. */ | |
99b1c316 | 154 | const class xlogue_layout & |
2bf6d935 ML |
155 | xlogue_layout::get_instance () |
156 | { | |
157 | enum xlogue_stub_sets stub_set; | |
158 | bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; | |
159 | ||
160 | if (stack_realign_fp) | |
161 | stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; | |
162 | else if (frame_pointer_needed) | |
163 | stub_set = aligned_plus_8 | |
164 | ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 | |
165 | : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; | |
166 | else | |
167 | stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; | |
168 | ||
169 | return s_instances[stub_set]; | |
170 | } | |
171 | ||
172 | /* Determine how many clobbered registers can be saved by the stub. | |
173 | Returns the count of registers the stub will save and restore. */ | |
174 | unsigned | |
175 | xlogue_layout::count_stub_managed_regs () | |
176 | { | |
177 | bool hfp = frame_pointer_needed || stack_realign_fp; | |
178 | unsigned i, count; | |
179 | unsigned regno; | |
180 | ||
181 | for (count = i = MIN_REGS; i < MAX_REGS; ++i) | |
182 | { | |
183 | regno = REG_ORDER[i]; | |
184 | if (regno == BP_REG && hfp) | |
185 | continue; | |
186 | if (!ix86_save_reg (regno, false, false)) | |
187 | break; | |
188 | ++count; | |
189 | } | |
190 | return count; | |
191 | } | |
192 | ||
193 | /* Determine if register REGNO is a stub managed register given the | |
194 | total COUNT of stub managed registers. */ | |
195 | bool | |
196 | xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) | |
197 | { | |
198 | bool hfp = frame_pointer_needed || stack_realign_fp; | |
199 | unsigned i; | |
200 | ||
201 | for (i = 0; i < count; ++i) | |
202 | { | |
203 | gcc_assert (i < MAX_REGS); | |
204 | if (REG_ORDER[i] == BP_REG && hfp) | |
205 | ++count; | |
206 | else if (REG_ORDER[i] == regno) | |
207 | return true; | |
208 | } | |
209 | return false; | |
210 | } | |
211 | ||
212 | /* Constructor for xlogue_layout. */ | |
213 | xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) | |
214 | : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), | |
215 | m_stack_align_off_in (stack_align_off_in) | |
216 | { | |
217 | HOST_WIDE_INT offset = stack_align_off_in; | |
218 | unsigned i, j; | |
219 | ||
220 | for (i = j = 0; i < MAX_REGS; ++i) | |
221 | { | |
222 | unsigned regno = REG_ORDER[i]; | |
223 | ||
224 | if (regno == BP_REG && hfp) | |
225 | continue; | |
226 | if (SSE_REGNO_P (regno)) | |
227 | { | |
228 | offset += 16; | |
229 | /* Verify that SSE regs are always aligned. */ | |
230 | gcc_assert (!((stack_align_off_in + offset) & 15)); | |
231 | } | |
232 | else | |
233 | offset += 8; | |
234 | ||
235 | m_regs[j].regno = regno; | |
236 | m_regs[j++].offset = offset - STUB_INDEX_OFFSET; | |
237 | } | |
238 | gcc_assert (j == m_nregs); | |
239 | } | |
240 | ||
241 | const char * | |
242 | xlogue_layout::get_stub_name (enum xlogue_stub stub, | |
243 | unsigned n_extra_regs) | |
244 | { | |
245 | const int have_avx = TARGET_AVX; | |
246 | char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; | |
247 | ||
248 | /* Lazy init */ | |
249 | if (!*name) | |
250 | { | |
251 | int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u", | |
252 | (have_avx ? "avx" : "sse"), | |
253 | STUB_BASE_NAMES[stub], | |
254 | MIN_REGS + n_extra_regs); | |
255 | gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); | |
256 | } | |
257 | ||
258 | return name; | |
259 | } | |
260 | ||
261 | /* Return rtx of a symbol ref for the entry point (based upon | |
262 | cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ | |
263 | rtx | |
264 | xlogue_layout::get_stub_rtx (enum xlogue_stub stub) | |
265 | { | |
266 | const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs; | |
267 | gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); | |
268 | gcc_assert (stub < XLOGUE_STUB_COUNT); | |
269 | gcc_assert (crtl->stack_realign_finalized); | |
270 | ||
271 | return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); | |
272 | } | |
273 | ||
274 | unsigned scalar_chain::max_id = 0; | |
275 | ||
72bb85f8 ML |
276 | namespace { |
277 | ||
2bf6d935 ML |
278 | /* Initialize new chain. */ |
279 | ||
93cf5515 | 280 | scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_) |
2bf6d935 | 281 | { |
93cf5515 RB |
282 | smode = smode_; |
283 | vmode = vmode_; | |
284 | ||
2bf6d935 ML |
285 | chain_id = ++max_id; |
286 | ||
287 | if (dump_file) | |
288 | fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); | |
289 | ||
290 | bitmap_obstack_initialize (NULL); | |
291 | insns = BITMAP_ALLOC (NULL); | |
292 | defs = BITMAP_ALLOC (NULL); | |
293 | defs_conv = BITMAP_ALLOC (NULL); | |
294 | queue = NULL; | |
295 | } | |
296 | ||
297 | /* Free chain's data. */ | |
298 | ||
299 | scalar_chain::~scalar_chain () | |
300 | { | |
301 | BITMAP_FREE (insns); | |
302 | BITMAP_FREE (defs); | |
303 | BITMAP_FREE (defs_conv); | |
304 | bitmap_obstack_release (NULL); | |
305 | } | |
306 | ||
307 | /* Add instruction into chains' queue. */ | |
308 | ||
309 | void | |
310 | scalar_chain::add_to_queue (unsigned insn_uid) | |
311 | { | |
312 | if (bitmap_bit_p (insns, insn_uid) | |
313 | || bitmap_bit_p (queue, insn_uid)) | |
314 | return; | |
315 | ||
316 | if (dump_file) | |
317 | fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", | |
318 | insn_uid, chain_id); | |
319 | bitmap_set_bit (queue, insn_uid); | |
320 | } | |
321 | ||
b5a6addb RB |
322 | general_scalar_chain::general_scalar_chain (enum machine_mode smode_, |
323 | enum machine_mode vmode_) | |
324 | : scalar_chain (smode_, vmode_) | |
325 | { | |
326 | insns_conv = BITMAP_ALLOC (NULL); | |
327 | n_sse_to_integer = 0; | |
328 | n_integer_to_sse = 0; | |
329 | } | |
330 | ||
331 | general_scalar_chain::~general_scalar_chain () | |
332 | { | |
333 | BITMAP_FREE (insns_conv); | |
334 | } | |
335 | ||
2bf6d935 ML |
336 | /* For DImode conversion, mark register defined by DEF as requiring |
337 | conversion. */ | |
338 | ||
339 | void | |
93cf5515 | 340 | general_scalar_chain::mark_dual_mode_def (df_ref def) |
2bf6d935 ML |
341 | { |
342 | gcc_assert (DF_REF_REG_DEF_P (def)); | |
343 | ||
b5a6addb RB |
344 | /* Record the def/insn pair so we can later efficiently iterate over |
345 | the defs to convert on insns not in the chain. */ | |
346 | bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); | |
347 | if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def))) | |
348 | { | |
349 | if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def)) | |
350 | && !reg_new) | |
351 | return; | |
352 | n_integer_to_sse++; | |
353 | } | |
354 | else | |
355 | { | |
356 | if (!reg_new) | |
357 | return; | |
358 | n_sse_to_integer++; | |
359 | } | |
360 | ||
2bf6d935 ML |
361 | if (dump_file) |
362 | fprintf (dump_file, | |
363 | " Mark r%d def in insn %d as requiring both modes in chain #%d\n", | |
364 | DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); | |
2bf6d935 ML |
365 | } |
366 | ||
367 | /* For TImode conversion, it is unused. */ | |
368 | ||
369 | void | |
370 | timode_scalar_chain::mark_dual_mode_def (df_ref) | |
371 | { | |
372 | gcc_unreachable (); | |
373 | } | |
374 | ||
375 | /* Check REF's chain to add new insns into a queue | |
376 | and find registers requiring conversion. */ | |
377 | ||
378 | void | |
379 | scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) | |
380 | { | |
381 | df_link *chain; | |
382 | ||
383 | gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) | |
384 | || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); | |
385 | add_to_queue (DF_REF_INSN_UID (ref)); | |
386 | ||
387 | for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) | |
388 | { | |
389 | unsigned uid = DF_REF_INSN_UID (chain->ref); | |
390 | ||
391 | if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) | |
392 | continue; | |
393 | ||
394 | if (!DF_REF_REG_MEM_P (chain->ref)) | |
395 | { | |
396 | if (bitmap_bit_p (insns, uid)) | |
397 | continue; | |
398 | ||
399 | if (bitmap_bit_p (candidates, uid)) | |
400 | { | |
401 | add_to_queue (uid); | |
402 | continue; | |
403 | } | |
404 | } | |
405 | ||
406 | if (DF_REF_REG_DEF_P (chain->ref)) | |
407 | { | |
408 | if (dump_file) | |
409 | fprintf (dump_file, " r%d def in insn %d isn't convertible\n", | |
410 | DF_REF_REGNO (chain->ref), uid); | |
411 | mark_dual_mode_def (chain->ref); | |
412 | } | |
413 | else | |
414 | { | |
415 | if (dump_file) | |
416 | fprintf (dump_file, " r%d use in insn %d isn't convertible\n", | |
417 | DF_REF_REGNO (chain->ref), uid); | |
418 | mark_dual_mode_def (ref); | |
419 | } | |
420 | } | |
421 | } | |
422 | ||
423 | /* Add instruction into a chain. */ | |
424 | ||
425 | void | |
426 | scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) | |
427 | { | |
428 | if (bitmap_bit_p (insns, insn_uid)) | |
429 | return; | |
430 | ||
431 | if (dump_file) | |
432 | fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); | |
433 | ||
434 | bitmap_set_bit (insns, insn_uid); | |
435 | ||
436 | rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; | |
437 | rtx def_set = single_set (insn); | |
438 | if (def_set && REG_P (SET_DEST (def_set)) | |
439 | && !HARD_REGISTER_P (SET_DEST (def_set))) | |
440 | bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); | |
441 | ||
93cf5515 RB |
442 | /* ??? The following is quadratic since analyze_register_chain |
443 | iterates over all refs to look for dual-mode regs. Instead this | |
444 | should be done separately for all regs mentioned in the chain once. */ | |
2bf6d935 | 445 | df_ref ref; |
2bf6d935 ML |
446 | for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) |
447 | if (!HARD_REGISTER_P (DF_REF_REG (ref))) | |
48a31a09 | 448 | analyze_register_chain (candidates, ref); |
2bf6d935 ML |
449 | for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) |
450 | if (!DF_REF_REG_MEM_P (ref)) | |
451 | analyze_register_chain (candidates, ref); | |
452 | } | |
453 | ||
454 | /* Build new chain starting from insn INSN_UID recursively | |
455 | adding all dependent uses and definitions. */ | |
456 | ||
457 | void | |
458 | scalar_chain::build (bitmap candidates, unsigned insn_uid) | |
459 | { | |
460 | queue = BITMAP_ALLOC (NULL); | |
461 | bitmap_set_bit (queue, insn_uid); | |
462 | ||
463 | if (dump_file) | |
464 | fprintf (dump_file, "Building chain #%d...\n", chain_id); | |
465 | ||
466 | while (!bitmap_empty_p (queue)) | |
467 | { | |
468 | insn_uid = bitmap_first_set_bit (queue); | |
469 | bitmap_clear_bit (queue, insn_uid); | |
470 | bitmap_clear_bit (candidates, insn_uid); | |
471 | add_insn (candidates, insn_uid); | |
472 | } | |
473 | ||
474 | if (dump_file) | |
475 | { | |
476 | fprintf (dump_file, "Collected chain #%d...\n", chain_id); | |
477 | fprintf (dump_file, " insns: "); | |
478 | dump_bitmap (dump_file, insns); | |
479 | if (!bitmap_empty_p (defs_conv)) | |
480 | { | |
481 | bitmap_iterator bi; | |
482 | unsigned id; | |
483 | const char *comma = ""; | |
484 | fprintf (dump_file, " defs to convert: "); | |
485 | EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) | |
486 | { | |
487 | fprintf (dump_file, "%sr%d", comma, id); | |
488 | comma = ", "; | |
489 | } | |
490 | fprintf (dump_file, "\n"); | |
491 | } | |
492 | } | |
493 | ||
494 | BITMAP_FREE (queue); | |
495 | } | |
496 | ||
497 | /* Return a cost of building a vector costant | |
498 | instead of using a scalar one. */ | |
499 | ||
500 | int | |
93cf5515 | 501 | general_scalar_chain::vector_const_cost (rtx exp) |
2bf6d935 ML |
502 | { |
503 | gcc_assert (CONST_INT_P (exp)); | |
504 | ||
93cf5515 RB |
505 | if (standard_sse_constant_p (exp, vmode)) |
506 | return ix86_cost->sse_op; | |
507 | /* We have separate costs for SImode and DImode, use SImode costs | |
508 | for smaller modes. */ | |
509 | return ix86_cost->sse_load[smode == DImode ? 1 : 0]; | |
2bf6d935 ML |
510 | } |
511 | ||
512 | /* Compute a gain for chain conversion. */ | |
513 | ||
514 | int | |
93cf5515 | 515 | general_scalar_chain::compute_convert_gain () |
2bf6d935 ML |
516 | { |
517 | bitmap_iterator bi; | |
518 | unsigned insn_uid; | |
519 | int gain = 0; | |
520 | int cost = 0; | |
521 | ||
522 | if (dump_file) | |
523 | fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); | |
524 | ||
93cf5515 RB |
525 | /* SSE costs distinguish between SImode and DImode loads/stores, for |
526 | int costs factor in the number of GPRs involved. When supporting | |
527 | smaller modes than SImode the int load/store costs need to be | |
528 | adjusted as well. */ | |
529 | unsigned sse_cost_idx = smode == DImode ? 1 : 0; | |
530 | unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1; | |
531 | ||
2bf6d935 ML |
532 | EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) |
533 | { | |
534 | rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; | |
535 | rtx def_set = single_set (insn); | |
536 | rtx src = SET_SRC (def_set); | |
537 | rtx dst = SET_DEST (def_set); | |
c6521daa | 538 | int igain = 0; |
2bf6d935 ML |
539 | |
540 | if (REG_P (src) && REG_P (dst)) | |
93cf5515 | 541 | igain += 2 * m - ix86_cost->xmm_move; |
2bf6d935 | 542 | else if (REG_P (src) && MEM_P (dst)) |
93cf5515 RB |
543 | igain |
544 | += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx]; | |
2bf6d935 | 545 | else if (MEM_P (src) && REG_P (dst)) |
93cf5515 | 546 | igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx]; |
9f6aeb85 UB |
547 | else |
548 | switch (GET_CODE (src)) | |
549 | { | |
550 | case ASHIFT: | |
551 | case ASHIFTRT: | |
552 | case LSHIFTRT: | |
553 | if (m == 2) | |
554 | { | |
555 | if (INTVAL (XEXP (src, 1)) >= 32) | |
556 | igain += ix86_cost->add; | |
557 | else | |
558 | igain += ix86_cost->shift_const; | |
559 | } | |
2a3daf5b | 560 | |
9f6aeb85 | 561 | igain += ix86_cost->shift_const - ix86_cost->sse_op; |
2a3daf5b | 562 | |
9f6aeb85 UB |
563 | if (CONST_INT_P (XEXP (src, 0))) |
564 | igain -= vector_const_cost (XEXP (src, 0)); | |
565 | break; | |
566 | ||
567 | case AND: | |
568 | case IOR: | |
569 | case XOR: | |
570 | case PLUS: | |
571 | case MINUS: | |
572 | igain += m * ix86_cost->add - ix86_cost->sse_op; | |
573 | /* Additional gain for andnot for targets without BMI. */ | |
574 | if (GET_CODE (XEXP (src, 0)) == NOT | |
575 | && !TARGET_BMI) | |
576 | igain += m * ix86_cost->add; | |
577 | ||
578 | if (CONST_INT_P (XEXP (src, 0))) | |
579 | igain -= vector_const_cost (XEXP (src, 0)); | |
580 | if (CONST_INT_P (XEXP (src, 1))) | |
581 | igain -= vector_const_cost (XEXP (src, 1)); | |
582 | break; | |
583 | ||
584 | case NEG: | |
585 | case NOT: | |
586 | igain -= ix86_cost->sse_op + COSTS_N_INSNS (1); | |
587 | ||
588 | if (GET_CODE (XEXP (src, 0)) != ABS) | |
589 | { | |
590 | igain += m * ix86_cost->add; | |
591 | break; | |
592 | } | |
593 | /* FALLTHRU */ | |
594 | ||
595 | case ABS: | |
596 | case SMAX: | |
597 | case SMIN: | |
598 | case UMAX: | |
599 | case UMIN: | |
600 | /* We do not have any conditional move cost, estimate it as a | |
601 | reg-reg move. Comparisons are costed as adds. */ | |
602 | igain += m * (COSTS_N_INSNS (2) + ix86_cost->add); | |
603 | /* Integer SSE ops are all costed the same. */ | |
604 | igain -= ix86_cost->sse_op; | |
605 | break; | |
606 | ||
607 | case COMPARE: | |
608 | /* Assume comparison cost is the same. */ | |
609 | break; | |
610 | ||
611 | case CONST_INT: | |
612 | if (REG_P (dst)) | |
613 | /* DImode can be immediate for TARGET_64BIT and SImode always. */ | |
614 | igain += m * COSTS_N_INSNS (1); | |
615 | else if (MEM_P (dst)) | |
616 | igain += (m * ix86_cost->int_store[2] | |
617 | - ix86_cost->sse_store[sse_cost_idx]); | |
618 | igain -= vector_const_cost (src); | |
619 | break; | |
620 | ||
621 | default: | |
622 | gcc_unreachable (); | |
623 | } | |
c6521daa RB |
624 | |
625 | if (igain != 0 && dump_file) | |
626 | { | |
627 | fprintf (dump_file, " Instruction gain %d for ", igain); | |
628 | dump_insn_slim (dump_file, insn); | |
629 | } | |
630 | gain += igain; | |
2bf6d935 ML |
631 | } |
632 | ||
633 | if (dump_file) | |
634 | fprintf (dump_file, " Instruction conversion gain: %d\n", gain); | |
635 | ||
b5a6addb RB |
636 | /* Cost the integer to sse and sse to integer moves. */ |
637 | cost += n_sse_to_integer * ix86_cost->sse_to_integer; | |
638 | /* ??? integer_to_sse but we only have that in the RA cost table. | |
639 | Assume sse_to_integer/integer_to_sse are the same which they | |
640 | are at the moment. */ | |
641 | cost += n_integer_to_sse * ix86_cost->sse_to_integer; | |
2bf6d935 ML |
642 | |
643 | if (dump_file) | |
644 | fprintf (dump_file, " Registers conversion cost: %d\n", cost); | |
645 | ||
646 | gain -= cost; | |
647 | ||
648 | if (dump_file) | |
649 | fprintf (dump_file, " Total gain: %d\n", gain); | |
650 | ||
651 | return gain; | |
652 | } | |
653 | ||
2bf6d935 ML |
654 | /* Insert generated conversion instruction sequence INSNS |
655 | after instruction AFTER. New BB may be required in case | |
656 | instruction has EH region attached. */ | |
657 | ||
658 | void | |
659 | scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) | |
660 | { | |
661 | if (!control_flow_insn_p (after)) | |
662 | { | |
663 | emit_insn_after (insns, after); | |
664 | return; | |
665 | } | |
666 | ||
667 | basic_block bb = BLOCK_FOR_INSN (after); | |
668 | edge e = find_fallthru_edge (bb->succs); | |
669 | gcc_assert (e); | |
670 | ||
671 | basic_block new_bb = split_edge (e); | |
672 | emit_insn_after (insns, BB_HEAD (new_bb)); | |
673 | } | |
674 | ||
72bb85f8 ML |
675 | } // anon namespace |
676 | ||
8ed1d2fa RB |
677 | /* Generate the canonical SET_SRC to move GPR to a VMODE vector register, |
678 | zeroing the upper parts. */ | |
679 | ||
680 | static rtx | |
681 | gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr) | |
682 | { | |
683 | switch (GET_MODE_NUNITS (vmode)) | |
684 | { | |
685 | case 1: | |
54dc8577 RB |
686 | /* We are not using this case currently. */ |
687 | gcc_unreachable (); | |
8ed1d2fa RB |
688 | case 2: |
689 | return gen_rtx_VEC_CONCAT (vmode, gpr, | |
690 | CONST0_RTX (GET_MODE_INNER (vmode))); | |
691 | default: | |
692 | return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr), | |
693 | CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U)); | |
694 | } | |
695 | } | |
696 | ||
2bf6d935 ML |
697 | /* Make vector copies for all register REGNO definitions |
698 | and replace its uses in a chain. */ | |
699 | ||
700 | void | |
b5a6addb | 701 | general_scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg) |
2bf6d935 | 702 | { |
b5a6addb | 703 | rtx vreg = *defs_map.get (reg); |
2bf6d935 | 704 | |
b5a6addb RB |
705 | start_sequence (); |
706 | if (!TARGET_INTER_UNIT_MOVES_TO_VEC) | |
2bf6d935 | 707 | { |
b5a6addb RB |
708 | rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP); |
709 | if (smode == DImode && !TARGET_64BIT) | |
2bf6d935 | 710 | { |
b5a6addb RB |
711 | emit_move_insn (adjust_address (tmp, SImode, 0), |
712 | gen_rtx_SUBREG (SImode, reg, 0)); | |
713 | emit_move_insn (adjust_address (tmp, SImode, 4), | |
714 | gen_rtx_SUBREG (SImode, reg, 4)); | |
2bf6d935 | 715 | } |
b5a6addb RB |
716 | else |
717 | emit_move_insn (copy_rtx (tmp), reg); | |
718 | emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), | |
719 | gen_gpr_to_xmm_move_src (vmode, tmp))); | |
720 | } | |
721 | else if (!TARGET_64BIT && smode == DImode) | |
722 | { | |
723 | if (TARGET_SSE4_1) | |
2bf6d935 | 724 | { |
b5a6addb RB |
725 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), |
726 | CONST0_RTX (V4SImode), | |
727 | gen_rtx_SUBREG (SImode, reg, 0))); | |
728 | emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
729 | gen_rtx_SUBREG (V4SImode, vreg, 0), | |
730 | gen_rtx_SUBREG (SImode, reg, 4), | |
731 | GEN_INT (2))); | |
2bf6d935 | 732 | } |
48a31a09 | 733 | else |
b5a6addb RB |
734 | { |
735 | rtx tmp = gen_reg_rtx (DImode); | |
736 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
737 | CONST0_RTX (V4SImode), | |
738 | gen_rtx_SUBREG (SImode, reg, 0))); | |
739 | emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), | |
740 | CONST0_RTX (V4SImode), | |
741 | gen_rtx_SUBREG (SImode, reg, 4))); | |
742 | emit_insn (gen_vec_interleave_lowv4si | |
743 | (gen_rtx_SUBREG (V4SImode, vreg, 0), | |
744 | gen_rtx_SUBREG (V4SImode, vreg, 0), | |
745 | gen_rtx_SUBREG (V4SImode, tmp, 0))); | |
746 | } | |
48a31a09 | 747 | } |
b5a6addb RB |
748 | else |
749 | emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0), | |
750 | gen_gpr_to_xmm_move_src (vmode, reg))); | |
751 | rtx_insn *seq = get_insns (); | |
752 | end_sequence (); | |
753 | emit_conversion_insns (seq, insn); | |
754 | ||
755 | if (dump_file) | |
756 | fprintf (dump_file, | |
757 | " Copied r%d to a vector register r%d for insn %d\n", | |
758 | REGNO (reg), REGNO (vreg), INSN_UID (insn)); | |
48a31a09 | 759 | } |
2bf6d935 | 760 | |
48a31a09 RB |
761 | /* Copy the definition SRC of INSN inside the chain to DST for |
762 | scalar uses outside of the chain. */ | |
2bf6d935 | 763 | |
48a31a09 RB |
764 | void |
765 | general_scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src) | |
766 | { | |
767 | start_sequence (); | |
768 | if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) | |
769 | { | |
770 | rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP); | |
771 | emit_move_insn (tmp, src); | |
772 | if (!TARGET_64BIT && smode == DImode) | |
773 | { | |
774 | emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0), | |
775 | adjust_address (tmp, SImode, 0)); | |
776 | emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4), | |
777 | adjust_address (tmp, SImode, 4)); | |
778 | } | |
779 | else | |
780 | emit_move_insn (dst, copy_rtx (tmp)); | |
781 | } | |
782 | else if (!TARGET_64BIT && smode == DImode) | |
783 | { | |
784 | if (TARGET_SSE4_1) | |
785 | { | |
786 | rtx tmp = gen_rtx_PARALLEL (VOIDmode, | |
787 | gen_rtvec (1, const0_rtx)); | |
788 | emit_insn | |
789 | (gen_rtx_SET | |
790 | (gen_rtx_SUBREG (SImode, dst, 0), | |
791 | gen_rtx_VEC_SELECT (SImode, | |
792 | gen_rtx_SUBREG (V4SImode, src, 0), | |
793 | tmp))); | |
794 | ||
795 | tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); | |
796 | emit_insn | |
797 | (gen_rtx_SET | |
798 | (gen_rtx_SUBREG (SImode, dst, 4), | |
799 | gen_rtx_VEC_SELECT (SImode, | |
800 | gen_rtx_SUBREG (V4SImode, src, 0), | |
801 | tmp))); | |
802 | } | |
803 | else | |
804 | { | |
805 | rtx vcopy = gen_reg_rtx (V2DImode); | |
806 | emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0)); | |
807 | emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0), | |
808 | gen_rtx_SUBREG (SImode, vcopy, 0)); | |
809 | emit_move_insn (vcopy, | |
810 | gen_rtx_LSHIFTRT (V2DImode, | |
811 | vcopy, GEN_INT (32))); | |
812 | emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4), | |
813 | gen_rtx_SUBREG (SImode, vcopy, 0)); | |
814 | } | |
815 | } | |
816 | else | |
817 | emit_move_insn (dst, src); | |
2bf6d935 | 818 | |
48a31a09 RB |
819 | rtx_insn *seq = get_insns (); |
820 | end_sequence (); | |
821 | emit_conversion_insns (seq, insn); | |
2bf6d935 | 822 | |
48a31a09 RB |
823 | if (dump_file) |
824 | fprintf (dump_file, | |
825 | " Copied r%d to a scalar register r%d for insn %d\n", | |
826 | REGNO (src), REGNO (dst), INSN_UID (insn)); | |
2bf6d935 ML |
827 | } |
828 | ||
829 | /* Convert operand OP in INSN. We should handle | |
830 | memory operands and uninitialized registers. | |
831 | All other register uses are converted during | |
832 | registers conversion. */ | |
833 | ||
834 | void | |
93cf5515 | 835 | general_scalar_chain::convert_op (rtx *op, rtx_insn *insn) |
2bf6d935 ML |
836 | { |
837 | *op = copy_rtx_if_shared (*op); | |
838 | ||
839 | if (GET_CODE (*op) == NOT) | |
840 | { | |
841 | convert_op (&XEXP (*op, 0), insn); | |
93cf5515 | 842 | PUT_MODE (*op, vmode); |
2bf6d935 ML |
843 | } |
844 | else if (MEM_P (*op)) | |
845 | { | |
93cf5515 | 846 | rtx tmp = gen_reg_rtx (GET_MODE (*op)); |
2bf6d935 | 847 | |
b049c269 RB |
848 | /* Handle movabs. */ |
849 | if (!memory_operand (*op, GET_MODE (*op))) | |
850 | { | |
851 | rtx tmp2 = gen_reg_rtx (GET_MODE (*op)); | |
852 | ||
853 | emit_insn_before (gen_rtx_SET (tmp2, *op), insn); | |
854 | *op = tmp2; | |
855 | } | |
856 | ||
f386ca41 RB |
857 | emit_insn_before (gen_rtx_SET (gen_rtx_SUBREG (vmode, tmp, 0), |
858 | gen_gpr_to_xmm_move_src (vmode, *op)), | |
859 | insn); | |
93cf5515 | 860 | *op = gen_rtx_SUBREG (vmode, tmp, 0); |
2bf6d935 ML |
861 | |
862 | if (dump_file) | |
863 | fprintf (dump_file, " Preloading operand for insn %d into r%d\n", | |
864 | INSN_UID (insn), REGNO (tmp)); | |
865 | } | |
866 | else if (REG_P (*op)) | |
867 | { | |
93cf5515 | 868 | *op = gen_rtx_SUBREG (vmode, *op, 0); |
2bf6d935 ML |
869 | } |
870 | else if (CONST_INT_P (*op)) | |
871 | { | |
872 | rtx vec_cst; | |
93cf5515 | 873 | rtx tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0); |
2bf6d935 ML |
874 | |
875 | /* Prefer all ones vector in case of -1. */ | |
876 | if (constm1_operand (*op, GET_MODE (*op))) | |
93cf5515 | 877 | vec_cst = CONSTM1_RTX (vmode); |
2bf6d935 | 878 | else |
93cf5515 RB |
879 | { |
880 | unsigned n = GET_MODE_NUNITS (vmode); | |
881 | rtx *v = XALLOCAVEC (rtx, n); | |
882 | v[0] = *op; | |
883 | for (unsigned i = 1; i < n; ++i) | |
884 | v[i] = const0_rtx; | |
885 | vec_cst = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v)); | |
886 | } | |
2bf6d935 | 887 | |
93cf5515 | 888 | if (!standard_sse_constant_p (vec_cst, vmode)) |
2bf6d935 ML |
889 | { |
890 | start_sequence (); | |
93cf5515 | 891 | vec_cst = validize_mem (force_const_mem (vmode, vec_cst)); |
2bf6d935 ML |
892 | rtx_insn *seq = get_insns (); |
893 | end_sequence (); | |
894 | emit_insn_before (seq, insn); | |
895 | } | |
896 | ||
897 | emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); | |
898 | *op = tmp; | |
899 | } | |
900 | else | |
901 | { | |
902 | gcc_assert (SUBREG_P (*op)); | |
93cf5515 | 903 | gcc_assert (GET_MODE (*op) == vmode); |
2bf6d935 ML |
904 | } |
905 | } | |
906 | ||
907 | /* Convert INSN to vector mode. */ | |
908 | ||
909 | void | |
93cf5515 | 910 | general_scalar_chain::convert_insn (rtx_insn *insn) |
2bf6d935 | 911 | { |
c49609be | 912 | /* Generate copies for out-of-chain uses of defs and adjust debug uses. */ |
48a31a09 RB |
913 | for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref)) |
914 | if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref))) | |
915 | { | |
916 | df_link *use; | |
917 | for (use = DF_REF_CHAIN (ref); use; use = use->next) | |
c49609be RB |
918 | if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref)) |
919 | && (DF_REF_REG_MEM_P (use->ref) | |
920 | || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref)))) | |
48a31a09 RB |
921 | break; |
922 | if (use) | |
923 | convert_reg (insn, DF_REF_REG (ref), | |
924 | *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)])); | |
132e2b41 | 925 | else if (MAY_HAVE_DEBUG_BIND_INSNS) |
c49609be RB |
926 | { |
927 | /* If we generated a scalar copy we can leave debug-insns | |
928 | as-is, if not, we have to adjust them. */ | |
929 | auto_vec<rtx_insn *, 5> to_reset_debug_insns; | |
930 | for (use = DF_REF_CHAIN (ref); use; use = use->next) | |
931 | if (DEBUG_INSN_P (DF_REF_INSN (use->ref))) | |
932 | { | |
933 | rtx_insn *debug_insn = DF_REF_INSN (use->ref); | |
934 | /* If there's a reaching definition outside of the | |
935 | chain we have to reset. */ | |
936 | df_link *def; | |
937 | for (def = DF_REF_CHAIN (use->ref); def; def = def->next) | |
938 | if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref))) | |
939 | break; | |
940 | if (def) | |
941 | to_reset_debug_insns.safe_push (debug_insn); | |
942 | else | |
943 | { | |
944 | *DF_REF_REAL_LOC (use->ref) | |
945 | = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]); | |
946 | df_insn_rescan (debug_insn); | |
947 | } | |
948 | } | |
949 | /* Have to do the reset outside of the DF_CHAIN walk to not | |
950 | disrupt it. */ | |
951 | while (!to_reset_debug_insns.is_empty ()) | |
952 | { | |
953 | rtx_insn *debug_insn = to_reset_debug_insns.pop (); | |
954 | INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC (); | |
955 | df_insn_rescan_debug_internal (debug_insn); | |
956 | } | |
957 | } | |
48a31a09 RB |
958 | } |
959 | ||
960 | /* Replace uses in this insn with the defs we use in the chain. */ | |
961 | for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref)) | |
962 | if (!DF_REF_REG_MEM_P (ref)) | |
963 | if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)])) | |
964 | { | |
965 | /* Also update a corresponding REG_DEAD note. */ | |
966 | rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref)); | |
967 | if (note) | |
968 | XEXP (note, 0) = *vreg; | |
969 | *DF_REF_REAL_LOC (ref) = *vreg; | |
970 | } | |
971 | ||
2bf6d935 ML |
972 | rtx def_set = single_set (insn); |
973 | rtx src = SET_SRC (def_set); | |
974 | rtx dst = SET_DEST (def_set); | |
975 | rtx subreg; | |
976 | ||
977 | if (MEM_P (dst) && !REG_P (src)) | |
978 | { | |
979 | /* There are no scalar integer instructions and therefore | |
980 | temporary register usage is required. */ | |
93cf5515 | 981 | rtx tmp = gen_reg_rtx (smode); |
2bf6d935 | 982 | emit_conversion_insns (gen_move_insn (dst, tmp), insn); |
93cf5515 | 983 | dst = gen_rtx_SUBREG (vmode, tmp, 0); |
2bf6d935 | 984 | } |
48a31a09 RB |
985 | else if (REG_P (dst)) |
986 | { | |
987 | /* Replace the definition with a SUBREG to the definition we | |
988 | use inside the chain. */ | |
989 | rtx *vdef = defs_map.get (dst); | |
990 | if (vdef) | |
991 | dst = *vdef; | |
992 | dst = gen_rtx_SUBREG (vmode, dst, 0); | |
993 | /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST | |
994 | is a non-REG_P. So kill those off. */ | |
995 | rtx note = find_reg_equal_equiv_note (insn); | |
996 | if (note) | |
997 | remove_note (insn, note); | |
998 | } | |
2bf6d935 ML |
999 | |
1000 | switch (GET_CODE (src)) | |
1001 | { | |
2bf6d935 ML |
1002 | case PLUS: |
1003 | case MINUS: | |
1004 | case IOR: | |
1005 | case XOR: | |
1006 | case AND: | |
93cf5515 RB |
1007 | case SMAX: |
1008 | case SMIN: | |
1009 | case UMAX: | |
1010 | case UMIN: | |
2bf6d935 | 1011 | convert_op (&XEXP (src, 1), insn); |
fdace758 UB |
1012 | /* FALLTHRU */ |
1013 | ||
1014 | case ABS: | |
1015 | case ASHIFT: | |
1016 | case ASHIFTRT: | |
1017 | case LSHIFTRT: | |
1018 | convert_op (&XEXP (src, 0), insn); | |
93cf5515 | 1019 | PUT_MODE (src, vmode); |
2bf6d935 ML |
1020 | break; |
1021 | ||
1022 | case NEG: | |
1023 | src = XEXP (src, 0); | |
9f6aeb85 UB |
1024 | |
1025 | if (GET_CODE (src) == ABS) | |
1026 | { | |
1027 | src = XEXP (src, 0); | |
1028 | convert_op (&src, insn); | |
1029 | subreg = gen_reg_rtx (vmode); | |
1030 | emit_insn_before (gen_rtx_SET (subreg, | |
1031 | gen_rtx_ABS (vmode, src)), insn); | |
1032 | src = subreg; | |
1033 | } | |
1034 | else | |
1035 | convert_op (&src, insn); | |
1036 | ||
93cf5515 RB |
1037 | subreg = gen_reg_rtx (vmode); |
1038 | emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn); | |
1039 | src = gen_rtx_MINUS (vmode, subreg, src); | |
2bf6d935 ML |
1040 | break; |
1041 | ||
1042 | case NOT: | |
1043 | src = XEXP (src, 0); | |
1044 | convert_op (&src, insn); | |
93cf5515 RB |
1045 | subreg = gen_reg_rtx (vmode); |
1046 | emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn); | |
1047 | src = gen_rtx_XOR (vmode, src, subreg); | |
2bf6d935 ML |
1048 | break; |
1049 | ||
1050 | case MEM: | |
1051 | if (!REG_P (dst)) | |
1052 | convert_op (&src, insn); | |
1053 | break; | |
1054 | ||
1055 | case REG: | |
1056 | if (!MEM_P (dst)) | |
1057 | convert_op (&src, insn); | |
1058 | break; | |
1059 | ||
1060 | case SUBREG: | |
93cf5515 | 1061 | gcc_assert (GET_MODE (src) == vmode); |
2bf6d935 ML |
1062 | break; |
1063 | ||
1064 | case COMPARE: | |
1065 | src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); | |
1066 | ||
48a31a09 RB |
1067 | gcc_assert (REG_P (src) && GET_MODE (src) == DImode); |
1068 | subreg = gen_rtx_SUBREG (V2DImode, src, 0); | |
9f6aeb85 UB |
1069 | emit_insn_before (gen_vec_interleave_lowv2di |
1070 | (copy_rtx_if_shared (subreg), | |
1071 | copy_rtx_if_shared (subreg), | |
1072 | copy_rtx_if_shared (subreg)), | |
2bf6d935 ML |
1073 | insn); |
1074 | dst = gen_rtx_REG (CCmode, FLAGS_REG); | |
48a31a09 RB |
1075 | src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (subreg), |
1076 | copy_rtx_if_shared (subreg)), | |
2bf6d935 ML |
1077 | UNSPEC_PTEST); |
1078 | break; | |
1079 | ||
1080 | case CONST_INT: | |
1081 | convert_op (&src, insn); | |
1082 | break; | |
1083 | ||
1084 | default: | |
1085 | gcc_unreachable (); | |
1086 | } | |
1087 | ||
1088 | SET_SRC (def_set) = src; | |
1089 | SET_DEST (def_set) = dst; | |
1090 | ||
1091 | /* Drop possible dead definitions. */ | |
1092 | PATTERN (insn) = def_set; | |
1093 | ||
1094 | INSN_CODE (insn) = -1; | |
93cf5515 RB |
1095 | int patt = recog_memoized (insn); |
1096 | if (patt == -1) | |
1097 | fatal_insn_not_found (insn); | |
2bf6d935 ML |
1098 | df_insn_rescan (insn); |
1099 | } | |
1100 | ||
1101 | /* Fix uses of converted REG in debug insns. */ | |
1102 | ||
1103 | void | |
1104 | timode_scalar_chain::fix_debug_reg_uses (rtx reg) | |
1105 | { | |
1106 | if (!flag_var_tracking) | |
1107 | return; | |
1108 | ||
1109 | df_ref ref, next; | |
1110 | for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) | |
1111 | { | |
1112 | rtx_insn *insn = DF_REF_INSN (ref); | |
1113 | /* Make sure the next ref is for a different instruction, | |
1114 | so that we're not affected by the rescan. */ | |
1115 | next = DF_REF_NEXT_REG (ref); | |
1116 | while (next && DF_REF_INSN (next) == insn) | |
1117 | next = DF_REF_NEXT_REG (next); | |
1118 | ||
1119 | if (DEBUG_INSN_P (insn)) | |
1120 | { | |
1121 | /* It may be a debug insn with a TImode variable in | |
1122 | register. */ | |
1123 | bool changed = false; | |
1124 | for (; ref != next; ref = DF_REF_NEXT_REG (ref)) | |
1125 | { | |
1126 | rtx *loc = DF_REF_LOC (ref); | |
1127 | if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) | |
1128 | { | |
1129 | *loc = gen_rtx_SUBREG (TImode, *loc, 0); | |
1130 | changed = true; | |
1131 | } | |
1132 | } | |
1133 | if (changed) | |
1134 | df_insn_rescan (insn); | |
1135 | } | |
1136 | } | |
1137 | } | |
1138 | ||
1139 | /* Convert INSN from TImode to V1T1mode. */ | |
1140 | ||
1141 | void | |
1142 | timode_scalar_chain::convert_insn (rtx_insn *insn) | |
1143 | { | |
1144 | rtx def_set = single_set (insn); | |
1145 | rtx src = SET_SRC (def_set); | |
1146 | rtx dst = SET_DEST (def_set); | |
1147 | ||
1148 | switch (GET_CODE (dst)) | |
1149 | { | |
1150 | case REG: | |
1151 | { | |
1152 | rtx tmp = find_reg_equal_equiv_note (insn); | |
1153 | if (tmp) | |
1154 | PUT_MODE (XEXP (tmp, 0), V1TImode); | |
1155 | PUT_MODE (dst, V1TImode); | |
1156 | fix_debug_reg_uses (dst); | |
1157 | } | |
1158 | break; | |
1159 | case MEM: | |
1160 | PUT_MODE (dst, V1TImode); | |
1161 | break; | |
1162 | ||
1163 | default: | |
1164 | gcc_unreachable (); | |
1165 | } | |
1166 | ||
1167 | switch (GET_CODE (src)) | |
1168 | { | |
1169 | case REG: | |
1170 | PUT_MODE (src, V1TImode); | |
1171 | /* Call fix_debug_reg_uses only if SRC is never defined. */ | |
1172 | if (!DF_REG_DEF_CHAIN (REGNO (src))) | |
1173 | fix_debug_reg_uses (src); | |
1174 | break; | |
1175 | ||
1176 | case MEM: | |
1177 | PUT_MODE (src, V1TImode); | |
1178 | break; | |
1179 | ||
1180 | case CONST_WIDE_INT: | |
1181 | if (NONDEBUG_INSN_P (insn)) | |
1182 | { | |
1183 | /* Since there are no instructions to store 128-bit constant, | |
1184 | temporary register usage is required. */ | |
1185 | rtx tmp = gen_reg_rtx (V1TImode); | |
1186 | start_sequence (); | |
1187 | src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); | |
1188 | src = validize_mem (force_const_mem (V1TImode, src)); | |
1189 | rtx_insn *seq = get_insns (); | |
1190 | end_sequence (); | |
1191 | if (seq) | |
1192 | emit_insn_before (seq, insn); | |
1193 | emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); | |
1194 | dst = tmp; | |
1195 | } | |
1196 | break; | |
1197 | ||
1198 | case CONST_INT: | |
1199 | switch (standard_sse_constant_p (src, TImode)) | |
1200 | { | |
1201 | case 1: | |
1202 | src = CONST0_RTX (GET_MODE (dst)); | |
1203 | break; | |
1204 | case 2: | |
1205 | src = CONSTM1_RTX (GET_MODE (dst)); | |
1206 | break; | |
1207 | default: | |
1208 | gcc_unreachable (); | |
1209 | } | |
1210 | if (NONDEBUG_INSN_P (insn)) | |
1211 | { | |
1212 | rtx tmp = gen_reg_rtx (V1TImode); | |
1213 | /* Since there are no instructions to store standard SSE | |
1214 | constant, temporary register usage is required. */ | |
1215 | emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); | |
1216 | dst = tmp; | |
1217 | } | |
1218 | break; | |
1219 | ||
1220 | default: | |
1221 | gcc_unreachable (); | |
1222 | } | |
1223 | ||
1224 | SET_SRC (def_set) = src; | |
1225 | SET_DEST (def_set) = dst; | |
1226 | ||
1227 | /* Drop possible dead definitions. */ | |
1228 | PATTERN (insn) = def_set; | |
1229 | ||
1230 | INSN_CODE (insn) = -1; | |
1231 | recog_memoized (insn); | |
1232 | df_insn_rescan (insn); | |
1233 | } | |
1234 | ||
48a31a09 RB |
1235 | /* Generate copies from defs used by the chain but not defined therein. |
1236 | Also populates defs_map which is used later by convert_insn. */ | |
1237 | ||
2bf6d935 | 1238 | void |
93cf5515 | 1239 | general_scalar_chain::convert_registers () |
2bf6d935 ML |
1240 | { |
1241 | bitmap_iterator bi; | |
1242 | unsigned id; | |
48a31a09 | 1243 | EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) |
d865ed72 RB |
1244 | { |
1245 | rtx chain_reg = gen_reg_rtx (smode); | |
1246 | defs_map.put (regno_reg_rtx[id], chain_reg); | |
1247 | } | |
b5a6addb RB |
1248 | EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi) |
1249 | for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref)) | |
1250 | if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref))) | |
1251 | make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref)); | |
2bf6d935 ML |
1252 | } |
1253 | ||
1254 | /* Convert whole chain creating required register | |
1255 | conversions and copies. */ | |
1256 | ||
1257 | int | |
1258 | scalar_chain::convert () | |
1259 | { | |
1260 | bitmap_iterator bi; | |
1261 | unsigned id; | |
1262 | int converted_insns = 0; | |
1263 | ||
1264 | if (!dbg_cnt (stv_conversion)) | |
1265 | return 0; | |
1266 | ||
1267 | if (dump_file) | |
1268 | fprintf (dump_file, "Converting chain #%d...\n", chain_id); | |
1269 | ||
1270 | convert_registers (); | |
1271 | ||
1272 | EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) | |
1273 | { | |
1274 | convert_insn (DF_INSN_UID_GET (id)->insn); | |
1275 | converted_insns++; | |
1276 | } | |
1277 | ||
1278 | return converted_insns; | |
1279 | } | |
1280 | ||
266f44a9 L |
1281 | /* Return the SET expression if INSN doesn't reference hard register. |
1282 | Return NULL if INSN uses or defines a hard register, excluding | |
1283 | pseudo register pushes, hard register uses in a memory address, | |
1284 | clobbers and flags definitions. */ | |
2bf6d935 | 1285 | |
266f44a9 L |
1286 | static rtx |
1287 | pseudo_reg_set (rtx_insn *insn) | |
2bf6d935 | 1288 | { |
266f44a9 L |
1289 | rtx set = single_set (insn); |
1290 | if (!set) | |
1291 | return NULL; | |
1292 | ||
1293 | /* Check pseudo register push first. */ | |
6643ca0b | 1294 | machine_mode mode = TARGET_64BIT ? TImode : DImode; |
266f44a9 L |
1295 | if (REG_P (SET_SRC (set)) |
1296 | && !HARD_REGISTER_P (SET_SRC (set)) | |
6643ca0b | 1297 | && push_operand (SET_DEST (set), mode)) |
266f44a9 L |
1298 | return set; |
1299 | ||
2bf6d935 ML |
1300 | df_ref ref; |
1301 | FOR_EACH_INSN_DEF (ref, insn) | |
1302 | if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) | |
1303 | && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) | |
1304 | && DF_REF_REGNO (ref) != FLAGS_REG) | |
266f44a9 | 1305 | return NULL; |
2bf6d935 ML |
1306 | |
1307 | FOR_EACH_INSN_USE (ref, insn) | |
1308 | if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) | |
266f44a9 | 1309 | return NULL; |
2bf6d935 | 1310 | |
266f44a9 | 1311 | return set; |
2bf6d935 ML |
1312 | } |
1313 | ||
1314 | /* Check if comparison INSN may be transformed | |
1315 | into vector comparison. Currently we transform | |
1316 | zero checks only which look like: | |
1317 | ||
1318 | (set (reg:CCZ 17 flags) | |
1319 | (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) | |
1320 | (subreg:SI (reg:DI x) 0)) | |
1321 | (const_int 0 [0]))) */ | |
1322 | ||
1323 | static bool | |
3b45ae63 | 1324 | convertible_comparison_p (rtx_insn *insn, enum machine_mode mode) |
2bf6d935 | 1325 | { |
c839844a UB |
1326 | /* ??? Currently convertible for double-word DImode chain only. */ |
1327 | if (TARGET_64BIT || mode != DImode) | |
1328 | return false; | |
1329 | ||
2bf6d935 ML |
1330 | if (!TARGET_SSE4_1) |
1331 | return false; | |
1332 | ||
1333 | rtx def_set = single_set (insn); | |
1334 | ||
1335 | gcc_assert (def_set); | |
1336 | ||
1337 | rtx src = SET_SRC (def_set); | |
1338 | rtx dst = SET_DEST (def_set); | |
1339 | ||
1340 | gcc_assert (GET_CODE (src) == COMPARE); | |
1341 | ||
1342 | if (GET_CODE (dst) != REG | |
1343 | || REGNO (dst) != FLAGS_REG | |
1344 | || GET_MODE (dst) != CCZmode) | |
1345 | return false; | |
1346 | ||
1347 | rtx op1 = XEXP (src, 0); | |
1348 | rtx op2 = XEXP (src, 1); | |
1349 | ||
1350 | if (op2 != CONST0_RTX (GET_MODE (op2))) | |
1351 | return false; | |
1352 | ||
1353 | if (GET_CODE (op1) != IOR) | |
1354 | return false; | |
1355 | ||
1356 | op2 = XEXP (op1, 1); | |
1357 | op1 = XEXP (op1, 0); | |
1358 | ||
1359 | if (!SUBREG_P (op1) | |
1360 | || !SUBREG_P (op2) | |
c839844a UB |
1361 | || GET_MODE (op1) != SImode |
1362 | || GET_MODE (op2) != SImode | |
2bf6d935 | 1363 | || ((SUBREG_BYTE (op1) != 0 |
c839844a | 1364 | || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) |
2bf6d935 | 1365 | && (SUBREG_BYTE (op2) != 0 |
c839844a | 1366 | || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) |
2bf6d935 ML |
1367 | return false; |
1368 | ||
1369 | op1 = SUBREG_REG (op1); | |
1370 | op2 = SUBREG_REG (op2); | |
1371 | ||
1372 | if (op1 != op2 | |
1373 | || !REG_P (op1) | |
c839844a | 1374 | || GET_MODE (op1) != DImode) |
2bf6d935 ML |
1375 | return false; |
1376 | ||
1377 | return true; | |
1378 | } | |
1379 | ||
c839844a | 1380 | /* The general version of scalar_to_vector_candidate_p. */ |
2bf6d935 ML |
1381 | |
1382 | static bool | |
93cf5515 | 1383 | general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode) |
2bf6d935 | 1384 | { |
266f44a9 | 1385 | rtx def_set = pseudo_reg_set (insn); |
2bf6d935 ML |
1386 | |
1387 | if (!def_set) | |
1388 | return false; | |
1389 | ||
2bf6d935 ML |
1390 | rtx src = SET_SRC (def_set); |
1391 | rtx dst = SET_DEST (def_set); | |
1392 | ||
1393 | if (GET_CODE (src) == COMPARE) | |
93cf5515 | 1394 | return convertible_comparison_p (insn, mode); |
2bf6d935 | 1395 | |
c839844a | 1396 | /* We are interested in "mode" only. */ |
93cf5515 | 1397 | if ((GET_MODE (src) != mode |
2bf6d935 | 1398 | && !CONST_INT_P (src)) |
93cf5515 | 1399 | || GET_MODE (dst) != mode) |
2bf6d935 ML |
1400 | return false; |
1401 | ||
1402 | if (!REG_P (dst) && !MEM_P (dst)) | |
1403 | return false; | |
1404 | ||
1405 | switch (GET_CODE (src)) | |
1406 | { | |
1407 | case ASHIFTRT: | |
1408 | if (!TARGET_AVX512VL) | |
1409 | return false; | |
1410 | /* FALLTHRU */ | |
1411 | ||
1412 | case ASHIFT: | |
1413 | case LSHIFTRT: | |
1414 | if (!CONST_INT_P (XEXP (src, 1)) | |
2a3daf5b | 1415 | || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1)) |
2bf6d935 ML |
1416 | return false; |
1417 | break; | |
1418 | ||
93cf5515 RB |
1419 | case SMAX: |
1420 | case SMIN: | |
1421 | case UMAX: | |
1422 | case UMIN: | |
1423 | if ((mode == DImode && !TARGET_AVX512VL) | |
1424 | || (mode == SImode && !TARGET_SSE4_1)) | |
1425 | return false; | |
1426 | /* Fallthru. */ | |
1427 | ||
9f6aeb85 | 1428 | case AND: |
2bf6d935 ML |
1429 | case IOR: |
1430 | case XOR: | |
9f6aeb85 UB |
1431 | case PLUS: |
1432 | case MINUS: | |
2bf6d935 ML |
1433 | if (!REG_P (XEXP (src, 1)) |
1434 | && !MEM_P (XEXP (src, 1)) | |
1435 | && !CONST_INT_P (XEXP (src, 1))) | |
1436 | return false; | |
1437 | ||
93cf5515 | 1438 | if (GET_MODE (XEXP (src, 1)) != mode |
2bf6d935 ML |
1439 | && !CONST_INT_P (XEXP (src, 1))) |
1440 | return false; | |
9f6aeb85 UB |
1441 | |
1442 | /* Check for andnot case. */ | |
1443 | if (GET_CODE (src) != AND | |
1444 | || GET_CODE (XEXP (src, 0)) != NOT) | |
1445 | break; | |
1446 | ||
1447 | src = XEXP (src, 0); | |
1448 | /* FALLTHRU */ | |
1449 | ||
1450 | case NOT: | |
fdace758 UB |
1451 | break; |
1452 | ||
9f6aeb85 UB |
1453 | case NEG: |
1454 | /* Check for nabs case. */ | |
1455 | if (GET_CODE (XEXP (src, 0)) != ABS) | |
1456 | break; | |
1457 | ||
1458 | src = XEXP (src, 0); | |
1459 | /* FALLTHRU */ | |
1460 | ||
fdace758 UB |
1461 | case ABS: |
1462 | if ((mode == DImode && !TARGET_AVX512VL) | |
1463 | || (mode == SImode && !TARGET_SSSE3)) | |
1464 | return false; | |
2bf6d935 ML |
1465 | break; |
1466 | ||
2bf6d935 ML |
1467 | case REG: |
1468 | return true; | |
1469 | ||
1470 | case MEM: | |
1471 | case CONST_INT: | |
1472 | return REG_P (dst); | |
1473 | ||
1474 | default: | |
1475 | return false; | |
1476 | } | |
1477 | ||
1478 | if (!REG_P (XEXP (src, 0)) | |
1479 | && !MEM_P (XEXP (src, 0)) | |
9f6aeb85 UB |
1480 | && !CONST_INT_P (XEXP (src, 0))) |
1481 | return false; | |
2bf6d935 | 1482 | |
93cf5515 | 1483 | if (GET_MODE (XEXP (src, 0)) != mode |
2bf6d935 ML |
1484 | && !CONST_INT_P (XEXP (src, 0))) |
1485 | return false; | |
1486 | ||
1487 | return true; | |
1488 | } | |
1489 | ||
1490 | /* The TImode version of scalar_to_vector_candidate_p. */ | |
1491 | ||
1492 | static bool | |
1493 | timode_scalar_to_vector_candidate_p (rtx_insn *insn) | |
1494 | { | |
266f44a9 | 1495 | rtx def_set = pseudo_reg_set (insn); |
2bf6d935 ML |
1496 | |
1497 | if (!def_set) | |
1498 | return false; | |
1499 | ||
2bf6d935 ML |
1500 | rtx src = SET_SRC (def_set); |
1501 | rtx dst = SET_DEST (def_set); | |
1502 | ||
1503 | /* Only TImode load and store are allowed. */ | |
1504 | if (GET_MODE (dst) != TImode) | |
1505 | return false; | |
1506 | ||
1507 | if (MEM_P (dst)) | |
1508 | { | |
1509 | /* Check for store. Memory must be aligned or unaligned store | |
1510 | is optimal. Only support store from register, standard SSE | |
1511 | constant or CONST_WIDE_INT generated from piecewise store. | |
1512 | ||
1513 | ??? Verify performance impact before enabling CONST_INT for | |
1514 | __int128 store. */ | |
1515 | if (misaligned_operand (dst, TImode) | |
1516 | && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) | |
1517 | return false; | |
1518 | ||
1519 | switch (GET_CODE (src)) | |
1520 | { | |
1521 | default: | |
1522 | return false; | |
1523 | ||
1524 | case REG: | |
1525 | case CONST_WIDE_INT: | |
1526 | return true; | |
1527 | ||
1528 | case CONST_INT: | |
1529 | return standard_sse_constant_p (src, TImode); | |
1530 | } | |
1531 | } | |
1532 | else if (MEM_P (src)) | |
1533 | { | |
1534 | /* Check for load. Memory must be aligned or unaligned load is | |
1535 | optimal. */ | |
1536 | return (REG_P (dst) | |
1537 | && (!misaligned_operand (src, TImode) | |
1538 | || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); | |
1539 | } | |
1540 | ||
1541 | return false; | |
1542 | } | |
1543 | ||
2bf6d935 ML |
1544 | /* For a register REGNO, scan instructions for its defs and uses. |
1545 | Put REGNO in REGS if a def or use isn't in CANDIDATES. */ | |
1546 | ||
1547 | static void | |
1548 | timode_check_non_convertible_regs (bitmap candidates, bitmap regs, | |
1549 | unsigned int regno) | |
1550 | { | |
1551 | for (df_ref def = DF_REG_DEF_CHAIN (regno); | |
1552 | def; | |
1553 | def = DF_REF_NEXT_REG (def)) | |
1554 | { | |
1555 | if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) | |
1556 | { | |
1557 | if (dump_file) | |
1558 | fprintf (dump_file, | |
1559 | "r%d has non convertible def in insn %d\n", | |
1560 | regno, DF_REF_INSN_UID (def)); | |
1561 | ||
1562 | bitmap_set_bit (regs, regno); | |
1563 | break; | |
1564 | } | |
1565 | } | |
1566 | ||
1567 | for (df_ref ref = DF_REG_USE_CHAIN (regno); | |
1568 | ref; | |
1569 | ref = DF_REF_NEXT_REG (ref)) | |
1570 | { | |
1571 | /* Debug instructions are skipped. */ | |
1572 | if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) | |
1573 | && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) | |
1574 | { | |
1575 | if (dump_file) | |
1576 | fprintf (dump_file, | |
1577 | "r%d has non convertible use in insn %d\n", | |
1578 | regno, DF_REF_INSN_UID (ref)); | |
1579 | ||
1580 | bitmap_set_bit (regs, regno); | |
1581 | break; | |
1582 | } | |
1583 | } | |
1584 | } | |
1585 | ||
1586 | /* The TImode version of remove_non_convertible_regs. */ | |
1587 | ||
1588 | static void | |
1589 | timode_remove_non_convertible_regs (bitmap candidates) | |
1590 | { | |
1591 | bitmap_iterator bi; | |
1592 | unsigned id; | |
1593 | bitmap regs = BITMAP_ALLOC (NULL); | |
1594 | ||
1595 | EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) | |
1596 | { | |
1597 | rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); | |
1598 | rtx dest = SET_DEST (def_set); | |
1599 | rtx src = SET_SRC (def_set); | |
1600 | ||
1601 | if ((!REG_P (dest) | |
1602 | || bitmap_bit_p (regs, REGNO (dest)) | |
1603 | || HARD_REGISTER_P (dest)) | |
1604 | && (!REG_P (src) | |
1605 | || bitmap_bit_p (regs, REGNO (src)) | |
1606 | || HARD_REGISTER_P (src))) | |
1607 | continue; | |
1608 | ||
1609 | if (REG_P (dest)) | |
1610 | timode_check_non_convertible_regs (candidates, regs, | |
1611 | REGNO (dest)); | |
1612 | ||
1613 | if (REG_P (src)) | |
1614 | timode_check_non_convertible_regs (candidates, regs, | |
1615 | REGNO (src)); | |
1616 | } | |
1617 | ||
1618 | EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) | |
1619 | { | |
1620 | for (df_ref def = DF_REG_DEF_CHAIN (id); | |
1621 | def; | |
1622 | def = DF_REF_NEXT_REG (def)) | |
1623 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) | |
1624 | { | |
1625 | if (dump_file) | |
1626 | fprintf (dump_file, "Removing insn %d from candidates list\n", | |
1627 | DF_REF_INSN_UID (def)); | |
1628 | ||
1629 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); | |
1630 | } | |
1631 | ||
1632 | for (df_ref ref = DF_REG_USE_CHAIN (id); | |
1633 | ref; | |
1634 | ref = DF_REF_NEXT_REG (ref)) | |
1635 | if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) | |
1636 | { | |
1637 | if (dump_file) | |
1638 | fprintf (dump_file, "Removing insn %d from candidates list\n", | |
1639 | DF_REF_INSN_UID (ref)); | |
1640 | ||
1641 | bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); | |
1642 | } | |
1643 | } | |
1644 | ||
1645 | BITMAP_FREE (regs); | |
1646 | } | |
1647 | ||
2bf6d935 ML |
1648 | /* Main STV pass function. Find and convert scalar |
1649 | instructions into vector mode when profitable. */ | |
1650 | ||
1651 | static unsigned int | |
f386ca41 | 1652 | convert_scalars_to_vector (bool timode_p) |
2bf6d935 ML |
1653 | { |
1654 | basic_block bb; | |
2bf6d935 ML |
1655 | int converted_insns = 0; |
1656 | ||
1657 | bitmap_obstack_initialize (NULL); | |
93cf5515 RB |
1658 | const machine_mode cand_mode[3] = { SImode, DImode, TImode }; |
1659 | const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode }; | |
1660 | bitmap_head candidates[3]; /* { SImode, DImode, TImode } */ | |
1661 | for (unsigned i = 0; i < 3; ++i) | |
1662 | bitmap_initialize (&candidates[i], &bitmap_default_obstack); | |
2bf6d935 ML |
1663 | |
1664 | calculate_dominance_info (CDI_DOMINATORS); | |
972918ee | 1665 | df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS); |
2bf6d935 | 1666 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); |
2bf6d935 ML |
1667 | df_analyze (); |
1668 | ||
1669 | /* Find all instructions we want to convert into vector mode. */ | |
1670 | if (dump_file) | |
1671 | fprintf (dump_file, "Searching for mode conversion candidates...\n"); | |
1672 | ||
1673 | FOR_EACH_BB_FN (bb, cfun) | |
1674 | { | |
1675 | rtx_insn *insn; | |
1676 | FOR_BB_INSNS (bb, insn) | |
f386ca41 | 1677 | if (timode_p |
93cf5515 | 1678 | && timode_scalar_to_vector_candidate_p (insn)) |
2bf6d935 ML |
1679 | { |
1680 | if (dump_file) | |
93cf5515 | 1681 | fprintf (dump_file, " insn %d is marked as a TImode candidate\n", |
2bf6d935 ML |
1682 | INSN_UID (insn)); |
1683 | ||
93cf5515 RB |
1684 | bitmap_set_bit (&candidates[2], INSN_UID (insn)); |
1685 | } | |
f386ca41 | 1686 | else if (!timode_p) |
93cf5515 RB |
1687 | { |
1688 | /* Check {SI,DI}mode. */ | |
1689 | for (unsigned i = 0; i <= 1; ++i) | |
1690 | if (general_scalar_to_vector_candidate_p (insn, cand_mode[i])) | |
1691 | { | |
1692 | if (dump_file) | |
1693 | fprintf (dump_file, " insn %d is marked as a %s candidate\n", | |
1694 | INSN_UID (insn), i == 0 ? "SImode" : "DImode"); | |
1695 | ||
1696 | bitmap_set_bit (&candidates[i], INSN_UID (insn)); | |
1697 | break; | |
1698 | } | |
2bf6d935 ML |
1699 | } |
1700 | } | |
1701 | ||
f386ca41 | 1702 | if (timode_p) |
93cf5515 | 1703 | timode_remove_non_convertible_regs (&candidates[2]); |
2bf6d935 | 1704 | |
93cf5515 RB |
1705 | for (unsigned i = 0; i <= 2; ++i) |
1706 | if (!bitmap_empty_p (&candidates[i])) | |
1707 | break; | |
1708 | else if (i == 2 && dump_file) | |
2bf6d935 ML |
1709 | fprintf (dump_file, "There are no candidates for optimization.\n"); |
1710 | ||
93cf5515 RB |
1711 | for (unsigned i = 0; i <= 2; ++i) |
1712 | while (!bitmap_empty_p (&candidates[i])) | |
1713 | { | |
1714 | unsigned uid = bitmap_first_set_bit (&candidates[i]); | |
1715 | scalar_chain *chain; | |
2bf6d935 | 1716 | |
93cf5515 RB |
1717 | if (cand_mode[i] == TImode) |
1718 | chain = new timode_scalar_chain; | |
1719 | else | |
1720 | chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]); | |
2bf6d935 | 1721 | |
93cf5515 RB |
1722 | /* Find instructions chain we want to convert to vector mode. |
1723 | Check all uses and definitions to estimate all required | |
1724 | conversions. */ | |
1725 | chain->build (&candidates[i], uid); | |
2bf6d935 | 1726 | |
93cf5515 RB |
1727 | if (chain->compute_convert_gain () > 0) |
1728 | converted_insns += chain->convert (); | |
1729 | else | |
1730 | if (dump_file) | |
1731 | fprintf (dump_file, "Chain #%d conversion is not profitable\n", | |
1732 | chain->chain_id); | |
2bf6d935 | 1733 | |
93cf5515 RB |
1734 | delete chain; |
1735 | } | |
2bf6d935 ML |
1736 | |
1737 | if (dump_file) | |
1738 | fprintf (dump_file, "Total insns converted: %d\n", converted_insns); | |
1739 | ||
93cf5515 RB |
1740 | for (unsigned i = 0; i <= 2; ++i) |
1741 | bitmap_release (&candidates[i]); | |
2bf6d935 ML |
1742 | bitmap_obstack_release (NULL); |
1743 | df_process_deferred_rescans (); | |
1744 | ||
1745 | /* Conversion means we may have 128bit register spills/fills | |
1746 | which require aligned stack. */ | |
1747 | if (converted_insns) | |
1748 | { | |
1749 | if (crtl->stack_alignment_needed < 128) | |
1750 | crtl->stack_alignment_needed = 128; | |
1751 | if (crtl->stack_alignment_estimated < 128) | |
1752 | crtl->stack_alignment_estimated = 128; | |
c1441faf UB |
1753 | |
1754 | crtl->stack_realign_needed | |
1755 | = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated; | |
1756 | crtl->stack_realign_tried = crtl->stack_realign_needed; | |
1757 | ||
1758 | crtl->stack_realign_processed = true; | |
1759 | ||
1760 | if (!crtl->drap_reg) | |
1761 | { | |
1762 | rtx drap_rtx = targetm.calls.get_drap_rtx (); | |
1763 | ||
1764 | /* stack_realign_drap and drap_rtx must match. */ | |
1765 | gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL)); | |
1766 | ||
1767 | /* Do nothing if NULL is returned, | |
1768 | which means DRAP is not needed. */ | |
1769 | if (drap_rtx != NULL) | |
1770 | { | |
1771 | crtl->args.internal_arg_pointer = drap_rtx; | |
1772 | ||
1773 | /* Call fixup_tail_calls to clean up | |
1774 | REG_EQUIV note if DRAP is needed. */ | |
1775 | fixup_tail_calls (); | |
1776 | } | |
1777 | } | |
1778 | ||
2bf6d935 ML |
1779 | /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ |
1780 | if (TARGET_64BIT) | |
1781 | for (tree parm = DECL_ARGUMENTS (current_function_decl); | |
1782 | parm; parm = DECL_CHAIN (parm)) | |
1783 | { | |
1784 | if (TYPE_MODE (TREE_TYPE (parm)) != TImode) | |
1785 | continue; | |
1786 | if (DECL_RTL_SET_P (parm) | |
1787 | && GET_MODE (DECL_RTL (parm)) == V1TImode) | |
1788 | { | |
1789 | rtx r = DECL_RTL (parm); | |
1790 | if (REG_P (r)) | |
1791 | SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); | |
1792 | } | |
1793 | if (DECL_INCOMING_RTL (parm) | |
1794 | && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) | |
1795 | { | |
1796 | rtx r = DECL_INCOMING_RTL (parm); | |
1797 | if (REG_P (r)) | |
1798 | DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); | |
1799 | } | |
1800 | } | |
1801 | } | |
1802 | ||
1803 | return 0; | |
1804 | } | |
1805 | ||
1806 | static unsigned int | |
1807 | rest_of_handle_insert_vzeroupper (void) | |
1808 | { | |
9a90b311 | 1809 | /* vzeroupper instructions are inserted immediately after reload to |
1810 | account for possible spills from 256bit or 512bit registers. The pass | |
1811 | reuses mode switching infrastructure by re-running mode insertion | |
1812 | pass, so disable entities that have already been processed. */ | |
1813 | for (int i = 0; i < MAX_386_ENTITIES; i++) | |
1814 | ix86_optimize_mode_switching[i] = 0; | |
2bf6d935 | 1815 | |
9a90b311 | 1816 | ix86_optimize_mode_switching[AVX_U128] = 1; |
2bf6d935 | 1817 | |
9a90b311 | 1818 | /* Call optimize_mode_switching. */ |
1819 | g->get_passes ()->execute_pass_mode_switching (); | |
1820 | ||
1821 | df_analyze (); | |
2bf6d935 ML |
1822 | return 0; |
1823 | } | |
1824 | ||
1825 | namespace { | |
1826 | ||
1827 | const pass_data pass_data_insert_vzeroupper = | |
1828 | { | |
1829 | RTL_PASS, /* type */ | |
1830 | "vzeroupper", /* name */ | |
1831 | OPTGROUP_NONE, /* optinfo_flags */ | |
1832 | TV_MACH_DEP, /* tv_id */ | |
1833 | 0, /* properties_required */ | |
1834 | 0, /* properties_provided */ | |
1835 | 0, /* properties_destroyed */ | |
1836 | 0, /* todo_flags_start */ | |
1837 | TODO_df_finish, /* todo_flags_finish */ | |
1838 | }; | |
1839 | ||
1840 | class pass_insert_vzeroupper : public rtl_opt_pass | |
1841 | { | |
1842 | public: | |
1843 | pass_insert_vzeroupper(gcc::context *ctxt) | |
1844 | : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) | |
1845 | {} | |
1846 | ||
1847 | /* opt_pass methods: */ | |
1848 | virtual bool gate (function *) | |
1849 | { | |
9a90b311 | 1850 | return TARGET_AVX && TARGET_VZEROUPPER |
1851 | && flag_expensive_optimizations && !optimize_size; | |
2bf6d935 ML |
1852 | } |
1853 | ||
1854 | virtual unsigned int execute (function *) | |
1855 | { | |
1856 | return rest_of_handle_insert_vzeroupper (); | |
1857 | } | |
1858 | ||
1859 | }; // class pass_insert_vzeroupper | |
1860 | ||
1861 | const pass_data pass_data_stv = | |
1862 | { | |
1863 | RTL_PASS, /* type */ | |
1864 | "stv", /* name */ | |
1865 | OPTGROUP_NONE, /* optinfo_flags */ | |
1866 | TV_MACH_DEP, /* tv_id */ | |
1867 | 0, /* properties_required */ | |
1868 | 0, /* properties_provided */ | |
1869 | 0, /* properties_destroyed */ | |
1870 | 0, /* todo_flags_start */ | |
1871 | TODO_df_finish, /* todo_flags_finish */ | |
1872 | }; | |
1873 | ||
1874 | class pass_stv : public rtl_opt_pass | |
1875 | { | |
1876 | public: | |
1877 | pass_stv (gcc::context *ctxt) | |
1878 | : rtl_opt_pass (pass_data_stv, ctxt), | |
1879 | timode_p (false) | |
1880 | {} | |
1881 | ||
1882 | /* opt_pass methods: */ | |
1883 | virtual bool gate (function *) | |
1884 | { | |
f386ca41 | 1885 | return ((!timode_p || TARGET_64BIT) |
2bf6d935 ML |
1886 | && TARGET_STV && TARGET_SSE2 && optimize > 1); |
1887 | } | |
1888 | ||
1889 | virtual unsigned int execute (function *) | |
1890 | { | |
f386ca41 | 1891 | return convert_scalars_to_vector (timode_p); |
2bf6d935 ML |
1892 | } |
1893 | ||
1894 | opt_pass *clone () | |
1895 | { | |
1896 | return new pass_stv (m_ctxt); | |
1897 | } | |
1898 | ||
1899 | void set_pass_param (unsigned int n, bool param) | |
1900 | { | |
1901 | gcc_assert (n == 0); | |
1902 | timode_p = param; | |
1903 | } | |
1904 | ||
1905 | private: | |
1906 | bool timode_p; | |
1907 | }; // class pass_stv | |
1908 | ||
1909 | } // anon namespace | |
1910 | ||
1911 | rtl_opt_pass * | |
1912 | make_pass_insert_vzeroupper (gcc::context *ctxt) | |
1913 | { | |
1914 | return new pass_insert_vzeroupper (ctxt); | |
1915 | } | |
1916 | ||
1917 | rtl_opt_pass * | |
1918 | make_pass_stv (gcc::context *ctxt) | |
1919 | { | |
1920 | return new pass_stv (ctxt); | |
1921 | } | |
1922 | ||
3dcea658 | 1923 | /* Inserting ENDBR and pseudo patchable-area instructions. */ |
2bf6d935 | 1924 | |
3dcea658 L |
1925 | static void |
1926 | rest_of_insert_endbr_and_patchable_area (bool need_endbr, | |
1927 | unsigned int patchable_area_size) | |
2bf6d935 | 1928 | { |
3dcea658 | 1929 | rtx endbr; |
2bf6d935 | 1930 | rtx_insn *insn; |
3dcea658 | 1931 | rtx_insn *endbr_insn = NULL; |
2bf6d935 ML |
1932 | basic_block bb; |
1933 | ||
3dcea658 L |
1934 | if (need_endbr) |
1935 | { | |
1936 | /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' | |
1937 | is absent among function attributes. Later an optimization will | |
1938 | be introduced to make analysis if an address of a static function | |
1939 | is taken. A static function whose address is not taken will get | |
1940 | a nocf_check attribute. This will allow to reduce the number of | |
1941 | EB. */ | |
1942 | if (!lookup_attribute ("nocf_check", | |
1943 | TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) | |
1944 | && (!flag_manual_endbr | |
1945 | || lookup_attribute ("cf_check", | |
1946 | DECL_ATTRIBUTES (cfun->decl))) | |
1947 | && (!cgraph_node::get (cfun->decl)->only_called_directly_p () | |
1948 | || ix86_cmodel == CM_LARGE | |
1949 | || ix86_cmodel == CM_LARGE_PIC | |
1950 | || flag_force_indirect_call | |
1951 | || (TARGET_DLLIMPORT_DECL_ATTRIBUTES | |
1952 | && DECL_DLLIMPORT_P (cfun->decl)))) | |
1953 | { | |
1954 | if (crtl->profile && flag_fentry) | |
1955 | { | |
1956 | /* Queue ENDBR insertion to x86_function_profiler. | |
1957 | NB: Any patchable-area insn will be inserted after | |
1958 | ENDBR. */ | |
1959 | cfun->machine->insn_queued_at_entrance = TYPE_ENDBR; | |
1960 | } | |
1961 | else | |
1962 | { | |
1963 | endbr = gen_nop_endbr (); | |
1964 | bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; | |
1965 | rtx_insn *insn = BB_HEAD (bb); | |
1966 | endbr_insn = emit_insn_before (endbr, insn); | |
1967 | } | |
1968 | } | |
1969 | } | |
1970 | ||
1971 | if (patchable_area_size) | |
2bf6d935 | 1972 | { |
2bf6d935 | 1973 | if (crtl->profile && flag_fentry) |
3dcea658 L |
1974 | { |
1975 | /* Queue patchable-area insertion to x86_function_profiler. | |
1976 | NB: If there is a queued ENDBR, x86_function_profiler | |
1977 | will also handle patchable-area. */ | |
1978 | if (!cfun->machine->insn_queued_at_entrance) | |
1979 | cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA; | |
1980 | } | |
2bf6d935 ML |
1981 | else |
1982 | { | |
3dcea658 L |
1983 | rtx patchable_area |
1984 | = gen_patchable_area (GEN_INT (patchable_area_size), | |
1985 | GEN_INT (crtl->patch_area_entry == 0)); | |
1986 | if (endbr_insn) | |
1987 | emit_insn_after (patchable_area, endbr_insn); | |
1988 | else | |
1989 | { | |
1990 | bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; | |
1991 | insn = BB_HEAD (bb); | |
1992 | emit_insn_before (patchable_area, insn); | |
1993 | } | |
2bf6d935 ML |
1994 | } |
1995 | } | |
1996 | ||
3dcea658 L |
1997 | if (!need_endbr) |
1998 | return; | |
1999 | ||
2bf6d935 ML |
2000 | bb = 0; |
2001 | FOR_EACH_BB_FN (bb, cfun) | |
2002 | { | |
2003 | for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); | |
2004 | insn = NEXT_INSN (insn)) | |
2005 | { | |
2006 | if (CALL_P (insn)) | |
2007 | { | |
2bf6d935 ML |
2008 | need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL; |
2009 | if (!need_endbr && !SIBLING_CALL_P (insn)) | |
2010 | { | |
2011 | rtx call = get_call_rtx_from (insn); | |
2012 | rtx fnaddr = XEXP (call, 0); | |
2013 | tree fndecl = NULL_TREE; | |
2014 | ||
2015 | /* Also generate ENDBRANCH for non-tail call which | |
2016 | may return via indirect branch. */ | |
2017 | if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) | |
2018 | fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); | |
2019 | if (fndecl == NULL_TREE) | |
2020 | fndecl = MEM_EXPR (fnaddr); | |
2021 | if (fndecl | |
2022 | && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE | |
2023 | && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) | |
2024 | fndecl = NULL_TREE; | |
2025 | if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl))) | |
2026 | { | |
2027 | tree fntype = TREE_TYPE (fndecl); | |
2028 | if (lookup_attribute ("indirect_return", | |
2029 | TYPE_ATTRIBUTES (fntype))) | |
2030 | need_endbr = true; | |
2031 | } | |
2032 | } | |
2033 | if (!need_endbr) | |
2034 | continue; | |
2035 | /* Generate ENDBRANCH after CALL, which can return more than | |
2036 | twice, setjmp-like functions. */ | |
2037 | ||
3dcea658 L |
2038 | endbr = gen_nop_endbr (); |
2039 | emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn)); | |
2bf6d935 ML |
2040 | continue; |
2041 | } | |
2042 | ||
2043 | if (JUMP_P (insn) && flag_cet_switch) | |
2044 | { | |
2045 | rtx target = JUMP_LABEL (insn); | |
2046 | if (target == NULL_RTX || ANY_RETURN_P (target)) | |
2047 | continue; | |
2048 | ||
2049 | /* Check the jump is a switch table. */ | |
2050 | rtx_insn *label = as_a<rtx_insn *> (target); | |
2051 | rtx_insn *table = next_insn (label); | |
2052 | if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) | |
2053 | continue; | |
2054 | ||
2055 | /* For the indirect jump find out all places it jumps and insert | |
2056 | ENDBRANCH there. It should be done under a special flag to | |
2057 | control ENDBRANCH generation for switch stmts. */ | |
2058 | edge_iterator ei; | |
2059 | edge e; | |
2060 | basic_block dest_blk; | |
2061 | ||
2062 | FOR_EACH_EDGE (e, ei, bb->succs) | |
2063 | { | |
2064 | rtx_insn *insn; | |
2065 | ||
2066 | dest_blk = e->dest; | |
2067 | insn = BB_HEAD (dest_blk); | |
2068 | gcc_assert (LABEL_P (insn)); | |
3dcea658 L |
2069 | endbr = gen_nop_endbr (); |
2070 | emit_insn_after (endbr, insn); | |
2bf6d935 ML |
2071 | } |
2072 | continue; | |
2073 | } | |
2074 | ||
02ed9049 | 2075 | if (LABEL_P (insn) && LABEL_PRESERVE_P (insn)) |
2bf6d935 | 2076 | { |
3dcea658 L |
2077 | endbr = gen_nop_endbr (); |
2078 | emit_insn_after (endbr, insn); | |
2bf6d935 ML |
2079 | continue; |
2080 | } | |
2081 | } | |
2082 | } | |
2083 | ||
3dcea658 | 2084 | return; |
2bf6d935 ML |
2085 | } |
2086 | ||
2087 | namespace { | |
2088 | ||
3dcea658 | 2089 | const pass_data pass_data_insert_endbr_and_patchable_area = |
2bf6d935 ML |
2090 | { |
2091 | RTL_PASS, /* type. */ | |
3dcea658 | 2092 | "endbr_and_patchable_area", /* name. */ |
2bf6d935 ML |
2093 | OPTGROUP_NONE, /* optinfo_flags. */ |
2094 | TV_MACH_DEP, /* tv_id. */ | |
2095 | 0, /* properties_required. */ | |
2096 | 0, /* properties_provided. */ | |
2097 | 0, /* properties_destroyed. */ | |
2098 | 0, /* todo_flags_start. */ | |
2099 | 0, /* todo_flags_finish. */ | |
2100 | }; | |
2101 | ||
3dcea658 | 2102 | class pass_insert_endbr_and_patchable_area : public rtl_opt_pass |
2bf6d935 ML |
2103 | { |
2104 | public: | |
3dcea658 L |
2105 | pass_insert_endbr_and_patchable_area (gcc::context *ctxt) |
2106 | : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt) | |
2bf6d935 ML |
2107 | {} |
2108 | ||
2109 | /* opt_pass methods: */ | |
2110 | virtual bool gate (function *) | |
2111 | { | |
3dcea658 L |
2112 | need_endbr = (flag_cf_protection & CF_BRANCH) != 0; |
2113 | patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry; | |
2114 | return need_endbr || patchable_area_size; | |
2bf6d935 ML |
2115 | } |
2116 | ||
2117 | virtual unsigned int execute (function *) | |
2118 | { | |
3dcea658 L |
2119 | timevar_push (TV_MACH_DEP); |
2120 | rest_of_insert_endbr_and_patchable_area (need_endbr, | |
2121 | patchable_area_size); | |
2122 | timevar_pop (TV_MACH_DEP); | |
2123 | return 0; | |
2bf6d935 ML |
2124 | } |
2125 | ||
3dcea658 L |
2126 | private: |
2127 | bool need_endbr; | |
2128 | unsigned int patchable_area_size; | |
2129 | }; // class pass_insert_endbr_and_patchable_area | |
2bf6d935 ML |
2130 | |
2131 | } // anon namespace | |
2132 | ||
2133 | rtl_opt_pass * | |
3dcea658 | 2134 | make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt) |
2bf6d935 | 2135 | { |
3dcea658 | 2136 | return new pass_insert_endbr_and_patchable_area (ctxt); |
2bf6d935 ML |
2137 | } |
2138 | ||
2139 | /* At entry of the nearest common dominator for basic blocks with | |
2140 | conversions, generate a single | |
2141 | vxorps %xmmN, %xmmN, %xmmN | |
2142 | for all | |
2143 | vcvtss2sd op, %xmmN, %xmmX | |
2144 | vcvtsd2ss op, %xmmN, %xmmX | |
2145 | vcvtsi2ss op, %xmmN, %xmmX | |
2146 | vcvtsi2sd op, %xmmN, %xmmX | |
2147 | ||
2148 | NB: We want to generate only a single vxorps to cover the whole | |
2149 | function. The LCM algorithm isn't appropriate here since it may | |
2150 | place a vxorps inside the loop. */ | |
2151 | ||
2152 | static unsigned int | |
2153 | remove_partial_avx_dependency (void) | |
2154 | { | |
2155 | timevar_push (TV_MACH_DEP); | |
2156 | ||
2157 | bitmap_obstack_initialize (NULL); | |
2158 | bitmap convert_bbs = BITMAP_ALLOC (NULL); | |
2159 | ||
2160 | basic_block bb; | |
2161 | rtx_insn *insn, *set_insn; | |
2162 | rtx set; | |
2163 | rtx v4sf_const0 = NULL_RTX; | |
2164 | ||
2165 | auto_vec<rtx_insn *> control_flow_insns; | |
2166 | ||
a7f52181 RB |
2167 | /* We create invalid RTL initially so defer rescans. */ |
2168 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
2169 | ||
2bf6d935 ML |
2170 | FOR_EACH_BB_FN (bb, cfun) |
2171 | { | |
2172 | FOR_BB_INSNS (bb, insn) | |
2173 | { | |
2174 | if (!NONDEBUG_INSN_P (insn)) | |
2175 | continue; | |
2176 | ||
2177 | set = single_set (insn); | |
2178 | if (!set) | |
2179 | continue; | |
2180 | ||
2181 | if (get_attr_avx_partial_xmm_update (insn) | |
2182 | != AVX_PARTIAL_XMM_UPDATE_TRUE) | |
2183 | continue; | |
2184 | ||
2185 | if (!v4sf_const0) | |
a7f52181 | 2186 | v4sf_const0 = gen_reg_rtx (V4SFmode); |
2bf6d935 ML |
2187 | |
2188 | /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, | |
2189 | SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and | |
2190 | vec_merge with subreg. */ | |
2191 | rtx src = SET_SRC (set); | |
2192 | rtx dest = SET_DEST (set); | |
2193 | machine_mode dest_mode = GET_MODE (dest); | |
2194 | ||
2195 | rtx zero; | |
2196 | machine_mode dest_vecmode; | |
2197 | if (dest_mode == E_SFmode) | |
2198 | { | |
2199 | dest_vecmode = V4SFmode; | |
2200 | zero = v4sf_const0; | |
2201 | } | |
2202 | else | |
2203 | { | |
2204 | dest_vecmode = V2DFmode; | |
2205 | zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); | |
2206 | } | |
2207 | ||
2208 | /* Change source to vector mode. */ | |
2209 | src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src); | |
2210 | src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero, | |
2211 | GEN_INT (HOST_WIDE_INT_1U)); | |
2212 | /* Change destination to vector mode. */ | |
2213 | rtx vec = gen_reg_rtx (dest_vecmode); | |
2214 | /* Generate an XMM vector SET. */ | |
2215 | set = gen_rtx_SET (vec, src); | |
2216 | set_insn = emit_insn_before (set, insn); | |
2217 | df_insn_rescan (set_insn); | |
2218 | ||
2219 | if (cfun->can_throw_non_call_exceptions) | |
2220 | { | |
2221 | /* Handle REG_EH_REGION note. */ | |
2222 | rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); | |
2223 | if (note) | |
2224 | { | |
2225 | control_flow_insns.safe_push (set_insn); | |
2226 | add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0)); | |
2227 | } | |
2228 | } | |
2229 | ||
2230 | src = gen_rtx_SUBREG (dest_mode, vec, 0); | |
2231 | set = gen_rtx_SET (dest, src); | |
2232 | ||
2233 | /* Drop possible dead definitions. */ | |
2234 | PATTERN (insn) = set; | |
2235 | ||
2236 | INSN_CODE (insn) = -1; | |
2237 | recog_memoized (insn); | |
2238 | df_insn_rescan (insn); | |
2239 | bitmap_set_bit (convert_bbs, bb->index); | |
2240 | } | |
2241 | } | |
2242 | ||
2243 | if (v4sf_const0) | |
2244 | { | |
2245 | /* (Re-)discover loops so that bb->loop_father can be used in the | |
2246 | analysis below. */ | |
a7f52181 | 2247 | calculate_dominance_info (CDI_DOMINATORS); |
2bf6d935 ML |
2248 | loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
2249 | ||
2250 | /* Generate a vxorps at entry of the nearest dominator for basic | |
700d4cb0 | 2251 | blocks with conversions, which is in the fake loop that |
2bf6d935 ML |
2252 | contains the whole function, so that there is only a single |
2253 | vxorps in the whole function. */ | |
2254 | bb = nearest_common_dominator_for_set (CDI_DOMINATORS, | |
2255 | convert_bbs); | |
2256 | while (bb->loop_father->latch | |
2257 | != EXIT_BLOCK_PTR_FOR_FN (cfun)) | |
2258 | bb = get_immediate_dominator (CDI_DOMINATORS, | |
2259 | bb->loop_father->header); | |
2260 | ||
2261 | set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); | |
2262 | ||
2263 | insn = BB_HEAD (bb); | |
2264 | while (insn && !NONDEBUG_INSN_P (insn)) | |
2265 | { | |
2266 | if (insn == BB_END (bb)) | |
2267 | { | |
2268 | insn = NULL; | |
2269 | break; | |
2270 | } | |
2271 | insn = NEXT_INSN (insn); | |
2272 | } | |
2273 | if (insn == BB_HEAD (bb)) | |
2274 | set_insn = emit_insn_before (set, insn); | |
2275 | else | |
2276 | set_insn = emit_insn_after (set, | |
2277 | insn ? PREV_INSN (insn) : BB_END (bb)); | |
2278 | df_insn_rescan (set_insn); | |
2bf6d935 ML |
2279 | loop_optimizer_finalize (); |
2280 | ||
2281 | if (!control_flow_insns.is_empty ()) | |
2282 | { | |
2283 | free_dominance_info (CDI_DOMINATORS); | |
2284 | ||
2285 | unsigned int i; | |
2286 | FOR_EACH_VEC_ELT (control_flow_insns, i, insn) | |
2287 | if (control_flow_insn_p (insn)) | |
2288 | { | |
2289 | /* Split the block after insn. There will be a fallthru | |
2290 | edge, which is OK so we keep it. We have to create | |
2291 | the exception edges ourselves. */ | |
2292 | bb = BLOCK_FOR_INSN (insn); | |
2293 | split_block (bb, insn); | |
2294 | rtl_make_eh_edge (NULL, bb, BB_END (bb)); | |
2295 | } | |
2296 | } | |
2297 | } | |
2298 | ||
a7f52181 | 2299 | df_process_deferred_rescans (); |
accc5ba5 | 2300 | df_clear_flags (DF_DEFER_INSN_RESCAN); |
2bf6d935 ML |
2301 | bitmap_obstack_release (NULL); |
2302 | BITMAP_FREE (convert_bbs); | |
2303 | ||
2304 | timevar_pop (TV_MACH_DEP); | |
2305 | return 0; | |
2306 | } | |
2307 | ||
2308 | namespace { | |
2309 | ||
2310 | const pass_data pass_data_remove_partial_avx_dependency = | |
2311 | { | |
2312 | RTL_PASS, /* type */ | |
2313 | "rpad", /* name */ | |
2314 | OPTGROUP_NONE, /* optinfo_flags */ | |
2315 | TV_MACH_DEP, /* tv_id */ | |
2316 | 0, /* properties_required */ | |
2317 | 0, /* properties_provided */ | |
2318 | 0, /* properties_destroyed */ | |
2319 | 0, /* todo_flags_start */ | |
a7f52181 | 2320 | 0, /* todo_flags_finish */ |
2bf6d935 ML |
2321 | }; |
2322 | ||
2323 | class pass_remove_partial_avx_dependency : public rtl_opt_pass | |
2324 | { | |
2325 | public: | |
2326 | pass_remove_partial_avx_dependency (gcc::context *ctxt) | |
2327 | : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt) | |
2328 | {} | |
2329 | ||
2330 | /* opt_pass methods: */ | |
2331 | virtual bool gate (function *) | |
2332 | { | |
a6291d88 | 2333 | return (TARGET_AVX |
2334 | && TARGET_SSE_PARTIAL_REG_DEPENDENCY | |
2335 | && TARGET_SSE_MATH | |
2336 | && optimize | |
2337 | && optimize_function_for_speed_p (cfun)); | |
2bf6d935 ML |
2338 | } |
2339 | ||
2340 | virtual unsigned int execute (function *) | |
2341 | { | |
2342 | return remove_partial_avx_dependency (); | |
2343 | } | |
2344 | }; // class pass_rpad | |
2345 | ||
2346 | } // anon namespace | |
2347 | ||
2348 | rtl_opt_pass * | |
2349 | make_pass_remove_partial_avx_dependency (gcc::context *ctxt) | |
2350 | { | |
2351 | return new pass_remove_partial_avx_dependency (ctxt); | |
2352 | } | |
2353 | ||
2354 | /* This compares the priority of target features in function DECL1 | |
2355 | and DECL2. It returns positive value if DECL1 is higher priority, | |
2356 | negative value if DECL2 is higher priority and 0 if they are the | |
2357 | same. */ | |
2358 | ||
2359 | int | |
2360 | ix86_compare_version_priority (tree decl1, tree decl2) | |
2361 | { | |
2362 | unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); | |
2363 | unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); | |
2364 | ||
2365 | return (int)priority1 - (int)priority2; | |
2366 | } | |
2367 | ||
2368 | /* V1 and V2 point to function versions with different priorities | |
2369 | based on the target ISA. This function compares their priorities. */ | |
2370 | ||
2371 | static int | |
2372 | feature_compare (const void *v1, const void *v2) | |
2373 | { | |
2374 | typedef struct _function_version_info | |
2375 | { | |
2376 | tree version_decl; | |
2377 | tree predicate_chain; | |
2378 | unsigned int dispatch_priority; | |
2379 | } function_version_info; | |
2380 | ||
2381 | const function_version_info c1 = *(const function_version_info *)v1; | |
2382 | const function_version_info c2 = *(const function_version_info *)v2; | |
2383 | return (c2.dispatch_priority - c1.dispatch_priority); | |
2384 | } | |
2385 | ||
2386 | /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL | |
2387 | to return a pointer to VERSION_DECL if the outcome of the expression | |
2388 | formed by PREDICATE_CHAIN is true. This function will be called during | |
2389 | version dispatch to decide which function version to execute. It returns | |
2390 | the basic block at the end, to which more conditions can be added. */ | |
2391 | ||
2392 | static basic_block | |
2393 | add_condition_to_bb (tree function_decl, tree version_decl, | |
2394 | tree predicate_chain, basic_block new_bb) | |
2395 | { | |
2396 | gimple *return_stmt; | |
2397 | tree convert_expr, result_var; | |
2398 | gimple *convert_stmt; | |
2399 | gimple *call_cond_stmt; | |
2400 | gimple *if_else_stmt; | |
2401 | ||
2402 | basic_block bb1, bb2, bb3; | |
2403 | edge e12, e23; | |
2404 | ||
2405 | tree cond_var, and_expr_var = NULL_TREE; | |
2406 | gimple_seq gseq; | |
2407 | ||
2408 | tree predicate_decl, predicate_arg; | |
2409 | ||
2410 | push_cfun (DECL_STRUCT_FUNCTION (function_decl)); | |
2411 | ||
2412 | gcc_assert (new_bb != NULL); | |
2413 | gseq = bb_seq (new_bb); | |
2414 | ||
2415 | ||
2416 | convert_expr = build1 (CONVERT_EXPR, ptr_type_node, | |
2417 | build_fold_addr_expr (version_decl)); | |
2418 | result_var = create_tmp_var (ptr_type_node); | |
2419 | convert_stmt = gimple_build_assign (result_var, convert_expr); | |
2420 | return_stmt = gimple_build_return (result_var); | |
2421 | ||
2422 | if (predicate_chain == NULL_TREE) | |
2423 | { | |
2424 | gimple_seq_add_stmt (&gseq, convert_stmt); | |
2425 | gimple_seq_add_stmt (&gseq, return_stmt); | |
2426 | set_bb_seq (new_bb, gseq); | |
2427 | gimple_set_bb (convert_stmt, new_bb); | |
2428 | gimple_set_bb (return_stmt, new_bb); | |
2429 | pop_cfun (); | |
2430 | return new_bb; | |
2431 | } | |
2432 | ||
2433 | while (predicate_chain != NULL) | |
2434 | { | |
2435 | cond_var = create_tmp_var (integer_type_node); | |
2436 | predicate_decl = TREE_PURPOSE (predicate_chain); | |
2437 | predicate_arg = TREE_VALUE (predicate_chain); | |
2438 | call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); | |
2439 | gimple_call_set_lhs (call_cond_stmt, cond_var); | |
2440 | ||
2441 | gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); | |
2442 | gimple_set_bb (call_cond_stmt, new_bb); | |
2443 | gimple_seq_add_stmt (&gseq, call_cond_stmt); | |
2444 | ||
2445 | predicate_chain = TREE_CHAIN (predicate_chain); | |
2446 | ||
2447 | if (and_expr_var == NULL) | |
2448 | and_expr_var = cond_var; | |
2449 | else | |
2450 | { | |
2451 | gimple *assign_stmt; | |
2452 | /* Use MIN_EXPR to check if any integer is zero?. | |
2453 | and_expr_var = min_expr <cond_var, and_expr_var> */ | |
2454 | assign_stmt = gimple_build_assign (and_expr_var, | |
2455 | build2 (MIN_EXPR, integer_type_node, | |
2456 | cond_var, and_expr_var)); | |
2457 | ||
2458 | gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); | |
2459 | gimple_set_bb (assign_stmt, new_bb); | |
2460 | gimple_seq_add_stmt (&gseq, assign_stmt); | |
2461 | } | |
2462 | } | |
2463 | ||
2464 | if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, | |
2465 | integer_zero_node, | |
2466 | NULL_TREE, NULL_TREE); | |
2467 | gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); | |
2468 | gimple_set_bb (if_else_stmt, new_bb); | |
2469 | gimple_seq_add_stmt (&gseq, if_else_stmt); | |
2470 | ||
2471 | gimple_seq_add_stmt (&gseq, convert_stmt); | |
2472 | gimple_seq_add_stmt (&gseq, return_stmt); | |
2473 | set_bb_seq (new_bb, gseq); | |
2474 | ||
2475 | bb1 = new_bb; | |
2476 | e12 = split_block (bb1, if_else_stmt); | |
2477 | bb2 = e12->dest; | |
2478 | e12->flags &= ~EDGE_FALLTHRU; | |
2479 | e12->flags |= EDGE_TRUE_VALUE; | |
2480 | ||
2481 | e23 = split_block (bb2, return_stmt); | |
2482 | ||
2483 | gimple_set_bb (convert_stmt, bb2); | |
2484 | gimple_set_bb (return_stmt, bb2); | |
2485 | ||
2486 | bb3 = e23->dest; | |
2487 | make_edge (bb1, bb3, EDGE_FALSE_VALUE); | |
2488 | ||
2489 | remove_edge (e23); | |
2490 | make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); | |
2491 | ||
2492 | pop_cfun (); | |
2493 | ||
2494 | return bb3; | |
2495 | } | |
2496 | ||
2497 | /* This function generates the dispatch function for | |
2498 | multi-versioned functions. DISPATCH_DECL is the function which will | |
2499 | contain the dispatch logic. FNDECLS are the function choices for | |
2500 | dispatch, and is a tree chain. EMPTY_BB is the basic block pointer | |
2501 | in DISPATCH_DECL in which the dispatch code is generated. */ | |
2502 | ||
2503 | static int | |
2504 | dispatch_function_versions (tree dispatch_decl, | |
2505 | void *fndecls_p, | |
2506 | basic_block *empty_bb) | |
2507 | { | |
2508 | tree default_decl; | |
2509 | gimple *ifunc_cpu_init_stmt; | |
2510 | gimple_seq gseq; | |
2511 | int ix; | |
2512 | tree ele; | |
2513 | vec<tree> *fndecls; | |
2514 | unsigned int num_versions = 0; | |
2515 | unsigned int actual_versions = 0; | |
2516 | unsigned int i; | |
2517 | ||
2518 | struct _function_version_info | |
2519 | { | |
2520 | tree version_decl; | |
2521 | tree predicate_chain; | |
2522 | unsigned int dispatch_priority; | |
2523 | }*function_version_info; | |
2524 | ||
2525 | gcc_assert (dispatch_decl != NULL | |
2526 | && fndecls_p != NULL | |
2527 | && empty_bb != NULL); | |
2528 | ||
2529 | /*fndecls_p is actually a vector. */ | |
2530 | fndecls = static_cast<vec<tree> *> (fndecls_p); | |
2531 | ||
2532 | /* At least one more version other than the default. */ | |
2533 | num_versions = fndecls->length (); | |
2534 | gcc_assert (num_versions >= 2); | |
2535 | ||
2536 | function_version_info = (struct _function_version_info *) | |
2537 | XNEWVEC (struct _function_version_info, (num_versions - 1)); | |
2538 | ||
2539 | /* The first version in the vector is the default decl. */ | |
2540 | default_decl = (*fndecls)[0]; | |
2541 | ||
2542 | push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); | |
2543 | ||
2544 | gseq = bb_seq (*empty_bb); | |
2545 | /* Function version dispatch is via IFUNC. IFUNC resolvers fire before | |
2546 | constructors, so explicity call __builtin_cpu_init here. */ | |
2547 | ifunc_cpu_init_stmt | |
2548 | = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL); | |
2549 | gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); | |
2550 | gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); | |
2551 | set_bb_seq (*empty_bb, gseq); | |
2552 | ||
2553 | pop_cfun (); | |
2554 | ||
2555 | ||
2556 | for (ix = 1; fndecls->iterate (ix, &ele); ++ix) | |
2557 | { | |
2558 | tree version_decl = ele; | |
2559 | tree predicate_chain = NULL_TREE; | |
2560 | unsigned int priority; | |
2561 | /* Get attribute string, parse it and find the right predicate decl. | |
2562 | The predicate function could be a lengthy combination of many | |
2563 | features, like arch-type and various isa-variants. */ | |
2564 | priority = get_builtin_code_for_version (version_decl, | |
2565 | &predicate_chain); | |
2566 | ||
2567 | if (predicate_chain == NULL_TREE) | |
2568 | continue; | |
2569 | ||
2570 | function_version_info [actual_versions].version_decl = version_decl; | |
2571 | function_version_info [actual_versions].predicate_chain | |
2572 | = predicate_chain; | |
2573 | function_version_info [actual_versions].dispatch_priority = priority; | |
2574 | actual_versions++; | |
2575 | } | |
2576 | ||
2577 | /* Sort the versions according to descending order of dispatch priority. The | |
2578 | priority is based on the ISA. This is not a perfect solution. There | |
2579 | could still be ambiguity. If more than one function version is suitable | |
2580 | to execute, which one should be dispatched? In future, allow the user | |
2581 | to specify a dispatch priority next to the version. */ | |
2582 | qsort (function_version_info, actual_versions, | |
2583 | sizeof (struct _function_version_info), feature_compare); | |
2584 | ||
2585 | for (i = 0; i < actual_versions; ++i) | |
2586 | *empty_bb = add_condition_to_bb (dispatch_decl, | |
2587 | function_version_info[i].version_decl, | |
2588 | function_version_info[i].predicate_chain, | |
2589 | *empty_bb); | |
2590 | ||
2591 | /* dispatch default version at the end. */ | |
2592 | *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, | |
2593 | NULL, *empty_bb); | |
2594 | ||
2595 | free (function_version_info); | |
2596 | return 0; | |
2597 | } | |
2598 | ||
2599 | /* This function changes the assembler name for functions that are | |
2600 | versions. If DECL is a function version and has a "target" | |
2601 | attribute, it appends the attribute string to its assembler name. */ | |
2602 | ||
2603 | static tree | |
2604 | ix86_mangle_function_version_assembler_name (tree decl, tree id) | |
2605 | { | |
2606 | tree version_attr; | |
2607 | const char *orig_name, *version_string; | |
2608 | char *attr_str, *assembler_name; | |
2609 | ||
2610 | if (DECL_DECLARED_INLINE_P (decl) | |
2611 | && lookup_attribute ("gnu_inline", | |
2612 | DECL_ATTRIBUTES (decl))) | |
2613 | error_at (DECL_SOURCE_LOCATION (decl), | |
a9c697b8 | 2614 | "function versions cannot be marked as %<gnu_inline%>," |
2bf6d935 ML |
2615 | " bodies have to be generated"); |
2616 | ||
2617 | if (DECL_VIRTUAL_P (decl) | |
2618 | || DECL_VINDEX (decl)) | |
2619 | sorry ("virtual function multiversioning not supported"); | |
2620 | ||
2621 | version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); | |
2622 | ||
2623 | /* target attribute string cannot be NULL. */ | |
2624 | gcc_assert (version_attr != NULL_TREE); | |
2625 | ||
2626 | orig_name = IDENTIFIER_POINTER (id); | |
2627 | version_string | |
2628 | = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); | |
2629 | ||
2630 | if (strcmp (version_string, "default") == 0) | |
2631 | return id; | |
2632 | ||
2633 | attr_str = sorted_attr_string (TREE_VALUE (version_attr)); | |
2634 | assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); | |
2635 | ||
2636 | sprintf (assembler_name, "%s.%s", orig_name, attr_str); | |
2637 | ||
2638 | /* Allow assembler name to be modified if already set. */ | |
2639 | if (DECL_ASSEMBLER_NAME_SET_P (decl)) | |
2640 | SET_DECL_RTL (decl, NULL); | |
2641 | ||
2642 | tree ret = get_identifier (assembler_name); | |
2643 | XDELETEVEC (attr_str); | |
2644 | XDELETEVEC (assembler_name); | |
2645 | return ret; | |
2646 | } | |
2647 | ||
2648 | tree | |
2649 | ix86_mangle_decl_assembler_name (tree decl, tree id) | |
2650 | { | |
2651 | /* For function version, add the target suffix to the assembler name. */ | |
2652 | if (TREE_CODE (decl) == FUNCTION_DECL | |
2653 | && DECL_FUNCTION_VERSIONED (decl)) | |
2654 | id = ix86_mangle_function_version_assembler_name (decl, id); | |
2655 | #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME | |
2656 | id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); | |
2657 | #endif | |
2658 | ||
2659 | return id; | |
2660 | } | |
2661 | ||
2662 | /* Make a dispatcher declaration for the multi-versioned function DECL. | |
2663 | Calls to DECL function will be replaced with calls to the dispatcher | |
2664 | by the front-end. Returns the decl of the dispatcher function. */ | |
2665 | ||
2666 | tree | |
2667 | ix86_get_function_versions_dispatcher (void *decl) | |
2668 | { | |
2669 | tree fn = (tree) decl; | |
2670 | struct cgraph_node *node = NULL; | |
2671 | struct cgraph_node *default_node = NULL; | |
2672 | struct cgraph_function_version_info *node_v = NULL; | |
2673 | struct cgraph_function_version_info *first_v = NULL; | |
2674 | ||
2675 | tree dispatch_decl = NULL; | |
2676 | ||
2677 | struct cgraph_function_version_info *default_version_info = NULL; | |
2678 | ||
2679 | gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); | |
2680 | ||
2681 | node = cgraph_node::get (fn); | |
2682 | gcc_assert (node != NULL); | |
2683 | ||
2684 | node_v = node->function_version (); | |
2685 | gcc_assert (node_v != NULL); | |
2686 | ||
2687 | if (node_v->dispatcher_resolver != NULL) | |
2688 | return node_v->dispatcher_resolver; | |
2689 | ||
2690 | /* Find the default version and make it the first node. */ | |
2691 | first_v = node_v; | |
2692 | /* Go to the beginning of the chain. */ | |
2693 | while (first_v->prev != NULL) | |
2694 | first_v = first_v->prev; | |
2695 | default_version_info = first_v; | |
2696 | while (default_version_info != NULL) | |
2697 | { | |
2698 | if (is_function_default_version | |
2699 | (default_version_info->this_node->decl)) | |
2700 | break; | |
2701 | default_version_info = default_version_info->next; | |
2702 | } | |
2703 | ||
2704 | /* If there is no default node, just return NULL. */ | |
2705 | if (default_version_info == NULL) | |
2706 | return NULL; | |
2707 | ||
2708 | /* Make default info the first node. */ | |
2709 | if (first_v != default_version_info) | |
2710 | { | |
2711 | default_version_info->prev->next = default_version_info->next; | |
2712 | if (default_version_info->next) | |
2713 | default_version_info->next->prev = default_version_info->prev; | |
2714 | first_v->prev = default_version_info; | |
2715 | default_version_info->next = first_v; | |
2716 | default_version_info->prev = NULL; | |
2717 | } | |
2718 | ||
2719 | default_node = default_version_info->this_node; | |
2720 | ||
2721 | #if defined (ASM_OUTPUT_TYPE_DIRECTIVE) | |
2722 | if (targetm.has_ifunc_p ()) | |
2723 | { | |
2724 | struct cgraph_function_version_info *it_v = NULL; | |
2725 | struct cgraph_node *dispatcher_node = NULL; | |
2726 | struct cgraph_function_version_info *dispatcher_version_info = NULL; | |
2727 | ||
2728 | /* Right now, the dispatching is done via ifunc. */ | |
2729 | dispatch_decl = make_dispatcher_decl (default_node->decl); | |
2730 | ||
2731 | dispatcher_node = cgraph_node::get_create (dispatch_decl); | |
2732 | gcc_assert (dispatcher_node != NULL); | |
2733 | dispatcher_node->dispatcher_function = 1; | |
2734 | dispatcher_version_info | |
2735 | = dispatcher_node->insert_new_function_version (); | |
2736 | dispatcher_version_info->next = default_version_info; | |
2737 | dispatcher_node->definition = 1; | |
2738 | ||
2739 | /* Set the dispatcher for all the versions. */ | |
2740 | it_v = default_version_info; | |
2741 | while (it_v != NULL) | |
2742 | { | |
2743 | it_v->dispatcher_resolver = dispatch_decl; | |
2744 | it_v = it_v->next; | |
2745 | } | |
2746 | } | |
2747 | else | |
2748 | #endif | |
2749 | { | |
2750 | error_at (DECL_SOURCE_LOCATION (default_node->decl), | |
0ecf545c | 2751 | "multiversioning needs %<ifunc%> which is not supported " |
2bf6d935 ML |
2752 | "on this target"); |
2753 | } | |
2754 | ||
2755 | return dispatch_decl; | |
2756 | } | |
2757 | ||
2758 | /* Make the resolver function decl to dispatch the versions of | |
2759 | a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is | |
2760 | ifunc alias that will point to the created resolver. Create an | |
2761 | empty basic block in the resolver and store the pointer in | |
2762 | EMPTY_BB. Return the decl of the resolver function. */ | |
2763 | ||
2764 | static tree | |
2765 | make_resolver_func (const tree default_decl, | |
2766 | const tree ifunc_alias_decl, | |
2767 | basic_block *empty_bb) | |
2768 | { | |
c2bd2b46 | 2769 | tree decl, type, t; |
2bf6d935 | 2770 | |
c2bd2b46 ML |
2771 | /* Create resolver function name based on default_decl. */ |
2772 | tree decl_name = clone_function_name (default_decl, "resolver"); | |
2773 | const char *resolver_name = IDENTIFIER_POINTER (decl_name); | |
2bf6d935 ML |
2774 | |
2775 | /* The resolver function should return a (void *). */ | |
2776 | type = build_function_type_list (ptr_type_node, NULL_TREE); | |
2777 | ||
2778 | decl = build_fn_decl (resolver_name, type); | |
2bf6d935 ML |
2779 | SET_DECL_ASSEMBLER_NAME (decl, decl_name); |
2780 | ||
2781 | DECL_NAME (decl) = decl_name; | |
2782 | TREE_USED (decl) = 1; | |
2783 | DECL_ARTIFICIAL (decl) = 1; | |
2784 | DECL_IGNORED_P (decl) = 1; | |
2785 | TREE_PUBLIC (decl) = 0; | |
2786 | DECL_UNINLINABLE (decl) = 1; | |
2787 | ||
2788 | /* Resolver is not external, body is generated. */ | |
2789 | DECL_EXTERNAL (decl) = 0; | |
2790 | DECL_EXTERNAL (ifunc_alias_decl) = 0; | |
2791 | ||
2792 | DECL_CONTEXT (decl) = NULL_TREE; | |
2793 | DECL_INITIAL (decl) = make_node (BLOCK); | |
2794 | DECL_STATIC_CONSTRUCTOR (decl) = 0; | |
2795 | ||
2796 | if (DECL_COMDAT_GROUP (default_decl) | |
2797 | || TREE_PUBLIC (default_decl)) | |
2798 | { | |
2799 | /* In this case, each translation unit with a call to this | |
2800 | versioned function will put out a resolver. Ensure it | |
2801 | is comdat to keep just one copy. */ | |
2802 | DECL_COMDAT (decl) = 1; | |
2803 | make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); | |
2804 | } | |
724ec02c ML |
2805 | else |
2806 | TREE_PUBLIC (ifunc_alias_decl) = 0; | |
2807 | ||
2bf6d935 ML |
2808 | /* Build result decl and add to function_decl. */ |
2809 | t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); | |
2810 | DECL_CONTEXT (t) = decl; | |
2811 | DECL_ARTIFICIAL (t) = 1; | |
2812 | DECL_IGNORED_P (t) = 1; | |
2813 | DECL_RESULT (decl) = t; | |
2814 | ||
2815 | gimplify_function_tree (decl); | |
2816 | push_cfun (DECL_STRUCT_FUNCTION (decl)); | |
2817 | *empty_bb = init_lowered_empty_function (decl, false, | |
2818 | profile_count::uninitialized ()); | |
2819 | ||
2820 | cgraph_node::add_new_function (decl, true); | |
2821 | symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); | |
2822 | ||
2823 | pop_cfun (); | |
2824 | ||
2825 | gcc_assert (ifunc_alias_decl != NULL); | |
2826 | /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ | |
2827 | DECL_ATTRIBUTES (ifunc_alias_decl) | |
2828 | = make_attribute ("ifunc", resolver_name, | |
2829 | DECL_ATTRIBUTES (ifunc_alias_decl)); | |
2830 | ||
2831 | /* Create the alias for dispatch to resolver here. */ | |
2832 | cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); | |
2bf6d935 ML |
2833 | return decl; |
2834 | } | |
2835 | ||
2836 | /* Generate the dispatching code body to dispatch multi-versioned function | |
2837 | DECL. The target hook is called to process the "target" attributes and | |
2838 | provide the code to dispatch the right function at run-time. NODE points | |
2839 | to the dispatcher decl whose body will be created. */ | |
2840 | ||
2841 | tree | |
2842 | ix86_generate_version_dispatcher_body (void *node_p) | |
2843 | { | |
2844 | tree resolver_decl; | |
2845 | basic_block empty_bb; | |
2846 | tree default_ver_decl; | |
2847 | struct cgraph_node *versn; | |
2848 | struct cgraph_node *node; | |
2849 | ||
2850 | struct cgraph_function_version_info *node_version_info = NULL; | |
2851 | struct cgraph_function_version_info *versn_info = NULL; | |
2852 | ||
2853 | node = (cgraph_node *)node_p; | |
2854 | ||
2855 | node_version_info = node->function_version (); | |
2856 | gcc_assert (node->dispatcher_function | |
2857 | && node_version_info != NULL); | |
2858 | ||
2859 | if (node_version_info->dispatcher_resolver) | |
2860 | return node_version_info->dispatcher_resolver; | |
2861 | ||
2862 | /* The first version in the chain corresponds to the default version. */ | |
2863 | default_ver_decl = node_version_info->next->this_node->decl; | |
2864 | ||
2865 | /* node is going to be an alias, so remove the finalized bit. */ | |
2866 | node->definition = false; | |
2867 | ||
2868 | resolver_decl = make_resolver_func (default_ver_decl, | |
2869 | node->decl, &empty_bb); | |
2870 | ||
2871 | node_version_info->dispatcher_resolver = resolver_decl; | |
2872 | ||
2873 | push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); | |
2874 | ||
2875 | auto_vec<tree, 2> fn_ver_vec; | |
2876 | ||
2877 | for (versn_info = node_version_info->next; versn_info; | |
2878 | versn_info = versn_info->next) | |
2879 | { | |
2880 | versn = versn_info->this_node; | |
2881 | /* Check for virtual functions here again, as by this time it should | |
2882 | have been determined if this function needs a vtable index or | |
2883 | not. This happens for methods in derived classes that override | |
2884 | virtual methods in base classes but are not explicitly marked as | |
2885 | virtual. */ | |
2886 | if (DECL_VINDEX (versn->decl)) | |
2887 | sorry ("virtual function multiversioning not supported"); | |
2888 | ||
2889 | fn_ver_vec.safe_push (versn->decl); | |
2890 | } | |
2891 | ||
2892 | dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); | |
2893 | cgraph_edge::rebuild_edges (); | |
2894 | pop_cfun (); | |
2895 | return resolver_decl; | |
2896 | } | |
2897 | ||
2898 |