]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.cc
i386: Add V8QI and V4QImode partial vector shift operations
[thirdparty/gcc.git] / gcc / config / i386 / i386.cc
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2023 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "gimple-fold.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
73 #include "builtins.h"
74 #include "rtl-iter.h"
75 #include "tree-iterator.h"
76 #include "dbgcnt.h"
77 #include "case-cfn-macros.h"
78 #include "dojump.h"
79 #include "fold-const-call.h"
80 #include "tree-vrp.h"
81 #include "tree-ssanames.h"
82 #include "selftest.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
85 #include "intl.h"
86 #include "ifcvt.h"
87 #include "symbol-summary.h"
88 #include "ipa-prop.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
92 #include "debug.h"
93 #include "dwarf2out.h"
94 #include "i386-options.h"
95 #include "i386-builtins.h"
96 #include "i386-expand.h"
97 #include "i386-features.h"
98 #include "function-abi.h"
99 #include "rtl-error.h"
100
101 /* This file should be included last. */
102 #include "target-def.h"
103
104 static rtx legitimize_dllimport_symbol (rtx, bool);
105 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
106 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
107 static void ix86_emit_restore_reg_using_pop (rtx);
108
109
110 #ifndef CHECK_STACK_LIMIT
111 #define CHECK_STACK_LIMIT (-1)
112 #endif
113
114 /* Return index of given mode in mult and division cost tables. */
115 #define MODE_INDEX(mode) \
116 ((mode) == QImode ? 0 \
117 : (mode) == HImode ? 1 \
118 : (mode) == SImode ? 2 \
119 : (mode) == DImode ? 3 \
120 : 4)
121
122
123 /* Set by -mtune. */
124 const struct processor_costs *ix86_tune_cost = NULL;
125
126 /* Set by -mtune or -Os. */
127 const struct processor_costs *ix86_cost = NULL;
128
129 /* In case the average insn count for single function invocation is
130 lower than this constant, emit fast (but longer) prologue and
131 epilogue code. */
132 #define FAST_PROLOGUE_INSN_COUNT 20
133
134 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
135 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
136 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
137 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
138
139 /* Array of the smallest class containing reg number REGNO, indexed by
140 REGNO. Used by REGNO_REG_CLASS in i386.h. */
141
142 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
143 {
144 /* ax, dx, cx, bx */
145 AREG, DREG, CREG, BREG,
146 /* si, di, bp, sp */
147 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
148 /* FP registers */
149 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
150 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
151 /* arg pointer, flags, fpsr, frame */
152 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
153 /* SSE registers */
154 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
155 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
156 /* MMX registers */
157 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
158 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
159 /* REX registers */
160 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
161 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
162 /* SSE REX registers */
163 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
164 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
165 /* AVX-512 SSE registers */
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
169 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
170 /* Mask registers. */
171 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
172 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
173 };
174
175 /* The "default" register map used in 32bit mode. */
176
177 int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
178 {
179 /* general regs */
180 0, 2, 1, 3, 6, 7, 4, 5,
181 /* fp regs */
182 12, 13, 14, 15, 16, 17, 18, 19,
183 /* arg, flags, fpsr, frame */
184 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
185 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
186 /* SSE */
187 21, 22, 23, 24, 25, 26, 27, 28,
188 /* MMX */
189 29, 30, 31, 32, 33, 34, 35, 36,
190 /* extended integer registers */
191 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193 /* extended sse registers */
194 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196 /* AVX-512 registers 16-23 */
197 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 /* AVX-512 registers 24-31 */
200 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
201 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
202 /* Mask registers */
203 93, 94, 95, 96, 97, 98, 99, 100
204 };
205
206 /* The "default" register map used in 64bit mode. */
207
208 int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
209 {
210 /* general regs */
211 0, 1, 2, 3, 4, 5, 6, 7,
212 /* fp regs */
213 33, 34, 35, 36, 37, 38, 39, 40,
214 /* arg, flags, fpsr, frame */
215 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
216 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
217 /* SSE */
218 17, 18, 19, 20, 21, 22, 23, 24,
219 /* MMX */
220 41, 42, 43, 44, 45, 46, 47, 48,
221 /* extended integer registers */
222 8, 9, 10, 11, 12, 13, 14, 15,
223 /* extended SSE registers */
224 25, 26, 27, 28, 29, 30, 31, 32,
225 /* AVX-512 registers 16-23 */
226 67, 68, 69, 70, 71, 72, 73, 74,
227 /* AVX-512 registers 24-31 */
228 75, 76, 77, 78, 79, 80, 81, 82,
229 /* Mask registers */
230 118, 119, 120, 121, 122, 123, 124, 125
231 };
232
233 /* Define the register numbers to be used in Dwarf debugging information.
234 The SVR4 reference port C compiler uses the following register numbers
235 in its Dwarf output code:
236 0 for %eax (gcc regno = 0)
237 1 for %ecx (gcc regno = 2)
238 2 for %edx (gcc regno = 1)
239 3 for %ebx (gcc regno = 3)
240 4 for %esp (gcc regno = 7)
241 5 for %ebp (gcc regno = 6)
242 6 for %esi (gcc regno = 4)
243 7 for %edi (gcc regno = 5)
244 The following three DWARF register numbers are never generated by
245 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
246 believed these numbers have these meanings.
247 8 for %eip (no gcc equivalent)
248 9 for %eflags (gcc regno = 17)
249 10 for %trapno (no gcc equivalent)
250 It is not at all clear how we should number the FP stack registers
251 for the x86 architecture. If the version of SDB on x86/svr4 were
252 a bit less brain dead with respect to floating-point then we would
253 have a precedent to follow with respect to DWARF register numbers
254 for x86 FP registers, but the SDB on x86/svr4 was so completely
255 broken with respect to FP registers that it is hardly worth thinking
256 of it as something to strive for compatibility with.
257 The version of x86/svr4 SDB I had does (partially)
258 seem to believe that DWARF register number 11 is associated with
259 the x86 register %st(0), but that's about all. Higher DWARF
260 register numbers don't seem to be associated with anything in
261 particular, and even for DWARF regno 11, SDB only seemed to under-
262 stand that it should say that a variable lives in %st(0) (when
263 asked via an `=' command) if we said it was in DWARF regno 11,
264 but SDB still printed garbage when asked for the value of the
265 variable in question (via a `/' command).
266 (Also note that the labels SDB printed for various FP stack regs
267 when doing an `x' command were all wrong.)
268 Note that these problems generally don't affect the native SVR4
269 C compiler because it doesn't allow the use of -O with -g and
270 because when it is *not* optimizing, it allocates a memory
271 location for each floating-point variable, and the memory
272 location is what gets described in the DWARF AT_location
273 attribute for the variable in question.
274 Regardless of the severe mental illness of the x86/svr4 SDB, we
275 do something sensible here and we use the following DWARF
276 register numbers. Note that these are all stack-top-relative
277 numbers.
278 11 for %st(0) (gcc regno = 8)
279 12 for %st(1) (gcc regno = 9)
280 13 for %st(2) (gcc regno = 10)
281 14 for %st(3) (gcc regno = 11)
282 15 for %st(4) (gcc regno = 12)
283 16 for %st(5) (gcc regno = 13)
284 17 for %st(6) (gcc regno = 14)
285 18 for %st(7) (gcc regno = 15)
286 */
287 int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
288 {
289 /* general regs */
290 0, 2, 1, 3, 6, 7, 5, 4,
291 /* fp regs */
292 11, 12, 13, 14, 15, 16, 17, 18,
293 /* arg, flags, fpsr, frame */
294 IGNORED_DWARF_REGNUM, 9,
295 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
296 /* SSE registers */
297 21, 22, 23, 24, 25, 26, 27, 28,
298 /* MMX registers */
299 29, 30, 31, 32, 33, 34, 35, 36,
300 /* extended integer registers */
301 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303 /* extended sse registers */
304 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306 /* AVX-512 registers 16-23 */
307 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309 /* AVX-512 registers 24-31 */
310 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
311 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
312 /* Mask registers */
313 93, 94, 95, 96, 97, 98, 99, 100
314 };
315
316 /* Define parameter passing and return registers. */
317
318 static int const x86_64_int_parameter_registers[6] =
319 {
320 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
321 };
322
323 static int const x86_64_ms_abi_int_parameter_registers[4] =
324 {
325 CX_REG, DX_REG, R8_REG, R9_REG
326 };
327
328 static int const x86_64_int_return_registers[4] =
329 {
330 AX_REG, DX_REG, DI_REG, SI_REG
331 };
332
333 /* Define the structure for the machine field in struct function. */
334
335 struct GTY(()) stack_local_entry {
336 unsigned short mode;
337 unsigned short n;
338 rtx rtl;
339 struct stack_local_entry *next;
340 };
341
342 /* Which cpu are we scheduling for. */
343 enum attr_cpu ix86_schedule;
344
345 /* Which cpu are we optimizing for. */
346 enum processor_type ix86_tune;
347
348 /* Which instruction set architecture to use. */
349 enum processor_type ix86_arch;
350
351 /* True if processor has SSE prefetch instruction. */
352 unsigned char ix86_prefetch_sse;
353
354 /* Preferred alignment for stack boundary in bits. */
355 unsigned int ix86_preferred_stack_boundary;
356
357 /* Alignment for incoming stack boundary in bits specified at
358 command line. */
359 unsigned int ix86_user_incoming_stack_boundary;
360
361 /* Default alignment for incoming stack boundary in bits. */
362 unsigned int ix86_default_incoming_stack_boundary;
363
364 /* Alignment for incoming stack boundary in bits. */
365 unsigned int ix86_incoming_stack_boundary;
366
367 /* True if there is no direct access to extern symbols. */
368 bool ix86_has_no_direct_extern_access;
369
370 /* Calling abi specific va_list type nodes. */
371 tree sysv_va_list_type_node;
372 tree ms_va_list_type_node;
373
374 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
375 char internal_label_prefix[16];
376 int internal_label_prefix_len;
377
378 /* Fence to use after loop using movnt. */
379 tree x86_mfence;
380
381 /* Register class used for passing given 64bit part of the argument.
382 These represent classes as documented by the PS ABI, with the exception
383 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
384 use SF or DFmode move instead of DImode to avoid reformatting penalties.
385
386 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
387 whenever possible (upper half does contain padding). */
388 enum x86_64_reg_class
389 {
390 X86_64_NO_CLASS,
391 X86_64_INTEGER_CLASS,
392 X86_64_INTEGERSI_CLASS,
393 X86_64_SSE_CLASS,
394 X86_64_SSEHF_CLASS,
395 X86_64_SSESF_CLASS,
396 X86_64_SSEDF_CLASS,
397 X86_64_SSEUP_CLASS,
398 X86_64_X87_CLASS,
399 X86_64_X87UP_CLASS,
400 X86_64_COMPLEX_X87_CLASS,
401 X86_64_MEMORY_CLASS
402 };
403
404 #define MAX_CLASSES 8
405
406 /* Table of constants used by fldpi, fldln2, etc.... */
407 static REAL_VALUE_TYPE ext_80387_constants_table [5];
408 static bool ext_80387_constants_init;
409
410 \f
411 static rtx ix86_function_value (const_tree, const_tree, bool);
412 static bool ix86_function_value_regno_p (const unsigned int);
413 static unsigned int ix86_function_arg_boundary (machine_mode,
414 const_tree);
415 static rtx ix86_static_chain (const_tree, bool);
416 static int ix86_function_regparm (const_tree, const_tree);
417 static void ix86_compute_frame_layout (void);
418 static tree ix86_canonical_va_list_type (tree);
419 static unsigned int split_stack_prologue_scratch_regno (void);
420 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
421
422 static bool ix86_can_inline_p (tree, tree);
423 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
424
425 \f
426 /* Whether -mtune= or -march= were specified */
427 int ix86_tune_defaulted;
428 int ix86_arch_specified;
429 \f
430 /* Return true if a red-zone is in use. We can't use red-zone when
431 there are local indirect jumps, like "indirect_jump" or "tablejump",
432 which jumps to another place in the function, since "call" in the
433 indirect thunk pushes the return address onto stack, destroying
434 red-zone.
435
436 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
437 for CALL, in red-zone, we can allow local indirect jumps with
438 indirect thunk. */
439
440 bool
441 ix86_using_red_zone (void)
442 {
443 return (TARGET_RED_ZONE
444 && !TARGET_64BIT_MS_ABI
445 && (!cfun->machine->has_local_indirect_jump
446 || cfun->machine->indirect_branch_type == indirect_branch_keep));
447 }
448 \f
449 /* Return true, if profiling code should be emitted before
450 prologue. Otherwise it returns false.
451 Note: For x86 with "hotfix" it is sorried. */
452 static bool
453 ix86_profile_before_prologue (void)
454 {
455 return flag_fentry != 0;
456 }
457
458 /* Update register usage after having seen the compiler flags. */
459
460 static void
461 ix86_conditional_register_usage (void)
462 {
463 int i, c_mask;
464
465 /* If there are no caller-saved registers, preserve all registers.
466 except fixed_regs and registers used for function return value
467 since aggregate_value_p checks call_used_regs[regno] on return
468 value. */
469 if (cfun && cfun->machine->no_caller_saved_registers)
470 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
471 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
472 call_used_regs[i] = 0;
473
474 /* For 32-bit targets, disable the REX registers. */
475 if (! TARGET_64BIT)
476 {
477 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
478 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
479 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
480 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
481 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
482 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
483 }
484
485 /* See the definition of CALL_USED_REGISTERS in i386.h. */
486 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
487
488 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
489
490 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
491 {
492 /* Set/reset conditionally defined registers from
493 CALL_USED_REGISTERS initializer. */
494 if (call_used_regs[i] > 1)
495 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
496
497 /* Calculate registers of CLOBBERED_REGS register set
498 as call used registers from GENERAL_REGS register set. */
499 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
500 && call_used_regs[i])
501 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
502 }
503
504 /* If MMX is disabled, disable the registers. */
505 if (! TARGET_MMX)
506 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
507
508 /* If SSE is disabled, disable the registers. */
509 if (! TARGET_SSE)
510 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
511
512 /* If the FPU is disabled, disable the registers. */
513 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
514 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
515
516 /* If AVX512F is disabled, disable the registers. */
517 if (! TARGET_AVX512F)
518 {
519 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
520 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
521
522 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
523 }
524 }
525
526 /* Canonicalize a comparison from one we don't have to one we do have. */
527
528 static void
529 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
530 bool op0_preserve_value)
531 {
532 /* The order of operands in x87 ficom compare is forced by combine in
533 simplify_comparison () function. Float operator is treated as RTX_OBJ
534 with a precedence over other operators and is always put in the first
535 place. Swap condition and operands to match ficom instruction. */
536 if (!op0_preserve_value
537 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
538 {
539 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
540
541 /* We are called only for compares that are split to SAHF instruction.
542 Ensure that we have setcc/jcc insn for the swapped condition. */
543 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
544 {
545 std::swap (*op0, *op1);
546 *code = (int) scode;
547 }
548 }
549 }
550 \f
551 \f
552 /* Hook to determine if one function can safely inline another. */
553
554 static bool
555 ix86_can_inline_p (tree caller, tree callee)
556 {
557 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
558 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
559
560 /* Changes of those flags can be tolerated for always inlines. Lets hope
561 user knows what he is doing. */
562 unsigned HOST_WIDE_INT always_inline_safe_mask
563 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
564 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
565 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
566 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
567 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
568 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
569 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
570
571
572 if (!callee_tree)
573 callee_tree = target_option_default_node;
574 if (!caller_tree)
575 caller_tree = target_option_default_node;
576 if (callee_tree == caller_tree)
577 return true;
578
579 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
580 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
581 bool ret = false;
582 bool always_inline
583 = (DECL_DISREGARD_INLINE_LIMITS (callee)
584 && lookup_attribute ("always_inline",
585 DECL_ATTRIBUTES (callee)));
586
587 /* If callee only uses GPRs, ignore MASK_80387. */
588 if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
589 always_inline_safe_mask |= MASK_80387;
590
591 cgraph_node *callee_node = cgraph_node::get (callee);
592 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
593 function can inline a SSE2 function but a SSE2 function can't inline
594 a SSE4 function. */
595 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
596 != callee_opts->x_ix86_isa_flags)
597 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
598 != callee_opts->x_ix86_isa_flags2))
599 ret = false;
600
601 /* See if we have the same non-isa options. */
602 else if ((!always_inline
603 && caller_opts->x_target_flags != callee_opts->x_target_flags)
604 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
605 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
606 ret = false;
607
608 /* See if arch, tune, etc. are the same. */
609 else if (caller_opts->arch != callee_opts->arch)
610 ret = false;
611
612 else if (!always_inline && caller_opts->tune != callee_opts->tune)
613 ret = false;
614
615 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
616 /* If the calle doesn't use FP expressions differences in
617 ix86_fpmath can be ignored. We are called from FEs
618 for multi-versioning call optimization, so beware of
619 ipa_fn_summaries not available. */
620 && (! ipa_fn_summaries
621 || ipa_fn_summaries->get (callee_node) == NULL
622 || ipa_fn_summaries->get (callee_node)->fp_expressions))
623 ret = false;
624
625 else if (!always_inline
626 && caller_opts->branch_cost != callee_opts->branch_cost)
627 ret = false;
628
629 else
630 ret = true;
631
632 return ret;
633 }
634 \f
635 /* Return true if this goes in large data/bss. */
636
637 static bool
638 ix86_in_large_data_p (tree exp)
639 {
640 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
641 return false;
642
643 if (exp == NULL_TREE)
644 return false;
645
646 /* Functions are never large data. */
647 if (TREE_CODE (exp) == FUNCTION_DECL)
648 return false;
649
650 /* Automatic variables are never large data. */
651 if (VAR_P (exp) && !is_global_var (exp))
652 return false;
653
654 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
655 {
656 const char *section = DECL_SECTION_NAME (exp);
657 if (strcmp (section, ".ldata") == 0
658 || strcmp (section, ".lbss") == 0)
659 return true;
660 return false;
661 }
662 else
663 {
664 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
665
666 /* If this is an incomplete type with size 0, then we can't put it
667 in data because it might be too big when completed. Also,
668 int_size_in_bytes returns -1 if size can vary or is larger than
669 an integer in which case also it is safer to assume that it goes in
670 large data. */
671 if (size <= 0 || size > ix86_section_threshold)
672 return true;
673 }
674
675 return false;
676 }
677
678 /* i386-specific section flag to mark large sections. */
679 #define SECTION_LARGE SECTION_MACH_DEP
680
681 /* Switch to the appropriate section for output of DECL.
682 DECL is either a `VAR_DECL' node or a constant of some sort.
683 RELOC indicates whether forming the initial value of DECL requires
684 link-time relocations. */
685
686 ATTRIBUTE_UNUSED static section *
687 x86_64_elf_select_section (tree decl, int reloc,
688 unsigned HOST_WIDE_INT align)
689 {
690 if (ix86_in_large_data_p (decl))
691 {
692 const char *sname = NULL;
693 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
694 switch (categorize_decl_for_section (decl, reloc))
695 {
696 case SECCAT_DATA:
697 sname = ".ldata";
698 break;
699 case SECCAT_DATA_REL:
700 sname = ".ldata.rel";
701 break;
702 case SECCAT_DATA_REL_LOCAL:
703 sname = ".ldata.rel.local";
704 break;
705 case SECCAT_DATA_REL_RO:
706 sname = ".ldata.rel.ro";
707 break;
708 case SECCAT_DATA_REL_RO_LOCAL:
709 sname = ".ldata.rel.ro.local";
710 break;
711 case SECCAT_BSS:
712 sname = ".lbss";
713 flags |= SECTION_BSS;
714 break;
715 case SECCAT_RODATA:
716 case SECCAT_RODATA_MERGE_STR:
717 case SECCAT_RODATA_MERGE_STR_INIT:
718 case SECCAT_RODATA_MERGE_CONST:
719 sname = ".lrodata";
720 flags &= ~SECTION_WRITE;
721 break;
722 case SECCAT_SRODATA:
723 case SECCAT_SDATA:
724 case SECCAT_SBSS:
725 gcc_unreachable ();
726 case SECCAT_TEXT:
727 case SECCAT_TDATA:
728 case SECCAT_TBSS:
729 /* We don't split these for medium model. Place them into
730 default sections and hope for best. */
731 break;
732 }
733 if (sname)
734 {
735 /* We might get called with string constants, but get_named_section
736 doesn't like them as they are not DECLs. Also, we need to set
737 flags in that case. */
738 if (!DECL_P (decl))
739 return get_section (sname, flags, NULL);
740 return get_named_section (decl, sname, reloc);
741 }
742 }
743 return default_elf_select_section (decl, reloc, align);
744 }
745
746 /* Select a set of attributes for section NAME based on the properties
747 of DECL and whether or not RELOC indicates that DECL's initializer
748 might contain runtime relocations. */
749
750 static unsigned int ATTRIBUTE_UNUSED
751 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
752 {
753 unsigned int flags = default_section_type_flags (decl, name, reloc);
754
755 if (ix86_in_large_data_p (decl))
756 flags |= SECTION_LARGE;
757
758 if (decl == NULL_TREE
759 && (strcmp (name, ".ldata.rel.ro") == 0
760 || strcmp (name, ".ldata.rel.ro.local") == 0))
761 flags |= SECTION_RELRO;
762
763 if (strcmp (name, ".lbss") == 0
764 || startswith (name, ".lbss.")
765 || startswith (name, ".gnu.linkonce.lb."))
766 flags |= SECTION_BSS;
767
768 return flags;
769 }
770
771 /* Build up a unique section name, expressed as a
772 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
773 RELOC indicates whether the initial value of EXP requires
774 link-time relocations. */
775
776 static void ATTRIBUTE_UNUSED
777 x86_64_elf_unique_section (tree decl, int reloc)
778 {
779 if (ix86_in_large_data_p (decl))
780 {
781 const char *prefix = NULL;
782 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
783 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
784
785 switch (categorize_decl_for_section (decl, reloc))
786 {
787 case SECCAT_DATA:
788 case SECCAT_DATA_REL:
789 case SECCAT_DATA_REL_LOCAL:
790 case SECCAT_DATA_REL_RO:
791 case SECCAT_DATA_REL_RO_LOCAL:
792 prefix = one_only ? ".ld" : ".ldata";
793 break;
794 case SECCAT_BSS:
795 prefix = one_only ? ".lb" : ".lbss";
796 break;
797 case SECCAT_RODATA:
798 case SECCAT_RODATA_MERGE_STR:
799 case SECCAT_RODATA_MERGE_STR_INIT:
800 case SECCAT_RODATA_MERGE_CONST:
801 prefix = one_only ? ".lr" : ".lrodata";
802 break;
803 case SECCAT_SRODATA:
804 case SECCAT_SDATA:
805 case SECCAT_SBSS:
806 gcc_unreachable ();
807 case SECCAT_TEXT:
808 case SECCAT_TDATA:
809 case SECCAT_TBSS:
810 /* We don't split these for medium model. Place them into
811 default sections and hope for best. */
812 break;
813 }
814 if (prefix)
815 {
816 const char *name, *linkonce;
817 char *string;
818
819 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
820 name = targetm.strip_name_encoding (name);
821
822 /* If we're using one_only, then there needs to be a .gnu.linkonce
823 prefix to the section name. */
824 linkonce = one_only ? ".gnu.linkonce" : "";
825
826 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
827
828 set_decl_section_name (decl, string);
829 return;
830 }
831 }
832 default_unique_section (decl, reloc);
833 }
834
835 #ifdef COMMON_ASM_OP
836
837 #ifndef LARGECOMM_SECTION_ASM_OP
838 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
839 #endif
840
841 /* This says how to output assembler code to declare an
842 uninitialized external linkage data object.
843
844 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
845 large objects. */
846 void
847 x86_elf_aligned_decl_common (FILE *file, tree decl,
848 const char *name, unsigned HOST_WIDE_INT size,
849 unsigned align)
850 {
851 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
852 && size > (unsigned int)ix86_section_threshold)
853 {
854 switch_to_section (get_named_section (decl, ".lbss", 0));
855 fputs (LARGECOMM_SECTION_ASM_OP, file);
856 }
857 else
858 fputs (COMMON_ASM_OP, file);
859 assemble_name (file, name);
860 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
861 size, align / BITS_PER_UNIT);
862 }
863 #endif
864
865 /* Utility function for targets to use in implementing
866 ASM_OUTPUT_ALIGNED_BSS. */
867
868 void
869 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
870 unsigned HOST_WIDE_INT size, unsigned align)
871 {
872 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
873 && size > (unsigned int)ix86_section_threshold)
874 switch_to_section (get_named_section (decl, ".lbss", 0));
875 else
876 switch_to_section (bss_section);
877 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
878 #ifdef ASM_DECLARE_OBJECT_NAME
879 last_assemble_variable_decl = decl;
880 ASM_DECLARE_OBJECT_NAME (file, name, decl);
881 #else
882 /* Standard thing is just output label for the object. */
883 ASM_OUTPUT_LABEL (file, name);
884 #endif /* ASM_DECLARE_OBJECT_NAME */
885 ASM_OUTPUT_SKIP (file, size ? size : 1);
886 }
887 \f
888 /* Decide whether we must probe the stack before any space allocation
889 on this target. It's essentially TARGET_STACK_PROBE except when
890 -fstack-check causes the stack to be already probed differently. */
891
892 bool
893 ix86_target_stack_probe (void)
894 {
895 /* Do not probe the stack twice if static stack checking is enabled. */
896 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
897 return false;
898
899 return TARGET_STACK_PROBE;
900 }
901 \f
902 /* Decide whether we can make a sibling call to a function. DECL is the
903 declaration of the function being targeted by the call and EXP is the
904 CALL_EXPR representing the call. */
905
906 static bool
907 ix86_function_ok_for_sibcall (tree decl, tree exp)
908 {
909 tree type, decl_or_type;
910 rtx a, b;
911 bool bind_global = decl && !targetm.binds_local_p (decl);
912
913 if (ix86_function_naked (current_function_decl))
914 return false;
915
916 /* Sibling call isn't OK if there are no caller-saved registers
917 since all registers must be preserved before return. */
918 if (cfun->machine->no_caller_saved_registers)
919 return false;
920
921 /* If we are generating position-independent code, we cannot sibcall
922 optimize direct calls to global functions, as the PLT requires
923 %ebx be live. (Darwin does not have a PLT.) */
924 if (!TARGET_MACHO
925 && !TARGET_64BIT
926 && flag_pic
927 && flag_plt
928 && bind_global)
929 return false;
930
931 /* If we need to align the outgoing stack, then sibcalling would
932 unalign the stack, which may break the called function. */
933 if (ix86_minimum_incoming_stack_boundary (true)
934 < PREFERRED_STACK_BOUNDARY)
935 return false;
936
937 if (decl)
938 {
939 decl_or_type = decl;
940 type = TREE_TYPE (decl);
941 }
942 else
943 {
944 /* We're looking at the CALL_EXPR, we need the type of the function. */
945 type = CALL_EXPR_FN (exp); /* pointer expression */
946 type = TREE_TYPE (type); /* pointer type */
947 type = TREE_TYPE (type); /* function type */
948 decl_or_type = type;
949 }
950
951 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
952 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
953 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
954 || (REG_PARM_STACK_SPACE (decl_or_type)
955 != REG_PARM_STACK_SPACE (current_function_decl)))
956 {
957 maybe_complain_about_tail_call (exp,
958 "inconsistent size of stack space"
959 " allocated for arguments which are"
960 " passed in registers");
961 return false;
962 }
963
964 /* Check that the return value locations are the same. Like
965 if we are returning floats on the 80387 register stack, we cannot
966 make a sibcall from a function that doesn't return a float to a
967 function that does or, conversely, from a function that does return
968 a float to a function that doesn't; the necessary stack adjustment
969 would not be executed. This is also the place we notice
970 differences in the return value ABI. Note that it is ok for one
971 of the functions to have void return type as long as the return
972 value of the other is passed in a register. */
973 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
974 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
975 cfun->decl, false);
976 if (STACK_REG_P (a) || STACK_REG_P (b))
977 {
978 if (!rtx_equal_p (a, b))
979 return false;
980 }
981 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
982 ;
983 else if (!rtx_equal_p (a, b))
984 return false;
985
986 if (TARGET_64BIT)
987 {
988 /* The SYSV ABI has more call-clobbered registers;
989 disallow sibcalls from MS to SYSV. */
990 if (cfun->machine->call_abi == MS_ABI
991 && ix86_function_type_abi (type) == SYSV_ABI)
992 return false;
993 }
994 else
995 {
996 /* If this call is indirect, we'll need to be able to use a
997 call-clobbered register for the address of the target function.
998 Make sure that all such registers are not used for passing
999 parameters. Note that DLLIMPORT functions and call to global
1000 function via GOT slot are indirect. */
1001 if (!decl
1002 || (bind_global && flag_pic && !flag_plt)
1003 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
1004 || flag_force_indirect_call)
1005 {
1006 /* Check if regparm >= 3 since arg_reg_available is set to
1007 false if regparm == 0. If regparm is 1 or 2, there is
1008 always a call-clobbered register available.
1009
1010 ??? The symbol indirect call doesn't need a call-clobbered
1011 register. But we don't know if this is a symbol indirect
1012 call or not here. */
1013 if (ix86_function_regparm (type, decl) >= 3
1014 && !cfun->machine->arg_reg_available)
1015 return false;
1016 }
1017 }
1018
1019 if (decl && ix86_use_pseudo_pic_reg ())
1020 {
1021 /* When PIC register is used, it must be restored after ifunc
1022 function returns. */
1023 cgraph_node *node = cgraph_node::get (decl);
1024 if (node && node->ifunc_resolver)
1025 return false;
1026 }
1027
1028 /* Disable sibcall if callee has indirect_return attribute and
1029 caller doesn't since callee will return to the caller's caller
1030 via an indirect jump. */
1031 if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
1032 == (CF_RETURN | CF_BRANCH))
1033 && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
1034 && !lookup_attribute ("indirect_return",
1035 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
1036 return false;
1037
1038 /* Otherwise okay. That also includes certain types of indirect calls. */
1039 return true;
1040 }
1041
1042 /* This function determines from TYPE the calling-convention. */
1043
1044 unsigned int
1045 ix86_get_callcvt (const_tree type)
1046 {
1047 unsigned int ret = 0;
1048 bool is_stdarg;
1049 tree attrs;
1050
1051 if (TARGET_64BIT)
1052 return IX86_CALLCVT_CDECL;
1053
1054 attrs = TYPE_ATTRIBUTES (type);
1055 if (attrs != NULL_TREE)
1056 {
1057 if (lookup_attribute ("cdecl", attrs))
1058 ret |= IX86_CALLCVT_CDECL;
1059 else if (lookup_attribute ("stdcall", attrs))
1060 ret |= IX86_CALLCVT_STDCALL;
1061 else if (lookup_attribute ("fastcall", attrs))
1062 ret |= IX86_CALLCVT_FASTCALL;
1063 else if (lookup_attribute ("thiscall", attrs))
1064 ret |= IX86_CALLCVT_THISCALL;
1065
1066 /* Regparam isn't allowed for thiscall and fastcall. */
1067 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1068 {
1069 if (lookup_attribute ("regparm", attrs))
1070 ret |= IX86_CALLCVT_REGPARM;
1071 if (lookup_attribute ("sseregparm", attrs))
1072 ret |= IX86_CALLCVT_SSEREGPARM;
1073 }
1074
1075 if (IX86_BASE_CALLCVT(ret) != 0)
1076 return ret;
1077 }
1078
1079 is_stdarg = stdarg_p (type);
1080 if (TARGET_RTD && !is_stdarg)
1081 return IX86_CALLCVT_STDCALL | ret;
1082
1083 if (ret != 0
1084 || is_stdarg
1085 || TREE_CODE (type) != METHOD_TYPE
1086 || ix86_function_type_abi (type) != MS_ABI)
1087 return IX86_CALLCVT_CDECL | ret;
1088
1089 return IX86_CALLCVT_THISCALL;
1090 }
1091
1092 /* Return 0 if the attributes for two types are incompatible, 1 if they
1093 are compatible, and 2 if they are nearly compatible (which causes a
1094 warning to be generated). */
1095
1096 static int
1097 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1098 {
1099 unsigned int ccvt1, ccvt2;
1100
1101 if (TREE_CODE (type1) != FUNCTION_TYPE
1102 && TREE_CODE (type1) != METHOD_TYPE)
1103 return 1;
1104
1105 ccvt1 = ix86_get_callcvt (type1);
1106 ccvt2 = ix86_get_callcvt (type2);
1107 if (ccvt1 != ccvt2)
1108 return 0;
1109 if (ix86_function_regparm (type1, NULL)
1110 != ix86_function_regparm (type2, NULL))
1111 return 0;
1112
1113 return 1;
1114 }
1115 \f
1116 /* Return the regparm value for a function with the indicated TYPE and DECL.
1117 DECL may be NULL when calling function indirectly
1118 or considering a libcall. */
1119
1120 static int
1121 ix86_function_regparm (const_tree type, const_tree decl)
1122 {
1123 tree attr;
1124 int regparm;
1125 unsigned int ccvt;
1126
1127 if (TARGET_64BIT)
1128 return (ix86_function_type_abi (type) == SYSV_ABI
1129 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1130 ccvt = ix86_get_callcvt (type);
1131 regparm = ix86_regparm;
1132
1133 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1134 {
1135 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1136 if (attr)
1137 {
1138 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1139 return regparm;
1140 }
1141 }
1142 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1143 return 2;
1144 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1145 return 1;
1146
1147 /* Use register calling convention for local functions when possible. */
1148 if (decl
1149 && TREE_CODE (decl) == FUNCTION_DECL)
1150 {
1151 cgraph_node *target = cgraph_node::get (decl);
1152 if (target)
1153 target = target->function_symbol ();
1154
1155 /* Caller and callee must agree on the calling convention, so
1156 checking here just optimize means that with
1157 __attribute__((optimize (...))) caller could use regparm convention
1158 and callee not, or vice versa. Instead look at whether the callee
1159 is optimized or not. */
1160 if (target && opt_for_fn (target->decl, optimize)
1161 && !(profile_flag && !flag_fentry))
1162 {
1163 if (target->local && target->can_change_signature)
1164 {
1165 int local_regparm, globals = 0, regno;
1166
1167 /* Make sure no regparm register is taken by a
1168 fixed register variable. */
1169 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1170 local_regparm++)
1171 if (fixed_regs[local_regparm])
1172 break;
1173
1174 /* We don't want to use regparm(3) for nested functions as
1175 these use a static chain pointer in the third argument. */
1176 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1177 local_regparm = 2;
1178
1179 /* Save a register for the split stack. */
1180 if (flag_split_stack)
1181 {
1182 if (local_regparm == 3)
1183 local_regparm = 2;
1184 else if (local_regparm == 2
1185 && DECL_STATIC_CHAIN (target->decl))
1186 local_regparm = 1;
1187 }
1188
1189 /* Each fixed register usage increases register pressure,
1190 so less registers should be used for argument passing.
1191 This functionality can be overriden by an explicit
1192 regparm value. */
1193 for (regno = AX_REG; regno <= DI_REG; regno++)
1194 if (fixed_regs[regno])
1195 globals++;
1196
1197 local_regparm
1198 = globals < local_regparm ? local_regparm - globals : 0;
1199
1200 if (local_regparm > regparm)
1201 regparm = local_regparm;
1202 }
1203 }
1204 }
1205
1206 return regparm;
1207 }
1208
1209 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1210 DFmode (2) arguments in SSE registers for a function with the
1211 indicated TYPE and DECL. DECL may be NULL when calling function
1212 indirectly or considering a libcall. Return -1 if any FP parameter
1213 should be rejected by error. This is used in siutation we imply SSE
1214 calling convetion but the function is called from another function with
1215 SSE disabled. Otherwise return 0. */
1216
1217 static int
1218 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1219 {
1220 gcc_assert (!TARGET_64BIT);
1221
1222 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1223 by the sseregparm attribute. */
1224 if (TARGET_SSEREGPARM
1225 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1226 {
1227 if (!TARGET_SSE)
1228 {
1229 if (warn)
1230 {
1231 if (decl)
1232 error ("calling %qD with attribute sseregparm without "
1233 "SSE/SSE2 enabled", decl);
1234 else
1235 error ("calling %qT with attribute sseregparm without "
1236 "SSE/SSE2 enabled", type);
1237 }
1238 return 0;
1239 }
1240
1241 return 2;
1242 }
1243
1244 if (!decl)
1245 return 0;
1246
1247 cgraph_node *target = cgraph_node::get (decl);
1248 if (target)
1249 target = target->function_symbol ();
1250
1251 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1252 (and DFmode for SSE2) arguments in SSE registers. */
1253 if (target
1254 /* TARGET_SSE_MATH */
1255 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1256 && opt_for_fn (target->decl, optimize)
1257 && !(profile_flag && !flag_fentry))
1258 {
1259 if (target->local && target->can_change_signature)
1260 {
1261 /* Refuse to produce wrong code when local function with SSE enabled
1262 is called from SSE disabled function.
1263 FIXME: We need a way to detect these cases cross-ltrans partition
1264 and avoid using SSE calling conventions on local functions called
1265 from function with SSE disabled. For now at least delay the
1266 warning until we know we are going to produce wrong code.
1267 See PR66047 */
1268 if (!TARGET_SSE && warn)
1269 return -1;
1270 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1271 ->x_ix86_isa_flags) ? 2 : 1;
1272 }
1273 }
1274
1275 return 0;
1276 }
1277
1278 /* Return true if EAX is live at the start of the function. Used by
1279 ix86_expand_prologue to determine if we need special help before
1280 calling allocate_stack_worker. */
1281
1282 static bool
1283 ix86_eax_live_at_start_p (void)
1284 {
1285 /* Cheat. Don't bother working forward from ix86_function_regparm
1286 to the function type to whether an actual argument is located in
1287 eax. Instead just look at cfg info, which is still close enough
1288 to correct at this point. This gives false positives for broken
1289 functions that might use uninitialized data that happens to be
1290 allocated in eax, but who cares? */
1291 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1292 }
1293
1294 static bool
1295 ix86_keep_aggregate_return_pointer (tree fntype)
1296 {
1297 tree attr;
1298
1299 if (!TARGET_64BIT)
1300 {
1301 attr = lookup_attribute ("callee_pop_aggregate_return",
1302 TYPE_ATTRIBUTES (fntype));
1303 if (attr)
1304 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1305
1306 /* For 32-bit MS-ABI the default is to keep aggregate
1307 return pointer. */
1308 if (ix86_function_type_abi (fntype) == MS_ABI)
1309 return true;
1310 }
1311 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1312 }
1313
1314 /* Value is the number of bytes of arguments automatically
1315 popped when returning from a subroutine call.
1316 FUNDECL is the declaration node of the function (as a tree),
1317 FUNTYPE is the data type of the function (as a tree),
1318 or for a library call it is an identifier node for the subroutine name.
1319 SIZE is the number of bytes of arguments passed on the stack.
1320
1321 On the 80386, the RTD insn may be used to pop them if the number
1322 of args is fixed, but if the number is variable then the caller
1323 must pop them all. RTD can't be used for library calls now
1324 because the library is compiled with the Unix compiler.
1325 Use of RTD is a selectable option, since it is incompatible with
1326 standard Unix calling sequences. If the option is not selected,
1327 the caller must always pop the args.
1328
1329 The attribute stdcall is equivalent to RTD on a per module basis. */
1330
1331 static poly_int64
1332 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1333 {
1334 unsigned int ccvt;
1335
1336 /* None of the 64-bit ABIs pop arguments. */
1337 if (TARGET_64BIT)
1338 return 0;
1339
1340 ccvt = ix86_get_callcvt (funtype);
1341
1342 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1343 | IX86_CALLCVT_THISCALL)) != 0
1344 && ! stdarg_p (funtype))
1345 return size;
1346
1347 /* Lose any fake structure return argument if it is passed on the stack. */
1348 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1349 && !ix86_keep_aggregate_return_pointer (funtype))
1350 {
1351 int nregs = ix86_function_regparm (funtype, fundecl);
1352 if (nregs == 0)
1353 return GET_MODE_SIZE (Pmode);
1354 }
1355
1356 return 0;
1357 }
1358
1359 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1360
1361 static bool
1362 ix86_legitimate_combined_insn (rtx_insn *insn)
1363 {
1364 int i;
1365
1366 /* Check operand constraints in case hard registers were propagated
1367 into insn pattern. This check prevents combine pass from
1368 generating insn patterns with invalid hard register operands.
1369 These invalid insns can eventually confuse reload to error out
1370 with a spill failure. See also PRs 46829 and 46843. */
1371
1372 gcc_assert (INSN_CODE (insn) >= 0);
1373
1374 extract_insn (insn);
1375 preprocess_constraints (insn);
1376
1377 int n_operands = recog_data.n_operands;
1378 int n_alternatives = recog_data.n_alternatives;
1379 for (i = 0; i < n_operands; i++)
1380 {
1381 rtx op = recog_data.operand[i];
1382 machine_mode mode = GET_MODE (op);
1383 const operand_alternative *op_alt;
1384 int offset = 0;
1385 bool win;
1386 int j;
1387
1388 /* A unary operator may be accepted by the predicate, but it
1389 is irrelevant for matching constraints. */
1390 if (UNARY_P (op))
1391 op = XEXP (op, 0);
1392
1393 if (SUBREG_P (op))
1394 {
1395 if (REG_P (SUBREG_REG (op))
1396 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1397 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1398 GET_MODE (SUBREG_REG (op)),
1399 SUBREG_BYTE (op),
1400 GET_MODE (op));
1401 op = SUBREG_REG (op);
1402 }
1403
1404 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1405 continue;
1406
1407 op_alt = recog_op_alt;
1408
1409 /* Operand has no constraints, anything is OK. */
1410 win = !n_alternatives;
1411
1412 alternative_mask preferred = get_preferred_alternatives (insn);
1413 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1414 {
1415 if (!TEST_BIT (preferred, j))
1416 continue;
1417 if (op_alt[i].anything_ok
1418 || (op_alt[i].matches != -1
1419 && operands_match_p
1420 (recog_data.operand[i],
1421 recog_data.operand[op_alt[i].matches]))
1422 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1423 {
1424 win = true;
1425 break;
1426 }
1427 }
1428
1429 if (!win)
1430 return false;
1431 }
1432
1433 return true;
1434 }
1435 \f
1436 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1437
1438 static unsigned HOST_WIDE_INT
1439 ix86_asan_shadow_offset (void)
1440 {
1441 return SUBTARGET_SHADOW_OFFSET;
1442 }
1443 \f
1444 /* Argument support functions. */
1445
1446 /* Return true when register may be used to pass function parameters. */
1447 bool
1448 ix86_function_arg_regno_p (int regno)
1449 {
1450 int i;
1451 enum calling_abi call_abi;
1452 const int *parm_regs;
1453
1454 if (TARGET_SSE && SSE_REGNO_P (regno)
1455 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1456 return true;
1457
1458 if (!TARGET_64BIT)
1459 return (regno < REGPARM_MAX
1460 || (TARGET_MMX && MMX_REGNO_P (regno)
1461 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1462
1463 /* TODO: The function should depend on current function ABI but
1464 builtins.cc would need updating then. Therefore we use the
1465 default ABI. */
1466 call_abi = ix86_cfun_abi ();
1467
1468 /* RAX is used as hidden argument to va_arg functions. */
1469 if (call_abi == SYSV_ABI && regno == AX_REG)
1470 return true;
1471
1472 if (call_abi == MS_ABI)
1473 parm_regs = x86_64_ms_abi_int_parameter_registers;
1474 else
1475 parm_regs = x86_64_int_parameter_registers;
1476
1477 for (i = 0; i < (call_abi == MS_ABI
1478 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1479 if (regno == parm_regs[i])
1480 return true;
1481 return false;
1482 }
1483
1484 /* Return if we do not know how to pass ARG solely in registers. */
1485
1486 static bool
1487 ix86_must_pass_in_stack (const function_arg_info &arg)
1488 {
1489 if (must_pass_in_stack_var_size_or_pad (arg))
1490 return true;
1491
1492 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1493 The layout_type routine is crafty and tries to trick us into passing
1494 currently unsupported vector types on the stack by using TImode. */
1495 return (!TARGET_64BIT && arg.mode == TImode
1496 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1497 }
1498
1499 /* It returns the size, in bytes, of the area reserved for arguments passed
1500 in registers for the function represented by fndecl dependent to the used
1501 abi format. */
1502 int
1503 ix86_reg_parm_stack_space (const_tree fndecl)
1504 {
1505 enum calling_abi call_abi = SYSV_ABI;
1506 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1507 call_abi = ix86_function_abi (fndecl);
1508 else
1509 call_abi = ix86_function_type_abi (fndecl);
1510 if (TARGET_64BIT && call_abi == MS_ABI)
1511 return 32;
1512 return 0;
1513 }
1514
1515 /* We add this as a workaround in order to use libc_has_function
1516 hook in i386.md. */
1517 bool
1518 ix86_libc_has_function (enum function_class fn_class)
1519 {
1520 return targetm.libc_has_function (fn_class, NULL_TREE);
1521 }
1522
1523 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1524 specifying the call abi used. */
1525 enum calling_abi
1526 ix86_function_type_abi (const_tree fntype)
1527 {
1528 enum calling_abi abi = ix86_abi;
1529
1530 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1531 return abi;
1532
1533 if (abi == SYSV_ABI
1534 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1535 {
1536 static int warned;
1537 if (TARGET_X32 && !warned)
1538 {
1539 error ("X32 does not support %<ms_abi%> attribute");
1540 warned = 1;
1541 }
1542
1543 abi = MS_ABI;
1544 }
1545 else if (abi == MS_ABI
1546 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1547 abi = SYSV_ABI;
1548
1549 return abi;
1550 }
1551
1552 enum calling_abi
1553 ix86_function_abi (const_tree fndecl)
1554 {
1555 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1556 }
1557
1558 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1559 specifying the call abi used. */
1560 enum calling_abi
1561 ix86_cfun_abi (void)
1562 {
1563 return cfun ? cfun->machine->call_abi : ix86_abi;
1564 }
1565
1566 bool
1567 ix86_function_ms_hook_prologue (const_tree fn)
1568 {
1569 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1570 {
1571 if (decl_function_context (fn) != NULL_TREE)
1572 error_at (DECL_SOURCE_LOCATION (fn),
1573 "%<ms_hook_prologue%> attribute is not compatible "
1574 "with nested function");
1575 else
1576 return true;
1577 }
1578 return false;
1579 }
1580
1581 bool
1582 ix86_function_naked (const_tree fn)
1583 {
1584 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1585 return true;
1586
1587 return false;
1588 }
1589
1590 /* Write the extra assembler code needed to declare a function properly. */
1591
1592 void
1593 ix86_asm_output_function_label (FILE *out_file, const char *fname,
1594 tree decl)
1595 {
1596 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1597
1598 if (cfun)
1599 cfun->machine->function_label_emitted = true;
1600
1601 if (is_ms_hook)
1602 {
1603 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1604 unsigned int filler_cc = 0xcccccccc;
1605
1606 for (i = 0; i < filler_count; i += 4)
1607 fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
1608 }
1609
1610 #ifdef SUBTARGET_ASM_UNWIND_INIT
1611 SUBTARGET_ASM_UNWIND_INIT (out_file);
1612 #endif
1613
1614 ASM_OUTPUT_LABEL (out_file, fname);
1615
1616 /* Output magic byte marker, if hot-patch attribute is set. */
1617 if (is_ms_hook)
1618 {
1619 if (TARGET_64BIT)
1620 {
1621 /* leaq [%rsp + 0], %rsp */
1622 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1623 out_file);
1624 }
1625 else
1626 {
1627 /* movl.s %edi, %edi
1628 push %ebp
1629 movl.s %esp, %ebp */
1630 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
1631 }
1632 }
1633 }
1634
1635 /* Implementation of call abi switching target hook. Specific to FNDECL
1636 the specific call register sets are set. See also
1637 ix86_conditional_register_usage for more details. */
1638 void
1639 ix86_call_abi_override (const_tree fndecl)
1640 {
1641 cfun->machine->call_abi = ix86_function_abi (fndecl);
1642 }
1643
1644 /* Return 1 if pseudo register should be created and used to hold
1645 GOT address for PIC code. */
1646 bool
1647 ix86_use_pseudo_pic_reg (void)
1648 {
1649 if ((TARGET_64BIT
1650 && (ix86_cmodel == CM_SMALL_PIC
1651 || TARGET_PECOFF))
1652 || !flag_pic)
1653 return false;
1654 return true;
1655 }
1656
1657 /* Initialize large model PIC register. */
1658
1659 static void
1660 ix86_init_large_pic_reg (unsigned int tmp_regno)
1661 {
1662 rtx_code_label *label;
1663 rtx tmp_reg;
1664
1665 gcc_assert (Pmode == DImode);
1666 label = gen_label_rtx ();
1667 emit_label (label);
1668 LABEL_PRESERVE_P (label) = 1;
1669 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1670 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1671 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1672 label));
1673 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1674 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1675 const char *name = LABEL_NAME (label);
1676 PUT_CODE (label, NOTE);
1677 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1678 NOTE_DELETED_LABEL_NAME (label) = name;
1679 }
1680
1681 /* Create and initialize PIC register if required. */
1682 static void
1683 ix86_init_pic_reg (void)
1684 {
1685 edge entry_edge;
1686 rtx_insn *seq;
1687
1688 if (!ix86_use_pseudo_pic_reg ())
1689 return;
1690
1691 start_sequence ();
1692
1693 if (TARGET_64BIT)
1694 {
1695 if (ix86_cmodel == CM_LARGE_PIC)
1696 ix86_init_large_pic_reg (R11_REG);
1697 else
1698 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1699 }
1700 else
1701 {
1702 /* If there is future mcount call in the function it is more profitable
1703 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1704 rtx reg = crtl->profile
1705 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1706 : pic_offset_table_rtx;
1707 rtx_insn *insn = emit_insn (gen_set_got (reg));
1708 RTX_FRAME_RELATED_P (insn) = 1;
1709 if (crtl->profile)
1710 emit_move_insn (pic_offset_table_rtx, reg);
1711 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1712 }
1713
1714 seq = get_insns ();
1715 end_sequence ();
1716
1717 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1718 insert_insn_on_edge (seq, entry_edge);
1719 commit_one_edge_insertion (entry_edge);
1720 }
1721
1722 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1723 for a call to a function whose data type is FNTYPE.
1724 For a library call, FNTYPE is 0. */
1725
1726 void
1727 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1728 tree fntype, /* tree ptr for function decl */
1729 rtx libname, /* SYMBOL_REF of library name or 0 */
1730 tree fndecl,
1731 int caller)
1732 {
1733 struct cgraph_node *local_info_node = NULL;
1734 struct cgraph_node *target = NULL;
1735
1736 /* Set silent_p to false to raise an error for invalid calls when
1737 expanding function body. */
1738 cfun->machine->silent_p = false;
1739
1740 memset (cum, 0, sizeof (*cum));
1741
1742 if (fndecl)
1743 {
1744 target = cgraph_node::get (fndecl);
1745 if (target)
1746 {
1747 target = target->function_symbol ();
1748 local_info_node = cgraph_node::local_info_node (target->decl);
1749 cum->call_abi = ix86_function_abi (target->decl);
1750 }
1751 else
1752 cum->call_abi = ix86_function_abi (fndecl);
1753 }
1754 else
1755 cum->call_abi = ix86_function_type_abi (fntype);
1756
1757 cum->caller = caller;
1758
1759 /* Set up the number of registers to use for passing arguments. */
1760 cum->nregs = ix86_regparm;
1761 if (TARGET_64BIT)
1762 {
1763 cum->nregs = (cum->call_abi == SYSV_ABI
1764 ? X86_64_REGPARM_MAX
1765 : X86_64_MS_REGPARM_MAX);
1766 }
1767 if (TARGET_SSE)
1768 {
1769 cum->sse_nregs = SSE_REGPARM_MAX;
1770 if (TARGET_64BIT)
1771 {
1772 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1773 ? X86_64_SSE_REGPARM_MAX
1774 : X86_64_MS_SSE_REGPARM_MAX);
1775 }
1776 }
1777 if (TARGET_MMX)
1778 cum->mmx_nregs = MMX_REGPARM_MAX;
1779 cum->warn_avx512f = true;
1780 cum->warn_avx = true;
1781 cum->warn_sse = true;
1782 cum->warn_mmx = true;
1783
1784 /* Because type might mismatch in between caller and callee, we need to
1785 use actual type of function for local calls.
1786 FIXME: cgraph_analyze can be told to actually record if function uses
1787 va_start so for local functions maybe_vaarg can be made aggressive
1788 helping K&R code.
1789 FIXME: once typesytem is fixed, we won't need this code anymore. */
1790 if (local_info_node && local_info_node->local
1791 && local_info_node->can_change_signature)
1792 fntype = TREE_TYPE (target->decl);
1793 cum->stdarg = stdarg_p (fntype);
1794 cum->maybe_vaarg = (fntype
1795 ? (!prototype_p (fntype) || stdarg_p (fntype))
1796 : !libname);
1797
1798 cum->decl = fndecl;
1799
1800 cum->warn_empty = !warn_abi || cum->stdarg;
1801 if (!cum->warn_empty && fntype)
1802 {
1803 function_args_iterator iter;
1804 tree argtype;
1805 bool seen_empty_type = false;
1806 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1807 {
1808 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1809 break;
1810 if (TYPE_EMPTY_P (argtype))
1811 seen_empty_type = true;
1812 else if (seen_empty_type)
1813 {
1814 cum->warn_empty = true;
1815 break;
1816 }
1817 }
1818 }
1819
1820 if (!TARGET_64BIT)
1821 {
1822 /* If there are variable arguments, then we won't pass anything
1823 in registers in 32-bit mode. */
1824 if (stdarg_p (fntype))
1825 {
1826 cum->nregs = 0;
1827 /* Since in 32-bit, variable arguments are always passed on
1828 stack, there is scratch register available for indirect
1829 sibcall. */
1830 cfun->machine->arg_reg_available = true;
1831 cum->sse_nregs = 0;
1832 cum->mmx_nregs = 0;
1833 cum->warn_avx512f = false;
1834 cum->warn_avx = false;
1835 cum->warn_sse = false;
1836 cum->warn_mmx = false;
1837 return;
1838 }
1839
1840 /* Use ecx and edx registers if function has fastcall attribute,
1841 else look for regparm information. */
1842 if (fntype)
1843 {
1844 unsigned int ccvt = ix86_get_callcvt (fntype);
1845 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1846 {
1847 cum->nregs = 1;
1848 cum->fastcall = 1; /* Same first register as in fastcall. */
1849 }
1850 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1851 {
1852 cum->nregs = 2;
1853 cum->fastcall = 1;
1854 }
1855 else
1856 cum->nregs = ix86_function_regparm (fntype, fndecl);
1857 }
1858
1859 /* Set up the number of SSE registers used for passing SFmode
1860 and DFmode arguments. Warn for mismatching ABI. */
1861 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1862 }
1863
1864 cfun->machine->arg_reg_available = (cum->nregs > 0);
1865 }
1866
1867 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1868 But in the case of vector types, it is some vector mode.
1869
1870 When we have only some of our vector isa extensions enabled, then there
1871 are some modes for which vector_mode_supported_p is false. For these
1872 modes, the generic vector support in gcc will choose some non-vector mode
1873 in order to implement the type. By computing the natural mode, we'll
1874 select the proper ABI location for the operand and not depend on whatever
1875 the middle-end decides to do with these vector types.
1876
1877 The midde-end can't deal with the vector types > 16 bytes. In this
1878 case, we return the original mode and warn ABI change if CUM isn't
1879 NULL.
1880
1881 If INT_RETURN is true, warn ABI change if the vector mode isn't
1882 available for function return value. */
1883
1884 static machine_mode
1885 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1886 bool in_return)
1887 {
1888 machine_mode mode = TYPE_MODE (type);
1889
1890 if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
1891 {
1892 HOST_WIDE_INT size = int_size_in_bytes (type);
1893 if ((size == 8 || size == 16 || size == 32 || size == 64)
1894 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1895 && TYPE_VECTOR_SUBPARTS (type) > 1)
1896 {
1897 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1898
1899 /* There are no XFmode vector modes ... */
1900 if (innermode == XFmode)
1901 return mode;
1902
1903 /* ... and no decimal float vector modes. */
1904 if (DECIMAL_FLOAT_MODE_P (innermode))
1905 return mode;
1906
1907 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
1908 mode = MIN_MODE_VECTOR_FLOAT;
1909 else
1910 mode = MIN_MODE_VECTOR_INT;
1911
1912 /* Get the mode which has this inner mode and number of units. */
1913 FOR_EACH_MODE_FROM (mode, mode)
1914 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1915 && GET_MODE_INNER (mode) == innermode)
1916 {
1917 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1918 {
1919 static bool warnedavx512f;
1920 static bool warnedavx512f_ret;
1921
1922 if (cum && cum->warn_avx512f && !warnedavx512f)
1923 {
1924 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1925 "without AVX512F enabled changes the ABI"))
1926 warnedavx512f = true;
1927 }
1928 else if (in_return && !warnedavx512f_ret)
1929 {
1930 if (warning (OPT_Wpsabi, "AVX512F vector return "
1931 "without AVX512F enabled changes the ABI"))
1932 warnedavx512f_ret = true;
1933 }
1934
1935 return TYPE_MODE (type);
1936 }
1937 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1938 {
1939 static bool warnedavx;
1940 static bool warnedavx_ret;
1941
1942 if (cum && cum->warn_avx && !warnedavx)
1943 {
1944 if (warning (OPT_Wpsabi, "AVX vector argument "
1945 "without AVX enabled changes the ABI"))
1946 warnedavx = true;
1947 }
1948 else if (in_return && !warnedavx_ret)
1949 {
1950 if (warning (OPT_Wpsabi, "AVX vector return "
1951 "without AVX enabled changes the ABI"))
1952 warnedavx_ret = true;
1953 }
1954
1955 return TYPE_MODE (type);
1956 }
1957 else if (((size == 8 && TARGET_64BIT) || size == 16)
1958 && !TARGET_SSE
1959 && !TARGET_IAMCU)
1960 {
1961 static bool warnedsse;
1962 static bool warnedsse_ret;
1963
1964 if (cum && cum->warn_sse && !warnedsse)
1965 {
1966 if (warning (OPT_Wpsabi, "SSE vector argument "
1967 "without SSE enabled changes the ABI"))
1968 warnedsse = true;
1969 }
1970 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1971 {
1972 if (warning (OPT_Wpsabi, "SSE vector return "
1973 "without SSE enabled changes the ABI"))
1974 warnedsse_ret = true;
1975 }
1976 }
1977 else if ((size == 8 && !TARGET_64BIT)
1978 && (!cfun
1979 || cfun->machine->func_type == TYPE_NORMAL)
1980 && !TARGET_MMX
1981 && !TARGET_IAMCU)
1982 {
1983 static bool warnedmmx;
1984 static bool warnedmmx_ret;
1985
1986 if (cum && cum->warn_mmx && !warnedmmx)
1987 {
1988 if (warning (OPT_Wpsabi, "MMX vector argument "
1989 "without MMX enabled changes the ABI"))
1990 warnedmmx = true;
1991 }
1992 else if (in_return && !warnedmmx_ret)
1993 {
1994 if (warning (OPT_Wpsabi, "MMX vector return "
1995 "without MMX enabled changes the ABI"))
1996 warnedmmx_ret = true;
1997 }
1998 }
1999 return mode;
2000 }
2001
2002 gcc_unreachable ();
2003 }
2004 }
2005
2006 return mode;
2007 }
2008
2009 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
2010 this may not agree with the mode that the type system has chosen for the
2011 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
2012 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
2013
2014 static rtx
2015 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
2016 unsigned int regno)
2017 {
2018 rtx tmp;
2019
2020 if (orig_mode != BLKmode)
2021 tmp = gen_rtx_REG (orig_mode, regno);
2022 else
2023 {
2024 tmp = gen_rtx_REG (mode, regno);
2025 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2026 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2027 }
2028
2029 return tmp;
2030 }
2031
2032 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
2033 of this code is to classify each 8bytes of incoming argument by the register
2034 class and assign registers accordingly. */
2035
2036 /* Return the union class of CLASS1 and CLASS2.
2037 See the x86-64 PS ABI for details. */
2038
2039 static enum x86_64_reg_class
2040 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2041 {
2042 /* Rule #1: If both classes are equal, this is the resulting class. */
2043 if (class1 == class2)
2044 return class1;
2045
2046 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2047 the other class. */
2048 if (class1 == X86_64_NO_CLASS)
2049 return class2;
2050 if (class2 == X86_64_NO_CLASS)
2051 return class1;
2052
2053 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2054 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2055 return X86_64_MEMORY_CLASS;
2056
2057 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2058 if ((class1 == X86_64_INTEGERSI_CLASS
2059 && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
2060 || (class2 == X86_64_INTEGERSI_CLASS
2061 && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
2062 return X86_64_INTEGERSI_CLASS;
2063 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2064 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2065 return X86_64_INTEGER_CLASS;
2066
2067 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2068 MEMORY is used. */
2069 if (class1 == X86_64_X87_CLASS
2070 || class1 == X86_64_X87UP_CLASS
2071 || class1 == X86_64_COMPLEX_X87_CLASS
2072 || class2 == X86_64_X87_CLASS
2073 || class2 == X86_64_X87UP_CLASS
2074 || class2 == X86_64_COMPLEX_X87_CLASS)
2075 return X86_64_MEMORY_CLASS;
2076
2077 /* Rule #6: Otherwise class SSE is used. */
2078 return X86_64_SSE_CLASS;
2079 }
2080
2081 /* Classify the argument of type TYPE and mode MODE.
2082 CLASSES will be filled by the register class used to pass each word
2083 of the operand. The number of words is returned. In case the parameter
2084 should be passed in memory, 0 is returned. As a special case for zero
2085 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2086
2087 BIT_OFFSET is used internally for handling records and specifies offset
2088 of the offset in bits modulo 512 to avoid overflow cases.
2089
2090 See the x86-64 PS ABI for details.
2091 */
2092
2093 static int
2094 classify_argument (machine_mode mode, const_tree type,
2095 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
2096 int &zero_width_bitfields)
2097 {
2098 HOST_WIDE_INT bytes
2099 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2100 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2101
2102 /* Variable sized entities are always passed/returned in memory. */
2103 if (bytes < 0)
2104 return 0;
2105
2106 if (mode != VOIDmode)
2107 {
2108 /* The value of "named" doesn't matter. */
2109 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2110 if (targetm.calls.must_pass_in_stack (arg))
2111 return 0;
2112 }
2113
2114 if (type && AGGREGATE_TYPE_P (type))
2115 {
2116 int i;
2117 tree field;
2118 enum x86_64_reg_class subclasses[MAX_CLASSES];
2119
2120 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2121 if (bytes > 64)
2122 return 0;
2123
2124 for (i = 0; i < words; i++)
2125 classes[i] = X86_64_NO_CLASS;
2126
2127 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2128 signalize memory class, so handle it as special case. */
2129 if (!words)
2130 {
2131 classes[0] = X86_64_NO_CLASS;
2132 return 1;
2133 }
2134
2135 /* Classify each field of record and merge classes. */
2136 switch (TREE_CODE (type))
2137 {
2138 case RECORD_TYPE:
2139 /* And now merge the fields of structure. */
2140 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2141 {
2142 if (TREE_CODE (field) == FIELD_DECL)
2143 {
2144 int num;
2145
2146 if (TREE_TYPE (field) == error_mark_node)
2147 continue;
2148
2149 /* Bitfields are always classified as integer. Handle them
2150 early, since later code would consider them to be
2151 misaligned integers. */
2152 if (DECL_BIT_FIELD (field))
2153 {
2154 if (integer_zerop (DECL_SIZE (field)))
2155 {
2156 if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
2157 continue;
2158 if (zero_width_bitfields != 2)
2159 {
2160 zero_width_bitfields = 1;
2161 continue;
2162 }
2163 }
2164 for (i = (int_bit_position (field)
2165 + (bit_offset % 64)) / 8 / 8;
2166 i < ((int_bit_position (field) + (bit_offset % 64))
2167 + tree_to_shwi (DECL_SIZE (field))
2168 + 63) / 8 / 8; i++)
2169 classes[i]
2170 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2171 }
2172 else
2173 {
2174 int pos;
2175
2176 type = TREE_TYPE (field);
2177
2178 /* Flexible array member is ignored. */
2179 if (TYPE_MODE (type) == BLKmode
2180 && TREE_CODE (type) == ARRAY_TYPE
2181 && TYPE_SIZE (type) == NULL_TREE
2182 && TYPE_DOMAIN (type) != NULL_TREE
2183 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2184 == NULL_TREE))
2185 {
2186 static bool warned;
2187
2188 if (!warned && warn_psabi)
2189 {
2190 warned = true;
2191 inform (input_location,
2192 "the ABI of passing struct with"
2193 " a flexible array member has"
2194 " changed in GCC 4.4");
2195 }
2196 continue;
2197 }
2198 num = classify_argument (TYPE_MODE (type), type,
2199 subclasses,
2200 (int_bit_position (field)
2201 + bit_offset) % 512,
2202 zero_width_bitfields);
2203 if (!num)
2204 return 0;
2205 pos = (int_bit_position (field)
2206 + (bit_offset % 64)) / 8 / 8;
2207 for (i = 0; i < num && (i + pos) < words; i++)
2208 classes[i + pos]
2209 = merge_classes (subclasses[i], classes[i + pos]);
2210 }
2211 }
2212 }
2213 break;
2214
2215 case ARRAY_TYPE:
2216 /* Arrays are handled as small records. */
2217 {
2218 int num;
2219 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2220 TREE_TYPE (type), subclasses, bit_offset,
2221 zero_width_bitfields);
2222 if (!num)
2223 return 0;
2224
2225 /* The partial classes are now full classes. */
2226 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2227 subclasses[0] = X86_64_SSE_CLASS;
2228 if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
2229 subclasses[0] = X86_64_SSE_CLASS;
2230 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2231 && !((bit_offset % 64) == 0 && bytes == 4))
2232 subclasses[0] = X86_64_INTEGER_CLASS;
2233
2234 for (i = 0; i < words; i++)
2235 classes[i] = subclasses[i % num];
2236
2237 break;
2238 }
2239 case UNION_TYPE:
2240 case QUAL_UNION_TYPE:
2241 /* Unions are similar to RECORD_TYPE but offset is always 0.
2242 */
2243 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2244 {
2245 if (TREE_CODE (field) == FIELD_DECL)
2246 {
2247 int num;
2248
2249 if (TREE_TYPE (field) == error_mark_node)
2250 continue;
2251
2252 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2253 TREE_TYPE (field), subclasses,
2254 bit_offset, zero_width_bitfields);
2255 if (!num)
2256 return 0;
2257 for (i = 0; i < num && i < words; i++)
2258 classes[i] = merge_classes (subclasses[i], classes[i]);
2259 }
2260 }
2261 break;
2262
2263 default:
2264 gcc_unreachable ();
2265 }
2266
2267 if (words > 2)
2268 {
2269 /* When size > 16 bytes, if the first one isn't
2270 X86_64_SSE_CLASS or any other ones aren't
2271 X86_64_SSEUP_CLASS, everything should be passed in
2272 memory. */
2273 if (classes[0] != X86_64_SSE_CLASS)
2274 return 0;
2275
2276 for (i = 1; i < words; i++)
2277 if (classes[i] != X86_64_SSEUP_CLASS)
2278 return 0;
2279 }
2280
2281 /* Final merger cleanup. */
2282 for (i = 0; i < words; i++)
2283 {
2284 /* If one class is MEMORY, everything should be passed in
2285 memory. */
2286 if (classes[i] == X86_64_MEMORY_CLASS)
2287 return 0;
2288
2289 /* The X86_64_SSEUP_CLASS should be always preceded by
2290 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2291 if (classes[i] == X86_64_SSEUP_CLASS
2292 && classes[i - 1] != X86_64_SSE_CLASS
2293 && classes[i - 1] != X86_64_SSEUP_CLASS)
2294 {
2295 /* The first one should never be X86_64_SSEUP_CLASS. */
2296 gcc_assert (i != 0);
2297 classes[i] = X86_64_SSE_CLASS;
2298 }
2299
2300 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2301 everything should be passed in memory. */
2302 if (classes[i] == X86_64_X87UP_CLASS
2303 && (classes[i - 1] != X86_64_X87_CLASS))
2304 {
2305 static bool warned;
2306
2307 /* The first one should never be X86_64_X87UP_CLASS. */
2308 gcc_assert (i != 0);
2309 if (!warned && warn_psabi)
2310 {
2311 warned = true;
2312 inform (input_location,
2313 "the ABI of passing union with %<long double%>"
2314 " has changed in GCC 4.4");
2315 }
2316 return 0;
2317 }
2318 }
2319 return words;
2320 }
2321
2322 /* Compute alignment needed. We align all types to natural boundaries with
2323 exception of XFmode that is aligned to 64bits. */
2324 if (mode != VOIDmode && mode != BLKmode)
2325 {
2326 int mode_alignment = GET_MODE_BITSIZE (mode);
2327
2328 if (mode == XFmode)
2329 mode_alignment = 128;
2330 else if (mode == XCmode)
2331 mode_alignment = 256;
2332 if (COMPLEX_MODE_P (mode))
2333 mode_alignment /= 2;
2334 /* Misaligned fields are always returned in memory. */
2335 if (bit_offset % mode_alignment)
2336 return 0;
2337 }
2338
2339 /* for V1xx modes, just use the base mode */
2340 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2341 && GET_MODE_UNIT_SIZE (mode) == bytes)
2342 mode = GET_MODE_INNER (mode);
2343
2344 /* Classification of atomic types. */
2345 switch (mode)
2346 {
2347 case E_SDmode:
2348 case E_DDmode:
2349 classes[0] = X86_64_SSE_CLASS;
2350 return 1;
2351 case E_TDmode:
2352 classes[0] = X86_64_SSE_CLASS;
2353 classes[1] = X86_64_SSEUP_CLASS;
2354 return 2;
2355 case E_DImode:
2356 case E_SImode:
2357 case E_HImode:
2358 case E_QImode:
2359 case E_CSImode:
2360 case E_CHImode:
2361 case E_CQImode:
2362 {
2363 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2364
2365 /* Analyze last 128 bits only. */
2366 size = (size - 1) & 0x7f;
2367
2368 if (size < 32)
2369 {
2370 classes[0] = X86_64_INTEGERSI_CLASS;
2371 return 1;
2372 }
2373 else if (size < 64)
2374 {
2375 classes[0] = X86_64_INTEGER_CLASS;
2376 return 1;
2377 }
2378 else if (size < 64+32)
2379 {
2380 classes[0] = X86_64_INTEGER_CLASS;
2381 classes[1] = X86_64_INTEGERSI_CLASS;
2382 return 2;
2383 }
2384 else if (size < 64+64)
2385 {
2386 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2387 return 2;
2388 }
2389 else
2390 gcc_unreachable ();
2391 }
2392 case E_CDImode:
2393 case E_TImode:
2394 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2395 return 2;
2396 case E_COImode:
2397 case E_OImode:
2398 /* OImode shouldn't be used directly. */
2399 gcc_unreachable ();
2400 case E_CTImode:
2401 return 0;
2402 case E_HFmode:
2403 case E_BFmode:
2404 if (!(bit_offset % 64))
2405 classes[0] = X86_64_SSEHF_CLASS;
2406 else
2407 classes[0] = X86_64_SSE_CLASS;
2408 return 1;
2409 case E_SFmode:
2410 if (!(bit_offset % 64))
2411 classes[0] = X86_64_SSESF_CLASS;
2412 else
2413 classes[0] = X86_64_SSE_CLASS;
2414 return 1;
2415 case E_DFmode:
2416 classes[0] = X86_64_SSEDF_CLASS;
2417 return 1;
2418 case E_XFmode:
2419 classes[0] = X86_64_X87_CLASS;
2420 classes[1] = X86_64_X87UP_CLASS;
2421 return 2;
2422 case E_TFmode:
2423 classes[0] = X86_64_SSE_CLASS;
2424 classes[1] = X86_64_SSEUP_CLASS;
2425 return 2;
2426 case E_HCmode:
2427 case E_BCmode:
2428 classes[0] = X86_64_SSE_CLASS;
2429 if (!(bit_offset % 64))
2430 return 1;
2431 else
2432 {
2433 classes[1] = X86_64_SSEHF_CLASS;
2434 return 2;
2435 }
2436 case E_SCmode:
2437 classes[0] = X86_64_SSE_CLASS;
2438 if (!(bit_offset % 64))
2439 return 1;
2440 else
2441 {
2442 static bool warned;
2443
2444 if (!warned && warn_psabi)
2445 {
2446 warned = true;
2447 inform (input_location,
2448 "the ABI of passing structure with %<complex float%>"
2449 " member has changed in GCC 4.4");
2450 }
2451 classes[1] = X86_64_SSESF_CLASS;
2452 return 2;
2453 }
2454 case E_DCmode:
2455 classes[0] = X86_64_SSEDF_CLASS;
2456 classes[1] = X86_64_SSEDF_CLASS;
2457 return 2;
2458 case E_XCmode:
2459 classes[0] = X86_64_COMPLEX_X87_CLASS;
2460 return 1;
2461 case E_TCmode:
2462 /* This modes is larger than 16 bytes. */
2463 return 0;
2464 case E_V8SFmode:
2465 case E_V8SImode:
2466 case E_V32QImode:
2467 case E_V16HFmode:
2468 case E_V16BFmode:
2469 case E_V16HImode:
2470 case E_V4DFmode:
2471 case E_V4DImode:
2472 classes[0] = X86_64_SSE_CLASS;
2473 classes[1] = X86_64_SSEUP_CLASS;
2474 classes[2] = X86_64_SSEUP_CLASS;
2475 classes[3] = X86_64_SSEUP_CLASS;
2476 return 4;
2477 case E_V8DFmode:
2478 case E_V16SFmode:
2479 case E_V32HFmode:
2480 case E_V32BFmode:
2481 case E_V8DImode:
2482 case E_V16SImode:
2483 case E_V32HImode:
2484 case E_V64QImode:
2485 classes[0] = X86_64_SSE_CLASS;
2486 classes[1] = X86_64_SSEUP_CLASS;
2487 classes[2] = X86_64_SSEUP_CLASS;
2488 classes[3] = X86_64_SSEUP_CLASS;
2489 classes[4] = X86_64_SSEUP_CLASS;
2490 classes[5] = X86_64_SSEUP_CLASS;
2491 classes[6] = X86_64_SSEUP_CLASS;
2492 classes[7] = X86_64_SSEUP_CLASS;
2493 return 8;
2494 case E_V4SFmode:
2495 case E_V4SImode:
2496 case E_V16QImode:
2497 case E_V8HImode:
2498 case E_V8HFmode:
2499 case E_V8BFmode:
2500 case E_V2DFmode:
2501 case E_V2DImode:
2502 classes[0] = X86_64_SSE_CLASS;
2503 classes[1] = X86_64_SSEUP_CLASS;
2504 return 2;
2505 case E_V1TImode:
2506 case E_V1DImode:
2507 case E_V2SFmode:
2508 case E_V2SImode:
2509 case E_V4HImode:
2510 case E_V4HFmode:
2511 case E_V4BFmode:
2512 case E_V2HFmode:
2513 case E_V2BFmode:
2514 case E_V8QImode:
2515 classes[0] = X86_64_SSE_CLASS;
2516 return 1;
2517 case E_BLKmode:
2518 case E_VOIDmode:
2519 return 0;
2520 default:
2521 gcc_assert (VECTOR_MODE_P (mode));
2522
2523 if (bytes > 16)
2524 return 0;
2525
2526 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2527
2528 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2529 classes[0] = X86_64_INTEGERSI_CLASS;
2530 else
2531 classes[0] = X86_64_INTEGER_CLASS;
2532 classes[1] = X86_64_INTEGER_CLASS;
2533 return 1 + (bytes > 8);
2534 }
2535 }
2536
2537 /* Wrapper around classify_argument with the extra zero_width_bitfields
2538 argument, to diagnose GCC 12.1 ABI differences for C. */
2539
2540 static int
2541 classify_argument (machine_mode mode, const_tree type,
2542 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2543 {
2544 int zero_width_bitfields = 0;
2545 static bool warned = false;
2546 int n = classify_argument (mode, type, classes, bit_offset,
2547 zero_width_bitfields);
2548 if (!zero_width_bitfields || warned || !warn_psabi)
2549 return n;
2550 enum x86_64_reg_class alt_classes[MAX_CLASSES];
2551 zero_width_bitfields = 2;
2552 if (classify_argument (mode, type, alt_classes, bit_offset,
2553 zero_width_bitfields) != n)
2554 zero_width_bitfields = 3;
2555 else
2556 for (int i = 0; i < n; i++)
2557 if (classes[i] != alt_classes[i])
2558 {
2559 zero_width_bitfields = 3;
2560 break;
2561 }
2562 if (zero_width_bitfields == 3)
2563 {
2564 warned = true;
2565 const char *url
2566 = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
2567
2568 inform (input_location,
2569 "the ABI of passing C structures with zero-width bit-fields"
2570 " has changed in GCC %{12.1%}", url);
2571 }
2572 return n;
2573 }
2574
2575 /* Examine the argument and return set number of register required in each
2576 class. Return true iff parameter should be passed in memory. */
2577
2578 static bool
2579 examine_argument (machine_mode mode, const_tree type, int in_return,
2580 int *int_nregs, int *sse_nregs)
2581 {
2582 enum x86_64_reg_class regclass[MAX_CLASSES];
2583 int n = classify_argument (mode, type, regclass, 0);
2584
2585 *int_nregs = 0;
2586 *sse_nregs = 0;
2587
2588 if (!n)
2589 return true;
2590 for (n--; n >= 0; n--)
2591 switch (regclass[n])
2592 {
2593 case X86_64_INTEGER_CLASS:
2594 case X86_64_INTEGERSI_CLASS:
2595 (*int_nregs)++;
2596 break;
2597 case X86_64_SSE_CLASS:
2598 case X86_64_SSEHF_CLASS:
2599 case X86_64_SSESF_CLASS:
2600 case X86_64_SSEDF_CLASS:
2601 (*sse_nregs)++;
2602 break;
2603 case X86_64_NO_CLASS:
2604 case X86_64_SSEUP_CLASS:
2605 break;
2606 case X86_64_X87_CLASS:
2607 case X86_64_X87UP_CLASS:
2608 case X86_64_COMPLEX_X87_CLASS:
2609 if (!in_return)
2610 return true;
2611 break;
2612 case X86_64_MEMORY_CLASS:
2613 gcc_unreachable ();
2614 }
2615
2616 return false;
2617 }
2618
2619 /* Construct container for the argument used by GCC interface. See
2620 FUNCTION_ARG for the detailed description. */
2621
2622 static rtx
2623 construct_container (machine_mode mode, machine_mode orig_mode,
2624 const_tree type, int in_return, int nintregs, int nsseregs,
2625 const int *intreg, int sse_regno)
2626 {
2627 /* The following variables hold the static issued_error state. */
2628 static bool issued_sse_arg_error;
2629 static bool issued_sse_ret_error;
2630 static bool issued_x87_ret_error;
2631
2632 machine_mode tmpmode;
2633 int bytes
2634 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2635 enum x86_64_reg_class regclass[MAX_CLASSES];
2636 int n;
2637 int i;
2638 int nexps = 0;
2639 int needed_sseregs, needed_intregs;
2640 rtx exp[MAX_CLASSES];
2641 rtx ret;
2642
2643 n = classify_argument (mode, type, regclass, 0);
2644 if (!n)
2645 return NULL;
2646 if (examine_argument (mode, type, in_return, &needed_intregs,
2647 &needed_sseregs))
2648 return NULL;
2649 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2650 return NULL;
2651
2652 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2653 some less clueful developer tries to use floating-point anyway. */
2654 if (needed_sseregs && !TARGET_SSE)
2655 {
2656 /* Return early if we shouldn't raise an error for invalid
2657 calls. */
2658 if (cfun != NULL && cfun->machine->silent_p)
2659 return NULL;
2660 if (in_return)
2661 {
2662 if (!issued_sse_ret_error)
2663 {
2664 error ("SSE register return with SSE disabled");
2665 issued_sse_ret_error = true;
2666 }
2667 }
2668 else if (!issued_sse_arg_error)
2669 {
2670 error ("SSE register argument with SSE disabled");
2671 issued_sse_arg_error = true;
2672 }
2673 return NULL;
2674 }
2675
2676 /* Likewise, error if the ABI requires us to return values in the
2677 x87 registers and the user specified -mno-80387. */
2678 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2679 for (i = 0; i < n; i++)
2680 if (regclass[i] == X86_64_X87_CLASS
2681 || regclass[i] == X86_64_X87UP_CLASS
2682 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2683 {
2684 /* Return early if we shouldn't raise an error for invalid
2685 calls. */
2686 if (cfun != NULL && cfun->machine->silent_p)
2687 return NULL;
2688 if (!issued_x87_ret_error)
2689 {
2690 error ("x87 register return with x87 disabled");
2691 issued_x87_ret_error = true;
2692 }
2693 return NULL;
2694 }
2695
2696 /* First construct simple cases. Avoid SCmode, since we want to use
2697 single register to pass this type. */
2698 if (n == 1 && mode != SCmode && mode != HCmode)
2699 switch (regclass[0])
2700 {
2701 case X86_64_INTEGER_CLASS:
2702 case X86_64_INTEGERSI_CLASS:
2703 return gen_rtx_REG (mode, intreg[0]);
2704 case X86_64_SSE_CLASS:
2705 case X86_64_SSEHF_CLASS:
2706 case X86_64_SSESF_CLASS:
2707 case X86_64_SSEDF_CLASS:
2708 if (mode != BLKmode)
2709 return gen_reg_or_parallel (mode, orig_mode,
2710 GET_SSE_REGNO (sse_regno));
2711 break;
2712 case X86_64_X87_CLASS:
2713 case X86_64_COMPLEX_X87_CLASS:
2714 return gen_rtx_REG (mode, FIRST_STACK_REG);
2715 case X86_64_NO_CLASS:
2716 /* Zero sized array, struct or class. */
2717 return NULL;
2718 default:
2719 gcc_unreachable ();
2720 }
2721 if (n == 2
2722 && regclass[0] == X86_64_SSE_CLASS
2723 && regclass[1] == X86_64_SSEUP_CLASS
2724 && mode != BLKmode)
2725 return gen_reg_or_parallel (mode, orig_mode,
2726 GET_SSE_REGNO (sse_regno));
2727 if (n == 4
2728 && regclass[0] == X86_64_SSE_CLASS
2729 && regclass[1] == X86_64_SSEUP_CLASS
2730 && regclass[2] == X86_64_SSEUP_CLASS
2731 && regclass[3] == X86_64_SSEUP_CLASS
2732 && mode != BLKmode)
2733 return gen_reg_or_parallel (mode, orig_mode,
2734 GET_SSE_REGNO (sse_regno));
2735 if (n == 8
2736 && regclass[0] == X86_64_SSE_CLASS
2737 && regclass[1] == X86_64_SSEUP_CLASS
2738 && regclass[2] == X86_64_SSEUP_CLASS
2739 && regclass[3] == X86_64_SSEUP_CLASS
2740 && regclass[4] == X86_64_SSEUP_CLASS
2741 && regclass[5] == X86_64_SSEUP_CLASS
2742 && regclass[6] == X86_64_SSEUP_CLASS
2743 && regclass[7] == X86_64_SSEUP_CLASS
2744 && mode != BLKmode)
2745 return gen_reg_or_parallel (mode, orig_mode,
2746 GET_SSE_REGNO (sse_regno));
2747 if (n == 2
2748 && regclass[0] == X86_64_X87_CLASS
2749 && regclass[1] == X86_64_X87UP_CLASS)
2750 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2751
2752 if (n == 2
2753 && regclass[0] == X86_64_INTEGER_CLASS
2754 && regclass[1] == X86_64_INTEGER_CLASS
2755 && (mode == CDImode || mode == TImode || mode == BLKmode)
2756 && intreg[0] + 1 == intreg[1])
2757 {
2758 if (mode == BLKmode)
2759 {
2760 /* Use TImode for BLKmode values in 2 integer registers. */
2761 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2762 gen_rtx_REG (TImode, intreg[0]),
2763 GEN_INT (0));
2764 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2765 XVECEXP (ret, 0, 0) = exp[0];
2766 return ret;
2767 }
2768 else
2769 return gen_rtx_REG (mode, intreg[0]);
2770 }
2771
2772 /* Otherwise figure out the entries of the PARALLEL. */
2773 for (i = 0; i < n; i++)
2774 {
2775 int pos;
2776
2777 switch (regclass[i])
2778 {
2779 case X86_64_NO_CLASS:
2780 break;
2781 case X86_64_INTEGER_CLASS:
2782 case X86_64_INTEGERSI_CLASS:
2783 /* Merge TImodes on aligned occasions here too. */
2784 if (i * 8 + 8 > bytes)
2785 {
2786 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2787 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2788 /* We've requested 24 bytes we
2789 don't have mode for. Use DImode. */
2790 tmpmode = DImode;
2791 }
2792 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2793 tmpmode = SImode;
2794 else
2795 tmpmode = DImode;
2796 exp [nexps++]
2797 = gen_rtx_EXPR_LIST (VOIDmode,
2798 gen_rtx_REG (tmpmode, *intreg),
2799 GEN_INT (i*8));
2800 intreg++;
2801 break;
2802 case X86_64_SSEHF_CLASS:
2803 tmpmode = (mode == BFmode ? BFmode : HFmode);
2804 exp [nexps++]
2805 = gen_rtx_EXPR_LIST (VOIDmode,
2806 gen_rtx_REG (tmpmode,
2807 GET_SSE_REGNO (sse_regno)),
2808 GEN_INT (i*8));
2809 sse_regno++;
2810 break;
2811 case X86_64_SSESF_CLASS:
2812 exp [nexps++]
2813 = gen_rtx_EXPR_LIST (VOIDmode,
2814 gen_rtx_REG (SFmode,
2815 GET_SSE_REGNO (sse_regno)),
2816 GEN_INT (i*8));
2817 sse_regno++;
2818 break;
2819 case X86_64_SSEDF_CLASS:
2820 exp [nexps++]
2821 = gen_rtx_EXPR_LIST (VOIDmode,
2822 gen_rtx_REG (DFmode,
2823 GET_SSE_REGNO (sse_regno)),
2824 GEN_INT (i*8));
2825 sse_regno++;
2826 break;
2827 case X86_64_SSE_CLASS:
2828 pos = i;
2829 switch (n)
2830 {
2831 case 1:
2832 tmpmode = DImode;
2833 break;
2834 case 2:
2835 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2836 {
2837 tmpmode = TImode;
2838 i++;
2839 }
2840 else
2841 tmpmode = DImode;
2842 break;
2843 case 4:
2844 gcc_assert (i == 0
2845 && regclass[1] == X86_64_SSEUP_CLASS
2846 && regclass[2] == X86_64_SSEUP_CLASS
2847 && regclass[3] == X86_64_SSEUP_CLASS);
2848 tmpmode = OImode;
2849 i += 3;
2850 break;
2851 case 8:
2852 gcc_assert (i == 0
2853 && regclass[1] == X86_64_SSEUP_CLASS
2854 && regclass[2] == X86_64_SSEUP_CLASS
2855 && regclass[3] == X86_64_SSEUP_CLASS
2856 && regclass[4] == X86_64_SSEUP_CLASS
2857 && regclass[5] == X86_64_SSEUP_CLASS
2858 && regclass[6] == X86_64_SSEUP_CLASS
2859 && regclass[7] == X86_64_SSEUP_CLASS);
2860 tmpmode = XImode;
2861 i += 7;
2862 break;
2863 default:
2864 gcc_unreachable ();
2865 }
2866 exp [nexps++]
2867 = gen_rtx_EXPR_LIST (VOIDmode,
2868 gen_rtx_REG (tmpmode,
2869 GET_SSE_REGNO (sse_regno)),
2870 GEN_INT (pos*8));
2871 sse_regno++;
2872 break;
2873 default:
2874 gcc_unreachable ();
2875 }
2876 }
2877
2878 /* Empty aligned struct, union or class. */
2879 if (nexps == 0)
2880 return NULL;
2881
2882 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2883 for (i = 0; i < nexps; i++)
2884 XVECEXP (ret, 0, i) = exp [i];
2885 return ret;
2886 }
2887
2888 /* Update the data in CUM to advance over an argument of mode MODE
2889 and data type TYPE. (TYPE is null for libcalls where that information
2890 may not be available.)
2891
2892 Return a number of integer regsiters advanced over. */
2893
2894 static int
2895 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2896 const_tree type, HOST_WIDE_INT bytes,
2897 HOST_WIDE_INT words)
2898 {
2899 int res = 0;
2900 bool error_p = false;
2901
2902 if (TARGET_IAMCU)
2903 {
2904 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2905 bytes in registers. */
2906 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2907 goto pass_in_reg;
2908 return res;
2909 }
2910
2911 switch (mode)
2912 {
2913 default:
2914 break;
2915
2916 case E_BLKmode:
2917 if (bytes < 0)
2918 break;
2919 /* FALLTHRU */
2920
2921 case E_DImode:
2922 case E_SImode:
2923 case E_HImode:
2924 case E_QImode:
2925 pass_in_reg:
2926 cum->words += words;
2927 cum->nregs -= words;
2928 cum->regno += words;
2929 if (cum->nregs >= 0)
2930 res = words;
2931 if (cum->nregs <= 0)
2932 {
2933 cum->nregs = 0;
2934 cfun->machine->arg_reg_available = false;
2935 cum->regno = 0;
2936 }
2937 break;
2938
2939 case E_OImode:
2940 /* OImode shouldn't be used directly. */
2941 gcc_unreachable ();
2942
2943 case E_DFmode:
2944 if (cum->float_in_sse == -1)
2945 error_p = true;
2946 if (cum->float_in_sse < 2)
2947 break;
2948 /* FALLTHRU */
2949 case E_SFmode:
2950 if (cum->float_in_sse == -1)
2951 error_p = true;
2952 if (cum->float_in_sse < 1)
2953 break;
2954 /* FALLTHRU */
2955
2956 case E_V16HFmode:
2957 case E_V16BFmode:
2958 case E_V8SFmode:
2959 case E_V8SImode:
2960 case E_V64QImode:
2961 case E_V32HImode:
2962 case E_V16SImode:
2963 case E_V8DImode:
2964 case E_V32HFmode:
2965 case E_V32BFmode:
2966 case E_V16SFmode:
2967 case E_V8DFmode:
2968 case E_V32QImode:
2969 case E_V16HImode:
2970 case E_V4DFmode:
2971 case E_V4DImode:
2972 case E_TImode:
2973 case E_V16QImode:
2974 case E_V8HImode:
2975 case E_V4SImode:
2976 case E_V2DImode:
2977 case E_V8HFmode:
2978 case E_V8BFmode:
2979 case E_V4SFmode:
2980 case E_V2DFmode:
2981 if (!type || !AGGREGATE_TYPE_P (type))
2982 {
2983 cum->sse_words += words;
2984 cum->sse_nregs -= 1;
2985 cum->sse_regno += 1;
2986 if (cum->sse_nregs <= 0)
2987 {
2988 cum->sse_nregs = 0;
2989 cum->sse_regno = 0;
2990 }
2991 }
2992 break;
2993
2994 case E_V8QImode:
2995 case E_V4HImode:
2996 case E_V4HFmode:
2997 case E_V4BFmode:
2998 case E_V2SImode:
2999 case E_V2SFmode:
3000 case E_V1TImode:
3001 case E_V1DImode:
3002 if (!type || !AGGREGATE_TYPE_P (type))
3003 {
3004 cum->mmx_words += words;
3005 cum->mmx_nregs -= 1;
3006 cum->mmx_regno += 1;
3007 if (cum->mmx_nregs <= 0)
3008 {
3009 cum->mmx_nregs = 0;
3010 cum->mmx_regno = 0;
3011 }
3012 }
3013 break;
3014 }
3015 if (error_p)
3016 {
3017 cum->float_in_sse = 0;
3018 error ("calling %qD with SSE calling convention without "
3019 "SSE/SSE2 enabled", cum->decl);
3020 sorry ("this is a GCC bug that can be worked around by adding "
3021 "attribute used to function called");
3022 }
3023
3024 return res;
3025 }
3026
3027 static int
3028 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
3029 const_tree type, HOST_WIDE_INT words, bool named)
3030 {
3031 int int_nregs, sse_nregs;
3032
3033 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
3034 if (!named && (VALID_AVX512F_REG_MODE (mode)
3035 || VALID_AVX256_REG_MODE (mode)))
3036 return 0;
3037
3038 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
3039 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3040 {
3041 cum->nregs -= int_nregs;
3042 cum->sse_nregs -= sse_nregs;
3043 cum->regno += int_nregs;
3044 cum->sse_regno += sse_nregs;
3045 return int_nregs;
3046 }
3047 else
3048 {
3049 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
3050 cum->words = ROUND_UP (cum->words, align);
3051 cum->words += words;
3052 return 0;
3053 }
3054 }
3055
3056 static int
3057 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3058 HOST_WIDE_INT words)
3059 {
3060 /* Otherwise, this should be passed indirect. */
3061 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3062
3063 cum->words += words;
3064 if (cum->nregs > 0)
3065 {
3066 cum->nregs -= 1;
3067 cum->regno += 1;
3068 return 1;
3069 }
3070 return 0;
3071 }
3072
3073 /* Update the data in CUM to advance over argument ARG. */
3074
3075 static void
3076 ix86_function_arg_advance (cumulative_args_t cum_v,
3077 const function_arg_info &arg)
3078 {
3079 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3080 machine_mode mode = arg.mode;
3081 HOST_WIDE_INT bytes, words;
3082 int nregs;
3083
3084 /* The argument of interrupt handler is a special case and is
3085 handled in ix86_function_arg. */
3086 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3087 return;
3088
3089 bytes = arg.promoted_size_in_bytes ();
3090 words = CEIL (bytes, UNITS_PER_WORD);
3091
3092 if (arg.type)
3093 mode = type_natural_mode (arg.type, NULL, false);
3094
3095 if (TARGET_64BIT)
3096 {
3097 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3098
3099 if (call_abi == MS_ABI)
3100 nregs = function_arg_advance_ms_64 (cum, bytes, words);
3101 else
3102 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
3103 arg.named);
3104 }
3105 else
3106 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
3107
3108 if (!nregs)
3109 {
3110 /* Track if there are outgoing arguments on stack. */
3111 if (cum->caller)
3112 cfun->machine->outgoing_args_on_stack = true;
3113 }
3114 }
3115
3116 /* Define where to put the arguments to a function.
3117 Value is zero to push the argument on the stack,
3118 or a hard register in which to store the argument.
3119
3120 MODE is the argument's machine mode.
3121 TYPE is the data type of the argument (as a tree).
3122 This is null for libcalls where that information may
3123 not be available.
3124 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3125 the preceding args and about the function being called.
3126 NAMED is nonzero if this argument is a named parameter
3127 (otherwise it is an extra parameter matching an ellipsis). */
3128
3129 static rtx
3130 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
3131 machine_mode orig_mode, const_tree type,
3132 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3133 {
3134 bool error_p = false;
3135
3136 /* Avoid the AL settings for the Unix64 ABI. */
3137 if (mode == VOIDmode)
3138 return constm1_rtx;
3139
3140 if (TARGET_IAMCU)
3141 {
3142 /* Intel MCU psABI passes scalars and aggregates no larger than 8
3143 bytes in registers. */
3144 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3145 goto pass_in_reg;
3146 return NULL_RTX;
3147 }
3148
3149 switch (mode)
3150 {
3151 default:
3152 break;
3153
3154 case E_BLKmode:
3155 if (bytes < 0)
3156 break;
3157 /* FALLTHRU */
3158 case E_DImode:
3159 case E_SImode:
3160 case E_HImode:
3161 case E_QImode:
3162 pass_in_reg:
3163 if (words <= cum->nregs)
3164 {
3165 int regno = cum->regno;
3166
3167 /* Fastcall allocates the first two DWORD (SImode) or
3168 smaller arguments to ECX and EDX if it isn't an
3169 aggregate type . */
3170 if (cum->fastcall)
3171 {
3172 if (mode == BLKmode
3173 || mode == DImode
3174 || (type && AGGREGATE_TYPE_P (type)))
3175 break;
3176
3177 /* ECX not EAX is the first allocated register. */
3178 if (regno == AX_REG)
3179 regno = CX_REG;
3180 }
3181 return gen_rtx_REG (mode, regno);
3182 }
3183 break;
3184
3185 case E_DFmode:
3186 if (cum->float_in_sse == -1)
3187 error_p = true;
3188 if (cum->float_in_sse < 2)
3189 break;
3190 /* FALLTHRU */
3191 case E_SFmode:
3192 if (cum->float_in_sse == -1)
3193 error_p = true;
3194 if (cum->float_in_sse < 1)
3195 break;
3196 /* FALLTHRU */
3197 case E_TImode:
3198 /* In 32bit, we pass TImode in xmm registers. */
3199 case E_V16QImode:
3200 case E_V8HImode:
3201 case E_V4SImode:
3202 case E_V2DImode:
3203 case E_V8HFmode:
3204 case E_V8BFmode:
3205 case E_V4SFmode:
3206 case E_V2DFmode:
3207 if (!type || !AGGREGATE_TYPE_P (type))
3208 {
3209 if (cum->sse_nregs)
3210 return gen_reg_or_parallel (mode, orig_mode,
3211 cum->sse_regno + FIRST_SSE_REG);
3212 }
3213 break;
3214
3215 case E_OImode:
3216 case E_XImode:
3217 /* OImode and XImode shouldn't be used directly. */
3218 gcc_unreachable ();
3219
3220 case E_V64QImode:
3221 case E_V32HImode:
3222 case E_V16SImode:
3223 case E_V8DImode:
3224 case E_V32HFmode:
3225 case E_V32BFmode:
3226 case E_V16SFmode:
3227 case E_V8DFmode:
3228 case E_V16HFmode:
3229 case E_V16BFmode:
3230 case E_V8SFmode:
3231 case E_V8SImode:
3232 case E_V32QImode:
3233 case E_V16HImode:
3234 case E_V4DFmode:
3235 case E_V4DImode:
3236 if (!type || !AGGREGATE_TYPE_P (type))
3237 {
3238 if (cum->sse_nregs)
3239 return gen_reg_or_parallel (mode, orig_mode,
3240 cum->sse_regno + FIRST_SSE_REG);
3241 }
3242 break;
3243
3244 case E_V8QImode:
3245 case E_V4HImode:
3246 case E_V4HFmode:
3247 case E_V4BFmode:
3248 case E_V2SImode:
3249 case E_V2SFmode:
3250 case E_V1TImode:
3251 case E_V1DImode:
3252 if (!type || !AGGREGATE_TYPE_P (type))
3253 {
3254 if (cum->mmx_nregs)
3255 return gen_reg_or_parallel (mode, orig_mode,
3256 cum->mmx_regno + FIRST_MMX_REG);
3257 }
3258 break;
3259 }
3260 if (error_p)
3261 {
3262 cum->float_in_sse = 0;
3263 error ("calling %qD with SSE calling convention without "
3264 "SSE/SSE2 enabled", cum->decl);
3265 sorry ("this is a GCC bug that can be worked around by adding "
3266 "attribute used to function called");
3267 }
3268
3269 return NULL_RTX;
3270 }
3271
3272 static rtx
3273 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3274 machine_mode orig_mode, const_tree type, bool named)
3275 {
3276 /* Handle a hidden AL argument containing number of registers
3277 for varargs x86-64 functions. */
3278 if (mode == VOIDmode)
3279 return GEN_INT (cum->maybe_vaarg
3280 ? (cum->sse_nregs < 0
3281 ? X86_64_SSE_REGPARM_MAX
3282 : cum->sse_regno)
3283 : -1);
3284
3285 switch (mode)
3286 {
3287 default:
3288 break;
3289
3290 case E_V16HFmode:
3291 case E_V16BFmode:
3292 case E_V8SFmode:
3293 case E_V8SImode:
3294 case E_V32QImode:
3295 case E_V16HImode:
3296 case E_V4DFmode:
3297 case E_V4DImode:
3298 case E_V32HFmode:
3299 case E_V32BFmode:
3300 case E_V16SFmode:
3301 case E_V16SImode:
3302 case E_V64QImode:
3303 case E_V32HImode:
3304 case E_V8DFmode:
3305 case E_V8DImode:
3306 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3307 if (!named)
3308 return NULL;
3309 break;
3310 }
3311
3312 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3313 cum->sse_nregs,
3314 &x86_64_int_parameter_registers [cum->regno],
3315 cum->sse_regno);
3316 }
3317
3318 static rtx
3319 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3320 machine_mode orig_mode, bool named, const_tree type,
3321 HOST_WIDE_INT bytes)
3322 {
3323 unsigned int regno;
3324
3325 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3326 We use value of -2 to specify that current function call is MSABI. */
3327 if (mode == VOIDmode)
3328 return GEN_INT (-2);
3329
3330 /* If we've run out of registers, it goes on the stack. */
3331 if (cum->nregs == 0)
3332 return NULL_RTX;
3333
3334 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3335
3336 /* Only floating point modes are passed in anything but integer regs. */
3337 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3338 {
3339 if (named)
3340 {
3341 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3342 regno = cum->regno + FIRST_SSE_REG;
3343 }
3344 else
3345 {
3346 rtx t1, t2;
3347
3348 /* Unnamed floating parameters are passed in both the
3349 SSE and integer registers. */
3350 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3351 t2 = gen_rtx_REG (mode, regno);
3352 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3353 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3354 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3355 }
3356 }
3357 /* Handle aggregated types passed in register. */
3358 if (orig_mode == BLKmode)
3359 {
3360 if (bytes > 0 && bytes <= 8)
3361 mode = (bytes > 4 ? DImode : SImode);
3362 if (mode == BLKmode)
3363 mode = DImode;
3364 }
3365
3366 return gen_reg_or_parallel (mode, orig_mode, regno);
3367 }
3368
3369 /* Return where to put the arguments to a function.
3370 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3371
3372 ARG describes the argument while CUM gives information about the
3373 preceding args and about the function being called. */
3374
3375 static rtx
3376 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3377 {
3378 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3379 machine_mode mode = arg.mode;
3380 HOST_WIDE_INT bytes, words;
3381 rtx reg;
3382
3383 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3384 {
3385 gcc_assert (arg.type != NULL_TREE);
3386 if (POINTER_TYPE_P (arg.type))
3387 {
3388 /* This is the pointer argument. */
3389 gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
3390 /* It is at -WORD(AP) in the current frame in interrupt and
3391 exception handlers. */
3392 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3393 }
3394 else
3395 {
3396 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3397 && TREE_CODE (arg.type) == INTEGER_TYPE
3398 && TYPE_MODE (arg.type) == word_mode);
3399 /* The error code is the word-mode integer argument at
3400 -2 * WORD(AP) in the current frame of the exception
3401 handler. */
3402 reg = gen_rtx_MEM (word_mode,
3403 plus_constant (Pmode,
3404 arg_pointer_rtx,
3405 -2 * UNITS_PER_WORD));
3406 }
3407 return reg;
3408 }
3409
3410 bytes = arg.promoted_size_in_bytes ();
3411 words = CEIL (bytes, UNITS_PER_WORD);
3412
3413 /* To simplify the code below, represent vector types with a vector mode
3414 even if MMX/SSE are not active. */
3415 if (arg.type && VECTOR_TYPE_P (arg.type))
3416 mode = type_natural_mode (arg.type, cum, false);
3417
3418 if (TARGET_64BIT)
3419 {
3420 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3421
3422 if (call_abi == MS_ABI)
3423 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3424 arg.type, bytes);
3425 else
3426 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3427 }
3428 else
3429 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3430
3431 /* Track if there are outgoing arguments on stack. */
3432 if (reg == NULL_RTX && cum->caller)
3433 cfun->machine->outgoing_args_on_stack = true;
3434
3435 return reg;
3436 }
3437
3438 /* A C expression that indicates when an argument must be passed by
3439 reference. If nonzero for an argument, a copy of that argument is
3440 made in memory and a pointer to the argument is passed instead of
3441 the argument itself. The pointer is passed in whatever way is
3442 appropriate for passing a pointer to that type. */
3443
3444 static bool
3445 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3446 {
3447 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3448
3449 if (TARGET_64BIT)
3450 {
3451 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3452
3453 /* See Windows x64 Software Convention. */
3454 if (call_abi == MS_ABI)
3455 {
3456 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3457
3458 if (tree type = arg.type)
3459 {
3460 /* Arrays are passed by reference. */
3461 if (TREE_CODE (type) == ARRAY_TYPE)
3462 return true;
3463
3464 if (RECORD_OR_UNION_TYPE_P (type))
3465 {
3466 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3467 are passed by reference. */
3468 msize = int_size_in_bytes (type);
3469 }
3470 }
3471
3472 /* __m128 is passed by reference. */
3473 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3474 }
3475 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3476 return true;
3477 }
3478
3479 return false;
3480 }
3481
3482 /* Return true when TYPE should be 128bit aligned for 32bit argument
3483 passing ABI. XXX: This function is obsolete and is only used for
3484 checking psABI compatibility with previous versions of GCC. */
3485
3486 static bool
3487 ix86_compat_aligned_value_p (const_tree type)
3488 {
3489 machine_mode mode = TYPE_MODE (type);
3490 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3491 || mode == TDmode
3492 || mode == TFmode
3493 || mode == TCmode)
3494 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3495 return true;
3496 if (TYPE_ALIGN (type) < 128)
3497 return false;
3498
3499 if (AGGREGATE_TYPE_P (type))
3500 {
3501 /* Walk the aggregates recursively. */
3502 switch (TREE_CODE (type))
3503 {
3504 case RECORD_TYPE:
3505 case UNION_TYPE:
3506 case QUAL_UNION_TYPE:
3507 {
3508 tree field;
3509
3510 /* Walk all the structure fields. */
3511 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3512 {
3513 if (TREE_CODE (field) == FIELD_DECL
3514 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3515 return true;
3516 }
3517 break;
3518 }
3519
3520 case ARRAY_TYPE:
3521 /* Just for use if some languages passes arrays by value. */
3522 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3523 return true;
3524 break;
3525
3526 default:
3527 gcc_unreachable ();
3528 }
3529 }
3530 return false;
3531 }
3532
3533 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3534 XXX: This function is obsolete and is only used for checking psABI
3535 compatibility with previous versions of GCC. */
3536
3537 static unsigned int
3538 ix86_compat_function_arg_boundary (machine_mode mode,
3539 const_tree type, unsigned int align)
3540 {
3541 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3542 natural boundaries. */
3543 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3544 {
3545 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3546 make an exception for SSE modes since these require 128bit
3547 alignment.
3548
3549 The handling here differs from field_alignment. ICC aligns MMX
3550 arguments to 4 byte boundaries, while structure fields are aligned
3551 to 8 byte boundaries. */
3552 if (!type)
3553 {
3554 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3555 align = PARM_BOUNDARY;
3556 }
3557 else
3558 {
3559 if (!ix86_compat_aligned_value_p (type))
3560 align = PARM_BOUNDARY;
3561 }
3562 }
3563 if (align > BIGGEST_ALIGNMENT)
3564 align = BIGGEST_ALIGNMENT;
3565 return align;
3566 }
3567
3568 /* Return true when TYPE should be 128bit aligned for 32bit argument
3569 passing ABI. */
3570
3571 static bool
3572 ix86_contains_aligned_value_p (const_tree type)
3573 {
3574 machine_mode mode = TYPE_MODE (type);
3575
3576 if (mode == XFmode || mode == XCmode)
3577 return false;
3578
3579 if (TYPE_ALIGN (type) < 128)
3580 return false;
3581
3582 if (AGGREGATE_TYPE_P (type))
3583 {
3584 /* Walk the aggregates recursively. */
3585 switch (TREE_CODE (type))
3586 {
3587 case RECORD_TYPE:
3588 case UNION_TYPE:
3589 case QUAL_UNION_TYPE:
3590 {
3591 tree field;
3592
3593 /* Walk all the structure fields. */
3594 for (field = TYPE_FIELDS (type);
3595 field;
3596 field = DECL_CHAIN (field))
3597 {
3598 if (TREE_CODE (field) == FIELD_DECL
3599 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3600 return true;
3601 }
3602 break;
3603 }
3604
3605 case ARRAY_TYPE:
3606 /* Just for use if some languages passes arrays by value. */
3607 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3608 return true;
3609 break;
3610
3611 default:
3612 gcc_unreachable ();
3613 }
3614 }
3615 else
3616 return TYPE_ALIGN (type) >= 128;
3617
3618 return false;
3619 }
3620
3621 /* Gives the alignment boundary, in bits, of an argument with the
3622 specified mode and type. */
3623
3624 static unsigned int
3625 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3626 {
3627 unsigned int align;
3628 if (type)
3629 {
3630 /* Since the main variant type is used for call, we convert it to
3631 the main variant type. */
3632 type = TYPE_MAIN_VARIANT (type);
3633 align = TYPE_ALIGN (type);
3634 if (TYPE_EMPTY_P (type))
3635 return PARM_BOUNDARY;
3636 }
3637 else
3638 align = GET_MODE_ALIGNMENT (mode);
3639 if (align < PARM_BOUNDARY)
3640 align = PARM_BOUNDARY;
3641 else
3642 {
3643 static bool warned;
3644 unsigned int saved_align = align;
3645
3646 if (!TARGET_64BIT)
3647 {
3648 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3649 if (!type)
3650 {
3651 if (mode == XFmode || mode == XCmode)
3652 align = PARM_BOUNDARY;
3653 }
3654 else if (!ix86_contains_aligned_value_p (type))
3655 align = PARM_BOUNDARY;
3656
3657 if (align < 128)
3658 align = PARM_BOUNDARY;
3659 }
3660
3661 if (warn_psabi
3662 && !warned
3663 && align != ix86_compat_function_arg_boundary (mode, type,
3664 saved_align))
3665 {
3666 warned = true;
3667 inform (input_location,
3668 "the ABI for passing parameters with %d-byte"
3669 " alignment has changed in GCC 4.6",
3670 align / BITS_PER_UNIT);
3671 }
3672 }
3673
3674 return align;
3675 }
3676
3677 /* Return true if N is a possible register number of function value. */
3678
3679 static bool
3680 ix86_function_value_regno_p (const unsigned int regno)
3681 {
3682 switch (regno)
3683 {
3684 case AX_REG:
3685 return true;
3686 case DX_REG:
3687 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3688 case DI_REG:
3689 case SI_REG:
3690 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3691
3692 /* Complex values are returned in %st(0)/%st(1) pair. */
3693 case ST0_REG:
3694 case ST1_REG:
3695 /* TODO: The function should depend on current function ABI but
3696 builtins.cc would need updating then. Therefore we use the
3697 default ABI. */
3698 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3699 return false;
3700 return TARGET_FLOAT_RETURNS_IN_80387;
3701
3702 /* Complex values are returned in %xmm0/%xmm1 pair. */
3703 case XMM0_REG:
3704 case XMM1_REG:
3705 return TARGET_SSE;
3706
3707 case MM0_REG:
3708 if (TARGET_MACHO || TARGET_64BIT)
3709 return false;
3710 return TARGET_MMX;
3711 }
3712
3713 return false;
3714 }
3715
3716 /* Check whether the register REGNO should be zeroed on X86.
3717 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3718 together, no need to zero it again.
3719 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3720
3721 static bool
3722 zero_call_used_regno_p (const unsigned int regno,
3723 bool all_sse_zeroed,
3724 bool need_zero_mmx)
3725 {
3726 return GENERAL_REGNO_P (regno)
3727 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3728 || MASK_REGNO_P (regno)
3729 || (need_zero_mmx && MMX_REGNO_P (regno));
3730 }
3731
3732 /* Return the machine_mode that is used to zero register REGNO. */
3733
3734 static machine_mode
3735 zero_call_used_regno_mode (const unsigned int regno)
3736 {
3737 /* NB: We only need to zero the lower 32 bits for integer registers
3738 and the lower 128 bits for vector registers since destination are
3739 zero-extended to the full register width. */
3740 if (GENERAL_REGNO_P (regno))
3741 return SImode;
3742 else if (SSE_REGNO_P (regno))
3743 return V4SFmode;
3744 else if (MASK_REGNO_P (regno))
3745 return HImode;
3746 else if (MMX_REGNO_P (regno))
3747 return V2SImode;
3748 else
3749 gcc_unreachable ();
3750 }
3751
3752 /* Generate a rtx to zero all vector registers together if possible,
3753 otherwise, return NULL. */
3754
3755 static rtx
3756 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3757 {
3758 if (!TARGET_AVX)
3759 return NULL;
3760
3761 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3762 if ((LEGACY_SSE_REGNO_P (regno)
3763 || (TARGET_64BIT
3764 && (REX_SSE_REGNO_P (regno)
3765 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3766 && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3767 return NULL;
3768
3769 return gen_avx_vzeroall ();
3770 }
3771
3772 /* Generate insns to zero all st registers together.
3773 Return true when zeroing instructions are generated.
3774 Assume the number of st registers that are zeroed is num_of_st,
3775 we will emit the following sequence to zero them together:
3776 fldz; \
3777 fldz; \
3778 ...
3779 fldz; \
3780 fstp %%st(0); \
3781 fstp %%st(0); \
3782 ...
3783 fstp %%st(0);
3784 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3785 mark stack slots empty.
3786
3787 How to compute the num_of_st:
3788 There is no direct mapping from stack registers to hard register
3789 numbers. If one stack register needs to be cleared, we don't know
3790 where in the stack the value remains. So, if any stack register
3791 needs to be cleared, the whole stack should be cleared. However,
3792 x87 stack registers that hold the return value should be excluded.
3793 x87 returns in the top (two for complex values) register, so
3794 num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
3795 return the value of num_of_st. */
3796
3797
3798 static int
3799 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3800 {
3801
3802 /* If the FPU is disabled, no need to zero all st registers. */
3803 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3804 return 0;
3805
3806 unsigned int num_of_st = 0;
3807 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3808 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3809 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3810 {
3811 num_of_st++;
3812 break;
3813 }
3814
3815 if (num_of_st == 0)
3816 return 0;
3817
3818 bool return_with_x87 = false;
3819 return_with_x87 = (crtl->return_rtx
3820 && (STACK_REG_P (crtl->return_rtx)));
3821
3822 bool complex_return = false;
3823 complex_return = (crtl->return_rtx
3824 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3825
3826 if (return_with_x87)
3827 if (complex_return)
3828 num_of_st = 6;
3829 else
3830 num_of_st = 7;
3831 else
3832 num_of_st = 8;
3833
3834 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3835 for (unsigned int i = 0; i < num_of_st; i++)
3836 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3837
3838 for (unsigned int i = 0; i < num_of_st; i++)
3839 {
3840 rtx insn;
3841 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3842 add_reg_note (insn, REG_DEAD, st_reg);
3843 }
3844 return num_of_st;
3845 }
3846
3847
3848 /* When the routine exit in MMX mode, if any ST register needs
3849 to be zeroed, we should clear all MMX registers except the
3850 RET_MMX_REGNO that holds the return value. */
3851 static bool
3852 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3853 unsigned int ret_mmx_regno)
3854 {
3855 bool need_zero_all_mm = false;
3856 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3857 if (STACK_REGNO_P (regno)
3858 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3859 {
3860 need_zero_all_mm = true;
3861 break;
3862 }
3863
3864 if (!need_zero_all_mm)
3865 return false;
3866
3867 machine_mode mode = V2SImode;
3868 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3869 if (regno != ret_mmx_regno)
3870 {
3871 rtx reg = gen_rtx_REG (mode, regno);
3872 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3873 }
3874 return true;
3875 }
3876
3877 /* TARGET_ZERO_CALL_USED_REGS. */
3878 /* Generate a sequence of instructions that zero registers specified by
3879 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3880 zeroed. */
3881 static HARD_REG_SET
3882 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3883 {
3884 HARD_REG_SET zeroed_hardregs;
3885 bool all_sse_zeroed = false;
3886 int all_st_zeroed_num = 0;
3887 bool all_mm_zeroed = false;
3888
3889 CLEAR_HARD_REG_SET (zeroed_hardregs);
3890
3891 /* first, let's see whether we can zero all vector registers together. */
3892 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3893 if (zero_all_vec_insn)
3894 {
3895 emit_insn (zero_all_vec_insn);
3896 all_sse_zeroed = true;
3897 }
3898
3899 /* mm/st registers are shared registers set, we should follow the following
3900 rules to clear them:
3901 MMX exit mode x87 exit mode
3902 -------------|----------------------|---------------
3903 uses x87 reg | clear all MMX | clear all x87
3904 uses MMX reg | clear individual MMX | clear all x87
3905 x87 + MMX | clear all MMX | clear all x87
3906
3907 first, we should decide which mode (MMX mode or x87 mode) the function
3908 exit with. */
3909
3910 bool exit_with_mmx_mode = (crtl->return_rtx
3911 && (MMX_REG_P (crtl->return_rtx)));
3912
3913 if (!exit_with_mmx_mode)
3914 /* x87 exit mode, we should zero all st registers together. */
3915 {
3916 all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
3917
3918 if (all_st_zeroed_num > 0)
3919 for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
3920 /* x87 stack registers that hold the return value should be excluded.
3921 x87 returns in the top (two for complex values) register. */
3922 if (all_st_zeroed_num == 8
3923 || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
3924 || (all_st_zeroed_num == 6
3925 && (regno == (REGNO (crtl->return_rtx) + 1)))))
3926 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3927 }
3928 else
3929 /* MMX exit mode, check whether we can zero all mm registers. */
3930 {
3931 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
3932 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
3933 exit_mmx_regno);
3934 if (all_mm_zeroed)
3935 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3936 if (regno != exit_mmx_regno)
3937 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3938 }
3939
3940 /* Now, generate instructions to zero all the other registers. */
3941
3942 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3943 {
3944 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3945 continue;
3946 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
3947 exit_with_mmx_mode && !all_mm_zeroed))
3948 continue;
3949
3950 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3951
3952 machine_mode mode = zero_call_used_regno_mode (regno);
3953
3954 rtx reg = gen_rtx_REG (mode, regno);
3955 rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
3956
3957 switch (mode)
3958 {
3959 case E_SImode:
3960 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
3961 {
3962 rtx clob = gen_rtx_CLOBBER (VOIDmode,
3963 gen_rtx_REG (CCmode,
3964 FLAGS_REG));
3965 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
3966 tmp,
3967 clob));
3968 }
3969 /* FALLTHRU. */
3970
3971 case E_V4SFmode:
3972 case E_HImode:
3973 case E_V2SImode:
3974 emit_insn (tmp);
3975 break;
3976
3977 default:
3978 gcc_unreachable ();
3979 }
3980 }
3981 return zeroed_hardregs;
3982 }
3983
3984 /* Define how to find the value returned by a function.
3985 VALTYPE is the data type of the value (as a tree).
3986 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3987 otherwise, FUNC is 0. */
3988
3989 static rtx
3990 function_value_32 (machine_mode orig_mode, machine_mode mode,
3991 const_tree fntype, const_tree fn)
3992 {
3993 unsigned int regno;
3994
3995 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3996 we normally prevent this case when mmx is not available. However
3997 some ABIs may require the result to be returned like DImode. */
3998 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3999 regno = FIRST_MMX_REG;
4000
4001 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4002 we prevent this case when sse is not available. However some ABIs
4003 may require the result to be returned like integer TImode. */
4004 else if (mode == TImode
4005 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4006 regno = FIRST_SSE_REG;
4007
4008 /* 32-byte vector modes in %ymm0. */
4009 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
4010 regno = FIRST_SSE_REG;
4011
4012 /* 64-byte vector modes in %zmm0. */
4013 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
4014 regno = FIRST_SSE_REG;
4015
4016 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4017 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4018 regno = FIRST_FLOAT_REG;
4019 else
4020 /* Most things go in %eax. */
4021 regno = AX_REG;
4022
4023 /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
4024 if (mode == HFmode || mode == BFmode)
4025 regno = FIRST_SSE_REG;
4026 if (mode == HCmode)
4027 {
4028 rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
4029 XVECEXP (ret, 0, 0)
4030 = gen_rtx_EXPR_LIST (VOIDmode,
4031 gen_rtx_REG (SImode, FIRST_SSE_REG),
4032 GEN_INT (0));
4033 return ret;
4034 }
4035
4036 /* Override FP return register with %xmm0 for local functions when
4037 SSE math is enabled or for functions with sseregparm attribute. */
4038 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4039 {
4040 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4041 if (sse_level == -1)
4042 {
4043 error ("calling %qD with SSE calling convention without "
4044 "SSE/SSE2 enabled", fn);
4045 sorry ("this is a GCC bug that can be worked around by adding "
4046 "attribute used to function called");
4047 }
4048 else if ((sse_level >= 1 && mode == SFmode)
4049 || (sse_level == 2 && mode == DFmode))
4050 regno = FIRST_SSE_REG;
4051 }
4052
4053 /* OImode shouldn't be used directly. */
4054 gcc_assert (mode != OImode);
4055
4056 return gen_rtx_REG (orig_mode, regno);
4057 }
4058
4059 static rtx
4060 function_value_64 (machine_mode orig_mode, machine_mode mode,
4061 const_tree valtype)
4062 {
4063 rtx ret;
4064
4065 /* Handle libcalls, which don't provide a type node. */
4066 if (valtype == NULL)
4067 {
4068 unsigned int regno;
4069
4070 switch (mode)
4071 {
4072 case E_BFmode:
4073 case E_HFmode:
4074 case E_HCmode:
4075 case E_SFmode:
4076 case E_SCmode:
4077 case E_DFmode:
4078 case E_DCmode:
4079 case E_TFmode:
4080 case E_SDmode:
4081 case E_DDmode:
4082 case E_TDmode:
4083 regno = FIRST_SSE_REG;
4084 break;
4085 case E_XFmode:
4086 case E_XCmode:
4087 regno = FIRST_FLOAT_REG;
4088 break;
4089 case E_TCmode:
4090 return NULL;
4091 default:
4092 regno = AX_REG;
4093 }
4094
4095 return gen_rtx_REG (mode, regno);
4096 }
4097 else if (POINTER_TYPE_P (valtype))
4098 {
4099 /* Pointers are always returned in word_mode. */
4100 mode = word_mode;
4101 }
4102
4103 ret = construct_container (mode, orig_mode, valtype, 1,
4104 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4105 x86_64_int_return_registers, 0);
4106
4107 /* For zero sized structures, construct_container returns NULL, but we
4108 need to keep rest of compiler happy by returning meaningful value. */
4109 if (!ret)
4110 ret = gen_rtx_REG (orig_mode, AX_REG);
4111
4112 return ret;
4113 }
4114
4115 static rtx
4116 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
4117 const_tree fntype, const_tree fn, const_tree valtype)
4118 {
4119 unsigned int regno;
4120
4121 /* Floating point return values in %st(0)
4122 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
4123 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
4124 && (GET_MODE_SIZE (mode) > 8
4125 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
4126 {
4127 regno = FIRST_FLOAT_REG;
4128 return gen_rtx_REG (orig_mode, regno);
4129 }
4130 else
4131 return function_value_32(orig_mode, mode, fntype,fn);
4132 }
4133
4134 static rtx
4135 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
4136 const_tree valtype)
4137 {
4138 unsigned int regno = AX_REG;
4139
4140 if (TARGET_SSE)
4141 {
4142 switch (GET_MODE_SIZE (mode))
4143 {
4144 case 16:
4145 if (valtype != NULL_TREE
4146 && !VECTOR_INTEGER_TYPE_P (valtype)
4147 && !VECTOR_INTEGER_TYPE_P (valtype)
4148 && !INTEGRAL_TYPE_P (valtype)
4149 && !VECTOR_FLOAT_TYPE_P (valtype))
4150 break;
4151 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4152 && !COMPLEX_MODE_P (mode))
4153 regno = FIRST_SSE_REG;
4154 break;
4155 case 8:
4156 case 4:
4157 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4158 break;
4159 if (mode == SFmode || mode == DFmode)
4160 regno = FIRST_SSE_REG;
4161 break;
4162 default:
4163 break;
4164 }
4165 }
4166 return gen_rtx_REG (orig_mode, regno);
4167 }
4168
4169 static rtx
4170 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4171 machine_mode orig_mode, machine_mode mode)
4172 {
4173 const_tree fn, fntype;
4174
4175 fn = NULL_TREE;
4176 if (fntype_or_decl && DECL_P (fntype_or_decl))
4177 fn = fntype_or_decl;
4178 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4179
4180 if (ix86_function_type_abi (fntype) == MS_ABI)
4181 {
4182 if (TARGET_64BIT)
4183 return function_value_ms_64 (orig_mode, mode, valtype);
4184 else
4185 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4186 }
4187 else if (TARGET_64BIT)
4188 return function_value_64 (orig_mode, mode, valtype);
4189 else
4190 return function_value_32 (orig_mode, mode, fntype, fn);
4191 }
4192
4193 static rtx
4194 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4195 {
4196 machine_mode mode, orig_mode;
4197
4198 orig_mode = TYPE_MODE (valtype);
4199 mode = type_natural_mode (valtype, NULL, true);
4200 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4201 }
4202
4203 /* Pointer function arguments and return values are promoted to
4204 word_mode for normal functions. */
4205
4206 static machine_mode
4207 ix86_promote_function_mode (const_tree type, machine_mode mode,
4208 int *punsignedp, const_tree fntype,
4209 int for_return)
4210 {
4211 if (cfun->machine->func_type == TYPE_NORMAL
4212 && type != NULL_TREE
4213 && POINTER_TYPE_P (type))
4214 {
4215 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4216 return word_mode;
4217 }
4218 return default_promote_function_mode (type, mode, punsignedp, fntype,
4219 for_return);
4220 }
4221
4222 /* Return true if a structure, union or array with MODE containing FIELD
4223 should be accessed using BLKmode. */
4224
4225 static bool
4226 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4227 {
4228 /* Union with XFmode must be in BLKmode. */
4229 return (mode == XFmode
4230 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4231 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4232 }
4233
4234 rtx
4235 ix86_libcall_value (machine_mode mode)
4236 {
4237 return ix86_function_value_1 (NULL, NULL, mode, mode);
4238 }
4239
4240 /* Return true iff type is returned in memory. */
4241
4242 static bool
4243 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4244 {
4245 const machine_mode mode = type_natural_mode (type, NULL, true);
4246 HOST_WIDE_INT size;
4247
4248 if (TARGET_64BIT)
4249 {
4250 if (ix86_function_type_abi (fntype) == MS_ABI)
4251 {
4252 size = int_size_in_bytes (type);
4253
4254 /* __m128 is returned in xmm0. */
4255 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4256 || INTEGRAL_TYPE_P (type)
4257 || VECTOR_FLOAT_TYPE_P (type))
4258 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4259 && !COMPLEX_MODE_P (mode)
4260 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4261 return false;
4262
4263 /* Otherwise, the size must be exactly in [1248]. */
4264 return size != 1 && size != 2 && size != 4 && size != 8;
4265 }
4266 else
4267 {
4268 int needed_intregs, needed_sseregs;
4269
4270 return examine_argument (mode, type, 1,
4271 &needed_intregs, &needed_sseregs);
4272 }
4273 }
4274 else
4275 {
4276 size = int_size_in_bytes (type);
4277
4278 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4279 bytes in registers. */
4280 if (TARGET_IAMCU)
4281 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4282
4283 if (mode == BLKmode)
4284 return true;
4285
4286 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4287 return false;
4288
4289 if (VECTOR_MODE_P (mode) || mode == TImode)
4290 {
4291 /* User-created vectors small enough to fit in EAX. */
4292 if (size < 8)
4293 return false;
4294
4295 /* Unless ABI prescibes otherwise,
4296 MMX/3dNow values are returned in MM0 if available. */
4297
4298 if (size == 8)
4299 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4300
4301 /* SSE values are returned in XMM0 if available. */
4302 if (size == 16)
4303 return !TARGET_SSE;
4304
4305 /* AVX values are returned in YMM0 if available. */
4306 if (size == 32)
4307 return !TARGET_AVX;
4308
4309 /* AVX512F values are returned in ZMM0 if available. */
4310 if (size == 64)
4311 return !TARGET_AVX512F;
4312 }
4313
4314 if (mode == XFmode)
4315 return false;
4316
4317 if (size > 12)
4318 return true;
4319
4320 /* OImode shouldn't be used directly. */
4321 gcc_assert (mode != OImode);
4322
4323 return false;
4324 }
4325 }
4326
4327 /* Implement TARGET_PUSH_ARGUMENT. */
4328
4329 static bool
4330 ix86_push_argument (unsigned int npush)
4331 {
4332 /* If SSE2 is available, use vector move to put large argument onto
4333 stack. NB: In 32-bit mode, use 8-byte vector move. */
4334 return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
4335 && TARGET_PUSH_ARGS
4336 && !ACCUMULATE_OUTGOING_ARGS);
4337 }
4338
4339 \f
4340 /* Create the va_list data type. */
4341
4342 static tree
4343 ix86_build_builtin_va_list_64 (void)
4344 {
4345 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4346
4347 record = lang_hooks.types.make_type (RECORD_TYPE);
4348 type_decl = build_decl (BUILTINS_LOCATION,
4349 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4350
4351 f_gpr = build_decl (BUILTINS_LOCATION,
4352 FIELD_DECL, get_identifier ("gp_offset"),
4353 unsigned_type_node);
4354 f_fpr = build_decl (BUILTINS_LOCATION,
4355 FIELD_DECL, get_identifier ("fp_offset"),
4356 unsigned_type_node);
4357 f_ovf = build_decl (BUILTINS_LOCATION,
4358 FIELD_DECL, get_identifier ("overflow_arg_area"),
4359 ptr_type_node);
4360 f_sav = build_decl (BUILTINS_LOCATION,
4361 FIELD_DECL, get_identifier ("reg_save_area"),
4362 ptr_type_node);
4363
4364 va_list_gpr_counter_field = f_gpr;
4365 va_list_fpr_counter_field = f_fpr;
4366
4367 DECL_FIELD_CONTEXT (f_gpr) = record;
4368 DECL_FIELD_CONTEXT (f_fpr) = record;
4369 DECL_FIELD_CONTEXT (f_ovf) = record;
4370 DECL_FIELD_CONTEXT (f_sav) = record;
4371
4372 TYPE_STUB_DECL (record) = type_decl;
4373 TYPE_NAME (record) = type_decl;
4374 TYPE_FIELDS (record) = f_gpr;
4375 DECL_CHAIN (f_gpr) = f_fpr;
4376 DECL_CHAIN (f_fpr) = f_ovf;
4377 DECL_CHAIN (f_ovf) = f_sav;
4378
4379 layout_type (record);
4380
4381 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4382 NULL_TREE, TYPE_ATTRIBUTES (record));
4383
4384 /* The correct type is an array type of one element. */
4385 return build_array_type (record, build_index_type (size_zero_node));
4386 }
4387
4388 /* Setup the builtin va_list data type and for 64-bit the additional
4389 calling convention specific va_list data types. */
4390
4391 static tree
4392 ix86_build_builtin_va_list (void)
4393 {
4394 if (TARGET_64BIT)
4395 {
4396 /* Initialize ABI specific va_list builtin types.
4397
4398 In lto1, we can encounter two va_list types:
4399 - one as a result of the type-merge across TUs, and
4400 - the one constructed here.
4401 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4402 a type identity check in canonical_va_list_type based on
4403 TYPE_MAIN_VARIANT (which we used to have) will not work.
4404 Instead, we tag each va_list_type_node with its unique attribute, and
4405 look for the attribute in the type identity check in
4406 canonical_va_list_type.
4407
4408 Tagging sysv_va_list_type_node directly with the attribute is
4409 problematic since it's a array of one record, which will degrade into a
4410 pointer to record when used as parameter (see build_va_arg comments for
4411 an example), dropping the attribute in the process. So we tag the
4412 record instead. */
4413
4414 /* For SYSV_ABI we use an array of one record. */
4415 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4416
4417 /* For MS_ABI we use plain pointer to argument area. */
4418 tree char_ptr_type = build_pointer_type (char_type_node);
4419 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4420 TYPE_ATTRIBUTES (char_ptr_type));
4421 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4422
4423 return ((ix86_abi == MS_ABI)
4424 ? ms_va_list_type_node
4425 : sysv_va_list_type_node);
4426 }
4427 else
4428 {
4429 /* For i386 we use plain pointer to argument area. */
4430 return build_pointer_type (char_type_node);
4431 }
4432 }
4433
4434 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4435
4436 static void
4437 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4438 {
4439 rtx save_area, mem;
4440 alias_set_type set;
4441 int i, max;
4442
4443 /* GPR size of varargs save area. */
4444 if (cfun->va_list_gpr_size)
4445 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4446 else
4447 ix86_varargs_gpr_size = 0;
4448
4449 /* FPR size of varargs save area. We don't need it if we don't pass
4450 anything in SSE registers. */
4451 if (TARGET_SSE && cfun->va_list_fpr_size)
4452 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4453 else
4454 ix86_varargs_fpr_size = 0;
4455
4456 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4457 return;
4458
4459 save_area = frame_pointer_rtx;
4460 set = get_varargs_alias_set ();
4461
4462 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4463 if (max > X86_64_REGPARM_MAX)
4464 max = X86_64_REGPARM_MAX;
4465
4466 for (i = cum->regno; i < max; i++)
4467 {
4468 mem = gen_rtx_MEM (word_mode,
4469 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4470 MEM_NOTRAP_P (mem) = 1;
4471 set_mem_alias_set (mem, set);
4472 emit_move_insn (mem,
4473 gen_rtx_REG (word_mode,
4474 x86_64_int_parameter_registers[i]));
4475 }
4476
4477 if (ix86_varargs_fpr_size)
4478 {
4479 machine_mode smode;
4480 rtx_code_label *label;
4481 rtx test;
4482
4483 /* Now emit code to save SSE registers. The AX parameter contains number
4484 of SSE parameter registers used to call this function, though all we
4485 actually check here is the zero/non-zero status. */
4486
4487 label = gen_label_rtx ();
4488 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4489 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4490 label));
4491
4492 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4493 we used movdqa (i.e. TImode) instead? Perhaps even better would
4494 be if we could determine the real mode of the data, via a hook
4495 into pass_stdarg. Ignore all that for now. */
4496 smode = V4SFmode;
4497 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4498 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4499
4500 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4501 if (max > X86_64_SSE_REGPARM_MAX)
4502 max = X86_64_SSE_REGPARM_MAX;
4503
4504 for (i = cum->sse_regno; i < max; ++i)
4505 {
4506 mem = plus_constant (Pmode, save_area,
4507 i * 16 + ix86_varargs_gpr_size);
4508 mem = gen_rtx_MEM (smode, mem);
4509 MEM_NOTRAP_P (mem) = 1;
4510 set_mem_alias_set (mem, set);
4511 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4512
4513 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4514 }
4515
4516 emit_label (label);
4517 }
4518 }
4519
4520 static void
4521 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4522 {
4523 alias_set_type set = get_varargs_alias_set ();
4524 int i;
4525
4526 /* Reset to zero, as there might be a sysv vaarg used
4527 before. */
4528 ix86_varargs_gpr_size = 0;
4529 ix86_varargs_fpr_size = 0;
4530
4531 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4532 {
4533 rtx reg, mem;
4534
4535 mem = gen_rtx_MEM (Pmode,
4536 plus_constant (Pmode, virtual_incoming_args_rtx,
4537 i * UNITS_PER_WORD));
4538 MEM_NOTRAP_P (mem) = 1;
4539 set_mem_alias_set (mem, set);
4540
4541 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4542 emit_move_insn (mem, reg);
4543 }
4544 }
4545
4546 static void
4547 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4548 const function_arg_info &arg,
4549 int *, int no_rtl)
4550 {
4551 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4552 CUMULATIVE_ARGS next_cum;
4553 tree fntype;
4554
4555 /* This argument doesn't appear to be used anymore. Which is good,
4556 because the old code here didn't suppress rtl generation. */
4557 gcc_assert (!no_rtl);
4558
4559 if (!TARGET_64BIT)
4560 return;
4561
4562 fntype = TREE_TYPE (current_function_decl);
4563
4564 /* For varargs, we do not want to skip the dummy va_dcl argument.
4565 For stdargs, we do want to skip the last named argument. */
4566 next_cum = *cum;
4567 if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
4568 && stdarg_p (fntype))
4569 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4570
4571 if (cum->call_abi == MS_ABI)
4572 setup_incoming_varargs_ms_64 (&next_cum);
4573 else
4574 setup_incoming_varargs_64 (&next_cum);
4575 }
4576
4577 /* Checks if TYPE is of kind va_list char *. */
4578
4579 static bool
4580 is_va_list_char_pointer (tree type)
4581 {
4582 tree canonic;
4583
4584 /* For 32-bit it is always true. */
4585 if (!TARGET_64BIT)
4586 return true;
4587 canonic = ix86_canonical_va_list_type (type);
4588 return (canonic == ms_va_list_type_node
4589 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4590 }
4591
4592 /* Implement va_start. */
4593
4594 static void
4595 ix86_va_start (tree valist, rtx nextarg)
4596 {
4597 HOST_WIDE_INT words, n_gpr, n_fpr;
4598 tree f_gpr, f_fpr, f_ovf, f_sav;
4599 tree gpr, fpr, ovf, sav, t;
4600 tree type;
4601 rtx ovf_rtx;
4602
4603 if (flag_split_stack
4604 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4605 {
4606 unsigned int scratch_regno;
4607
4608 /* When we are splitting the stack, we can't refer to the stack
4609 arguments using internal_arg_pointer, because they may be on
4610 the old stack. The split stack prologue will arrange to
4611 leave a pointer to the old stack arguments in a scratch
4612 register, which we here copy to a pseudo-register. The split
4613 stack prologue can't set the pseudo-register directly because
4614 it (the prologue) runs before any registers have been saved. */
4615
4616 scratch_regno = split_stack_prologue_scratch_regno ();
4617 if (scratch_regno != INVALID_REGNUM)
4618 {
4619 rtx reg;
4620 rtx_insn *seq;
4621
4622 reg = gen_reg_rtx (Pmode);
4623 cfun->machine->split_stack_varargs_pointer = reg;
4624
4625 start_sequence ();
4626 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4627 seq = get_insns ();
4628 end_sequence ();
4629
4630 push_topmost_sequence ();
4631 emit_insn_after (seq, entry_of_function ());
4632 pop_topmost_sequence ();
4633 }
4634 }
4635
4636 /* Only 64bit target needs something special. */
4637 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4638 {
4639 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4640 std_expand_builtin_va_start (valist, nextarg);
4641 else
4642 {
4643 rtx va_r, next;
4644
4645 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4646 next = expand_binop (ptr_mode, add_optab,
4647 cfun->machine->split_stack_varargs_pointer,
4648 crtl->args.arg_offset_rtx,
4649 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4650 convert_move (va_r, next, 0);
4651 }
4652 return;
4653 }
4654
4655 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4656 f_fpr = DECL_CHAIN (f_gpr);
4657 f_ovf = DECL_CHAIN (f_fpr);
4658 f_sav = DECL_CHAIN (f_ovf);
4659
4660 valist = build_simple_mem_ref (valist);
4661 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4662 /* The following should be folded into the MEM_REF offset. */
4663 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4664 f_gpr, NULL_TREE);
4665 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4666 f_fpr, NULL_TREE);
4667 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4668 f_ovf, NULL_TREE);
4669 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4670 f_sav, NULL_TREE);
4671
4672 /* Count number of gp and fp argument registers used. */
4673 words = crtl->args.info.words;
4674 n_gpr = crtl->args.info.regno;
4675 n_fpr = crtl->args.info.sse_regno;
4676
4677 if (cfun->va_list_gpr_size)
4678 {
4679 type = TREE_TYPE (gpr);
4680 t = build2 (MODIFY_EXPR, type,
4681 gpr, build_int_cst (type, n_gpr * 8));
4682 TREE_SIDE_EFFECTS (t) = 1;
4683 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4684 }
4685
4686 if (TARGET_SSE && cfun->va_list_fpr_size)
4687 {
4688 type = TREE_TYPE (fpr);
4689 t = build2 (MODIFY_EXPR, type, fpr,
4690 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4691 TREE_SIDE_EFFECTS (t) = 1;
4692 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4693 }
4694
4695 /* Find the overflow area. */
4696 type = TREE_TYPE (ovf);
4697 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4698 ovf_rtx = crtl->args.internal_arg_pointer;
4699 else
4700 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4701 t = make_tree (type, ovf_rtx);
4702 if (words != 0)
4703 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4704
4705 t = build2 (MODIFY_EXPR, type, ovf, t);
4706 TREE_SIDE_EFFECTS (t) = 1;
4707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4708
4709 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4710 {
4711 /* Find the register save area.
4712 Prologue of the function save it right above stack frame. */
4713 type = TREE_TYPE (sav);
4714 t = make_tree (type, frame_pointer_rtx);
4715 if (!ix86_varargs_gpr_size)
4716 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4717
4718 t = build2 (MODIFY_EXPR, type, sav, t);
4719 TREE_SIDE_EFFECTS (t) = 1;
4720 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4721 }
4722 }
4723
4724 /* Implement va_arg. */
4725
4726 static tree
4727 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4728 gimple_seq *post_p)
4729 {
4730 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4731 tree f_gpr, f_fpr, f_ovf, f_sav;
4732 tree gpr, fpr, ovf, sav, t;
4733 int size, rsize;
4734 tree lab_false, lab_over = NULL_TREE;
4735 tree addr, t2;
4736 rtx container;
4737 int indirect_p = 0;
4738 tree ptrtype;
4739 machine_mode nat_mode;
4740 unsigned int arg_boundary;
4741 unsigned int type_align;
4742
4743 /* Only 64bit target needs something special. */
4744 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4745 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4746
4747 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4748 f_fpr = DECL_CHAIN (f_gpr);
4749 f_ovf = DECL_CHAIN (f_fpr);
4750 f_sav = DECL_CHAIN (f_ovf);
4751
4752 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4753 valist, f_gpr, NULL_TREE);
4754
4755 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4756 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4757 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4758
4759 indirect_p = pass_va_arg_by_reference (type);
4760 if (indirect_p)
4761 type = build_pointer_type (type);
4762 size = arg_int_size_in_bytes (type);
4763 rsize = CEIL (size, UNITS_PER_WORD);
4764
4765 nat_mode = type_natural_mode (type, NULL, false);
4766 switch (nat_mode)
4767 {
4768 case E_V16HFmode:
4769 case E_V16BFmode:
4770 case E_V8SFmode:
4771 case E_V8SImode:
4772 case E_V32QImode:
4773 case E_V16HImode:
4774 case E_V4DFmode:
4775 case E_V4DImode:
4776 case E_V32HFmode:
4777 case E_V32BFmode:
4778 case E_V16SFmode:
4779 case E_V16SImode:
4780 case E_V64QImode:
4781 case E_V32HImode:
4782 case E_V8DFmode:
4783 case E_V8DImode:
4784 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4785 if (!TARGET_64BIT_MS_ABI)
4786 {
4787 container = NULL;
4788 break;
4789 }
4790 /* FALLTHRU */
4791
4792 default:
4793 container = construct_container (nat_mode, TYPE_MODE (type),
4794 type, 0, X86_64_REGPARM_MAX,
4795 X86_64_SSE_REGPARM_MAX, intreg,
4796 0);
4797 break;
4798 }
4799
4800 /* Pull the value out of the saved registers. */
4801
4802 addr = create_tmp_var (ptr_type_node, "addr");
4803 type_align = TYPE_ALIGN (type);
4804
4805 if (container)
4806 {
4807 int needed_intregs, needed_sseregs;
4808 bool need_temp;
4809 tree int_addr, sse_addr;
4810
4811 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4812 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4813
4814 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4815
4816 need_temp = (!REG_P (container)
4817 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4818 || TYPE_ALIGN (type) > 128));
4819
4820 /* In case we are passing structure, verify that it is consecutive block
4821 on the register save area. If not we need to do moves. */
4822 if (!need_temp && !REG_P (container))
4823 {
4824 /* Verify that all registers are strictly consecutive */
4825 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4826 {
4827 int i;
4828
4829 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4830 {
4831 rtx slot = XVECEXP (container, 0, i);
4832 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4833 || INTVAL (XEXP (slot, 1)) != i * 16)
4834 need_temp = true;
4835 }
4836 }
4837 else
4838 {
4839 int i;
4840
4841 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4842 {
4843 rtx slot = XVECEXP (container, 0, i);
4844 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4845 || INTVAL (XEXP (slot, 1)) != i * 8)
4846 need_temp = true;
4847 }
4848 }
4849 }
4850 if (!need_temp)
4851 {
4852 int_addr = addr;
4853 sse_addr = addr;
4854 }
4855 else
4856 {
4857 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4858 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4859 }
4860
4861 /* First ensure that we fit completely in registers. */
4862 if (needed_intregs)
4863 {
4864 t = build_int_cst (TREE_TYPE (gpr),
4865 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4866 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4867 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4868 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4869 gimplify_and_add (t, pre_p);
4870 }
4871 if (needed_sseregs)
4872 {
4873 t = build_int_cst (TREE_TYPE (fpr),
4874 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4875 + X86_64_REGPARM_MAX * 8);
4876 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4877 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4878 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4879 gimplify_and_add (t, pre_p);
4880 }
4881
4882 /* Compute index to start of area used for integer regs. */
4883 if (needed_intregs)
4884 {
4885 /* int_addr = gpr + sav; */
4886 t = fold_build_pointer_plus (sav, gpr);
4887 gimplify_assign (int_addr, t, pre_p);
4888 }
4889 if (needed_sseregs)
4890 {
4891 /* sse_addr = fpr + sav; */
4892 t = fold_build_pointer_plus (sav, fpr);
4893 gimplify_assign (sse_addr, t, pre_p);
4894 }
4895 if (need_temp)
4896 {
4897 int i, prev_size = 0;
4898 tree temp = create_tmp_var (type, "va_arg_tmp");
4899 TREE_ADDRESSABLE (temp) = 1;
4900
4901 /* addr = &temp; */
4902 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4903 gimplify_assign (addr, t, pre_p);
4904
4905 for (i = 0; i < XVECLEN (container, 0); i++)
4906 {
4907 rtx slot = XVECEXP (container, 0, i);
4908 rtx reg = XEXP (slot, 0);
4909 machine_mode mode = GET_MODE (reg);
4910 tree piece_type;
4911 tree addr_type;
4912 tree daddr_type;
4913 tree src_addr, src;
4914 int src_offset;
4915 tree dest_addr, dest;
4916 int cur_size = GET_MODE_SIZE (mode);
4917
4918 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4919 prev_size = INTVAL (XEXP (slot, 1));
4920 if (prev_size + cur_size > size)
4921 {
4922 cur_size = size - prev_size;
4923 unsigned int nbits = cur_size * BITS_PER_UNIT;
4924 if (!int_mode_for_size (nbits, 1).exists (&mode))
4925 mode = QImode;
4926 }
4927 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4928 if (mode == GET_MODE (reg))
4929 addr_type = build_pointer_type (piece_type);
4930 else
4931 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4932 true);
4933 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4934 true);
4935
4936 if (SSE_REGNO_P (REGNO (reg)))
4937 {
4938 src_addr = sse_addr;
4939 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4940 }
4941 else
4942 {
4943 src_addr = int_addr;
4944 src_offset = REGNO (reg) * 8;
4945 }
4946 src_addr = fold_convert (addr_type, src_addr);
4947 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4948
4949 dest_addr = fold_convert (daddr_type, addr);
4950 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4951 if (cur_size == GET_MODE_SIZE (mode))
4952 {
4953 src = build_va_arg_indirect_ref (src_addr);
4954 dest = build_va_arg_indirect_ref (dest_addr);
4955
4956 gimplify_assign (dest, src, pre_p);
4957 }
4958 else
4959 {
4960 tree copy
4961 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4962 3, dest_addr, src_addr,
4963 size_int (cur_size));
4964 gimplify_and_add (copy, pre_p);
4965 }
4966 prev_size += cur_size;
4967 }
4968 }
4969
4970 if (needed_intregs)
4971 {
4972 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4973 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4974 gimplify_assign (gpr, t, pre_p);
4975 /* The GPR save area guarantees only 8-byte alignment. */
4976 if (!need_temp)
4977 type_align = MIN (type_align, 64);
4978 }
4979
4980 if (needed_sseregs)
4981 {
4982 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4983 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4984 gimplify_assign (unshare_expr (fpr), t, pre_p);
4985 }
4986
4987 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4988
4989 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4990 }
4991
4992 /* ... otherwise out of the overflow area. */
4993
4994 /* When we align parameter on stack for caller, if the parameter
4995 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4996 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4997 here with caller. */
4998 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4999 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
5000 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
5001
5002 /* Care for on-stack alignment if needed. */
5003 if (arg_boundary <= 64 || size == 0)
5004 t = ovf;
5005 else
5006 {
5007 HOST_WIDE_INT align = arg_boundary / 8;
5008 t = fold_build_pointer_plus_hwi (ovf, align - 1);
5009 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5010 build_int_cst (TREE_TYPE (t), -align));
5011 }
5012
5013 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5014 gimplify_assign (addr, t, pre_p);
5015
5016 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
5017 gimplify_assign (unshare_expr (ovf), t, pre_p);
5018
5019 if (container)
5020 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
5021
5022 type = build_aligned_type (type, type_align);
5023 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
5024 addr = fold_convert (ptrtype, addr);
5025
5026 if (indirect_p)
5027 addr = build_va_arg_indirect_ref (addr);
5028 return build_va_arg_indirect_ref (addr);
5029 }
5030 \f
5031 /* Return true if OPNUM's MEM should be matched
5032 in movabs* patterns. */
5033
5034 bool
5035 ix86_check_movabs (rtx insn, int opnum)
5036 {
5037 rtx set, mem;
5038
5039 set = PATTERN (insn);
5040 if (GET_CODE (set) == PARALLEL)
5041 set = XVECEXP (set, 0, 0);
5042 gcc_assert (GET_CODE (set) == SET);
5043 mem = XEXP (set, opnum);
5044 while (SUBREG_P (mem))
5045 mem = SUBREG_REG (mem);
5046 gcc_assert (MEM_P (mem));
5047 return volatile_ok || !MEM_VOLATILE_P (mem);
5048 }
5049
5050 /* Return false if INSN contains a MEM with a non-default address space. */
5051 bool
5052 ix86_check_no_addr_space (rtx insn)
5053 {
5054 subrtx_var_iterator::array_type array;
5055 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
5056 {
5057 rtx x = *iter;
5058 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
5059 return false;
5060 }
5061 return true;
5062 }
5063 \f
5064 /* Initialize the table of extra 80387 mathematical constants. */
5065
5066 static void
5067 init_ext_80387_constants (void)
5068 {
5069 static const char * cst[5] =
5070 {
5071 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5072 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5073 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5074 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5075 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5076 };
5077 int i;
5078
5079 for (i = 0; i < 5; i++)
5080 {
5081 real_from_string (&ext_80387_constants_table[i], cst[i]);
5082 /* Ensure each constant is rounded to XFmode precision. */
5083 real_convert (&ext_80387_constants_table[i],
5084 XFmode, &ext_80387_constants_table[i]);
5085 }
5086
5087 ext_80387_constants_init = 1;
5088 }
5089
5090 /* Return non-zero if the constant is something that
5091 can be loaded with a special instruction. */
5092
5093 int
5094 standard_80387_constant_p (rtx x)
5095 {
5096 machine_mode mode = GET_MODE (x);
5097
5098 const REAL_VALUE_TYPE *r;
5099
5100 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
5101 return -1;
5102
5103 if (x == CONST0_RTX (mode))
5104 return 1;
5105 if (x == CONST1_RTX (mode))
5106 return 2;
5107
5108 r = CONST_DOUBLE_REAL_VALUE (x);
5109
5110 /* For XFmode constants, try to find a special 80387 instruction when
5111 optimizing for size or on those CPUs that benefit from them. */
5112 if (mode == XFmode
5113 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
5114 && !flag_rounding_math)
5115 {
5116 int i;
5117
5118 if (! ext_80387_constants_init)
5119 init_ext_80387_constants ();
5120
5121 for (i = 0; i < 5; i++)
5122 if (real_identical (r, &ext_80387_constants_table[i]))
5123 return i + 3;
5124 }
5125
5126 /* Load of the constant -0.0 or -1.0 will be split as
5127 fldz;fchs or fld1;fchs sequence. */
5128 if (real_isnegzero (r))
5129 return 8;
5130 if (real_identical (r, &dconstm1))
5131 return 9;
5132
5133 return 0;
5134 }
5135
5136 /* Return the opcode of the special instruction to be used to load
5137 the constant X. */
5138
5139 const char *
5140 standard_80387_constant_opcode (rtx x)
5141 {
5142 switch (standard_80387_constant_p (x))
5143 {
5144 case 1:
5145 return "fldz";
5146 case 2:
5147 return "fld1";
5148 case 3:
5149 return "fldlg2";
5150 case 4:
5151 return "fldln2";
5152 case 5:
5153 return "fldl2e";
5154 case 6:
5155 return "fldl2t";
5156 case 7:
5157 return "fldpi";
5158 case 8:
5159 case 9:
5160 return "#";
5161 default:
5162 gcc_unreachable ();
5163 }
5164 }
5165
5166 /* Return the CONST_DOUBLE representing the 80387 constant that is
5167 loaded by the specified special instruction. The argument IDX
5168 matches the return value from standard_80387_constant_p. */
5169
5170 rtx
5171 standard_80387_constant_rtx (int idx)
5172 {
5173 int i;
5174
5175 if (! ext_80387_constants_init)
5176 init_ext_80387_constants ();
5177
5178 switch (idx)
5179 {
5180 case 3:
5181 case 4:
5182 case 5:
5183 case 6:
5184 case 7:
5185 i = idx - 3;
5186 break;
5187
5188 default:
5189 gcc_unreachable ();
5190 }
5191
5192 return const_double_from_real_value (ext_80387_constants_table[i],
5193 XFmode);
5194 }
5195
5196 /* Return 1 if X is all bits 0, 2 if X is all bits 1
5197 and 3 if X is all bits 1 with zero extend
5198 in supported SSE/AVX vector mode. */
5199
5200 int
5201 standard_sse_constant_p (rtx x, machine_mode pred_mode)
5202 {
5203 machine_mode mode;
5204
5205 if (!TARGET_SSE)
5206 return 0;
5207
5208 mode = GET_MODE (x);
5209
5210 if (x == const0_rtx || const0_operand (x, mode))
5211 return 1;
5212
5213 if (x == constm1_rtx
5214 || vector_all_ones_operand (x, mode)
5215 || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5216 || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
5217 && float_vector_all_ones_operand (x, mode)))
5218 {
5219 /* VOIDmode integer constant, get mode from the predicate. */
5220 if (mode == VOIDmode)
5221 mode = pred_mode;
5222
5223 switch (GET_MODE_SIZE (mode))
5224 {
5225 case 64:
5226 if (TARGET_AVX512F)
5227 return 2;
5228 break;
5229 case 32:
5230 if (TARGET_AVX2)
5231 return 2;
5232 break;
5233 case 16:
5234 if (TARGET_SSE2)
5235 return 2;
5236 break;
5237 case 0:
5238 /* VOIDmode */
5239 gcc_unreachable ();
5240 default:
5241 break;
5242 }
5243 }
5244
5245 if (vector_all_ones_zero_extend_half_operand (x, mode)
5246 || vector_all_ones_zero_extend_quarter_operand (x, mode))
5247 return 3;
5248
5249 return 0;
5250 }
5251
5252 /* Return the opcode of the special instruction to be used to load
5253 the constant operands[1] into operands[0]. */
5254
5255 const char *
5256 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5257 {
5258 machine_mode mode;
5259 rtx x = operands[1];
5260
5261 gcc_assert (TARGET_SSE);
5262
5263 mode = GET_MODE (x);
5264
5265 if (x == const0_rtx || const0_operand (x, mode))
5266 {
5267 switch (get_attr_mode (insn))
5268 {
5269 case MODE_TI:
5270 if (!EXT_REX_SSE_REG_P (operands[0]))
5271 return "%vpxor\t%0, %d0";
5272 /* FALLTHRU */
5273 case MODE_XI:
5274 case MODE_OI:
5275 if (EXT_REX_SSE_REG_P (operands[0]))
5276 return (TARGET_AVX512VL
5277 ? "vpxord\t%x0, %x0, %x0"
5278 : "vpxord\t%g0, %g0, %g0");
5279 return "vpxor\t%x0, %x0, %x0";
5280
5281 case MODE_V2DF:
5282 if (!EXT_REX_SSE_REG_P (operands[0]))
5283 return "%vxorpd\t%0, %d0";
5284 /* FALLTHRU */
5285 case MODE_V8DF:
5286 case MODE_V4DF:
5287 if (!EXT_REX_SSE_REG_P (operands[0]))
5288 return "vxorpd\t%x0, %x0, %x0";
5289 else if (TARGET_AVX512DQ)
5290 return (TARGET_AVX512VL
5291 ? "vxorpd\t%x0, %x0, %x0"
5292 : "vxorpd\t%g0, %g0, %g0");
5293 else
5294 return (TARGET_AVX512VL
5295 ? "vpxorq\t%x0, %x0, %x0"
5296 : "vpxorq\t%g0, %g0, %g0");
5297
5298 case MODE_V4SF:
5299 if (!EXT_REX_SSE_REG_P (operands[0]))
5300 return "%vxorps\t%0, %d0";
5301 /* FALLTHRU */
5302 case MODE_V16SF:
5303 case MODE_V8SF:
5304 if (!EXT_REX_SSE_REG_P (operands[0]))
5305 return "vxorps\t%x0, %x0, %x0";
5306 else if (TARGET_AVX512DQ)
5307 return (TARGET_AVX512VL
5308 ? "vxorps\t%x0, %x0, %x0"
5309 : "vxorps\t%g0, %g0, %g0");
5310 else
5311 return (TARGET_AVX512VL
5312 ? "vpxord\t%x0, %x0, %x0"
5313 : "vpxord\t%g0, %g0, %g0");
5314
5315 default:
5316 gcc_unreachable ();
5317 }
5318 }
5319 else if (x == constm1_rtx
5320 || vector_all_ones_operand (x, mode)
5321 || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
5322 && float_vector_all_ones_operand (x, mode)))
5323 {
5324 enum attr_mode insn_mode = get_attr_mode (insn);
5325
5326 switch (insn_mode)
5327 {
5328 case MODE_XI:
5329 case MODE_V8DF:
5330 case MODE_V16SF:
5331 gcc_assert (TARGET_AVX512F);
5332 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5333
5334 case MODE_OI:
5335 case MODE_V4DF:
5336 case MODE_V8SF:
5337 gcc_assert (TARGET_AVX2);
5338 /* FALLTHRU */
5339 case MODE_TI:
5340 case MODE_V2DF:
5341 case MODE_V4SF:
5342 gcc_assert (TARGET_SSE2);
5343 if (!EXT_REX_SSE_REG_P (operands[0]))
5344 return (TARGET_AVX
5345 ? "vpcmpeqd\t%0, %0, %0"
5346 : "pcmpeqd\t%0, %0");
5347 else if (TARGET_AVX512VL)
5348 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5349 else
5350 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5351
5352 default:
5353 gcc_unreachable ();
5354 }
5355 }
5356 else if (vector_all_ones_zero_extend_half_operand (x, mode))
5357 {
5358 if (GET_MODE_SIZE (mode) == 64)
5359 {
5360 gcc_assert (TARGET_AVX512F);
5361 return "vpcmpeqd \t %t0, %t0, %t0";
5362 }
5363 else if (GET_MODE_SIZE (mode) == 32)
5364 {
5365 gcc_assert (TARGET_AVX);
5366 return "vpcmpeqd \t %x0, %x0, %x0";
5367 }
5368 gcc_unreachable ();
5369 }
5370 else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
5371 {
5372 gcc_assert (TARGET_AVX512F);
5373 return "vpcmpeqd \t %x0, %x0, %x0";
5374 }
5375
5376 gcc_unreachable ();
5377 }
5378
5379 /* Returns true if INSN can be transformed from a memory load
5380 to a supported FP constant load. */
5381
5382 bool
5383 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5384 {
5385 rtx src = find_constant_src (insn);
5386
5387 gcc_assert (REG_P (dst));
5388
5389 if (src == NULL
5390 || (SSE_REGNO_P (REGNO (dst))
5391 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5392 || (STACK_REGNO_P (REGNO (dst))
5393 && standard_80387_constant_p (src) < 1))
5394 return false;
5395
5396 return true;
5397 }
5398
5399 /* Predicate for pre-reload splitters with associated instructions,
5400 which can match any time before the split1 pass (usually combine),
5401 then are unconditionally split in that pass and should not be
5402 matched again afterwards. */
5403
5404 bool
5405 ix86_pre_reload_split (void)
5406 {
5407 return (can_create_pseudo_p ()
5408 && !(cfun->curr_properties & PROP_rtl_split_insns));
5409 }
5410
5411 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5412 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5413 TARGET_AVX512VL or it is a register to register move which can
5414 be done with zmm register move. */
5415
5416 static const char *
5417 ix86_get_ssemov (rtx *operands, unsigned size,
5418 enum attr_mode insn_mode, machine_mode mode)
5419 {
5420 char buf[128];
5421 bool misaligned_p = (misaligned_operand (operands[0], mode)
5422 || misaligned_operand (operands[1], mode));
5423 bool evex_reg_p = (size == 64
5424 || EXT_REX_SSE_REG_P (operands[0])
5425 || EXT_REX_SSE_REG_P (operands[1]));
5426 machine_mode scalar_mode;
5427
5428 const char *opcode = NULL;
5429 enum
5430 {
5431 opcode_int,
5432 opcode_float,
5433 opcode_double
5434 } type = opcode_int;
5435
5436 switch (insn_mode)
5437 {
5438 case MODE_V16SF:
5439 case MODE_V8SF:
5440 case MODE_V4SF:
5441 scalar_mode = E_SFmode;
5442 type = opcode_float;
5443 break;
5444 case MODE_V8DF:
5445 case MODE_V4DF:
5446 case MODE_V2DF:
5447 scalar_mode = E_DFmode;
5448 type = opcode_double;
5449 break;
5450 case MODE_XI:
5451 case MODE_OI:
5452 case MODE_TI:
5453 scalar_mode = GET_MODE_INNER (mode);
5454 break;
5455 default:
5456 gcc_unreachable ();
5457 }
5458
5459 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5460 we can only use zmm register move without memory operand. */
5461 if (evex_reg_p
5462 && !TARGET_AVX512VL
5463 && GET_MODE_SIZE (mode) < 64)
5464 {
5465 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5466 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5467 AVX512VL is disabled, LRA can still generate reg to
5468 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5469 modes. */
5470 if (memory_operand (operands[0], mode)
5471 || memory_operand (operands[1], mode))
5472 gcc_unreachable ();
5473 size = 64;
5474 switch (type)
5475 {
5476 case opcode_int:
5477 if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
5478 opcode = (misaligned_p
5479 ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
5480 : "vmovdqa64");
5481 else
5482 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5483 break;
5484 case opcode_float:
5485 opcode = misaligned_p ? "vmovups" : "vmovaps";
5486 break;
5487 case opcode_double:
5488 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5489 break;
5490 }
5491 }
5492 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5493 {
5494 switch (scalar_mode)
5495 {
5496 case E_HFmode:
5497 case E_BFmode:
5498 if (evex_reg_p)
5499 opcode = (misaligned_p
5500 ? (TARGET_AVX512BW
5501 ? "vmovdqu16"
5502 : "vmovdqu64")
5503 : "vmovdqa64");
5504 else
5505 opcode = (misaligned_p
5506 ? (TARGET_AVX512BW
5507 ? "vmovdqu16"
5508 : "%vmovdqu")
5509 : "%vmovdqa");
5510 break;
5511 case E_SFmode:
5512 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5513 break;
5514 case E_DFmode:
5515 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5516 break;
5517 case E_TFmode:
5518 if (evex_reg_p)
5519 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5520 else
5521 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5522 break;
5523 default:
5524 gcc_unreachable ();
5525 }
5526 }
5527 else if (SCALAR_INT_MODE_P (scalar_mode))
5528 {
5529 switch (scalar_mode)
5530 {
5531 case E_QImode:
5532 if (evex_reg_p)
5533 opcode = (misaligned_p
5534 ? (TARGET_AVX512BW
5535 ? "vmovdqu8"
5536 : "vmovdqu64")
5537 : "vmovdqa64");
5538 else
5539 opcode = (misaligned_p
5540 ? (TARGET_AVX512BW
5541 ? "vmovdqu8"
5542 : "%vmovdqu")
5543 : "%vmovdqa");
5544 break;
5545 case E_HImode:
5546 if (evex_reg_p)
5547 opcode = (misaligned_p
5548 ? (TARGET_AVX512BW
5549 ? "vmovdqu16"
5550 : "vmovdqu64")
5551 : "vmovdqa64");
5552 else
5553 opcode = (misaligned_p
5554 ? (TARGET_AVX512BW
5555 ? "vmovdqu16"
5556 : "%vmovdqu")
5557 : "%vmovdqa");
5558 break;
5559 case E_SImode:
5560 if (evex_reg_p)
5561 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5562 else
5563 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5564 break;
5565 case E_DImode:
5566 case E_TImode:
5567 case E_OImode:
5568 if (evex_reg_p)
5569 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5570 else
5571 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5572 break;
5573 case E_XImode:
5574 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5575 break;
5576 default:
5577 gcc_unreachable ();
5578 }
5579 }
5580 else
5581 gcc_unreachable ();
5582
5583 switch (size)
5584 {
5585 case 64:
5586 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5587 opcode);
5588 break;
5589 case 32:
5590 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5591 opcode);
5592 break;
5593 case 16:
5594 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5595 opcode);
5596 break;
5597 default:
5598 gcc_unreachable ();
5599 }
5600 output_asm_insn (buf, operands);
5601 return "";
5602 }
5603
5604 /* Return the template of the TYPE_SSEMOV instruction to move
5605 operands[1] into operands[0]. */
5606
5607 const char *
5608 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5609 {
5610 machine_mode mode = GET_MODE (operands[0]);
5611 if (get_attr_type (insn) != TYPE_SSEMOV
5612 || mode != GET_MODE (operands[1]))
5613 gcc_unreachable ();
5614
5615 enum attr_mode insn_mode = get_attr_mode (insn);
5616
5617 switch (insn_mode)
5618 {
5619 case MODE_XI:
5620 case MODE_V8DF:
5621 case MODE_V16SF:
5622 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5623
5624 case MODE_OI:
5625 case MODE_V4DF:
5626 case MODE_V8SF:
5627 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5628
5629 case MODE_TI:
5630 case MODE_V2DF:
5631 case MODE_V4SF:
5632 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5633
5634 case MODE_DI:
5635 /* Handle broken assemblers that require movd instead of movq. */
5636 if (GENERAL_REG_P (operands[0]))
5637 {
5638 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5639 return "%vmovq\t{%1, %q0|%q0, %1}";
5640 else
5641 return "%vmovd\t{%1, %q0|%q0, %1}";
5642 }
5643 else if (GENERAL_REG_P (operands[1]))
5644 {
5645 if (HAVE_AS_IX86_INTERUNIT_MOVQ)
5646 return "%vmovq\t{%q1, %0|%0, %q1}";
5647 else
5648 return "%vmovd\t{%q1, %0|%0, %q1}";
5649 }
5650 else
5651 return "%vmovq\t{%1, %0|%0, %1}";
5652
5653 case MODE_SI:
5654 if (GENERAL_REG_P (operands[0]))
5655 return "%vmovd\t{%1, %k0|%k0, %1}";
5656 else if (GENERAL_REG_P (operands[1]))
5657 return "%vmovd\t{%k1, %0|%0, %k1}";
5658 else
5659 return "%vmovd\t{%1, %0|%0, %1}";
5660
5661 case MODE_HI:
5662 if (GENERAL_REG_P (operands[0]))
5663 return "vmovw\t{%1, %k0|%k0, %1}";
5664 else if (GENERAL_REG_P (operands[1]))
5665 return "vmovw\t{%k1, %0|%0, %k1}";
5666 else
5667 return "vmovw\t{%1, %0|%0, %1}";
5668
5669 case MODE_DF:
5670 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5671 return "vmovsd\t{%d1, %0|%0, %d1}";
5672 else
5673 return "%vmovsd\t{%1, %0|%0, %1}";
5674
5675 case MODE_SF:
5676 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5677 return "vmovss\t{%d1, %0|%0, %d1}";
5678 else
5679 return "%vmovss\t{%1, %0|%0, %1}";
5680
5681 case MODE_HF:
5682 case MODE_BF:
5683 if (REG_P (operands[0]) && REG_P (operands[1]))
5684 return "vmovsh\t{%d1, %0|%0, %d1}";
5685 else
5686 return "vmovsh\t{%1, %0|%0, %1}";
5687
5688 case MODE_V1DF:
5689 gcc_assert (!TARGET_AVX);
5690 return "movlpd\t{%1, %0|%0, %1}";
5691
5692 case MODE_V2SF:
5693 if (TARGET_AVX && REG_P (operands[0]))
5694 return "vmovlps\t{%1, %d0|%d0, %1}";
5695 else
5696 return "%vmovlps\t{%1, %0|%0, %1}";
5697
5698 default:
5699 gcc_unreachable ();
5700 }
5701 }
5702
5703 /* Returns true if OP contains a symbol reference */
5704
5705 bool
5706 symbolic_reference_mentioned_p (rtx op)
5707 {
5708 const char *fmt;
5709 int i;
5710
5711 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5712 return true;
5713
5714 fmt = GET_RTX_FORMAT (GET_CODE (op));
5715 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5716 {
5717 if (fmt[i] == 'E')
5718 {
5719 int j;
5720
5721 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5722 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5723 return true;
5724 }
5725
5726 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5727 return true;
5728 }
5729
5730 return false;
5731 }
5732
5733 /* Return true if it is appropriate to emit `ret' instructions in the
5734 body of a function. Do this only if the epilogue is simple, needing a
5735 couple of insns. Prior to reloading, we can't tell how many registers
5736 must be saved, so return false then. Return false if there is no frame
5737 marker to de-allocate. */
5738
5739 bool
5740 ix86_can_use_return_insn_p (void)
5741 {
5742 if (ix86_function_ms_hook_prologue (current_function_decl))
5743 return false;
5744
5745 if (ix86_function_naked (current_function_decl))
5746 return false;
5747
5748 /* Don't use `ret' instruction in interrupt handler. */
5749 if (! reload_completed
5750 || frame_pointer_needed
5751 || cfun->machine->func_type != TYPE_NORMAL)
5752 return 0;
5753
5754 /* Don't allow more than 32k pop, since that's all we can do
5755 with one instruction. */
5756 if (crtl->args.pops_args && crtl->args.size >= 32768)
5757 return 0;
5758
5759 struct ix86_frame &frame = cfun->machine->frame;
5760 return (frame.stack_pointer_offset == UNITS_PER_WORD
5761 && (frame.nregs + frame.nsseregs) == 0);
5762 }
5763 \f
5764 /* Return stack frame size. get_frame_size () returns used stack slots
5765 during compilation, which may be optimized out later. If stack frame
5766 is needed, stack_frame_required should be true. */
5767
5768 static HOST_WIDE_INT
5769 ix86_get_frame_size (void)
5770 {
5771 if (cfun->machine->stack_frame_required)
5772 return get_frame_size ();
5773 else
5774 return 0;
5775 }
5776
5777 /* Value should be nonzero if functions must have frame pointers.
5778 Zero means the frame pointer need not be set up (and parms may
5779 be accessed via the stack pointer) in functions that seem suitable. */
5780
5781 static bool
5782 ix86_frame_pointer_required (void)
5783 {
5784 /* If we accessed previous frames, then the generated code expects
5785 to be able to access the saved ebp value in our frame. */
5786 if (cfun->machine->accesses_prev_frame)
5787 return true;
5788
5789 /* Several x86 os'es need a frame pointer for other reasons,
5790 usually pertaining to setjmp. */
5791 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5792 return true;
5793
5794 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5795 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5796 return true;
5797
5798 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5799 allocation is 4GB. */
5800 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5801 return true;
5802
5803 /* SSE saves require frame-pointer when stack is misaligned. */
5804 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5805 return true;
5806
5807 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5808 turns off the frame pointer by default. Turn it back on now if
5809 we've not got a leaf function. */
5810 if (TARGET_OMIT_LEAF_FRAME_POINTER
5811 && (!crtl->is_leaf
5812 || ix86_current_function_calls_tls_descriptor))
5813 return true;
5814
5815 /* Several versions of mcount for the x86 assumes that there is a
5816 frame, so we cannot allow profiling without a frame pointer. */
5817 if (crtl->profile && !flag_fentry)
5818 return true;
5819
5820 return false;
5821 }
5822
5823 /* Record that the current function accesses previous call frames. */
5824
5825 void
5826 ix86_setup_frame_addresses (void)
5827 {
5828 cfun->machine->accesses_prev_frame = 1;
5829 }
5830 \f
5831 #ifndef USE_HIDDEN_LINKONCE
5832 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5833 # define USE_HIDDEN_LINKONCE 1
5834 # else
5835 # define USE_HIDDEN_LINKONCE 0
5836 # endif
5837 #endif
5838
5839 /* Label count for call and return thunks. It is used to make unique
5840 labels in call and return thunks. */
5841 static int indirectlabelno;
5842
5843 /* True if call thunk function is needed. */
5844 static bool indirect_thunk_needed = false;
5845
5846 /* Bit masks of integer registers, which contain branch target, used
5847 by call thunk functions. */
5848 static HARD_REG_SET indirect_thunks_used;
5849
5850 /* True if return thunk function is needed. */
5851 static bool indirect_return_needed = false;
5852
5853 /* True if return thunk function via CX is needed. */
5854 static bool indirect_return_via_cx;
5855
5856 #ifndef INDIRECT_LABEL
5857 # define INDIRECT_LABEL "LIND"
5858 #endif
5859
5860 /* Indicate what prefix is needed for an indirect branch. */
5861 enum indirect_thunk_prefix
5862 {
5863 indirect_thunk_prefix_none,
5864 indirect_thunk_prefix_nt
5865 };
5866
5867 /* Return the prefix needed for an indirect branch INSN. */
5868
5869 enum indirect_thunk_prefix
5870 indirect_thunk_need_prefix (rtx_insn *insn)
5871 {
5872 enum indirect_thunk_prefix need_prefix;
5873 if ((cfun->machine->indirect_branch_type
5874 == indirect_branch_thunk_extern)
5875 && ix86_notrack_prefixed_insn_p (insn))
5876 {
5877 /* NOTRACK prefix is only used with external thunk so that it
5878 can be properly updated to support CET at run-time. */
5879 need_prefix = indirect_thunk_prefix_nt;
5880 }
5881 else
5882 need_prefix = indirect_thunk_prefix_none;
5883 return need_prefix;
5884 }
5885
5886 /* Fills in the label name that should be used for the indirect thunk. */
5887
5888 static void
5889 indirect_thunk_name (char name[32], unsigned int regno,
5890 enum indirect_thunk_prefix need_prefix,
5891 bool ret_p)
5892 {
5893 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5894 gcc_unreachable ();
5895
5896 if (USE_HIDDEN_LINKONCE)
5897 {
5898 const char *prefix;
5899
5900 if (need_prefix == indirect_thunk_prefix_nt
5901 && regno != INVALID_REGNUM)
5902 {
5903 /* NOTRACK prefix is only used with external thunk via
5904 register so that NOTRACK prefix can be added to indirect
5905 branch via register to support CET at run-time. */
5906 prefix = "_nt";
5907 }
5908 else
5909 prefix = "";
5910
5911 const char *ret = ret_p ? "return" : "indirect";
5912
5913 if (regno != INVALID_REGNUM)
5914 {
5915 const char *reg_prefix;
5916 if (LEGACY_INT_REGNO_P (regno))
5917 reg_prefix = TARGET_64BIT ? "r" : "e";
5918 else
5919 reg_prefix = "";
5920 sprintf (name, "__x86_%s_thunk%s_%s%s",
5921 ret, prefix, reg_prefix, reg_names[regno]);
5922 }
5923 else
5924 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5925 }
5926 else
5927 {
5928 if (regno != INVALID_REGNUM)
5929 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5930 else
5931 {
5932 if (ret_p)
5933 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5934 else
5935 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5936 }
5937 }
5938 }
5939
5940 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5941 the function address is in REGNO and the call and return thunk looks like:
5942
5943 call L2
5944 L1:
5945 pause
5946 lfence
5947 jmp L1
5948 L2:
5949 mov %REG, (%sp)
5950 ret
5951
5952 Otherwise, the function address is on the top of stack and the
5953 call and return thunk looks like:
5954
5955 call L2
5956 L1:
5957 pause
5958 lfence
5959 jmp L1
5960 L2:
5961 lea WORD_SIZE(%sp), %sp
5962 ret
5963 */
5964
5965 static void
5966 output_indirect_thunk (unsigned int regno)
5967 {
5968 char indirectlabel1[32];
5969 char indirectlabel2[32];
5970
5971 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5972 indirectlabelno++);
5973 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5974 indirectlabelno++);
5975
5976 /* Call */
5977 fputs ("\tcall\t", asm_out_file);
5978 assemble_name_raw (asm_out_file, indirectlabel2);
5979 fputc ('\n', asm_out_file);
5980
5981 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5982
5983 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5984 Usage of both pause + lfence is compromise solution. */
5985 fprintf (asm_out_file, "\tpause\n\tlfence\n");
5986
5987 /* Jump. */
5988 fputs ("\tjmp\t", asm_out_file);
5989 assemble_name_raw (asm_out_file, indirectlabel1);
5990 fputc ('\n', asm_out_file);
5991
5992 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5993
5994 /* The above call insn pushed a word to stack. Adjust CFI info. */
5995 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5996 {
5997 if (! dwarf2out_do_cfi_asm ())
5998 {
5999 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6000 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
6001 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
6002 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6003 }
6004 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
6005 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
6006 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
6007 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
6008 dwarf2out_emit_cfi (xcfi);
6009 }
6010
6011 if (regno != INVALID_REGNUM)
6012 {
6013 /* MOV. */
6014 rtx xops[2];
6015 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
6016 xops[1] = gen_rtx_REG (word_mode, regno);
6017 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
6018 }
6019 else
6020 {
6021 /* LEA. */
6022 rtx xops[2];
6023 xops[0] = stack_pointer_rtx;
6024 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
6025 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
6026 }
6027
6028 fputs ("\tret\n", asm_out_file);
6029 if ((ix86_harden_sls & harden_sls_return))
6030 fputs ("\tint3\n", asm_out_file);
6031 }
6032
6033 /* Output a funtion with a call and return thunk for indirect branch.
6034 If REGNO != INVALID_REGNUM, the function address is in REGNO.
6035 Otherwise, the function address is on the top of stack. Thunk is
6036 used for function return if RET_P is true. */
6037
6038 static void
6039 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
6040 unsigned int regno, bool ret_p)
6041 {
6042 char name[32];
6043 tree decl;
6044
6045 /* Create __x86_indirect_thunk. */
6046 indirect_thunk_name (name, regno, need_prefix, ret_p);
6047 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6048 get_identifier (name),
6049 build_function_type_list (void_type_node, NULL_TREE));
6050 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6051 NULL_TREE, void_type_node);
6052 TREE_PUBLIC (decl) = 1;
6053 TREE_STATIC (decl) = 1;
6054 DECL_IGNORED_P (decl) = 1;
6055
6056 #if TARGET_MACHO
6057 if (TARGET_MACHO)
6058 {
6059 switch_to_section (darwin_sections[picbase_thunk_section]);
6060 fputs ("\t.weak_definition\t", asm_out_file);
6061 assemble_name (asm_out_file, name);
6062 fputs ("\n\t.private_extern\t", asm_out_file);
6063 assemble_name (asm_out_file, name);
6064 putc ('\n', asm_out_file);
6065 ASM_OUTPUT_LABEL (asm_out_file, name);
6066 DECL_WEAK (decl) = 1;
6067 }
6068 else
6069 #endif
6070 if (USE_HIDDEN_LINKONCE)
6071 {
6072 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6073
6074 targetm.asm_out.unique_section (decl, 0);
6075 switch_to_section (get_named_section (decl, NULL, 0));
6076
6077 targetm.asm_out.globalize_label (asm_out_file, name);
6078 fputs ("\t.hidden\t", asm_out_file);
6079 assemble_name (asm_out_file, name);
6080 putc ('\n', asm_out_file);
6081 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6082 }
6083 else
6084 {
6085 switch_to_section (text_section);
6086 ASM_OUTPUT_LABEL (asm_out_file, name);
6087 }
6088
6089 DECL_INITIAL (decl) = make_node (BLOCK);
6090 current_function_decl = decl;
6091 allocate_struct_function (decl, false);
6092 init_function_start (decl);
6093 /* We're about to hide the function body from callees of final_* by
6094 emitting it directly; tell them we're a thunk, if they care. */
6095 cfun->is_thunk = true;
6096 first_function_block_is_cold = false;
6097 /* Make sure unwind info is emitted for the thunk if needed. */
6098 final_start_function (emit_barrier (), asm_out_file, 1);
6099
6100 output_indirect_thunk (regno);
6101
6102 final_end_function ();
6103 init_insn_lengths ();
6104 free_after_compilation (cfun);
6105 set_cfun (NULL);
6106 current_function_decl = NULL;
6107 }
6108
6109 static int pic_labels_used;
6110
6111 /* Fills in the label name that should be used for a pc thunk for
6112 the given register. */
6113
6114 static void
6115 get_pc_thunk_name (char name[32], unsigned int regno)
6116 {
6117 gcc_assert (!TARGET_64BIT);
6118
6119 if (USE_HIDDEN_LINKONCE)
6120 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
6121 else
6122 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6123 }
6124
6125
6126 /* This function generates code for -fpic that loads %ebx with
6127 the return address of the caller and then returns. */
6128
6129 static void
6130 ix86_code_end (void)
6131 {
6132 rtx xops[2];
6133 unsigned int regno;
6134
6135 if (indirect_return_needed)
6136 output_indirect_thunk_function (indirect_thunk_prefix_none,
6137 INVALID_REGNUM, true);
6138 if (indirect_return_via_cx)
6139 output_indirect_thunk_function (indirect_thunk_prefix_none,
6140 CX_REG, true);
6141 if (indirect_thunk_needed)
6142 output_indirect_thunk_function (indirect_thunk_prefix_none,
6143 INVALID_REGNUM, false);
6144
6145 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
6146 {
6147 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6148 output_indirect_thunk_function (indirect_thunk_prefix_none,
6149 regno, false);
6150 }
6151
6152 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
6153 {
6154 char name[32];
6155 tree decl;
6156
6157 if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
6158 output_indirect_thunk_function (indirect_thunk_prefix_none,
6159 regno, false);
6160
6161 if (!(pic_labels_used & (1 << regno)))
6162 continue;
6163
6164 get_pc_thunk_name (name, regno);
6165
6166 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
6167 get_identifier (name),
6168 build_function_type_list (void_type_node, NULL_TREE));
6169 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
6170 NULL_TREE, void_type_node);
6171 TREE_PUBLIC (decl) = 1;
6172 TREE_STATIC (decl) = 1;
6173 DECL_IGNORED_P (decl) = 1;
6174
6175 #if TARGET_MACHO
6176 if (TARGET_MACHO)
6177 {
6178 switch_to_section (darwin_sections[picbase_thunk_section]);
6179 fputs ("\t.weak_definition\t", asm_out_file);
6180 assemble_name (asm_out_file, name);
6181 fputs ("\n\t.private_extern\t", asm_out_file);
6182 assemble_name (asm_out_file, name);
6183 putc ('\n', asm_out_file);
6184 ASM_OUTPUT_LABEL (asm_out_file, name);
6185 DECL_WEAK (decl) = 1;
6186 }
6187 else
6188 #endif
6189 if (USE_HIDDEN_LINKONCE)
6190 {
6191 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
6192
6193 targetm.asm_out.unique_section (decl, 0);
6194 switch_to_section (get_named_section (decl, NULL, 0));
6195
6196 targetm.asm_out.globalize_label (asm_out_file, name);
6197 fputs ("\t.hidden\t", asm_out_file);
6198 assemble_name (asm_out_file, name);
6199 putc ('\n', asm_out_file);
6200 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6201 }
6202 else
6203 {
6204 switch_to_section (text_section);
6205 ASM_OUTPUT_LABEL (asm_out_file, name);
6206 }
6207
6208 DECL_INITIAL (decl) = make_node (BLOCK);
6209 current_function_decl = decl;
6210 allocate_struct_function (decl, false);
6211 init_function_start (decl);
6212 /* We're about to hide the function body from callees of final_* by
6213 emitting it directly; tell them we're a thunk, if they care. */
6214 cfun->is_thunk = true;
6215 first_function_block_is_cold = false;
6216 /* Make sure unwind info is emitted for the thunk if needed. */
6217 final_start_function (emit_barrier (), asm_out_file, 1);
6218
6219 /* Pad stack IP move with 4 instructions (two NOPs count
6220 as one instruction). */
6221 if (TARGET_PAD_SHORT_FUNCTION)
6222 {
6223 int i = 8;
6224
6225 while (i--)
6226 fputs ("\tnop\n", asm_out_file);
6227 }
6228
6229 xops[0] = gen_rtx_REG (Pmode, regno);
6230 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6231 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6232 fputs ("\tret\n", asm_out_file);
6233 final_end_function ();
6234 init_insn_lengths ();
6235 free_after_compilation (cfun);
6236 set_cfun (NULL);
6237 current_function_decl = NULL;
6238 }
6239
6240 if (flag_split_stack)
6241 file_end_indicate_split_stack ();
6242 }
6243
6244 /* Emit code for the SET_GOT patterns. */
6245
6246 const char *
6247 output_set_got (rtx dest, rtx label)
6248 {
6249 rtx xops[3];
6250
6251 xops[0] = dest;
6252
6253 if (TARGET_VXWORKS_RTP && flag_pic)
6254 {
6255 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6256 xops[2] = gen_rtx_MEM (Pmode,
6257 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6258 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6259
6260 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6261 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6262 an unadorned address. */
6263 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6264 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6265 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6266 return "";
6267 }
6268
6269 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6270
6271 if (flag_pic)
6272 {
6273 char name[32];
6274 get_pc_thunk_name (name, REGNO (dest));
6275 pic_labels_used |= 1 << REGNO (dest);
6276
6277 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6278 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6279 output_asm_insn ("%!call\t%X2", xops);
6280
6281 #if TARGET_MACHO
6282 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6283 This is what will be referenced by the Mach-O PIC subsystem. */
6284 if (machopic_should_output_picbase_label () || !label)
6285 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6286
6287 /* When we are restoring the pic base at the site of a nonlocal label,
6288 and we decided to emit the pic base above, we will still output a
6289 local label used for calculating the correction offset (even though
6290 the offset will be 0 in that case). */
6291 if (label)
6292 targetm.asm_out.internal_label (asm_out_file, "L",
6293 CODE_LABEL_NUMBER (label));
6294 #endif
6295 }
6296 else
6297 {
6298 if (TARGET_MACHO)
6299 /* We don't need a pic base, we're not producing pic. */
6300 gcc_unreachable ();
6301
6302 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6303 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6304 targetm.asm_out.internal_label (asm_out_file, "L",
6305 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6306 }
6307
6308 if (!TARGET_MACHO)
6309 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6310
6311 return "";
6312 }
6313
6314 /* Generate an "push" pattern for input ARG. */
6315
6316 rtx
6317 gen_push (rtx arg)
6318 {
6319 struct machine_function *m = cfun->machine;
6320
6321 if (m->fs.cfa_reg == stack_pointer_rtx)
6322 m->fs.cfa_offset += UNITS_PER_WORD;
6323 m->fs.sp_offset += UNITS_PER_WORD;
6324
6325 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6326 arg = gen_rtx_REG (word_mode, REGNO (arg));
6327
6328 return gen_rtx_SET (gen_rtx_MEM (word_mode,
6329 gen_rtx_PRE_DEC (Pmode,
6330 stack_pointer_rtx)),
6331 arg);
6332 }
6333
6334 /* Generate an "pop" pattern for input ARG. */
6335
6336 rtx
6337 gen_pop (rtx arg)
6338 {
6339 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6340 arg = gen_rtx_REG (word_mode, REGNO (arg));
6341
6342 return gen_rtx_SET (arg,
6343 gen_rtx_MEM (word_mode,
6344 gen_rtx_POST_INC (Pmode,
6345 stack_pointer_rtx)));
6346 }
6347
6348 /* Return >= 0 if there is an unused call-clobbered register available
6349 for the entire function. */
6350
6351 static unsigned int
6352 ix86_select_alt_pic_regnum (void)
6353 {
6354 if (ix86_use_pseudo_pic_reg ())
6355 return INVALID_REGNUM;
6356
6357 if (crtl->is_leaf
6358 && !crtl->profile
6359 && !ix86_current_function_calls_tls_descriptor)
6360 {
6361 int i, drap;
6362 /* Can't use the same register for both PIC and DRAP. */
6363 if (crtl->drap_reg)
6364 drap = REGNO (crtl->drap_reg);
6365 else
6366 drap = -1;
6367 for (i = 2; i >= 0; --i)
6368 if (i != drap && !df_regs_ever_live_p (i))
6369 return i;
6370 }
6371
6372 return INVALID_REGNUM;
6373 }
6374
6375 /* Return true if REGNO is used by the epilogue. */
6376
6377 bool
6378 ix86_epilogue_uses (int regno)
6379 {
6380 /* If there are no caller-saved registers, we preserve all registers,
6381 except for MMX and x87 registers which aren't supported when saving
6382 and restoring registers. Don't explicitly save SP register since
6383 it is always preserved. */
6384 return (epilogue_completed
6385 && cfun->machine->no_caller_saved_registers
6386 && !fixed_regs[regno]
6387 && !STACK_REGNO_P (regno)
6388 && !MMX_REGNO_P (regno));
6389 }
6390
6391 /* Return nonzero if register REGNO can be used as a scratch register
6392 in peephole2. */
6393
6394 static bool
6395 ix86_hard_regno_scratch_ok (unsigned int regno)
6396 {
6397 /* If there are no caller-saved registers, we can't use any register
6398 as a scratch register after epilogue and use REGNO as scratch
6399 register only if it has been used before to avoid saving and
6400 restoring it. */
6401 return (!cfun->machine->no_caller_saved_registers
6402 || (!epilogue_completed
6403 && df_regs_ever_live_p (regno)));
6404 }
6405
6406 /* Return TRUE if we need to save REGNO. */
6407
6408 bool
6409 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6410 {
6411 /* If there are no caller-saved registers, we preserve all registers,
6412 except for MMX and x87 registers which aren't supported when saving
6413 and restoring registers. Don't explicitly save SP register since
6414 it is always preserved. */
6415 if (cfun->machine->no_caller_saved_registers)
6416 {
6417 /* Don't preserve registers used for function return value. */
6418 rtx reg = crtl->return_rtx;
6419 if (reg)
6420 {
6421 unsigned int i = REGNO (reg);
6422 unsigned int nregs = REG_NREGS (reg);
6423 while (nregs-- > 0)
6424 if ((i + nregs) == regno)
6425 return false;
6426 }
6427
6428 return (df_regs_ever_live_p (regno)
6429 && !fixed_regs[regno]
6430 && !STACK_REGNO_P (regno)
6431 && !MMX_REGNO_P (regno)
6432 && (regno != HARD_FRAME_POINTER_REGNUM
6433 || !frame_pointer_needed));
6434 }
6435
6436 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6437 && pic_offset_table_rtx)
6438 {
6439 if (ix86_use_pseudo_pic_reg ())
6440 {
6441 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6442 _mcount in prologue. */
6443 if (!TARGET_64BIT && flag_pic && crtl->profile)
6444 return true;
6445 }
6446 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6447 || crtl->profile
6448 || crtl->calls_eh_return
6449 || crtl->uses_const_pool
6450 || cfun->has_nonlocal_label)
6451 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6452 }
6453
6454 if (crtl->calls_eh_return && maybe_eh_return)
6455 {
6456 unsigned i;
6457 for (i = 0; ; i++)
6458 {
6459 unsigned test = EH_RETURN_DATA_REGNO (i);
6460 if (test == INVALID_REGNUM)
6461 break;
6462 if (test == regno)
6463 return true;
6464 }
6465 }
6466
6467 if (ignore_outlined && cfun->machine->call_ms2sysv)
6468 {
6469 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6470 + xlogue_layout::MIN_REGS;
6471 if (xlogue_layout::is_stub_managed_reg (regno, count))
6472 return false;
6473 }
6474
6475 if (crtl->drap_reg
6476 && regno == REGNO (crtl->drap_reg)
6477 && !cfun->machine->no_drap_save_restore)
6478 return true;
6479
6480 return (df_regs_ever_live_p (regno)
6481 && !call_used_or_fixed_reg_p (regno)
6482 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6483 }
6484
6485 /* Return number of saved general prupose registers. */
6486
6487 static int
6488 ix86_nsaved_regs (void)
6489 {
6490 int nregs = 0;
6491 int regno;
6492
6493 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6494 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6495 nregs ++;
6496 return nregs;
6497 }
6498
6499 /* Return number of saved SSE registers. */
6500
6501 static int
6502 ix86_nsaved_sseregs (void)
6503 {
6504 int nregs = 0;
6505 int regno;
6506
6507 if (!TARGET_64BIT_MS_ABI)
6508 return 0;
6509 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6510 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6511 nregs ++;
6512 return nregs;
6513 }
6514
6515 /* Given FROM and TO register numbers, say whether this elimination is
6516 allowed. If stack alignment is needed, we can only replace argument
6517 pointer with hard frame pointer, or replace frame pointer with stack
6518 pointer. Otherwise, frame pointer elimination is automatically
6519 handled and all other eliminations are valid. */
6520
6521 static bool
6522 ix86_can_eliminate (const int from, const int to)
6523 {
6524 if (stack_realign_fp)
6525 return ((from == ARG_POINTER_REGNUM
6526 && to == HARD_FRAME_POINTER_REGNUM)
6527 || (from == FRAME_POINTER_REGNUM
6528 && to == STACK_POINTER_REGNUM));
6529 else
6530 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6531 }
6532
6533 /* Return the offset between two registers, one to be eliminated, and the other
6534 its replacement, at the start of a routine. */
6535
6536 HOST_WIDE_INT
6537 ix86_initial_elimination_offset (int from, int to)
6538 {
6539 struct ix86_frame &frame = cfun->machine->frame;
6540
6541 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6542 return frame.hard_frame_pointer_offset;
6543 else if (from == FRAME_POINTER_REGNUM
6544 && to == HARD_FRAME_POINTER_REGNUM)
6545 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6546 else
6547 {
6548 gcc_assert (to == STACK_POINTER_REGNUM);
6549
6550 if (from == ARG_POINTER_REGNUM)
6551 return frame.stack_pointer_offset;
6552
6553 gcc_assert (from == FRAME_POINTER_REGNUM);
6554 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6555 }
6556 }
6557
6558 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6559 void
6560 warn_once_call_ms2sysv_xlogues (const char *feature)
6561 {
6562 static bool warned_once = false;
6563 if (!warned_once)
6564 {
6565 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6566 feature);
6567 warned_once = true;
6568 }
6569 }
6570
6571 /* Return the probing interval for -fstack-clash-protection. */
6572
6573 static HOST_WIDE_INT
6574 get_probe_interval (void)
6575 {
6576 if (flag_stack_clash_protection)
6577 return (HOST_WIDE_INT_1U
6578 << param_stack_clash_protection_probe_interval);
6579 else
6580 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6581 }
6582
6583 /* When using -fsplit-stack, the allocation routines set a field in
6584 the TCB to the bottom of the stack plus this much space, measured
6585 in bytes. */
6586
6587 #define SPLIT_STACK_AVAILABLE 256
6588
6589 /* Fill structure ix86_frame about frame of currently computed function. */
6590
6591 static void
6592 ix86_compute_frame_layout (void)
6593 {
6594 struct ix86_frame *frame = &cfun->machine->frame;
6595 struct machine_function *m = cfun->machine;
6596 unsigned HOST_WIDE_INT stack_alignment_needed;
6597 HOST_WIDE_INT offset;
6598 unsigned HOST_WIDE_INT preferred_alignment;
6599 HOST_WIDE_INT size = ix86_get_frame_size ();
6600 HOST_WIDE_INT to_allocate;
6601
6602 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6603 * ms_abi functions that call a sysv function. We now need to prune away
6604 * cases where it should be disabled. */
6605 if (TARGET_64BIT && m->call_ms2sysv)
6606 {
6607 gcc_assert (TARGET_64BIT_MS_ABI);
6608 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6609 gcc_assert (!TARGET_SEH);
6610 gcc_assert (TARGET_SSE);
6611 gcc_assert (!ix86_using_red_zone ());
6612
6613 if (crtl->calls_eh_return)
6614 {
6615 gcc_assert (!reload_completed);
6616 m->call_ms2sysv = false;
6617 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6618 }
6619
6620 else if (ix86_static_chain_on_stack)
6621 {
6622 gcc_assert (!reload_completed);
6623 m->call_ms2sysv = false;
6624 warn_once_call_ms2sysv_xlogues ("static call chains");
6625 }
6626
6627 /* Finally, compute which registers the stub will manage. */
6628 else
6629 {
6630 unsigned count = xlogue_layout::count_stub_managed_regs ();
6631 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6632 m->call_ms2sysv_pad_in = 0;
6633 }
6634 }
6635
6636 frame->nregs = ix86_nsaved_regs ();
6637 frame->nsseregs = ix86_nsaved_sseregs ();
6638
6639 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6640 except for function prologues, leaf functions and when the defult
6641 incoming stack boundary is overriden at command line or via
6642 force_align_arg_pointer attribute.
6643
6644 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6645 at call sites, including profile function calls.
6646 */
6647 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6648 && crtl->preferred_stack_boundary < 128)
6649 && (!crtl->is_leaf || cfun->calls_alloca != 0
6650 || ix86_current_function_calls_tls_descriptor
6651 || (TARGET_MACHO && crtl->profile)
6652 || ix86_incoming_stack_boundary < 128))
6653 {
6654 crtl->preferred_stack_boundary = 128;
6655 crtl->stack_alignment_needed = 128;
6656 }
6657
6658 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6659 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6660
6661 gcc_assert (!size || stack_alignment_needed);
6662 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6663 gcc_assert (preferred_alignment <= stack_alignment_needed);
6664
6665 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6666 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6667 if (TARGET_64BIT && m->call_ms2sysv)
6668 {
6669 gcc_assert (stack_alignment_needed >= 16);
6670 gcc_assert (!frame->nsseregs);
6671 }
6672
6673 /* For SEH we have to limit the amount of code movement into the prologue.
6674 At present we do this via a BLOCKAGE, at which point there's very little
6675 scheduling that can be done, which means that there's very little point
6676 in doing anything except PUSHs. */
6677 if (TARGET_SEH)
6678 m->use_fast_prologue_epilogue = false;
6679 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6680 {
6681 int count = frame->nregs;
6682 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6683
6684 /* The fast prologue uses move instead of push to save registers. This
6685 is significantly longer, but also executes faster as modern hardware
6686 can execute the moves in parallel, but can't do that for push/pop.
6687
6688 Be careful about choosing what prologue to emit: When function takes
6689 many instructions to execute we may use slow version as well as in
6690 case function is known to be outside hot spot (this is known with
6691 feedback only). Weight the size of function by number of registers
6692 to save as it is cheap to use one or two push instructions but very
6693 slow to use many of them.
6694
6695 Calling this hook multiple times with the same frame requirements
6696 must produce the same layout, since the RA might otherwise be
6697 unable to reach a fixed point or might fail its final sanity checks.
6698 This means that once we've assumed that a function does or doesn't
6699 have a particular size, we have to stick to that assumption
6700 regardless of how the function has changed since. */
6701 if (count)
6702 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6703 if (node->frequency < NODE_FREQUENCY_NORMAL
6704 || (flag_branch_probabilities
6705 && node->frequency < NODE_FREQUENCY_HOT))
6706 m->use_fast_prologue_epilogue = false;
6707 else
6708 {
6709 if (count != frame->expensive_count)
6710 {
6711 frame->expensive_count = count;
6712 frame->expensive_p = expensive_function_p (count);
6713 }
6714 m->use_fast_prologue_epilogue = !frame->expensive_p;
6715 }
6716 }
6717
6718 frame->save_regs_using_mov
6719 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6720
6721 /* Skip return address and error code in exception handler. */
6722 offset = INCOMING_FRAME_SP_OFFSET;
6723
6724 /* Skip pushed static chain. */
6725 if (ix86_static_chain_on_stack)
6726 offset += UNITS_PER_WORD;
6727
6728 /* Skip saved base pointer. */
6729 if (frame_pointer_needed)
6730 offset += UNITS_PER_WORD;
6731 frame->hfp_save_offset = offset;
6732
6733 /* The traditional frame pointer location is at the top of the frame. */
6734 frame->hard_frame_pointer_offset = offset;
6735
6736 /* Register save area */
6737 offset += frame->nregs * UNITS_PER_WORD;
6738 frame->reg_save_offset = offset;
6739
6740 /* Calculate the size of the va-arg area (not including padding, if any). */
6741 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6742
6743 /* Also adjust stack_realign_offset for the largest alignment of
6744 stack slot actually used. */
6745 if (stack_realign_fp
6746 || (cfun->machine->max_used_stack_alignment != 0
6747 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6748 {
6749 /* We may need a 16-byte aligned stack for the remainder of the
6750 register save area, but the stack frame for the local function
6751 may require a greater alignment if using AVX/2/512. In order
6752 to avoid wasting space, we first calculate the space needed for
6753 the rest of the register saves, add that to the stack pointer,
6754 and then realign the stack to the boundary of the start of the
6755 frame for the local function. */
6756 HOST_WIDE_INT space_needed = 0;
6757 HOST_WIDE_INT sse_reg_space_needed = 0;
6758
6759 if (TARGET_64BIT)
6760 {
6761 if (m->call_ms2sysv)
6762 {
6763 m->call_ms2sysv_pad_in = 0;
6764 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6765 }
6766
6767 else if (frame->nsseregs)
6768 /* The only ABI that has saved SSE registers (Win64) also has a
6769 16-byte aligned default stack. However, many programs violate
6770 the ABI, and Wine64 forces stack realignment to compensate. */
6771 space_needed = frame->nsseregs * 16;
6772
6773 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6774
6775 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6776 rounding to be pedantic. */
6777 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6778 }
6779 else
6780 space_needed = frame->va_arg_size;
6781
6782 /* Record the allocation size required prior to the realignment AND. */
6783 frame->stack_realign_allocate = space_needed;
6784
6785 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6786 before this point are not directly comparable with values below
6787 this point. Use sp_valid_at to determine if the stack pointer is
6788 valid for a given offset, fp_valid_at for the frame pointer, or
6789 choose_baseaddr to have a base register chosen for you.
6790
6791 Note that the result of (frame->stack_realign_offset
6792 & (stack_alignment_needed - 1)) may not equal zero. */
6793 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6794 frame->stack_realign_offset = offset - space_needed;
6795 frame->sse_reg_save_offset = frame->stack_realign_offset
6796 + sse_reg_space_needed;
6797 }
6798 else
6799 {
6800 frame->stack_realign_offset = offset;
6801
6802 if (TARGET_64BIT && m->call_ms2sysv)
6803 {
6804 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6805 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6806 }
6807
6808 /* Align and set SSE register save area. */
6809 else if (frame->nsseregs)
6810 {
6811 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6812 required and the DRAP re-alignment boundary is at least 16 bytes,
6813 then we want the SSE register save area properly aligned. */
6814 if (ix86_incoming_stack_boundary >= 128
6815 || (stack_realign_drap && stack_alignment_needed >= 16))
6816 offset = ROUND_UP (offset, 16);
6817 offset += frame->nsseregs * 16;
6818 }
6819 frame->sse_reg_save_offset = offset;
6820 offset += frame->va_arg_size;
6821 }
6822
6823 /* Align start of frame for local function. When a function call
6824 is removed, it may become a leaf function. But if argument may
6825 be passed on stack, we need to align the stack when there is no
6826 tail call. */
6827 if (m->call_ms2sysv
6828 || frame->va_arg_size != 0
6829 || size != 0
6830 || !crtl->is_leaf
6831 || (!crtl->tail_call_emit
6832 && cfun->machine->outgoing_args_on_stack)
6833 || cfun->calls_alloca
6834 || ix86_current_function_calls_tls_descriptor)
6835 offset = ROUND_UP (offset, stack_alignment_needed);
6836
6837 /* Frame pointer points here. */
6838 frame->frame_pointer_offset = offset;
6839
6840 offset += size;
6841
6842 /* Add outgoing arguments area. Can be skipped if we eliminated
6843 all the function calls as dead code.
6844 Skipping is however impossible when function calls alloca. Alloca
6845 expander assumes that last crtl->outgoing_args_size
6846 of stack frame are unused. */
6847 if (ACCUMULATE_OUTGOING_ARGS
6848 && (!crtl->is_leaf || cfun->calls_alloca
6849 || ix86_current_function_calls_tls_descriptor))
6850 {
6851 offset += crtl->outgoing_args_size;
6852 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6853 }
6854 else
6855 frame->outgoing_arguments_size = 0;
6856
6857 /* Align stack boundary. Only needed if we're calling another function
6858 or using alloca. */
6859 if (!crtl->is_leaf || cfun->calls_alloca
6860 || ix86_current_function_calls_tls_descriptor)
6861 offset = ROUND_UP (offset, preferred_alignment);
6862
6863 /* We've reached end of stack frame. */
6864 frame->stack_pointer_offset = offset;
6865
6866 /* Size prologue needs to allocate. */
6867 to_allocate = offset - frame->sse_reg_save_offset;
6868
6869 if ((!to_allocate && frame->nregs <= 1)
6870 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6871 /* If static stack checking is enabled and done with probes,
6872 the registers need to be saved before allocating the frame. */
6873 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6874 /* If stack clash probing needs a loop, then it needs a
6875 scratch register. But the returned register is only guaranteed
6876 to be safe to use after register saves are complete. So if
6877 stack clash protections are enabled and the allocated frame is
6878 larger than the probe interval, then use pushes to save
6879 callee saved registers. */
6880 || (flag_stack_clash_protection
6881 && !ix86_target_stack_probe ()
6882 && to_allocate > get_probe_interval ()))
6883 frame->save_regs_using_mov = false;
6884
6885 if (ix86_using_red_zone ()
6886 && crtl->sp_is_unchanging
6887 && crtl->is_leaf
6888 && !ix86_pc_thunk_call_expanded
6889 && !ix86_current_function_calls_tls_descriptor)
6890 {
6891 frame->red_zone_size = to_allocate;
6892 if (frame->save_regs_using_mov)
6893 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6894 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6895 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6896 }
6897 else
6898 frame->red_zone_size = 0;
6899 frame->stack_pointer_offset -= frame->red_zone_size;
6900
6901 /* The SEH frame pointer location is near the bottom of the frame.
6902 This is enforced by the fact that the difference between the
6903 stack pointer and the frame pointer is limited to 240 bytes in
6904 the unwind data structure. */
6905 if (TARGET_SEH)
6906 {
6907 /* Force the frame pointer to point at or below the lowest register save
6908 area, see the SEH code in config/i386/winnt.cc for the rationale. */
6909 frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
6910
6911 /* If we can leave the frame pointer where it is, do so; however return
6912 the establisher frame for __builtin_frame_address (0) or else if the
6913 frame overflows the SEH maximum frame size.
6914
6915 Note that the value returned by __builtin_frame_address (0) is quite
6916 constrained, because setjmp is piggybacked on the SEH machinery with
6917 recent versions of MinGW:
6918
6919 # elif defined(__SEH__)
6920 # if defined(__aarch64__) || defined(_ARM64_)
6921 # define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
6922 # elif (__MINGW_GCC_VERSION < 40702)
6923 # define setjmp(BUF) _setjmp((BUF), mingw_getsp())
6924 # else
6925 # define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
6926 # endif
6927
6928 and the second argument passed to _setjmp, if not null, is forwarded
6929 to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
6930 built an ExceptionRecord on the fly describing the setjmp buffer). */
6931 const HOST_WIDE_INT diff
6932 = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6933 if (diff <= 255 && !crtl->accesses_prior_frames)
6934 {
6935 /* The resulting diff will be a multiple of 16 lower than 255,
6936 i.e. at most 240 as required by the unwind data structure. */
6937 frame->hard_frame_pointer_offset += (diff & 15);
6938 }
6939 else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
6940 {
6941 /* Ideally we'd determine what portion of the local stack frame
6942 (within the constraint of the lowest 240) is most heavily used.
6943 But without that complication, simply bias the frame pointer
6944 by 128 bytes so as to maximize the amount of the local stack
6945 frame that is addressable with 8-bit offsets. */
6946 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6947 }
6948 else
6949 frame->hard_frame_pointer_offset = frame->hfp_save_offset;
6950 }
6951 }
6952
6953 /* This is semi-inlined memory_address_length, but simplified
6954 since we know that we're always dealing with reg+offset, and
6955 to avoid having to create and discard all that rtl. */
6956
6957 static inline int
6958 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6959 {
6960 int len = 4;
6961
6962 if (offset == 0)
6963 {
6964 /* EBP and R13 cannot be encoded without an offset. */
6965 len = (regno == BP_REG || regno == R13_REG);
6966 }
6967 else if (IN_RANGE (offset, -128, 127))
6968 len = 1;
6969
6970 /* ESP and R12 must be encoded with a SIB byte. */
6971 if (regno == SP_REG || regno == R12_REG)
6972 len++;
6973
6974 return len;
6975 }
6976
6977 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6978 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6979
6980 static bool
6981 sp_valid_at (HOST_WIDE_INT cfa_offset)
6982 {
6983 const struct machine_frame_state &fs = cfun->machine->fs;
6984 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6985 {
6986 /* Validate that the cfa_offset isn't in a "no-man's land". */
6987 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6988 return false;
6989 }
6990 return fs.sp_valid;
6991 }
6992
6993 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6994 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6995
6996 static inline bool
6997 fp_valid_at (HOST_WIDE_INT cfa_offset)
6998 {
6999 const struct machine_frame_state &fs = cfun->machine->fs;
7000 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
7001 {
7002 /* Validate that the cfa_offset isn't in a "no-man's land". */
7003 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
7004 return false;
7005 }
7006 return fs.fp_valid;
7007 }
7008
7009 /* Choose a base register based upon alignment requested, speed and/or
7010 size. */
7011
7012 static void
7013 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
7014 HOST_WIDE_INT &base_offset,
7015 unsigned int align_reqested, unsigned int *align)
7016 {
7017 const struct machine_function *m = cfun->machine;
7018 unsigned int hfp_align;
7019 unsigned int drap_align;
7020 unsigned int sp_align;
7021 bool hfp_ok = fp_valid_at (cfa_offset);
7022 bool drap_ok = m->fs.drap_valid;
7023 bool sp_ok = sp_valid_at (cfa_offset);
7024
7025 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
7026
7027 /* Filter out any registers that don't meet the requested alignment
7028 criteria. */
7029 if (align_reqested)
7030 {
7031 if (m->fs.realigned)
7032 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
7033 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
7034 notes (which we would need to use a realigned stack pointer),
7035 so disable on SEH targets. */
7036 else if (m->fs.sp_realigned)
7037 sp_align = crtl->stack_alignment_needed;
7038
7039 hfp_ok = hfp_ok && hfp_align >= align_reqested;
7040 drap_ok = drap_ok && drap_align >= align_reqested;
7041 sp_ok = sp_ok && sp_align >= align_reqested;
7042 }
7043
7044 if (m->use_fast_prologue_epilogue)
7045 {
7046 /* Choose the base register most likely to allow the most scheduling
7047 opportunities. Generally FP is valid throughout the function,
7048 while DRAP must be reloaded within the epilogue. But choose either
7049 over the SP due to increased encoding size. */
7050
7051 if (hfp_ok)
7052 {
7053 base_reg = hard_frame_pointer_rtx;
7054 base_offset = m->fs.fp_offset - cfa_offset;
7055 }
7056 else if (drap_ok)
7057 {
7058 base_reg = crtl->drap_reg;
7059 base_offset = 0 - cfa_offset;
7060 }
7061 else if (sp_ok)
7062 {
7063 base_reg = stack_pointer_rtx;
7064 base_offset = m->fs.sp_offset - cfa_offset;
7065 }
7066 }
7067 else
7068 {
7069 HOST_WIDE_INT toffset;
7070 int len = 16, tlen;
7071
7072 /* Choose the base register with the smallest address encoding.
7073 With a tie, choose FP > DRAP > SP. */
7074 if (sp_ok)
7075 {
7076 base_reg = stack_pointer_rtx;
7077 base_offset = m->fs.sp_offset - cfa_offset;
7078 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
7079 }
7080 if (drap_ok)
7081 {
7082 toffset = 0 - cfa_offset;
7083 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
7084 if (tlen <= len)
7085 {
7086 base_reg = crtl->drap_reg;
7087 base_offset = toffset;
7088 len = tlen;
7089 }
7090 }
7091 if (hfp_ok)
7092 {
7093 toffset = m->fs.fp_offset - cfa_offset;
7094 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
7095 if (tlen <= len)
7096 {
7097 base_reg = hard_frame_pointer_rtx;
7098 base_offset = toffset;
7099 }
7100 }
7101 }
7102
7103 /* Set the align return value. */
7104 if (align)
7105 {
7106 if (base_reg == stack_pointer_rtx)
7107 *align = sp_align;
7108 else if (base_reg == crtl->drap_reg)
7109 *align = drap_align;
7110 else if (base_reg == hard_frame_pointer_rtx)
7111 *align = hfp_align;
7112 }
7113 }
7114
7115 /* Return an RTX that points to CFA_OFFSET within the stack frame and
7116 the alignment of address. If ALIGN is non-null, it should point to
7117 an alignment value (in bits) that is preferred or zero and will
7118 recieve the alignment of the base register that was selected,
7119 irrespective of rather or not CFA_OFFSET is a multiple of that
7120 alignment value. If it is possible for the base register offset to be
7121 non-immediate then SCRATCH_REGNO should specify a scratch register to
7122 use.
7123
7124 The valid base registers are taken from CFUN->MACHINE->FS. */
7125
7126 static rtx
7127 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
7128 unsigned int scratch_regno = INVALID_REGNUM)
7129 {
7130 rtx base_reg = NULL;
7131 HOST_WIDE_INT base_offset = 0;
7132
7133 /* If a specific alignment is requested, try to get a base register
7134 with that alignment first. */
7135 if (align && *align)
7136 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
7137
7138 if (!base_reg)
7139 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
7140
7141 gcc_assert (base_reg != NULL);
7142
7143 rtx base_offset_rtx = GEN_INT (base_offset);
7144
7145 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
7146 {
7147 gcc_assert (scratch_regno != INVALID_REGNUM);
7148
7149 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
7150 emit_move_insn (scratch_reg, base_offset_rtx);
7151
7152 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
7153 }
7154
7155 return plus_constant (Pmode, base_reg, base_offset);
7156 }
7157
7158 /* Emit code to save registers in the prologue. */
7159
7160 static void
7161 ix86_emit_save_regs (void)
7162 {
7163 unsigned int regno;
7164 rtx_insn *insn;
7165
7166 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7167 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7168 {
7169 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
7170 RTX_FRAME_RELATED_P (insn) = 1;
7171 }
7172 }
7173
7174 /* Emit a single register save at CFA - CFA_OFFSET. */
7175
7176 static void
7177 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
7178 HOST_WIDE_INT cfa_offset)
7179 {
7180 struct machine_function *m = cfun->machine;
7181 rtx reg = gen_rtx_REG (mode, regno);
7182 rtx mem, addr, base, insn;
7183 unsigned int align = GET_MODE_ALIGNMENT (mode);
7184
7185 addr = choose_baseaddr (cfa_offset, &align);
7186 mem = gen_frame_mem (mode, addr);
7187
7188 /* The location aligment depends upon the base register. */
7189 align = MIN (GET_MODE_ALIGNMENT (mode), align);
7190 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
7191 set_mem_align (mem, align);
7192
7193 insn = emit_insn (gen_rtx_SET (mem, reg));
7194 RTX_FRAME_RELATED_P (insn) = 1;
7195
7196 base = addr;
7197 if (GET_CODE (base) == PLUS)
7198 base = XEXP (base, 0);
7199 gcc_checking_assert (REG_P (base));
7200
7201 /* When saving registers into a re-aligned local stack frame, avoid
7202 any tricky guessing by dwarf2out. */
7203 if (m->fs.realigned)
7204 {
7205 gcc_checking_assert (stack_realign_drap);
7206
7207 if (regno == REGNO (crtl->drap_reg))
7208 {
7209 /* A bit of a hack. We force the DRAP register to be saved in
7210 the re-aligned stack frame, which provides us with a copy
7211 of the CFA that will last past the prologue. Install it. */
7212 gcc_checking_assert (cfun->machine->fs.fp_valid);
7213 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7214 cfun->machine->fs.fp_offset - cfa_offset);
7215 mem = gen_rtx_MEM (mode, addr);
7216 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
7217 }
7218 else
7219 {
7220 /* The frame pointer is a stable reference within the
7221 aligned frame. Use it. */
7222 gcc_checking_assert (cfun->machine->fs.fp_valid);
7223 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
7224 cfun->machine->fs.fp_offset - cfa_offset);
7225 mem = gen_rtx_MEM (mode, addr);
7226 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7227 }
7228 }
7229
7230 else if (base == stack_pointer_rtx && m->fs.sp_realigned
7231 && cfa_offset >= m->fs.sp_realigned_offset)
7232 {
7233 gcc_checking_assert (stack_realign_fp);
7234 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
7235 }
7236
7237 /* The memory may not be relative to the current CFA register,
7238 which means that we may need to generate a new pattern for
7239 use by the unwind info. */
7240 else if (base != m->fs.cfa_reg)
7241 {
7242 addr = plus_constant (Pmode, m->fs.cfa_reg,
7243 m->fs.cfa_offset - cfa_offset);
7244 mem = gen_rtx_MEM (mode, addr);
7245 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
7246 }
7247 }
7248
7249 /* Emit code to save registers using MOV insns.
7250 First register is stored at CFA - CFA_OFFSET. */
7251 static void
7252 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
7253 {
7254 unsigned int regno;
7255
7256 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7257 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7258 {
7259 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
7260 cfa_offset -= UNITS_PER_WORD;
7261 }
7262 }
7263
7264 /* Emit code to save SSE registers using MOV insns.
7265 First register is stored at CFA - CFA_OFFSET. */
7266 static void
7267 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
7268 {
7269 unsigned int regno;
7270
7271 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7272 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7273 {
7274 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7275 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7276 }
7277 }
7278
7279 static GTY(()) rtx queued_cfa_restores;
7280
7281 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7282 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7283 Don't add the note if the previously saved value will be left untouched
7284 within stack red-zone till return, as unwinders can find the same value
7285 in the register and on the stack. */
7286
7287 static void
7288 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7289 {
7290 if (!crtl->shrink_wrapped
7291 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7292 return;
7293
7294 if (insn)
7295 {
7296 add_reg_note (insn, REG_CFA_RESTORE, reg);
7297 RTX_FRAME_RELATED_P (insn) = 1;
7298 }
7299 else
7300 queued_cfa_restores
7301 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7302 }
7303
7304 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7305
7306 static void
7307 ix86_add_queued_cfa_restore_notes (rtx insn)
7308 {
7309 rtx last;
7310 if (!queued_cfa_restores)
7311 return;
7312 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7313 ;
7314 XEXP (last, 1) = REG_NOTES (insn);
7315 REG_NOTES (insn) = queued_cfa_restores;
7316 queued_cfa_restores = NULL_RTX;
7317 RTX_FRAME_RELATED_P (insn) = 1;
7318 }
7319
7320 /* Expand prologue or epilogue stack adjustment.
7321 The pattern exist to put a dependency on all ebp-based memory accesses.
7322 STYLE should be negative if instructions should be marked as frame related,
7323 zero if %r11 register is live and cannot be freely used and positive
7324 otherwise. */
7325
7326 static rtx
7327 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7328 int style, bool set_cfa)
7329 {
7330 struct machine_function *m = cfun->machine;
7331 rtx addend = offset;
7332 rtx insn;
7333 bool add_frame_related_expr = false;
7334
7335 if (!x86_64_immediate_operand (offset, Pmode))
7336 {
7337 /* r11 is used by indirect sibcall return as well, set before the
7338 epilogue and used after the epilogue. */
7339 if (style)
7340 addend = gen_rtx_REG (Pmode, R11_REG);
7341 else
7342 {
7343 gcc_assert (src != hard_frame_pointer_rtx
7344 && dest != hard_frame_pointer_rtx);
7345 addend = hard_frame_pointer_rtx;
7346 }
7347 emit_insn (gen_rtx_SET (addend, offset));
7348 if (style < 0)
7349 add_frame_related_expr = true;
7350 }
7351
7352 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7353 (Pmode, dest, src, addend));
7354 if (style >= 0)
7355 ix86_add_queued_cfa_restore_notes (insn);
7356
7357 if (set_cfa)
7358 {
7359 rtx r;
7360
7361 gcc_assert (m->fs.cfa_reg == src);
7362 m->fs.cfa_offset += INTVAL (offset);
7363 m->fs.cfa_reg = dest;
7364
7365 r = gen_rtx_PLUS (Pmode, src, offset);
7366 r = gen_rtx_SET (dest, r);
7367 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7368 RTX_FRAME_RELATED_P (insn) = 1;
7369 }
7370 else if (style < 0)
7371 {
7372 RTX_FRAME_RELATED_P (insn) = 1;
7373 if (add_frame_related_expr)
7374 {
7375 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7376 r = gen_rtx_SET (dest, r);
7377 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7378 }
7379 }
7380
7381 if (dest == stack_pointer_rtx)
7382 {
7383 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7384 bool valid = m->fs.sp_valid;
7385 bool realigned = m->fs.sp_realigned;
7386
7387 if (src == hard_frame_pointer_rtx)
7388 {
7389 valid = m->fs.fp_valid;
7390 realigned = false;
7391 ooffset = m->fs.fp_offset;
7392 }
7393 else if (src == crtl->drap_reg)
7394 {
7395 valid = m->fs.drap_valid;
7396 realigned = false;
7397 ooffset = 0;
7398 }
7399 else
7400 {
7401 /* Else there are two possibilities: SP itself, which we set
7402 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7403 taken care of this by hand along the eh_return path. */
7404 gcc_checking_assert (src == stack_pointer_rtx
7405 || offset == const0_rtx);
7406 }
7407
7408 m->fs.sp_offset = ooffset - INTVAL (offset);
7409 m->fs.sp_valid = valid;
7410 m->fs.sp_realigned = realigned;
7411 }
7412 return insn;
7413 }
7414
7415 /* Find an available register to be used as dynamic realign argument
7416 pointer regsiter. Such a register will be written in prologue and
7417 used in begin of body, so it must not be
7418 1. parameter passing register.
7419 2. GOT pointer.
7420 We reuse static-chain register if it is available. Otherwise, we
7421 use DI for i386 and R13 for x86-64. We chose R13 since it has
7422 shorter encoding.
7423
7424 Return: the regno of chosen register. */
7425
7426 static unsigned int
7427 find_drap_reg (void)
7428 {
7429 tree decl = cfun->decl;
7430
7431 /* Always use callee-saved register if there are no caller-saved
7432 registers. */
7433 if (TARGET_64BIT)
7434 {
7435 /* Use R13 for nested function or function need static chain.
7436 Since function with tail call may use any caller-saved
7437 registers in epilogue, DRAP must not use caller-saved
7438 register in such case. */
7439 if (DECL_STATIC_CHAIN (decl)
7440 || cfun->machine->no_caller_saved_registers
7441 || crtl->tail_call_emit)
7442 return R13_REG;
7443
7444 return R10_REG;
7445 }
7446 else
7447 {
7448 /* Use DI for nested function or function need static chain.
7449 Since function with tail call may use any caller-saved
7450 registers in epilogue, DRAP must not use caller-saved
7451 register in such case. */
7452 if (DECL_STATIC_CHAIN (decl)
7453 || cfun->machine->no_caller_saved_registers
7454 || crtl->tail_call_emit
7455 || crtl->calls_eh_return)
7456 return DI_REG;
7457
7458 /* Reuse static chain register if it isn't used for parameter
7459 passing. */
7460 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7461 {
7462 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7463 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7464 return CX_REG;
7465 }
7466 return DI_REG;
7467 }
7468 }
7469
7470 /* Return minimum incoming stack alignment. */
7471
7472 static unsigned int
7473 ix86_minimum_incoming_stack_boundary (bool sibcall)
7474 {
7475 unsigned int incoming_stack_boundary;
7476
7477 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7478 if (cfun->machine->func_type != TYPE_NORMAL)
7479 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7480 /* Prefer the one specified at command line. */
7481 else if (ix86_user_incoming_stack_boundary)
7482 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7483 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7484 if -mstackrealign is used, it isn't used for sibcall check and
7485 estimated stack alignment is 128bit. */
7486 else if (!sibcall
7487 && ix86_force_align_arg_pointer
7488 && crtl->stack_alignment_estimated == 128)
7489 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7490 else
7491 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7492
7493 /* Incoming stack alignment can be changed on individual functions
7494 via force_align_arg_pointer attribute. We use the smallest
7495 incoming stack boundary. */
7496 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7497 && lookup_attribute ("force_align_arg_pointer",
7498 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7499 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7500
7501 /* The incoming stack frame has to be aligned at least at
7502 parm_stack_boundary. */
7503 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7504 incoming_stack_boundary = crtl->parm_stack_boundary;
7505
7506 /* Stack at entrance of main is aligned by runtime. We use the
7507 smallest incoming stack boundary. */
7508 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7509 && DECL_NAME (current_function_decl)
7510 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7511 && DECL_FILE_SCOPE_P (current_function_decl))
7512 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7513
7514 return incoming_stack_boundary;
7515 }
7516
7517 /* Update incoming stack boundary and estimated stack alignment. */
7518
7519 static void
7520 ix86_update_stack_boundary (void)
7521 {
7522 ix86_incoming_stack_boundary
7523 = ix86_minimum_incoming_stack_boundary (false);
7524
7525 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7526 if (TARGET_64BIT
7527 && cfun->stdarg
7528 && crtl->stack_alignment_estimated < 128)
7529 crtl->stack_alignment_estimated = 128;
7530
7531 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7532 if (ix86_tls_descriptor_calls_expanded_in_cfun
7533 && crtl->preferred_stack_boundary < 128)
7534 crtl->preferred_stack_boundary = 128;
7535 }
7536
7537 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7538 needed or an rtx for DRAP otherwise. */
7539
7540 static rtx
7541 ix86_get_drap_rtx (void)
7542 {
7543 /* We must use DRAP if there are outgoing arguments on stack or
7544 the stack pointer register is clobbered by asm statment and
7545 ACCUMULATE_OUTGOING_ARGS is false. */
7546 if (ix86_force_drap
7547 || ((cfun->machine->outgoing_args_on_stack
7548 || crtl->sp_is_clobbered_by_asm)
7549 && !ACCUMULATE_OUTGOING_ARGS))
7550 crtl->need_drap = true;
7551
7552 if (stack_realign_drap)
7553 {
7554 /* Assign DRAP to vDRAP and returns vDRAP */
7555 unsigned int regno = find_drap_reg ();
7556 rtx drap_vreg;
7557 rtx arg_ptr;
7558 rtx_insn *seq, *insn;
7559
7560 arg_ptr = gen_rtx_REG (Pmode, regno);
7561 crtl->drap_reg = arg_ptr;
7562
7563 start_sequence ();
7564 drap_vreg = copy_to_reg (arg_ptr);
7565 seq = get_insns ();
7566 end_sequence ();
7567
7568 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7569 if (!optimize)
7570 {
7571 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7572 RTX_FRAME_RELATED_P (insn) = 1;
7573 }
7574 return drap_vreg;
7575 }
7576 else
7577 return NULL;
7578 }
7579
7580 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7581
7582 static rtx
7583 ix86_internal_arg_pointer (void)
7584 {
7585 return virtual_incoming_args_rtx;
7586 }
7587
7588 struct scratch_reg {
7589 rtx reg;
7590 bool saved;
7591 };
7592
7593 /* Return a short-lived scratch register for use on function entry.
7594 In 32-bit mode, it is valid only after the registers are saved
7595 in the prologue. This register must be released by means of
7596 release_scratch_register_on_entry once it is dead. */
7597
7598 static void
7599 get_scratch_register_on_entry (struct scratch_reg *sr)
7600 {
7601 int regno;
7602
7603 sr->saved = false;
7604
7605 if (TARGET_64BIT)
7606 {
7607 /* We always use R11 in 64-bit mode. */
7608 regno = R11_REG;
7609 }
7610 else
7611 {
7612 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7613 bool fastcall_p
7614 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7615 bool thiscall_p
7616 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7617 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7618 int regparm = ix86_function_regparm (fntype, decl);
7619 int drap_regno
7620 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7621
7622 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7623 for the static chain register. */
7624 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7625 && drap_regno != AX_REG)
7626 regno = AX_REG;
7627 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7628 for the static chain register. */
7629 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7630 regno = AX_REG;
7631 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7632 regno = DX_REG;
7633 /* ecx is the static chain register. */
7634 else if (regparm < 3 && !fastcall_p && !thiscall_p
7635 && !static_chain_p
7636 && drap_regno != CX_REG)
7637 regno = CX_REG;
7638 else if (ix86_save_reg (BX_REG, true, false))
7639 regno = BX_REG;
7640 /* esi is the static chain register. */
7641 else if (!(regparm == 3 && static_chain_p)
7642 && ix86_save_reg (SI_REG, true, false))
7643 regno = SI_REG;
7644 else if (ix86_save_reg (DI_REG, true, false))
7645 regno = DI_REG;
7646 else
7647 {
7648 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7649 sr->saved = true;
7650 }
7651 }
7652
7653 sr->reg = gen_rtx_REG (Pmode, regno);
7654 if (sr->saved)
7655 {
7656 rtx_insn *insn = emit_insn (gen_push (sr->reg));
7657 RTX_FRAME_RELATED_P (insn) = 1;
7658 }
7659 }
7660
7661 /* Release a scratch register obtained from the preceding function.
7662
7663 If RELEASE_VIA_POP is true, we just pop the register off the stack
7664 to release it. This is what non-Linux systems use with -fstack-check.
7665
7666 Otherwise we use OFFSET to locate the saved register and the
7667 allocated stack space becomes part of the local frame and is
7668 deallocated by the epilogue. */
7669
7670 static void
7671 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7672 bool release_via_pop)
7673 {
7674 if (sr->saved)
7675 {
7676 if (release_via_pop)
7677 {
7678 struct machine_function *m = cfun->machine;
7679 rtx x, insn = emit_insn (gen_pop (sr->reg));
7680
7681 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7682 RTX_FRAME_RELATED_P (insn) = 1;
7683 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7684 x = gen_rtx_SET (stack_pointer_rtx, x);
7685 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7686 m->fs.sp_offset -= UNITS_PER_WORD;
7687 }
7688 else
7689 {
7690 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
7691 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7692 emit_insn (x);
7693 }
7694 }
7695 }
7696
7697 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7698
7699 If INT_REGISTERS_SAVED is true, then integer registers have already been
7700 pushed on the stack.
7701
7702 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7703 beyond SIZE bytes.
7704
7705 This assumes no knowledge of the current probing state, i.e. it is never
7706 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7707 a suitable probe. */
7708
7709 static void
7710 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7711 const bool int_registers_saved,
7712 const bool protection_area)
7713 {
7714 struct machine_function *m = cfun->machine;
7715
7716 /* If this function does not statically allocate stack space, then
7717 no probes are needed. */
7718 if (!size)
7719 {
7720 /* However, the allocation of space via pushes for register
7721 saves could be viewed as allocating space, but without the
7722 need to probe. */
7723 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7724 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7725 else
7726 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7727 return;
7728 }
7729
7730 /* If we are a noreturn function, then we have to consider the
7731 possibility that we're called via a jump rather than a call.
7732
7733 Thus we don't have the implicit probe generated by saving the
7734 return address into the stack at the call. Thus, the stack
7735 pointer could be anywhere in the guard page. The safe thing
7736 to do is emit a probe now.
7737
7738 The probe can be avoided if we have already emitted any callee
7739 register saves into the stack or have a frame pointer (which will
7740 have been saved as well). Those saves will function as implicit
7741 probes.
7742
7743 ?!? This should be revamped to work like aarch64 and s390 where
7744 we track the offset from the most recent probe. Normally that
7745 offset would be zero. For a noreturn function we would reset
7746 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7747 we just probe when we cross PROBE_INTERVAL. */
7748 if (TREE_THIS_VOLATILE (cfun->decl)
7749 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7750 {
7751 /* We can safely use any register here since we're just going to push
7752 its value and immediately pop it back. But we do try and avoid
7753 argument passing registers so as not to introduce dependencies in
7754 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7755 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7756 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7757 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7758 m->fs.sp_offset -= UNITS_PER_WORD;
7759 if (m->fs.cfa_reg == stack_pointer_rtx)
7760 {
7761 m->fs.cfa_offset -= UNITS_PER_WORD;
7762 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7763 x = gen_rtx_SET (stack_pointer_rtx, x);
7764 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7765 RTX_FRAME_RELATED_P (insn_push) = 1;
7766 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7767 x = gen_rtx_SET (stack_pointer_rtx, x);
7768 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7769 RTX_FRAME_RELATED_P (insn_pop) = 1;
7770 }
7771 emit_insn (gen_blockage ());
7772 }
7773
7774 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7775 const int dope = 4 * UNITS_PER_WORD;
7776
7777 /* If there is protection area, take it into account in the size. */
7778 if (protection_area)
7779 size += probe_interval + dope;
7780
7781 /* If we allocate less than the size of the guard statically,
7782 then no probing is necessary, but we do need to allocate
7783 the stack. */
7784 else if (size < (1 << param_stack_clash_protection_guard_size))
7785 {
7786 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7787 GEN_INT (-size), -1,
7788 m->fs.cfa_reg == stack_pointer_rtx);
7789 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7790 return;
7791 }
7792
7793 /* We're allocating a large enough stack frame that we need to
7794 emit probes. Either emit them inline or in a loop depending
7795 on the size. */
7796 if (size <= 4 * probe_interval)
7797 {
7798 HOST_WIDE_INT i;
7799 for (i = probe_interval; i <= size; i += probe_interval)
7800 {
7801 /* Allocate PROBE_INTERVAL bytes. */
7802 rtx insn
7803 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7804 GEN_INT (-probe_interval), -1,
7805 m->fs.cfa_reg == stack_pointer_rtx);
7806 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7807
7808 /* And probe at *sp. */
7809 emit_stack_probe (stack_pointer_rtx);
7810 emit_insn (gen_blockage ());
7811 }
7812
7813 /* We need to allocate space for the residual, but we do not need
7814 to probe the residual... */
7815 HOST_WIDE_INT residual = (i - probe_interval - size);
7816 if (residual)
7817 {
7818 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7819 GEN_INT (residual), -1,
7820 m->fs.cfa_reg == stack_pointer_rtx);
7821
7822 /* ...except if there is a protection area to maintain. */
7823 if (protection_area)
7824 emit_stack_probe (stack_pointer_rtx);
7825 }
7826
7827 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7828 }
7829 else
7830 {
7831 /* We expect the GP registers to be saved when probes are used
7832 as the probing sequences might need a scratch register and
7833 the routine to allocate one assumes the integer registers
7834 have already been saved. */
7835 gcc_assert (int_registers_saved);
7836
7837 struct scratch_reg sr;
7838 get_scratch_register_on_entry (&sr);
7839
7840 /* If we needed to save a register, then account for any space
7841 that was pushed (we are not going to pop the register when
7842 we do the restore). */
7843 if (sr.saved)
7844 size -= UNITS_PER_WORD;
7845
7846 /* Step 1: round SIZE down to a multiple of the interval. */
7847 HOST_WIDE_INT rounded_size = size & -probe_interval;
7848
7849 /* Step 2: compute final value of the loop counter. Use lea if
7850 possible. */
7851 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7852 rtx insn;
7853 if (address_no_seg_operand (addr, Pmode))
7854 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7855 else
7856 {
7857 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7858 insn = emit_insn (gen_rtx_SET (sr.reg,
7859 gen_rtx_PLUS (Pmode, sr.reg,
7860 stack_pointer_rtx)));
7861 }
7862 if (m->fs.cfa_reg == stack_pointer_rtx)
7863 {
7864 add_reg_note (insn, REG_CFA_DEF_CFA,
7865 plus_constant (Pmode, sr.reg,
7866 m->fs.cfa_offset + rounded_size));
7867 RTX_FRAME_RELATED_P (insn) = 1;
7868 }
7869
7870 /* Step 3: the loop. */
7871 rtx size_rtx = GEN_INT (rounded_size);
7872 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7873 size_rtx));
7874 if (m->fs.cfa_reg == stack_pointer_rtx)
7875 {
7876 m->fs.cfa_offset += rounded_size;
7877 add_reg_note (insn, REG_CFA_DEF_CFA,
7878 plus_constant (Pmode, stack_pointer_rtx,
7879 m->fs.cfa_offset));
7880 RTX_FRAME_RELATED_P (insn) = 1;
7881 }
7882 m->fs.sp_offset += rounded_size;
7883 emit_insn (gen_blockage ());
7884
7885 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7886 is equal to ROUNDED_SIZE. */
7887
7888 if (size != rounded_size)
7889 {
7890 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7891 GEN_INT (rounded_size - size), -1,
7892 m->fs.cfa_reg == stack_pointer_rtx);
7893
7894 if (protection_area)
7895 emit_stack_probe (stack_pointer_rtx);
7896 }
7897
7898 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7899
7900 /* This does not deallocate the space reserved for the scratch
7901 register. That will be deallocated in the epilogue. */
7902 release_scratch_register_on_entry (&sr, size, false);
7903 }
7904
7905 /* Adjust back to account for the protection area. */
7906 if (protection_area)
7907 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7908 GEN_INT (probe_interval + dope), -1,
7909 m->fs.cfa_reg == stack_pointer_rtx);
7910
7911 /* Make sure nothing is scheduled before we are done. */
7912 emit_insn (gen_blockage ());
7913 }
7914
7915 /* Adjust the stack pointer up to REG while probing it. */
7916
7917 const char *
7918 output_adjust_stack_and_probe (rtx reg)
7919 {
7920 static int labelno = 0;
7921 char loop_lab[32];
7922 rtx xops[2];
7923
7924 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7925
7926 /* Loop. */
7927 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7928
7929 /* SP = SP + PROBE_INTERVAL. */
7930 xops[0] = stack_pointer_rtx;
7931 xops[1] = GEN_INT (get_probe_interval ());
7932 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7933
7934 /* Probe at SP. */
7935 xops[1] = const0_rtx;
7936 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7937
7938 /* Test if SP == LAST_ADDR. */
7939 xops[0] = stack_pointer_rtx;
7940 xops[1] = reg;
7941 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7942
7943 /* Branch. */
7944 fputs ("\tjne\t", asm_out_file);
7945 assemble_name_raw (asm_out_file, loop_lab);
7946 fputc ('\n', asm_out_file);
7947
7948 return "";
7949 }
7950
7951 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7952 inclusive. These are offsets from the current stack pointer.
7953
7954 INT_REGISTERS_SAVED is true if integer registers have already been
7955 pushed on the stack. */
7956
7957 static void
7958 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7959 const bool int_registers_saved)
7960 {
7961 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7962
7963 /* See if we have a constant small number of probes to generate. If so,
7964 that's the easy case. The run-time loop is made up of 6 insns in the
7965 generic case while the compile-time loop is made up of n insns for n #
7966 of intervals. */
7967 if (size <= 6 * probe_interval)
7968 {
7969 HOST_WIDE_INT i;
7970
7971 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7972 it exceeds SIZE. If only one probe is needed, this will not
7973 generate any code. Then probe at FIRST + SIZE. */
7974 for (i = probe_interval; i < size; i += probe_interval)
7975 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7976 -(first + i)));
7977
7978 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7979 -(first + size)));
7980 }
7981
7982 /* Otherwise, do the same as above, but in a loop. Note that we must be
7983 extra careful with variables wrapping around because we might be at
7984 the very top (or the very bottom) of the address space and we have
7985 to be able to handle this case properly; in particular, we use an
7986 equality test for the loop condition. */
7987 else
7988 {
7989 /* We expect the GP registers to be saved when probes are used
7990 as the probing sequences might need a scratch register and
7991 the routine to allocate one assumes the integer registers
7992 have already been saved. */
7993 gcc_assert (int_registers_saved);
7994
7995 HOST_WIDE_INT rounded_size, last;
7996 struct scratch_reg sr;
7997
7998 get_scratch_register_on_entry (&sr);
7999
8000
8001 /* Step 1: round SIZE to the previous multiple of the interval. */
8002
8003 rounded_size = ROUND_DOWN (size, probe_interval);
8004
8005
8006 /* Step 2: compute initial and final value of the loop counter. */
8007
8008 /* TEST_OFFSET = FIRST. */
8009 emit_move_insn (sr.reg, GEN_INT (-first));
8010
8011 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
8012 last = first + rounded_size;
8013
8014
8015 /* Step 3: the loop
8016
8017 do
8018 {
8019 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
8020 probe at TEST_ADDR
8021 }
8022 while (TEST_ADDR != LAST_ADDR)
8023
8024 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
8025 until it is equal to ROUNDED_SIZE. */
8026
8027 emit_insn
8028 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
8029
8030
8031 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
8032 that SIZE is equal to ROUNDED_SIZE. */
8033
8034 if (size != rounded_size)
8035 emit_stack_probe (plus_constant (Pmode,
8036 gen_rtx_PLUS (Pmode,
8037 stack_pointer_rtx,
8038 sr.reg),
8039 rounded_size - size));
8040
8041 release_scratch_register_on_entry (&sr, size, true);
8042 }
8043
8044 /* Make sure nothing is scheduled before we are done. */
8045 emit_insn (gen_blockage ());
8046 }
8047
8048 /* Probe a range of stack addresses from REG to END, inclusive. These are
8049 offsets from the current stack pointer. */
8050
8051 const char *
8052 output_probe_stack_range (rtx reg, rtx end)
8053 {
8054 static int labelno = 0;
8055 char loop_lab[32];
8056 rtx xops[3];
8057
8058 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
8059
8060 /* Loop. */
8061 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8062
8063 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
8064 xops[0] = reg;
8065 xops[1] = GEN_INT (get_probe_interval ());
8066 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
8067
8068 /* Probe at TEST_ADDR. */
8069 xops[0] = stack_pointer_rtx;
8070 xops[1] = reg;
8071 xops[2] = const0_rtx;
8072 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
8073
8074 /* Test if TEST_ADDR == LAST_ADDR. */
8075 xops[0] = reg;
8076 xops[1] = end;
8077 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8078
8079 /* Branch. */
8080 fputs ("\tjne\t", asm_out_file);
8081 assemble_name_raw (asm_out_file, loop_lab);
8082 fputc ('\n', asm_out_file);
8083
8084 return "";
8085 }
8086
8087 /* Set stack_frame_required to false if stack frame isn't required.
8088 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
8089 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
8090
8091 static void
8092 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
8093 bool check_stack_slot)
8094 {
8095 HARD_REG_SET set_up_by_prologue, prologue_used;
8096 basic_block bb;
8097
8098 CLEAR_HARD_REG_SET (prologue_used);
8099 CLEAR_HARD_REG_SET (set_up_by_prologue);
8100 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
8101 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
8102 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
8103 HARD_FRAME_POINTER_REGNUM);
8104
8105 /* The preferred stack alignment is the minimum stack alignment. */
8106 if (stack_alignment > crtl->preferred_stack_boundary)
8107 stack_alignment = crtl->preferred_stack_boundary;
8108
8109 bool require_stack_frame = false;
8110
8111 FOR_EACH_BB_FN (bb, cfun)
8112 {
8113 rtx_insn *insn;
8114 FOR_BB_INSNS (bb, insn)
8115 if (NONDEBUG_INSN_P (insn)
8116 && requires_stack_frame_p (insn, prologue_used,
8117 set_up_by_prologue))
8118 {
8119 require_stack_frame = true;
8120
8121 if (check_stack_slot)
8122 {
8123 /* Find the maximum stack alignment. */
8124 subrtx_iterator::array_type array;
8125 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
8126 if (MEM_P (*iter)
8127 && (reg_mentioned_p (stack_pointer_rtx,
8128 *iter)
8129 || reg_mentioned_p (frame_pointer_rtx,
8130 *iter)))
8131 {
8132 unsigned int alignment = MEM_ALIGN (*iter);
8133 if (alignment > stack_alignment)
8134 stack_alignment = alignment;
8135 }
8136 }
8137 }
8138 }
8139
8140 cfun->machine->stack_frame_required = require_stack_frame;
8141 }
8142
8143 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
8144 will guide prologue/epilogue to be generated in correct form. */
8145
8146 static void
8147 ix86_finalize_stack_frame_flags (void)
8148 {
8149 /* Check if stack realign is really needed after reload, and
8150 stores result in cfun */
8151 unsigned int incoming_stack_boundary
8152 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8153 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8154 unsigned int stack_alignment
8155 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
8156 ? crtl->max_used_stack_slot_alignment
8157 : crtl->stack_alignment_needed);
8158 unsigned int stack_realign
8159 = (incoming_stack_boundary < stack_alignment);
8160 bool recompute_frame_layout_p = false;
8161
8162 if (crtl->stack_realign_finalized)
8163 {
8164 /* After stack_realign_needed is finalized, we can't no longer
8165 change it. */
8166 gcc_assert (crtl->stack_realign_needed == stack_realign);
8167 return;
8168 }
8169
8170 /* It is always safe to compute max_used_stack_alignment. We
8171 compute it only if 128-bit aligned load/store may be generated
8172 on misaligned stack slot which will lead to segfault. */
8173 bool check_stack_slot
8174 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
8175 ix86_find_max_used_stack_alignment (stack_alignment,
8176 check_stack_slot);
8177
8178 /* If the only reason for frame_pointer_needed is that we conservatively
8179 assumed stack realignment might be needed or -fno-omit-frame-pointer
8180 is used, but in the end nothing that needed the stack alignment had
8181 been spilled nor stack access, clear frame_pointer_needed and say we
8182 don't need stack realignment.
8183
8184 When vector register is used for piecewise move and store, we don't
8185 increase stack_alignment_needed as there is no register spill for
8186 piecewise move and store. Since stack_realign_needed is set to true
8187 by checking stack_alignment_estimated which is updated by pseudo
8188 vector register usage, we also need to check stack_realign_needed to
8189 eliminate frame pointer. */
8190 if ((stack_realign
8191 || (!flag_omit_frame_pointer && optimize)
8192 || crtl->stack_realign_needed)
8193 && frame_pointer_needed
8194 && crtl->is_leaf
8195 && crtl->sp_is_unchanging
8196 && !ix86_current_function_calls_tls_descriptor
8197 && !crtl->accesses_prior_frames
8198 && !cfun->calls_alloca
8199 && !crtl->calls_eh_return
8200 /* See ira_setup_eliminable_regset for the rationale. */
8201 && !(STACK_CHECK_MOVING_SP
8202 && flag_stack_check
8203 && flag_exceptions
8204 && cfun->can_throw_non_call_exceptions)
8205 && !ix86_frame_pointer_required ()
8206 && ix86_get_frame_size () == 0
8207 && ix86_nsaved_sseregs () == 0
8208 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
8209 {
8210 if (cfun->machine->stack_frame_required)
8211 {
8212 /* Stack frame is required. If stack alignment needed is less
8213 than incoming stack boundary, don't realign stack. */
8214 stack_realign = incoming_stack_boundary < stack_alignment;
8215 if (!stack_realign)
8216 {
8217 crtl->max_used_stack_slot_alignment
8218 = incoming_stack_boundary;
8219 crtl->stack_alignment_needed
8220 = incoming_stack_boundary;
8221 /* Also update preferred_stack_boundary for leaf
8222 functions. */
8223 crtl->preferred_stack_boundary
8224 = incoming_stack_boundary;
8225 }
8226 }
8227 else
8228 {
8229 /* If drap has been set, but it actually isn't live at the
8230 start of the function, there is no reason to set it up. */
8231 if (crtl->drap_reg)
8232 {
8233 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8234 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
8235 REGNO (crtl->drap_reg)))
8236 {
8237 crtl->drap_reg = NULL_RTX;
8238 crtl->need_drap = false;
8239 }
8240 }
8241 else
8242 cfun->machine->no_drap_save_restore = true;
8243
8244 frame_pointer_needed = false;
8245 stack_realign = false;
8246 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
8247 crtl->stack_alignment_needed = incoming_stack_boundary;
8248 crtl->stack_alignment_estimated = incoming_stack_boundary;
8249 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
8250 crtl->preferred_stack_boundary = incoming_stack_boundary;
8251 df_finish_pass (true);
8252 df_scan_alloc (NULL);
8253 df_scan_blocks ();
8254 df_compute_regs_ever_live (true);
8255 df_analyze ();
8256
8257 if (flag_var_tracking)
8258 {
8259 /* Since frame pointer is no longer available, replace it with
8260 stack pointer - UNITS_PER_WORD in debug insns. */
8261 df_ref ref, next;
8262 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
8263 ref; ref = next)
8264 {
8265 next = DF_REF_NEXT_REG (ref);
8266 if (!DF_REF_INSN_INFO (ref))
8267 continue;
8268
8269 /* Make sure the next ref is for a different instruction,
8270 so that we're not affected by the rescan. */
8271 rtx_insn *insn = DF_REF_INSN (ref);
8272 while (next && DF_REF_INSN (next) == insn)
8273 next = DF_REF_NEXT_REG (next);
8274
8275 if (DEBUG_INSN_P (insn))
8276 {
8277 bool changed = false;
8278 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
8279 {
8280 rtx *loc = DF_REF_LOC (ref);
8281 if (*loc == hard_frame_pointer_rtx)
8282 {
8283 *loc = plus_constant (Pmode,
8284 stack_pointer_rtx,
8285 -UNITS_PER_WORD);
8286 changed = true;
8287 }
8288 }
8289 if (changed)
8290 df_insn_rescan (insn);
8291 }
8292 }
8293 }
8294
8295 recompute_frame_layout_p = true;
8296 }
8297 }
8298 else if (crtl->max_used_stack_slot_alignment >= 128
8299 && cfun->machine->stack_frame_required)
8300 {
8301 /* We don't need to realign stack. max_used_stack_alignment is
8302 used to decide how stack frame should be aligned. This is
8303 independent of any psABIs nor 32-bit vs 64-bit. */
8304 cfun->machine->max_used_stack_alignment
8305 = stack_alignment / BITS_PER_UNIT;
8306 }
8307
8308 if (crtl->stack_realign_needed != stack_realign)
8309 recompute_frame_layout_p = true;
8310 crtl->stack_realign_needed = stack_realign;
8311 crtl->stack_realign_finalized = true;
8312 if (recompute_frame_layout_p)
8313 ix86_compute_frame_layout ();
8314 }
8315
8316 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8317
8318 static void
8319 ix86_elim_entry_set_got (rtx reg)
8320 {
8321 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8322 rtx_insn *c_insn = BB_HEAD (bb);
8323 if (!NONDEBUG_INSN_P (c_insn))
8324 c_insn = next_nonnote_nondebug_insn (c_insn);
8325 if (c_insn && NONJUMP_INSN_P (c_insn))
8326 {
8327 rtx pat = PATTERN (c_insn);
8328 if (GET_CODE (pat) == PARALLEL)
8329 {
8330 rtx vec = XVECEXP (pat, 0, 0);
8331 if (GET_CODE (vec) == SET
8332 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
8333 && REGNO (XEXP (vec, 0)) == REGNO (reg))
8334 delete_insn (c_insn);
8335 }
8336 }
8337 }
8338
8339 static rtx
8340 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8341 {
8342 rtx addr, mem;
8343
8344 if (offset)
8345 addr = plus_constant (Pmode, frame_reg, offset);
8346 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8347 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8348 }
8349
8350 static inline rtx
8351 gen_frame_load (rtx reg, rtx frame_reg, int offset)
8352 {
8353 return gen_frame_set (reg, frame_reg, offset, false);
8354 }
8355
8356 static inline rtx
8357 gen_frame_store (rtx reg, rtx frame_reg, int offset)
8358 {
8359 return gen_frame_set (reg, frame_reg, offset, true);
8360 }
8361
8362 static void
8363 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8364 {
8365 struct machine_function *m = cfun->machine;
8366 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8367 + m->call_ms2sysv_extra_regs;
8368 rtvec v = rtvec_alloc (ncregs + 1);
8369 unsigned int align, i, vi = 0;
8370 rtx_insn *insn;
8371 rtx sym, addr;
8372 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8373 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8374
8375 /* AL should only be live with sysv_abi. */
8376 gcc_assert (!ix86_eax_live_at_start_p ());
8377 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8378
8379 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8380 we've actually realigned the stack or not. */
8381 align = GET_MODE_ALIGNMENT (V4SFmode);
8382 addr = choose_baseaddr (frame.stack_realign_offset
8383 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
8384 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8385
8386 emit_insn (gen_rtx_SET (rax, addr));
8387
8388 /* Get the stub symbol. */
8389 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8390 : XLOGUE_STUB_SAVE);
8391 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8392
8393 for (i = 0; i < ncregs; ++i)
8394 {
8395 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8396 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8397 r.regno);
8398 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
8399 }
8400
8401 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8402
8403 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8404 RTX_FRAME_RELATED_P (insn) = true;
8405 }
8406
8407 /* Generate and return an insn body to AND X with Y. */
8408
8409 static rtx_insn *
8410 gen_and2_insn (rtx x, rtx y)
8411 {
8412 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
8413
8414 gcc_assert (insn_operand_matches (icode, 0, x));
8415 gcc_assert (insn_operand_matches (icode, 1, x));
8416 gcc_assert (insn_operand_matches (icode, 2, y));
8417
8418 return GEN_FCN (icode) (x, x, y);
8419 }
8420
8421 /* Expand the prologue into a bunch of separate insns. */
8422
8423 void
8424 ix86_expand_prologue (void)
8425 {
8426 struct machine_function *m = cfun->machine;
8427 rtx insn, t;
8428 HOST_WIDE_INT allocate;
8429 bool int_registers_saved;
8430 bool sse_registers_saved;
8431 bool save_stub_call_needed;
8432 rtx static_chain = NULL_RTX;
8433
8434 ix86_last_zero_store_uid = 0;
8435 if (ix86_function_naked (current_function_decl))
8436 {
8437 if (flag_stack_usage_info)
8438 current_function_static_stack_size = 0;
8439 return;
8440 }
8441
8442 ix86_finalize_stack_frame_flags ();
8443
8444 /* DRAP should not coexist with stack_realign_fp */
8445 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8446
8447 memset (&m->fs, 0, sizeof (m->fs));
8448
8449 /* Initialize CFA state for before the prologue. */
8450 m->fs.cfa_reg = stack_pointer_rtx;
8451 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8452
8453 /* Track SP offset to the CFA. We continue tracking this after we've
8454 swapped the CFA register away from SP. In the case of re-alignment
8455 this is fudged; we're interested to offsets within the local frame. */
8456 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8457 m->fs.sp_valid = true;
8458 m->fs.sp_realigned = false;
8459
8460 const struct ix86_frame &frame = cfun->machine->frame;
8461
8462 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8463 {
8464 /* We should have already generated an error for any use of
8465 ms_hook on a nested function. */
8466 gcc_checking_assert (!ix86_static_chain_on_stack);
8467
8468 /* Check if profiling is active and we shall use profiling before
8469 prologue variant. If so sorry. */
8470 if (crtl->profile && flag_fentry != 0)
8471 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8472 "with %<-mfentry%> for 32-bit");
8473
8474 /* In ix86_asm_output_function_label we emitted:
8475 8b ff movl.s %edi,%edi
8476 55 push %ebp
8477 8b ec movl.s %esp,%ebp
8478
8479 This matches the hookable function prologue in Win32 API
8480 functions in Microsoft Windows XP Service Pack 2 and newer.
8481 Wine uses this to enable Windows apps to hook the Win32 API
8482 functions provided by Wine.
8483
8484 What that means is that we've already set up the frame pointer. */
8485
8486 if (frame_pointer_needed
8487 && !(crtl->drap_reg && crtl->stack_realign_needed))
8488 {
8489 rtx push, mov;
8490
8491 /* We've decided to use the frame pointer already set up.
8492 Describe this to the unwinder by pretending that both
8493 push and mov insns happen right here.
8494
8495 Putting the unwind info here at the end of the ms_hook
8496 is done so that we can make absolutely certain we get
8497 the required byte sequence at the start of the function,
8498 rather than relying on an assembler that can produce
8499 the exact encoding required.
8500
8501 However it does mean (in the unpatched case) that we have
8502 a 1 insn window where the asynchronous unwind info is
8503 incorrect. However, if we placed the unwind info at
8504 its correct location we would have incorrect unwind info
8505 in the patched case. Which is probably all moot since
8506 I don't expect Wine generates dwarf2 unwind info for the
8507 system libraries that use this feature. */
8508
8509 insn = emit_insn (gen_blockage ());
8510
8511 push = gen_push (hard_frame_pointer_rtx);
8512 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8513 stack_pointer_rtx);
8514 RTX_FRAME_RELATED_P (push) = 1;
8515 RTX_FRAME_RELATED_P (mov) = 1;
8516
8517 RTX_FRAME_RELATED_P (insn) = 1;
8518 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8519 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8520
8521 /* Note that gen_push incremented m->fs.cfa_offset, even
8522 though we didn't emit the push insn here. */
8523 m->fs.cfa_reg = hard_frame_pointer_rtx;
8524 m->fs.fp_offset = m->fs.cfa_offset;
8525 m->fs.fp_valid = true;
8526 }
8527 else
8528 {
8529 /* The frame pointer is not needed so pop %ebp again.
8530 This leaves us with a pristine state. */
8531 emit_insn (gen_pop (hard_frame_pointer_rtx));
8532 }
8533 }
8534
8535 /* The first insn of a function that accepts its static chain on the
8536 stack is to push the register that would be filled in by a direct
8537 call. This insn will be skipped by the trampoline. */
8538 else if (ix86_static_chain_on_stack)
8539 {
8540 static_chain = ix86_static_chain (cfun->decl, false);
8541 insn = emit_insn (gen_push (static_chain));
8542 emit_insn (gen_blockage ());
8543
8544 /* We don't want to interpret this push insn as a register save,
8545 only as a stack adjustment. The real copy of the register as
8546 a save will be done later, if needed. */
8547 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8548 t = gen_rtx_SET (stack_pointer_rtx, t);
8549 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8550 RTX_FRAME_RELATED_P (insn) = 1;
8551 }
8552
8553 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8554 of DRAP is needed and stack realignment is really needed after reload */
8555 if (stack_realign_drap)
8556 {
8557 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8558
8559 /* Can't use DRAP in interrupt function. */
8560 if (cfun->machine->func_type != TYPE_NORMAL)
8561 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8562 "in interrupt service routine. This may be worked "
8563 "around by avoiding functions with aggregate return.");
8564
8565 /* Only need to push parameter pointer reg if it is caller saved. */
8566 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8567 {
8568 /* Push arg pointer reg */
8569 insn = emit_insn (gen_push (crtl->drap_reg));
8570 RTX_FRAME_RELATED_P (insn) = 1;
8571 }
8572
8573 /* Grab the argument pointer. */
8574 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8575 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8576 RTX_FRAME_RELATED_P (insn) = 1;
8577 m->fs.cfa_reg = crtl->drap_reg;
8578 m->fs.cfa_offset = 0;
8579
8580 /* Align the stack. */
8581 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8582 GEN_INT (-align_bytes)));
8583 RTX_FRAME_RELATED_P (insn) = 1;
8584
8585 /* Replicate the return address on the stack so that return
8586 address can be reached via (argp - 1) slot. This is needed
8587 to implement macro RETURN_ADDR_RTX and intrinsic function
8588 expand_builtin_return_addr etc. */
8589 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8590 t = gen_frame_mem (word_mode, t);
8591 insn = emit_insn (gen_push (t));
8592 RTX_FRAME_RELATED_P (insn) = 1;
8593
8594 /* For the purposes of frame and register save area addressing,
8595 we've started over with a new frame. */
8596 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8597 m->fs.realigned = true;
8598
8599 if (static_chain)
8600 {
8601 /* Replicate static chain on the stack so that static chain
8602 can be reached via (argp - 2) slot. This is needed for
8603 nested function with stack realignment. */
8604 insn = emit_insn (gen_push (static_chain));
8605 RTX_FRAME_RELATED_P (insn) = 1;
8606 }
8607 }
8608
8609 int_registers_saved = (frame.nregs == 0);
8610 sse_registers_saved = (frame.nsseregs == 0);
8611 save_stub_call_needed = (m->call_ms2sysv);
8612 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8613
8614 if (frame_pointer_needed && !m->fs.fp_valid)
8615 {
8616 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8617 slower on all targets. Also sdb didn't like it. */
8618 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8619 RTX_FRAME_RELATED_P (insn) = 1;
8620
8621 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8622 {
8623 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8624 RTX_FRAME_RELATED_P (insn) = 1;
8625
8626 if (m->fs.cfa_reg == stack_pointer_rtx)
8627 m->fs.cfa_reg = hard_frame_pointer_rtx;
8628 m->fs.fp_offset = m->fs.sp_offset;
8629 m->fs.fp_valid = true;
8630 }
8631 }
8632
8633 if (!int_registers_saved)
8634 {
8635 /* If saving registers via PUSH, do so now. */
8636 if (!frame.save_regs_using_mov)
8637 {
8638 ix86_emit_save_regs ();
8639 int_registers_saved = true;
8640 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8641 }
8642
8643 /* When using red zone we may start register saving before allocating
8644 the stack frame saving one cycle of the prologue. However, avoid
8645 doing this if we have to probe the stack; at least on x86_64 the
8646 stack probe can turn into a call that clobbers a red zone location. */
8647 else if (ix86_using_red_zone ()
8648 && (! TARGET_STACK_PROBE
8649 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8650 {
8651 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8652 cfun->machine->red_zone_used = true;
8653 int_registers_saved = true;
8654 }
8655 }
8656
8657 if (frame.red_zone_size != 0)
8658 cfun->machine->red_zone_used = true;
8659
8660 if (stack_realign_fp)
8661 {
8662 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8663 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8664
8665 /* Record last valid frame pointer offset. */
8666 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8667
8668 /* The computation of the size of the re-aligned stack frame means
8669 that we must allocate the size of the register save area before
8670 performing the actual alignment. Otherwise we cannot guarantee
8671 that there's enough storage above the realignment point. */
8672 allocate = frame.reg_save_offset - m->fs.sp_offset
8673 + frame.stack_realign_allocate;
8674 if (allocate)
8675 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8676 GEN_INT (-allocate), -1, false);
8677
8678 /* Align the stack. */
8679 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8680 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8681 m->fs.sp_realigned_offset = m->fs.sp_offset
8682 - frame.stack_realign_allocate;
8683 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8684 Beyond this point, stack access should be done via choose_baseaddr or
8685 by using sp_valid_at and fp_valid_at to determine the correct base
8686 register. Henceforth, any CFA offset should be thought of as logical
8687 and not physical. */
8688 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8689 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8690 m->fs.sp_realigned = true;
8691
8692 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8693 is needed to describe where a register is saved using a realigned
8694 stack pointer, so we need to invalidate the stack pointer for that
8695 target. */
8696 if (TARGET_SEH)
8697 m->fs.sp_valid = false;
8698
8699 /* If SP offset is non-immediate after allocation of the stack frame,
8700 then emit SSE saves or stub call prior to allocating the rest of the
8701 stack frame. This is less efficient for the out-of-line stub because
8702 we can't combine allocations across the call barrier, but it's better
8703 than using a scratch register. */
8704 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8705 - m->fs.sp_realigned_offset),
8706 Pmode))
8707 {
8708 if (!sse_registers_saved)
8709 {
8710 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8711 sse_registers_saved = true;
8712 }
8713 else if (save_stub_call_needed)
8714 {
8715 ix86_emit_outlined_ms2sysv_save (frame);
8716 save_stub_call_needed = false;
8717 }
8718 }
8719 }
8720
8721 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8722
8723 if (flag_stack_usage_info)
8724 {
8725 /* We start to count from ARG_POINTER. */
8726 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8727
8728 /* If it was realigned, take into account the fake frame. */
8729 if (stack_realign_drap)
8730 {
8731 if (ix86_static_chain_on_stack)
8732 stack_size += UNITS_PER_WORD;
8733
8734 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8735 stack_size += UNITS_PER_WORD;
8736
8737 /* This over-estimates by 1 minimal-stack-alignment-unit but
8738 mitigates that by counting in the new return address slot. */
8739 current_function_dynamic_stack_size
8740 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8741 }
8742
8743 current_function_static_stack_size = stack_size;
8744 }
8745
8746 /* On SEH target with very large frame size, allocate an area to save
8747 SSE registers (as the very large allocation won't be described). */
8748 if (TARGET_SEH
8749 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8750 && !sse_registers_saved)
8751 {
8752 HOST_WIDE_INT sse_size
8753 = frame.sse_reg_save_offset - frame.reg_save_offset;
8754
8755 gcc_assert (int_registers_saved);
8756
8757 /* No need to do stack checking as the area will be immediately
8758 written. */
8759 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8760 GEN_INT (-sse_size), -1,
8761 m->fs.cfa_reg == stack_pointer_rtx);
8762 allocate -= sse_size;
8763 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8764 sse_registers_saved = true;
8765 }
8766
8767 /* If stack clash protection is requested, then probe the stack, unless it
8768 is already probed on the target. */
8769 if (allocate >= 0
8770 && flag_stack_clash_protection
8771 && !ix86_target_stack_probe ())
8772 {
8773 ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
8774 allocate = 0;
8775 }
8776
8777 /* The stack has already been decremented by the instruction calling us
8778 so probe if the size is non-negative to preserve the protection area. */
8779 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8780 {
8781 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8782
8783 if (STACK_CHECK_MOVING_SP)
8784 {
8785 if (crtl->is_leaf
8786 && !cfun->calls_alloca
8787 && allocate <= probe_interval)
8788 ;
8789
8790 else
8791 {
8792 ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
8793 allocate = 0;
8794 }
8795 }
8796
8797 else
8798 {
8799 HOST_WIDE_INT size = allocate;
8800
8801 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8802 size = 0x80000000 - get_stack_check_protect () - 1;
8803
8804 if (TARGET_STACK_PROBE)
8805 {
8806 if (crtl->is_leaf && !cfun->calls_alloca)
8807 {
8808 if (size > probe_interval)
8809 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8810 }
8811 else
8812 ix86_emit_probe_stack_range (0,
8813 size + get_stack_check_protect (),
8814 int_registers_saved);
8815 }
8816 else
8817 {
8818 if (crtl->is_leaf && !cfun->calls_alloca)
8819 {
8820 if (size > probe_interval
8821 && size > get_stack_check_protect ())
8822 ix86_emit_probe_stack_range (get_stack_check_protect (),
8823 (size
8824 - get_stack_check_protect ()),
8825 int_registers_saved);
8826 }
8827 else
8828 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8829 int_registers_saved);
8830 }
8831 }
8832 }
8833
8834 if (allocate == 0)
8835 ;
8836 else if (!ix86_target_stack_probe ()
8837 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8838 {
8839 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8840 GEN_INT (-allocate), -1,
8841 m->fs.cfa_reg == stack_pointer_rtx);
8842 }
8843 else
8844 {
8845 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8846 rtx r10 = NULL;
8847 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8848 bool eax_live = ix86_eax_live_at_start_p ();
8849 bool r10_live = false;
8850
8851 if (TARGET_64BIT)
8852 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8853
8854 if (eax_live)
8855 {
8856 insn = emit_insn (gen_push (eax));
8857 allocate -= UNITS_PER_WORD;
8858 /* Note that SEH directives need to continue tracking the stack
8859 pointer even after the frame pointer has been set up. */
8860 if (sp_is_cfa_reg || TARGET_SEH)
8861 {
8862 if (sp_is_cfa_reg)
8863 m->fs.cfa_offset += UNITS_PER_WORD;
8864 RTX_FRAME_RELATED_P (insn) = 1;
8865 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8866 gen_rtx_SET (stack_pointer_rtx,
8867 plus_constant (Pmode,
8868 stack_pointer_rtx,
8869 -UNITS_PER_WORD)));
8870 }
8871 }
8872
8873 if (r10_live)
8874 {
8875 r10 = gen_rtx_REG (Pmode, R10_REG);
8876 insn = emit_insn (gen_push (r10));
8877 allocate -= UNITS_PER_WORD;
8878 if (sp_is_cfa_reg || TARGET_SEH)
8879 {
8880 if (sp_is_cfa_reg)
8881 m->fs.cfa_offset += UNITS_PER_WORD;
8882 RTX_FRAME_RELATED_P (insn) = 1;
8883 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8884 gen_rtx_SET (stack_pointer_rtx,
8885 plus_constant (Pmode,
8886 stack_pointer_rtx,
8887 -UNITS_PER_WORD)));
8888 }
8889 }
8890
8891 emit_move_insn (eax, GEN_INT (allocate));
8892 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8893
8894 /* Use the fact that AX still contains ALLOCATE. */
8895 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8896 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8897
8898 if (sp_is_cfa_reg || TARGET_SEH)
8899 {
8900 if (sp_is_cfa_reg)
8901 m->fs.cfa_offset += allocate;
8902 RTX_FRAME_RELATED_P (insn) = 1;
8903 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8904 gen_rtx_SET (stack_pointer_rtx,
8905 plus_constant (Pmode, stack_pointer_rtx,
8906 -allocate)));
8907 }
8908 m->fs.sp_offset += allocate;
8909
8910 /* Use stack_pointer_rtx for relative addressing so that code works for
8911 realigned stack. But this means that we need a blockage to prevent
8912 stores based on the frame pointer from being scheduled before. */
8913 if (r10_live && eax_live)
8914 {
8915 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8916 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8917 gen_frame_mem (word_mode, t));
8918 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8919 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8920 gen_frame_mem (word_mode, t));
8921 emit_insn (gen_memory_blockage ());
8922 }
8923 else if (eax_live || r10_live)
8924 {
8925 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8926 emit_move_insn (gen_rtx_REG (word_mode,
8927 (eax_live ? AX_REG : R10_REG)),
8928 gen_frame_mem (word_mode, t));
8929 emit_insn (gen_memory_blockage ());
8930 }
8931 }
8932 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8933
8934 /* If we havn't already set up the frame pointer, do so now. */
8935 if (frame_pointer_needed && !m->fs.fp_valid)
8936 {
8937 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8938 GEN_INT (frame.stack_pointer_offset
8939 - frame.hard_frame_pointer_offset));
8940 insn = emit_insn (insn);
8941 RTX_FRAME_RELATED_P (insn) = 1;
8942 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8943
8944 if (m->fs.cfa_reg == stack_pointer_rtx)
8945 m->fs.cfa_reg = hard_frame_pointer_rtx;
8946 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8947 m->fs.fp_valid = true;
8948 }
8949
8950 if (!int_registers_saved)
8951 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8952 if (!sse_registers_saved)
8953 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8954 else if (save_stub_call_needed)
8955 ix86_emit_outlined_ms2sysv_save (frame);
8956
8957 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8958 in PROLOGUE. */
8959 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8960 {
8961 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8962 insn = emit_insn (gen_set_got (pic));
8963 RTX_FRAME_RELATED_P (insn) = 1;
8964 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8965 emit_insn (gen_prologue_use (pic));
8966 /* Deleting already emmitted SET_GOT if exist and allocated to
8967 REAL_PIC_OFFSET_TABLE_REGNUM. */
8968 ix86_elim_entry_set_got (pic);
8969 }
8970
8971 if (crtl->drap_reg && !crtl->stack_realign_needed)
8972 {
8973 /* vDRAP is setup but after reload it turns out stack realign
8974 isn't necessary, here we will emit prologue to setup DRAP
8975 without stack realign adjustment */
8976 t = choose_baseaddr (0, NULL);
8977 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8978 }
8979
8980 /* Prevent instructions from being scheduled into register save push
8981 sequence when access to the redzone area is done through frame pointer.
8982 The offset between the frame pointer and the stack pointer is calculated
8983 relative to the value of the stack pointer at the end of the function
8984 prologue, and moving instructions that access redzone area via frame
8985 pointer inside push sequence violates this assumption. */
8986 if (frame_pointer_needed && frame.red_zone_size)
8987 emit_insn (gen_memory_blockage ());
8988
8989 /* SEH requires that the prologue end within 256 bytes of the start of
8990 the function. Prevent instruction schedules that would extend that.
8991 Further, prevent alloca modifications to the stack pointer from being
8992 combined with prologue modifications. */
8993 if (TARGET_SEH)
8994 emit_insn (gen_prologue_use (stack_pointer_rtx));
8995 }
8996
8997 /* Emit code to restore REG using a POP insn. */
8998
8999 static void
9000 ix86_emit_restore_reg_using_pop (rtx reg)
9001 {
9002 struct machine_function *m = cfun->machine;
9003 rtx_insn *insn = emit_insn (gen_pop (reg));
9004
9005 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9006 m->fs.sp_offset -= UNITS_PER_WORD;
9007
9008 if (m->fs.cfa_reg == crtl->drap_reg
9009 && REGNO (reg) == REGNO (crtl->drap_reg))
9010 {
9011 /* Previously we'd represented the CFA as an expression
9012 like *(%ebp - 8). We've just popped that value from
9013 the stack, which means we need to reset the CFA to
9014 the drap register. This will remain until we restore
9015 the stack pointer. */
9016 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9017 RTX_FRAME_RELATED_P (insn) = 1;
9018
9019 /* This means that the DRAP register is valid for addressing too. */
9020 m->fs.drap_valid = true;
9021 return;
9022 }
9023
9024 if (m->fs.cfa_reg == stack_pointer_rtx)
9025 {
9026 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9027 x = gen_rtx_SET (stack_pointer_rtx, x);
9028 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9029 RTX_FRAME_RELATED_P (insn) = 1;
9030
9031 m->fs.cfa_offset -= UNITS_PER_WORD;
9032 }
9033
9034 /* When the frame pointer is the CFA, and we pop it, we are
9035 swapping back to the stack pointer as the CFA. This happens
9036 for stack frames that don't allocate other data, so we assume
9037 the stack pointer is now pointing at the return address, i.e.
9038 the function entry state, which makes the offset be 1 word. */
9039 if (reg == hard_frame_pointer_rtx)
9040 {
9041 m->fs.fp_valid = false;
9042 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9043 {
9044 m->fs.cfa_reg = stack_pointer_rtx;
9045 m->fs.cfa_offset -= UNITS_PER_WORD;
9046
9047 add_reg_note (insn, REG_CFA_DEF_CFA,
9048 plus_constant (Pmode, stack_pointer_rtx,
9049 m->fs.cfa_offset));
9050 RTX_FRAME_RELATED_P (insn) = 1;
9051 }
9052 }
9053 }
9054
9055 /* Emit code to restore saved registers using POP insns. */
9056
9057 static void
9058 ix86_emit_restore_regs_using_pop (void)
9059 {
9060 unsigned int regno;
9061
9062 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9063 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
9064 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
9065 }
9066
9067 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
9068 omits the emit and only attaches the notes. */
9069
9070 static void
9071 ix86_emit_leave (rtx_insn *insn)
9072 {
9073 struct machine_function *m = cfun->machine;
9074
9075 if (!insn)
9076 insn = emit_insn (gen_leave (word_mode));
9077
9078 ix86_add_queued_cfa_restore_notes (insn);
9079
9080 gcc_assert (m->fs.fp_valid);
9081 m->fs.sp_valid = true;
9082 m->fs.sp_realigned = false;
9083 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9084 m->fs.fp_valid = false;
9085
9086 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9087 {
9088 m->fs.cfa_reg = stack_pointer_rtx;
9089 m->fs.cfa_offset = m->fs.sp_offset;
9090
9091 add_reg_note (insn, REG_CFA_DEF_CFA,
9092 plus_constant (Pmode, stack_pointer_rtx,
9093 m->fs.sp_offset));
9094 RTX_FRAME_RELATED_P (insn) = 1;
9095 }
9096 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9097 m->fs.fp_offset);
9098 }
9099
9100 /* Emit code to restore saved registers using MOV insns.
9101 First register is restored from CFA - CFA_OFFSET. */
9102 static void
9103 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9104 bool maybe_eh_return)
9105 {
9106 struct machine_function *m = cfun->machine;
9107 unsigned int regno;
9108
9109 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9110 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9111 {
9112 rtx reg = gen_rtx_REG (word_mode, regno);
9113 rtx mem;
9114 rtx_insn *insn;
9115
9116 mem = choose_baseaddr (cfa_offset, NULL);
9117 mem = gen_frame_mem (word_mode, mem);
9118 insn = emit_move_insn (reg, mem);
9119
9120 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9121 {
9122 /* Previously we'd represented the CFA as an expression
9123 like *(%ebp - 8). We've just popped that value from
9124 the stack, which means we need to reset the CFA to
9125 the drap register. This will remain until we restore
9126 the stack pointer. */
9127 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9128 RTX_FRAME_RELATED_P (insn) = 1;
9129
9130 /* This means that the DRAP register is valid for addressing. */
9131 m->fs.drap_valid = true;
9132 }
9133 else
9134 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9135
9136 cfa_offset -= UNITS_PER_WORD;
9137 }
9138 }
9139
9140 /* Emit code to restore saved registers using MOV insns.
9141 First register is restored from CFA - CFA_OFFSET. */
9142 static void
9143 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9144 bool maybe_eh_return)
9145 {
9146 unsigned int regno;
9147
9148 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9149 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
9150 {
9151 rtx reg = gen_rtx_REG (V4SFmode, regno);
9152 rtx mem;
9153 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
9154
9155 mem = choose_baseaddr (cfa_offset, &align);
9156 mem = gen_rtx_MEM (V4SFmode, mem);
9157
9158 /* The location aligment depends upon the base register. */
9159 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
9160 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
9161 set_mem_align (mem, align);
9162 emit_insn (gen_rtx_SET (reg, mem));
9163
9164 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
9165
9166 cfa_offset -= GET_MODE_SIZE (V4SFmode);
9167 }
9168 }
9169
9170 static void
9171 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
9172 bool use_call, int style)
9173 {
9174 struct machine_function *m = cfun->machine;
9175 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
9176 + m->call_ms2sysv_extra_regs;
9177 rtvec v;
9178 unsigned int elems_needed, align, i, vi = 0;
9179 rtx_insn *insn;
9180 rtx sym, tmp;
9181 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
9182 rtx r10 = NULL_RTX;
9183 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
9184 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
9185 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
9186 rtx rsi_frame_load = NULL_RTX;
9187 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
9188 enum xlogue_stub stub;
9189
9190 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
9191
9192 /* If using a realigned stack, we should never start with padding. */
9193 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
9194
9195 /* Setup RSI as the stub's base pointer. */
9196 align = GET_MODE_ALIGNMENT (V4SFmode);
9197 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
9198 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
9199
9200 emit_insn (gen_rtx_SET (rsi, tmp));
9201
9202 /* Get a symbol for the stub. */
9203 if (frame_pointer_needed)
9204 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
9205 : XLOGUE_STUB_RESTORE_HFP_TAIL;
9206 else
9207 stub = use_call ? XLOGUE_STUB_RESTORE
9208 : XLOGUE_STUB_RESTORE_TAIL;
9209 sym = xlogue.get_stub_rtx (stub);
9210
9211 elems_needed = ncregs;
9212 if (use_call)
9213 elems_needed += 1;
9214 else
9215 elems_needed += frame_pointer_needed ? 5 : 3;
9216 v = rtvec_alloc (elems_needed);
9217
9218 /* We call the epilogue stub when we need to pop incoming args or we are
9219 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
9220 epilogue stub and it is the tail-call. */
9221 if (use_call)
9222 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9223 else
9224 {
9225 RTVEC_ELT (v, vi++) = ret_rtx;
9226 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
9227 if (frame_pointer_needed)
9228 {
9229 rtx rbp = gen_rtx_REG (DImode, BP_REG);
9230 gcc_assert (m->fs.fp_valid);
9231 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
9232
9233 tmp = plus_constant (DImode, rbp, 8);
9234 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
9235 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
9236 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
9237 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
9238 }
9239 else
9240 {
9241 /* If no hard frame pointer, we set R10 to the SP restore value. */
9242 gcc_assert (!m->fs.fp_valid);
9243 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9244 gcc_assert (m->fs.sp_valid);
9245
9246 r10 = gen_rtx_REG (DImode, R10_REG);
9247 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
9248 emit_insn (gen_rtx_SET (r10, tmp));
9249
9250 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
9251 }
9252 }
9253
9254 /* Generate frame load insns and restore notes. */
9255 for (i = 0; i < ncregs; ++i)
9256 {
9257 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
9258 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
9259 rtx reg, frame_load;
9260
9261 reg = gen_rtx_REG (mode, r.regno);
9262 frame_load = gen_frame_load (reg, rsi, r.offset);
9263
9264 /* Save RSI frame load insn & note to add last. */
9265 if (r.regno == SI_REG)
9266 {
9267 gcc_assert (!rsi_frame_load);
9268 rsi_frame_load = frame_load;
9269 rsi_restore_offset = r.offset;
9270 }
9271 else
9272 {
9273 RTVEC_ELT (v, vi++) = frame_load;
9274 ix86_add_cfa_restore_note (NULL, reg, r.offset);
9275 }
9276 }
9277
9278 /* Add RSI frame load & restore note at the end. */
9279 gcc_assert (rsi_frame_load);
9280 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9281 RTVEC_ELT (v, vi++) = rsi_frame_load;
9282 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
9283 rsi_restore_offset);
9284
9285 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9286 if (!use_call && !frame_pointer_needed)
9287 {
9288 gcc_assert (m->fs.sp_valid);
9289 gcc_assert (!m->fs.sp_realigned);
9290
9291 /* At this point, R10 should point to frame.stack_realign_offset. */
9292 if (m->fs.cfa_reg == stack_pointer_rtx)
9293 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9294 m->fs.sp_offset = frame.stack_realign_offset;
9295 }
9296
9297 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9298 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9299 if (use_call)
9300 insn = emit_insn (tmp);
9301 else
9302 {
9303 insn = emit_jump_insn (tmp);
9304 JUMP_LABEL (insn) = ret_rtx;
9305
9306 if (frame_pointer_needed)
9307 ix86_emit_leave (insn);
9308 else
9309 {
9310 /* Need CFA adjust note. */
9311 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9312 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9313 }
9314 }
9315
9316 RTX_FRAME_RELATED_P (insn) = true;
9317 ix86_add_queued_cfa_restore_notes (insn);
9318
9319 /* If we're not doing a tail-call, we need to adjust the stack. */
9320 if (use_call && m->fs.sp_valid)
9321 {
9322 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9323 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9324 GEN_INT (dealloc), style,
9325 m->fs.cfa_reg == stack_pointer_rtx);
9326 }
9327 }
9328
9329 /* Restore function stack, frame, and registers. */
9330
9331 void
9332 ix86_expand_epilogue (int style)
9333 {
9334 struct machine_function *m = cfun->machine;
9335 struct machine_frame_state frame_state_save = m->fs;
9336 bool restore_regs_via_mov;
9337 bool using_drap;
9338 bool restore_stub_is_tail = false;
9339
9340 if (ix86_function_naked (current_function_decl))
9341 {
9342 /* The program should not reach this point. */
9343 emit_insn (gen_ud2 ());
9344 return;
9345 }
9346
9347 ix86_finalize_stack_frame_flags ();
9348 const struct ix86_frame &frame = cfun->machine->frame;
9349
9350 m->fs.sp_realigned = stack_realign_fp;
9351 m->fs.sp_valid = stack_realign_fp
9352 || !frame_pointer_needed
9353 || crtl->sp_is_unchanging;
9354 gcc_assert (!m->fs.sp_valid
9355 || m->fs.sp_offset == frame.stack_pointer_offset);
9356
9357 /* The FP must be valid if the frame pointer is present. */
9358 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9359 gcc_assert (!m->fs.fp_valid
9360 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9361
9362 /* We must have *some* valid pointer to the stack frame. */
9363 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9364
9365 /* The DRAP is never valid at this point. */
9366 gcc_assert (!m->fs.drap_valid);
9367
9368 /* See the comment about red zone and frame
9369 pointer usage in ix86_expand_prologue. */
9370 if (frame_pointer_needed && frame.red_zone_size)
9371 emit_insn (gen_memory_blockage ());
9372
9373 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9374 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9375
9376 /* Determine the CFA offset of the end of the red-zone. */
9377 m->fs.red_zone_offset = 0;
9378 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9379 {
9380 /* The red-zone begins below return address and error code in
9381 exception handler. */
9382 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9383
9384 /* When the register save area is in the aligned portion of
9385 the stack, determine the maximum runtime displacement that
9386 matches up with the aligned frame. */
9387 if (stack_realign_drap)
9388 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9389 + UNITS_PER_WORD);
9390 }
9391
9392 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9393
9394 /* Special care must be taken for the normal return case of a function
9395 using eh_return: the eax and edx registers are marked as saved, but
9396 not restored along this path. Adjust the save location to match. */
9397 if (crtl->calls_eh_return && style != 2)
9398 reg_save_offset -= 2 * UNITS_PER_WORD;
9399
9400 /* EH_RETURN requires the use of moves to function properly. */
9401 if (crtl->calls_eh_return)
9402 restore_regs_via_mov = true;
9403 /* SEH requires the use of pops to identify the epilogue. */
9404 else if (TARGET_SEH)
9405 restore_regs_via_mov = false;
9406 /* If we're only restoring one register and sp cannot be used then
9407 using a move instruction to restore the register since it's
9408 less work than reloading sp and popping the register. */
9409 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
9410 restore_regs_via_mov = true;
9411 else if (TARGET_EPILOGUE_USING_MOVE
9412 && cfun->machine->use_fast_prologue_epilogue
9413 && (frame.nregs > 1
9414 || m->fs.sp_offset != reg_save_offset))
9415 restore_regs_via_mov = true;
9416 else if (frame_pointer_needed
9417 && !frame.nregs
9418 && m->fs.sp_offset != reg_save_offset)
9419 restore_regs_via_mov = true;
9420 else if (frame_pointer_needed
9421 && TARGET_USE_LEAVE
9422 && cfun->machine->use_fast_prologue_epilogue
9423 && frame.nregs == 1)
9424 restore_regs_via_mov = true;
9425 else
9426 restore_regs_via_mov = false;
9427
9428 if (restore_regs_via_mov || frame.nsseregs)
9429 {
9430 /* Ensure that the entire register save area is addressable via
9431 the stack pointer, if we will restore SSE regs via sp. */
9432 if (TARGET_64BIT
9433 && m->fs.sp_offset > 0x7fffffff
9434 && sp_valid_at (frame.stack_realign_offset + 1)
9435 && (frame.nsseregs + frame.nregs) != 0)
9436 {
9437 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9438 GEN_INT (m->fs.sp_offset
9439 - frame.sse_reg_save_offset),
9440 style,
9441 m->fs.cfa_reg == stack_pointer_rtx);
9442 }
9443 }
9444
9445 /* If there are any SSE registers to restore, then we have to do it
9446 via moves, since there's obviously no pop for SSE regs. */
9447 if (frame.nsseregs)
9448 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9449 style == 2);
9450
9451 if (m->call_ms2sysv)
9452 {
9453 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9454
9455 /* We cannot use a tail-call for the stub if:
9456 1. We have to pop incoming args,
9457 2. We have additional int regs to restore, or
9458 3. A sibling call will be the tail-call, or
9459 4. We are emitting an eh_return_internal epilogue.
9460
9461 TODO: Item 4 has not yet tested!
9462
9463 If any of the above are true, we will call the stub rather than
9464 jump to it. */
9465 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9466 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9467 }
9468
9469 /* If using out-of-line stub that is a tail-call, then...*/
9470 if (m->call_ms2sysv && restore_stub_is_tail)
9471 {
9472 /* TODO: parinoid tests. (remove eventually) */
9473 gcc_assert (m->fs.sp_valid);
9474 gcc_assert (!m->fs.sp_realigned);
9475 gcc_assert (!m->fs.fp_valid);
9476 gcc_assert (!m->fs.realigned);
9477 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9478 gcc_assert (!crtl->drap_reg);
9479 gcc_assert (!frame.nregs);
9480 }
9481 else if (restore_regs_via_mov)
9482 {
9483 rtx t;
9484
9485 if (frame.nregs)
9486 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9487
9488 /* eh_return epilogues need %ecx added to the stack pointer. */
9489 if (style == 2)
9490 {
9491 rtx sa = EH_RETURN_STACKADJ_RTX;
9492 rtx_insn *insn;
9493
9494 /* Stack realignment doesn't work with eh_return. */
9495 if (crtl->stack_realign_needed)
9496 sorry ("Stack realignment not supported with "
9497 "%<__builtin_eh_return%>");
9498
9499 /* regparm nested functions don't work with eh_return. */
9500 if (ix86_static_chain_on_stack)
9501 sorry ("regparm nested function not supported with "
9502 "%<__builtin_eh_return%>");
9503
9504 if (frame_pointer_needed)
9505 {
9506 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9507 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9508 emit_insn (gen_rtx_SET (sa, t));
9509
9510 /* NB: eh_return epilogues must restore the frame pointer
9511 in word_mode since the upper 32 bits of RBP register
9512 can have any values. */
9513 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9514 rtx frame_reg = gen_rtx_REG (word_mode,
9515 HARD_FRAME_POINTER_REGNUM);
9516 insn = emit_move_insn (frame_reg, t);
9517
9518 /* Note that we use SA as a temporary CFA, as the return
9519 address is at the proper place relative to it. We
9520 pretend this happens at the FP restore insn because
9521 prior to this insn the FP would be stored at the wrong
9522 offset relative to SA, and after this insn we have no
9523 other reasonable register to use for the CFA. We don't
9524 bother resetting the CFA to the SP for the duration of
9525 the return insn, unless the control flow instrumentation
9526 is done. In this case the SP is used later and we have
9527 to reset CFA to SP. */
9528 add_reg_note (insn, REG_CFA_DEF_CFA,
9529 plus_constant (Pmode, sa, UNITS_PER_WORD));
9530 ix86_add_queued_cfa_restore_notes (insn);
9531 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9532 RTX_FRAME_RELATED_P (insn) = 1;
9533
9534 m->fs.cfa_reg = sa;
9535 m->fs.cfa_offset = UNITS_PER_WORD;
9536 m->fs.fp_valid = false;
9537
9538 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9539 const0_rtx, style,
9540 flag_cf_protection);
9541 }
9542 else
9543 {
9544 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9545 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9546 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9547 ix86_add_queued_cfa_restore_notes (insn);
9548
9549 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9550 if (m->fs.cfa_offset != UNITS_PER_WORD)
9551 {
9552 m->fs.cfa_offset = UNITS_PER_WORD;
9553 add_reg_note (insn, REG_CFA_DEF_CFA,
9554 plus_constant (Pmode, stack_pointer_rtx,
9555 UNITS_PER_WORD));
9556 RTX_FRAME_RELATED_P (insn) = 1;
9557 }
9558 }
9559 m->fs.sp_offset = UNITS_PER_WORD;
9560 m->fs.sp_valid = true;
9561 m->fs.sp_realigned = false;
9562 }
9563 }
9564 else
9565 {
9566 /* SEH requires that the function end with (1) a stack adjustment
9567 if necessary, (2) a sequence of pops, and (3) a return or
9568 jump instruction. Prevent insns from the function body from
9569 being scheduled into this sequence. */
9570 if (TARGET_SEH)
9571 {
9572 /* Prevent a catch region from being adjacent to the standard
9573 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9574 nor several other flags that would be interesting to test are
9575 set up yet. */
9576 if (flag_non_call_exceptions)
9577 emit_insn (gen_nops (const1_rtx));
9578 else
9579 emit_insn (gen_blockage ());
9580 }
9581
9582 /* First step is to deallocate the stack frame so that we can
9583 pop the registers. If the stack pointer was realigned, it needs
9584 to be restored now. Also do it on SEH target for very large
9585 frame as the emitted instructions aren't allowed by the ABI
9586 in epilogues. */
9587 if (!m->fs.sp_valid || m->fs.sp_realigned
9588 || (TARGET_SEH
9589 && (m->fs.sp_offset - reg_save_offset
9590 >= SEH_MAX_FRAME_SIZE)))
9591 {
9592 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9593 GEN_INT (m->fs.fp_offset
9594 - reg_save_offset),
9595 style, false);
9596 }
9597 else if (m->fs.sp_offset != reg_save_offset)
9598 {
9599 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9600 GEN_INT (m->fs.sp_offset
9601 - reg_save_offset),
9602 style,
9603 m->fs.cfa_reg == stack_pointer_rtx);
9604 }
9605
9606 ix86_emit_restore_regs_using_pop ();
9607 }
9608
9609 /* If we used a stack pointer and haven't already got rid of it,
9610 then do so now. */
9611 if (m->fs.fp_valid)
9612 {
9613 /* If the stack pointer is valid and pointing at the frame
9614 pointer store address, then we only need a pop. */
9615 if (sp_valid_at (frame.hfp_save_offset)
9616 && m->fs.sp_offset == frame.hfp_save_offset)
9617 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9618 /* Leave results in shorter dependency chains on CPUs that are
9619 able to grok it fast. */
9620 else if (TARGET_USE_LEAVE
9621 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9622 || !cfun->machine->use_fast_prologue_epilogue)
9623 ix86_emit_leave (NULL);
9624 else
9625 {
9626 pro_epilogue_adjust_stack (stack_pointer_rtx,
9627 hard_frame_pointer_rtx,
9628 const0_rtx, style, !using_drap);
9629 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9630 }
9631 }
9632
9633 if (using_drap)
9634 {
9635 int param_ptr_offset = UNITS_PER_WORD;
9636 rtx_insn *insn;
9637
9638 gcc_assert (stack_realign_drap);
9639
9640 if (ix86_static_chain_on_stack)
9641 param_ptr_offset += UNITS_PER_WORD;
9642 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9643 param_ptr_offset += UNITS_PER_WORD;
9644
9645 insn = emit_insn (gen_rtx_SET
9646 (stack_pointer_rtx,
9647 plus_constant (Pmode, crtl->drap_reg,
9648 -param_ptr_offset)));
9649 m->fs.cfa_reg = stack_pointer_rtx;
9650 m->fs.cfa_offset = param_ptr_offset;
9651 m->fs.sp_offset = param_ptr_offset;
9652 m->fs.realigned = false;
9653
9654 add_reg_note (insn, REG_CFA_DEF_CFA,
9655 plus_constant (Pmode, stack_pointer_rtx,
9656 param_ptr_offset));
9657 RTX_FRAME_RELATED_P (insn) = 1;
9658
9659 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9660 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9661 }
9662
9663 /* At this point the stack pointer must be valid, and we must have
9664 restored all of the registers. We may not have deallocated the
9665 entire stack frame. We've delayed this until now because it may
9666 be possible to merge the local stack deallocation with the
9667 deallocation forced by ix86_static_chain_on_stack. */
9668 gcc_assert (m->fs.sp_valid);
9669 gcc_assert (!m->fs.sp_realigned);
9670 gcc_assert (!m->fs.fp_valid);
9671 gcc_assert (!m->fs.realigned);
9672 if (m->fs.sp_offset != UNITS_PER_WORD)
9673 {
9674 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9675 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9676 style, true);
9677 }
9678 else
9679 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9680
9681 /* Sibcall epilogues don't want a return instruction. */
9682 if (style == 0)
9683 {
9684 m->fs = frame_state_save;
9685 return;
9686 }
9687
9688 if (cfun->machine->func_type != TYPE_NORMAL)
9689 emit_jump_insn (gen_interrupt_return ());
9690 else if (crtl->args.pops_args && crtl->args.size)
9691 {
9692 rtx popc = GEN_INT (crtl->args.pops_args);
9693
9694 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9695 address, do explicit add, and jump indirectly to the caller. */
9696
9697 if (crtl->args.pops_args >= 65536)
9698 {
9699 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9700 rtx_insn *insn;
9701
9702 /* There is no "pascal" calling convention in any 64bit ABI. */
9703 gcc_assert (!TARGET_64BIT);
9704
9705 insn = emit_insn (gen_pop (ecx));
9706 m->fs.cfa_offset -= UNITS_PER_WORD;
9707 m->fs.sp_offset -= UNITS_PER_WORD;
9708
9709 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9710 x = gen_rtx_SET (stack_pointer_rtx, x);
9711 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9712 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9713 RTX_FRAME_RELATED_P (insn) = 1;
9714
9715 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9716 popc, -1, true);
9717 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9718 }
9719 else
9720 emit_jump_insn (gen_simple_return_pop_internal (popc));
9721 }
9722 else if (!m->call_ms2sysv || !restore_stub_is_tail)
9723 {
9724 /* In case of return from EH a simple return cannot be used
9725 as a return address will be compared with a shadow stack
9726 return address. Use indirect jump instead. */
9727 if (style == 2 && flag_cf_protection)
9728 {
9729 /* Register used in indirect jump must be in word_mode. But
9730 Pmode may not be the same as word_mode for x32. */
9731 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9732 rtx_insn *insn;
9733
9734 insn = emit_insn (gen_pop (ecx));
9735 m->fs.cfa_offset -= UNITS_PER_WORD;
9736 m->fs.sp_offset -= UNITS_PER_WORD;
9737
9738 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9739 x = gen_rtx_SET (stack_pointer_rtx, x);
9740 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9741 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9742 RTX_FRAME_RELATED_P (insn) = 1;
9743
9744 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9745 }
9746 else
9747 emit_jump_insn (gen_simple_return_internal ());
9748 }
9749
9750 /* Restore the state back to the state from the prologue,
9751 so that it's correct for the next epilogue. */
9752 m->fs = frame_state_save;
9753 }
9754
9755 /* Reset from the function's potential modifications. */
9756
9757 static void
9758 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9759 {
9760 if (pic_offset_table_rtx
9761 && !ix86_use_pseudo_pic_reg ())
9762 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9763
9764 if (TARGET_MACHO)
9765 {
9766 rtx_insn *insn = get_last_insn ();
9767 rtx_insn *deleted_debug_label = NULL;
9768
9769 /* Mach-O doesn't support labels at the end of objects, so if
9770 it looks like we might want one, take special action.
9771 First, collect any sequence of deleted debug labels. */
9772 while (insn
9773 && NOTE_P (insn)
9774 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9775 {
9776 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9777 notes only, instead set their CODE_LABEL_NUMBER to -1,
9778 otherwise there would be code generation differences
9779 in between -g and -g0. */
9780 if (NOTE_P (insn) && NOTE_KIND (insn)
9781 == NOTE_INSN_DELETED_DEBUG_LABEL)
9782 deleted_debug_label = insn;
9783 insn = PREV_INSN (insn);
9784 }
9785
9786 /* If we have:
9787 label:
9788 barrier
9789 then this needs to be detected, so skip past the barrier. */
9790
9791 if (insn && BARRIER_P (insn))
9792 insn = PREV_INSN (insn);
9793
9794 /* Up to now we've only seen notes or barriers. */
9795 if (insn)
9796 {
9797 if (LABEL_P (insn)
9798 || (NOTE_P (insn)
9799 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9800 /* Trailing label. */
9801 fputs ("\tnop\n", file);
9802 else if (cfun && ! cfun->is_thunk)
9803 {
9804 /* See if we have a completely empty function body, skipping
9805 the special case of the picbase thunk emitted as asm. */
9806 while (insn && ! INSN_P (insn))
9807 insn = PREV_INSN (insn);
9808 /* If we don't find any insns, we've got an empty function body;
9809 I.e. completely empty - without a return or branch. This is
9810 taken as the case where a function body has been removed
9811 because it contains an inline __builtin_unreachable(). GCC
9812 declares that reaching __builtin_unreachable() means UB so
9813 we're not obliged to do anything special; however, we want
9814 non-zero-sized function bodies. To meet this, and help the
9815 user out, let's trap the case. */
9816 if (insn == NULL)
9817 fputs ("\tud2\n", file);
9818 }
9819 }
9820 else if (deleted_debug_label)
9821 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9822 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9823 CODE_LABEL_NUMBER (insn) = -1;
9824 }
9825 }
9826
9827 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9828
9829 void
9830 ix86_print_patchable_function_entry (FILE *file,
9831 unsigned HOST_WIDE_INT patch_area_size,
9832 bool record_p)
9833 {
9834 if (cfun->machine->function_label_emitted)
9835 {
9836 /* NB: When ix86_print_patchable_function_entry is called after
9837 function table has been emitted, we have inserted or queued
9838 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9839 place. There is nothing to do here. */
9840 return;
9841 }
9842
9843 default_print_patchable_function_entry (file, patch_area_size,
9844 record_p);
9845 }
9846
9847 /* Output patchable area. NB: default_print_patchable_function_entry
9848 isn't available in i386.md. */
9849
9850 void
9851 ix86_output_patchable_area (unsigned int patch_area_size,
9852 bool record_p)
9853 {
9854 default_print_patchable_function_entry (asm_out_file,
9855 patch_area_size,
9856 record_p);
9857 }
9858
9859 /* Return a scratch register to use in the split stack prologue. The
9860 split stack prologue is used for -fsplit-stack. It is the first
9861 instructions in the function, even before the regular prologue.
9862 The scratch register can be any caller-saved register which is not
9863 used for parameters or for the static chain. */
9864
9865 static unsigned int
9866 split_stack_prologue_scratch_regno (void)
9867 {
9868 if (TARGET_64BIT)
9869 return R11_REG;
9870 else
9871 {
9872 bool is_fastcall, is_thiscall;
9873 int regparm;
9874
9875 is_fastcall = (lookup_attribute ("fastcall",
9876 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9877 != NULL);
9878 is_thiscall = (lookup_attribute ("thiscall",
9879 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9880 != NULL);
9881 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9882
9883 if (is_fastcall)
9884 {
9885 if (DECL_STATIC_CHAIN (cfun->decl))
9886 {
9887 sorry ("%<-fsplit-stack%> does not support fastcall with "
9888 "nested function");
9889 return INVALID_REGNUM;
9890 }
9891 return AX_REG;
9892 }
9893 else if (is_thiscall)
9894 {
9895 if (!DECL_STATIC_CHAIN (cfun->decl))
9896 return DX_REG;
9897 return AX_REG;
9898 }
9899 else if (regparm < 3)
9900 {
9901 if (!DECL_STATIC_CHAIN (cfun->decl))
9902 return CX_REG;
9903 else
9904 {
9905 if (regparm >= 2)
9906 {
9907 sorry ("%<-fsplit-stack%> does not support 2 register "
9908 "parameters for a nested function");
9909 return INVALID_REGNUM;
9910 }
9911 return DX_REG;
9912 }
9913 }
9914 else
9915 {
9916 /* FIXME: We could make this work by pushing a register
9917 around the addition and comparison. */
9918 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9919 return INVALID_REGNUM;
9920 }
9921 }
9922 }
9923
9924 /* A SYMBOL_REF for the function which allocates new stackspace for
9925 -fsplit-stack. */
9926
9927 static GTY(()) rtx split_stack_fn;
9928
9929 /* A SYMBOL_REF for the more stack function when using the large
9930 model. */
9931
9932 static GTY(()) rtx split_stack_fn_large;
9933
9934 /* Return location of the stack guard value in the TLS block. */
9935
9936 rtx
9937 ix86_split_stack_guard (void)
9938 {
9939 int offset;
9940 addr_space_t as = DEFAULT_TLS_SEG_REG;
9941 rtx r;
9942
9943 gcc_assert (flag_split_stack);
9944
9945 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9946 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9947 #else
9948 gcc_unreachable ();
9949 #endif
9950
9951 r = GEN_INT (offset);
9952 r = gen_const_mem (Pmode, r);
9953 set_mem_addr_space (r, as);
9954
9955 return r;
9956 }
9957
9958 /* Handle -fsplit-stack. These are the first instructions in the
9959 function, even before the regular prologue. */
9960
9961 void
9962 ix86_expand_split_stack_prologue (void)
9963 {
9964 HOST_WIDE_INT allocate;
9965 unsigned HOST_WIDE_INT args_size;
9966 rtx_code_label *label;
9967 rtx limit, current, allocate_rtx, call_fusage;
9968 rtx_insn *call_insn;
9969 rtx scratch_reg = NULL_RTX;
9970 rtx_code_label *varargs_label = NULL;
9971 rtx fn;
9972
9973 gcc_assert (flag_split_stack && reload_completed);
9974
9975 ix86_finalize_stack_frame_flags ();
9976 struct ix86_frame &frame = cfun->machine->frame;
9977 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9978
9979 /* This is the label we will branch to if we have enough stack
9980 space. We expect the basic block reordering pass to reverse this
9981 branch if optimizing, so that we branch in the unlikely case. */
9982 label = gen_label_rtx ();
9983
9984 /* We need to compare the stack pointer minus the frame size with
9985 the stack boundary in the TCB. The stack boundary always gives
9986 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9987 can compare directly. Otherwise we need to do an addition. */
9988
9989 limit = ix86_split_stack_guard ();
9990
9991 if (allocate < SPLIT_STACK_AVAILABLE)
9992 current = stack_pointer_rtx;
9993 else
9994 {
9995 unsigned int scratch_regno;
9996 rtx offset;
9997
9998 /* We need a scratch register to hold the stack pointer minus
9999 the required frame size. Since this is the very start of the
10000 function, the scratch register can be any caller-saved
10001 register which is not used for parameters. */
10002 offset = GEN_INT (- allocate);
10003 scratch_regno = split_stack_prologue_scratch_regno ();
10004 if (scratch_regno == INVALID_REGNUM)
10005 return;
10006 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10007 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10008 {
10009 /* We don't use gen_add in this case because it will
10010 want to split to lea, but when not optimizing the insn
10011 will not be split after this point. */
10012 emit_insn (gen_rtx_SET (scratch_reg,
10013 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10014 offset)));
10015 }
10016 else
10017 {
10018 emit_move_insn (scratch_reg, offset);
10019 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
10020 }
10021 current = scratch_reg;
10022 }
10023
10024 ix86_expand_branch (GEU, current, limit, label);
10025 rtx_insn *jump_insn = get_last_insn ();
10026 JUMP_LABEL (jump_insn) = label;
10027
10028 /* Mark the jump as very likely to be taken. */
10029 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
10030
10031 if (split_stack_fn == NULL_RTX)
10032 {
10033 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10034 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
10035 }
10036 fn = split_stack_fn;
10037
10038 /* Get more stack space. We pass in the desired stack space and the
10039 size of the arguments to copy to the new stack. In 32-bit mode
10040 we push the parameters; __morestack will return on a new stack
10041 anyhow. In 64-bit mode we pass the parameters in r10 and
10042 r11. */
10043 allocate_rtx = GEN_INT (allocate);
10044 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
10045 call_fusage = NULL_RTX;
10046 rtx pop = NULL_RTX;
10047 if (TARGET_64BIT)
10048 {
10049 rtx reg10, reg11;
10050
10051 reg10 = gen_rtx_REG (Pmode, R10_REG);
10052 reg11 = gen_rtx_REG (Pmode, R11_REG);
10053
10054 /* If this function uses a static chain, it will be in %r10.
10055 Preserve it across the call to __morestack. */
10056 if (DECL_STATIC_CHAIN (cfun->decl))
10057 {
10058 rtx rax;
10059
10060 rax = gen_rtx_REG (word_mode, AX_REG);
10061 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
10062 use_reg (&call_fusage, rax);
10063 }
10064
10065 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
10066 && !TARGET_PECOFF)
10067 {
10068 HOST_WIDE_INT argval;
10069
10070 gcc_assert (Pmode == DImode);
10071 /* When using the large model we need to load the address
10072 into a register, and we've run out of registers. So we
10073 switch to a different calling convention, and we call a
10074 different function: __morestack_large. We pass the
10075 argument size in the upper 32 bits of r10 and pass the
10076 frame size in the lower 32 bits. */
10077 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
10078 gcc_assert ((args_size & 0xffffffff) == args_size);
10079
10080 if (split_stack_fn_large == NULL_RTX)
10081 {
10082 split_stack_fn_large
10083 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
10084 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
10085 }
10086 if (ix86_cmodel == CM_LARGE_PIC)
10087 {
10088 rtx_code_label *label;
10089 rtx x;
10090
10091 label = gen_label_rtx ();
10092 emit_label (label);
10093 LABEL_PRESERVE_P (label) = 1;
10094 emit_insn (gen_set_rip_rex64 (reg10, label));
10095 emit_insn (gen_set_got_offset_rex64 (reg11, label));
10096 emit_insn (gen_add2_insn (reg10, reg11));
10097 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
10098 UNSPEC_GOT);
10099 x = gen_rtx_CONST (Pmode, x);
10100 emit_move_insn (reg11, x);
10101 x = gen_rtx_PLUS (Pmode, reg10, reg11);
10102 x = gen_const_mem (Pmode, x);
10103 emit_move_insn (reg11, x);
10104 }
10105 else
10106 emit_move_insn (reg11, split_stack_fn_large);
10107
10108 fn = reg11;
10109
10110 argval = ((args_size << 16) << 16) + allocate;
10111 emit_move_insn (reg10, GEN_INT (argval));
10112 }
10113 else
10114 {
10115 emit_move_insn (reg10, allocate_rtx);
10116 emit_move_insn (reg11, GEN_INT (args_size));
10117 use_reg (&call_fusage, reg11);
10118 }
10119
10120 use_reg (&call_fusage, reg10);
10121 }
10122 else
10123 {
10124 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
10125 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
10126 insn = emit_insn (gen_push (allocate_rtx));
10127 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
10128 pop = GEN_INT (2 * UNITS_PER_WORD);
10129 }
10130 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
10131 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10132 pop, false);
10133 add_function_usage_to (call_insn, call_fusage);
10134 if (!TARGET_64BIT)
10135 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
10136 /* Indicate that this function can't jump to non-local gotos. */
10137 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
10138
10139 /* In order to make call/return prediction work right, we now need
10140 to execute a return instruction. See
10141 libgcc/config/i386/morestack.S for the details on how this works.
10142
10143 For flow purposes gcc must not see this as a return
10144 instruction--we need control flow to continue at the subsequent
10145 label. Therefore, we use an unspec. */
10146 gcc_assert (crtl->args.pops_args < 65536);
10147 rtx_insn *ret_insn
10148 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10149
10150 if ((flag_cf_protection & CF_BRANCH))
10151 {
10152 /* Insert ENDBR since __morestack will jump back here via indirect
10153 call. */
10154 rtx cet_eb = gen_nop_endbr ();
10155 emit_insn_after (cet_eb, ret_insn);
10156 }
10157
10158 /* If we are in 64-bit mode and this function uses a static chain,
10159 we saved %r10 in %rax before calling _morestack. */
10160 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10161 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
10162 gen_rtx_REG (word_mode, AX_REG));
10163
10164 /* If this function calls va_start, we need to store a pointer to
10165 the arguments on the old stack, because they may not have been
10166 all copied to the new stack. At this point the old stack can be
10167 found at the frame pointer value used by __morestack, because
10168 __morestack has set that up before calling back to us. Here we
10169 store that pointer in a scratch register, and in
10170 ix86_expand_prologue we store the scratch register in a stack
10171 slot. */
10172 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10173 {
10174 unsigned int scratch_regno;
10175 rtx frame_reg;
10176 int words;
10177
10178 scratch_regno = split_stack_prologue_scratch_regno ();
10179 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10180 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10181
10182 /* 64-bit:
10183 fp -> old fp value
10184 return address within this function
10185 return address of caller of this function
10186 stack arguments
10187 So we add three words to get to the stack arguments.
10188
10189 32-bit:
10190 fp -> old fp value
10191 return address within this function
10192 first argument to __morestack
10193 second argument to __morestack
10194 return address of caller of this function
10195 stack arguments
10196 So we add five words to get to the stack arguments.
10197 */
10198 words = TARGET_64BIT ? 3 : 5;
10199 emit_insn (gen_rtx_SET (scratch_reg,
10200 plus_constant (Pmode, frame_reg,
10201 words * UNITS_PER_WORD)));
10202
10203 varargs_label = gen_label_rtx ();
10204 emit_jump_insn (gen_jump (varargs_label));
10205 JUMP_LABEL (get_last_insn ()) = varargs_label;
10206
10207 emit_barrier ();
10208 }
10209
10210 emit_label (label);
10211 LABEL_NUSES (label) = 1;
10212
10213 /* If this function calls va_start, we now have to set the scratch
10214 register for the case where we do not call __morestack. In this
10215 case we need to set it based on the stack pointer. */
10216 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10217 {
10218 emit_insn (gen_rtx_SET (scratch_reg,
10219 plus_constant (Pmode, stack_pointer_rtx,
10220 UNITS_PER_WORD)));
10221
10222 emit_label (varargs_label);
10223 LABEL_NUSES (varargs_label) = 1;
10224 }
10225 }
10226
10227 /* We may have to tell the dataflow pass that the split stack prologue
10228 is initializing a scratch register. */
10229
10230 static void
10231 ix86_live_on_entry (bitmap regs)
10232 {
10233 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10234 {
10235 gcc_assert (flag_split_stack);
10236 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10237 }
10238 }
10239 \f
10240 /* Extract the parts of an RTL expression that is a valid memory address
10241 for an instruction. Return false if the structure of the address is
10242 grossly off. */
10243
10244 bool
10245 ix86_decompose_address (rtx addr, struct ix86_address *out)
10246 {
10247 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10248 rtx base_reg, index_reg;
10249 HOST_WIDE_INT scale = 1;
10250 rtx scale_rtx = NULL_RTX;
10251 rtx tmp;
10252 addr_space_t seg = ADDR_SPACE_GENERIC;
10253
10254 /* Allow zero-extended SImode addresses,
10255 they will be emitted with addr32 prefix. */
10256 if (TARGET_64BIT && GET_MODE (addr) == DImode)
10257 {
10258 if (GET_CODE (addr) == ZERO_EXTEND
10259 && GET_MODE (XEXP (addr, 0)) == SImode)
10260 {
10261 addr = XEXP (addr, 0);
10262 if (CONST_INT_P (addr))
10263 return false;
10264 }
10265 else if (GET_CODE (addr) == AND
10266 && const_32bit_mask (XEXP (addr, 1), DImode))
10267 {
10268 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
10269 if (addr == NULL_RTX)
10270 return false;
10271
10272 if (CONST_INT_P (addr))
10273 return false;
10274 }
10275 else if (GET_CODE (addr) == AND)
10276 {
10277 /* For ASHIFT inside AND, combine will not generate
10278 canonical zero-extend. Merge mask for AND and shift_count
10279 to check if it is canonical zero-extend. */
10280 tmp = XEXP (addr, 0);
10281 rtx mask = XEXP (addr, 1);
10282 if (tmp && GET_CODE(tmp) == ASHIFT)
10283 {
10284 rtx shift_val = XEXP (tmp, 1);
10285 if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
10286 && (((unsigned HOST_WIDE_INT) INTVAL(mask)
10287 | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1))
10288 == 0xffffffff))
10289 {
10290 addr = lowpart_subreg (SImode, XEXP (addr, 0),
10291 DImode);
10292 }
10293 }
10294
10295 }
10296 }
10297
10298 /* Allow SImode subregs of DImode addresses,
10299 they will be emitted with addr32 prefix. */
10300 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10301 {
10302 if (SUBREG_P (addr)
10303 && GET_MODE (SUBREG_REG (addr)) == DImode)
10304 {
10305 addr = SUBREG_REG (addr);
10306 if (CONST_INT_P (addr))
10307 return false;
10308 }
10309 }
10310
10311 if (REG_P (addr))
10312 base = addr;
10313 else if (SUBREG_P (addr))
10314 {
10315 if (REG_P (SUBREG_REG (addr)))
10316 base = addr;
10317 else
10318 return false;
10319 }
10320 else if (GET_CODE (addr) == PLUS)
10321 {
10322 rtx addends[4], op;
10323 int n = 0, i;
10324
10325 op = addr;
10326 do
10327 {
10328 if (n >= 4)
10329 return false;
10330 addends[n++] = XEXP (op, 1);
10331 op = XEXP (op, 0);
10332 }
10333 while (GET_CODE (op) == PLUS);
10334 if (n >= 4)
10335 return false;
10336 addends[n] = op;
10337
10338 for (i = n; i >= 0; --i)
10339 {
10340 op = addends[i];
10341 switch (GET_CODE (op))
10342 {
10343 case MULT:
10344 if (index)
10345 return false;
10346 index = XEXP (op, 0);
10347 scale_rtx = XEXP (op, 1);
10348 break;
10349
10350 case ASHIFT:
10351 if (index)
10352 return false;
10353 index = XEXP (op, 0);
10354 tmp = XEXP (op, 1);
10355 if (!CONST_INT_P (tmp))
10356 return false;
10357 scale = INTVAL (tmp);
10358 if ((unsigned HOST_WIDE_INT) scale > 3)
10359 return false;
10360 scale = 1 << scale;
10361 break;
10362
10363 case ZERO_EXTEND:
10364 op = XEXP (op, 0);
10365 if (GET_CODE (op) != UNSPEC)
10366 return false;
10367 /* FALLTHRU */
10368
10369 case UNSPEC:
10370 if (XINT (op, 1) == UNSPEC_TP
10371 && TARGET_TLS_DIRECT_SEG_REFS
10372 && seg == ADDR_SPACE_GENERIC)
10373 seg = DEFAULT_TLS_SEG_REG;
10374 else
10375 return false;
10376 break;
10377
10378 case SUBREG:
10379 if (!REG_P (SUBREG_REG (op)))
10380 return false;
10381 /* FALLTHRU */
10382
10383 case REG:
10384 if (!base)
10385 base = op;
10386 else if (!index)
10387 index = op;
10388 else
10389 return false;
10390 break;
10391
10392 case CONST:
10393 case CONST_INT:
10394 case SYMBOL_REF:
10395 case LABEL_REF:
10396 if (disp)
10397 return false;
10398 disp = op;
10399 break;
10400
10401 default:
10402 return false;
10403 }
10404 }
10405 }
10406 else if (GET_CODE (addr) == MULT)
10407 {
10408 index = XEXP (addr, 0); /* index*scale */
10409 scale_rtx = XEXP (addr, 1);
10410 }
10411 else if (GET_CODE (addr) == ASHIFT)
10412 {
10413 /* We're called for lea too, which implements ashift on occasion. */
10414 index = XEXP (addr, 0);
10415 tmp = XEXP (addr, 1);
10416 if (!CONST_INT_P (tmp))
10417 return false;
10418 scale = INTVAL (tmp);
10419 if ((unsigned HOST_WIDE_INT) scale > 3)
10420 return false;
10421 scale = 1 << scale;
10422 }
10423 else
10424 disp = addr; /* displacement */
10425
10426 if (index)
10427 {
10428 if (REG_P (index))
10429 ;
10430 else if (SUBREG_P (index)
10431 && REG_P (SUBREG_REG (index)))
10432 ;
10433 else
10434 return false;
10435 }
10436
10437 /* Extract the integral value of scale. */
10438 if (scale_rtx)
10439 {
10440 if (!CONST_INT_P (scale_rtx))
10441 return false;
10442 scale = INTVAL (scale_rtx);
10443 }
10444
10445 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10446 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10447
10448 /* Avoid useless 0 displacement. */
10449 if (disp == const0_rtx && (base || index))
10450 disp = NULL_RTX;
10451
10452 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10453 if (base_reg && index_reg && scale == 1
10454 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10455 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10456 || REGNO (index_reg) == SP_REG))
10457 {
10458 std::swap (base, index);
10459 std::swap (base_reg, index_reg);
10460 }
10461
10462 /* Special case: %ebp cannot be encoded as a base without a displacement.
10463 Similarly %r13. */
10464 if (!disp && base_reg
10465 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10466 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10467 || REGNO (base_reg) == BP_REG
10468 || REGNO (base_reg) == R13_REG))
10469 disp = const0_rtx;
10470
10471 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10472 Avoid this by transforming to [%esi+0].
10473 Reload calls address legitimization without cfun defined, so we need
10474 to test cfun for being non-NULL. */
10475 if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
10476 && base_reg && !index_reg && !disp
10477 && REGNO (base_reg) == SI_REG)
10478 disp = const0_rtx;
10479
10480 /* Special case: encode reg+reg instead of reg*2. */
10481 if (!base && index && scale == 2)
10482 base = index, base_reg = index_reg, scale = 1;
10483
10484 /* Special case: scaling cannot be encoded without base or displacement. */
10485 if (!base && !disp && index && scale != 1)
10486 disp = const0_rtx;
10487
10488 out->base = base;
10489 out->index = index;
10490 out->disp = disp;
10491 out->scale = scale;
10492 out->seg = seg;
10493
10494 return true;
10495 }
10496 \f
10497 /* Return cost of the memory address x.
10498 For i386, it is better to use a complex address than let gcc copy
10499 the address into a reg and make a new pseudo. But not if the address
10500 requires to two regs - that would mean more pseudos with longer
10501 lifetimes. */
10502 static int
10503 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10504 {
10505 struct ix86_address parts;
10506 int cost = 1;
10507 int ok = ix86_decompose_address (x, &parts);
10508
10509 gcc_assert (ok);
10510
10511 if (parts.base && SUBREG_P (parts.base))
10512 parts.base = SUBREG_REG (parts.base);
10513 if (parts.index && SUBREG_P (parts.index))
10514 parts.index = SUBREG_REG (parts.index);
10515
10516 /* Attempt to minimize number of registers in the address by increasing
10517 address cost for each used register. We don't increase address cost
10518 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10519 is not invariant itself it most likely means that base or index is not
10520 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10521 which is not profitable for x86. */
10522 if (parts.base
10523 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10524 && (current_pass->type == GIMPLE_PASS
10525 || !pic_offset_table_rtx
10526 || !REG_P (parts.base)
10527 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10528 cost++;
10529
10530 if (parts.index
10531 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10532 && (current_pass->type == GIMPLE_PASS
10533 || !pic_offset_table_rtx
10534 || !REG_P (parts.index)
10535 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10536 cost++;
10537
10538 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10539 since it's predecode logic can't detect the length of instructions
10540 and it degenerates to vector decoded. Increase cost of such
10541 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10542 to split such addresses or even refuse such addresses at all.
10543
10544 Following addressing modes are affected:
10545 [base+scale*index]
10546 [scale*index+disp]
10547 [base+index]
10548
10549 The first and last case may be avoidable by explicitly coding the zero in
10550 memory address, but I don't have AMD-K6 machine handy to check this
10551 theory. */
10552
10553 if (TARGET_CPU_P (K6)
10554 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10555 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10556 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10557 cost += 10;
10558
10559 return cost;
10560 }
10561 \f
10562 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10563 this is used for to form addresses to local data when -fPIC is in
10564 use. */
10565
10566 static bool
10567 darwin_local_data_pic (rtx disp)
10568 {
10569 return (GET_CODE (disp) == UNSPEC
10570 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10571 }
10572
10573 /* True if the function symbol operand X should be loaded from GOT.
10574 If CALL_P is true, X is a call operand.
10575
10576 NB: -mno-direct-extern-access doesn't force load from GOT for
10577 call.
10578
10579 NB: In 32-bit mode, only non-PIC is allowed in inline assembly
10580 statements, since a PIC register could not be available at the
10581 call site. */
10582
10583 bool
10584 ix86_force_load_from_GOT_p (rtx x, bool call_p)
10585 {
10586 return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
10587 && !TARGET_PECOFF && !TARGET_MACHO
10588 && (!flag_pic || this_is_asm_operands)
10589 && ix86_cmodel != CM_LARGE
10590 && ix86_cmodel != CM_LARGE_PIC
10591 && GET_CODE (x) == SYMBOL_REF
10592 && ((!call_p
10593 && (!ix86_direct_extern_access
10594 || (SYMBOL_REF_DECL (x)
10595 && lookup_attribute ("nodirect_extern_access",
10596 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
10597 || (SYMBOL_REF_FUNCTION_P (x)
10598 && (!flag_plt
10599 || (SYMBOL_REF_DECL (x)
10600 && lookup_attribute ("noplt",
10601 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
10602 && !SYMBOL_REF_LOCAL_P (x));
10603 }
10604
10605 /* Determine if a given RTX is a valid constant. We already know this
10606 satisfies CONSTANT_P. */
10607
10608 static bool
10609 ix86_legitimate_constant_p (machine_mode mode, rtx x)
10610 {
10611 switch (GET_CODE (x))
10612 {
10613 case CONST:
10614 x = XEXP (x, 0);
10615
10616 if (GET_CODE (x) == PLUS)
10617 {
10618 if (!CONST_INT_P (XEXP (x, 1)))
10619 return false;
10620 x = XEXP (x, 0);
10621 }
10622
10623 if (TARGET_MACHO && darwin_local_data_pic (x))
10624 return true;
10625
10626 /* Only some unspecs are valid as "constants". */
10627 if (GET_CODE (x) == UNSPEC)
10628 switch (XINT (x, 1))
10629 {
10630 case UNSPEC_GOT:
10631 case UNSPEC_GOTOFF:
10632 case UNSPEC_PLTOFF:
10633 return TARGET_64BIT;
10634 case UNSPEC_TPOFF:
10635 case UNSPEC_NTPOFF:
10636 x = XVECEXP (x, 0, 0);
10637 return (GET_CODE (x) == SYMBOL_REF
10638 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10639 case UNSPEC_DTPOFF:
10640 x = XVECEXP (x, 0, 0);
10641 return (GET_CODE (x) == SYMBOL_REF
10642 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10643 default:
10644 return false;
10645 }
10646
10647 /* We must have drilled down to a symbol. */
10648 if (GET_CODE (x) == LABEL_REF)
10649 return true;
10650 if (GET_CODE (x) != SYMBOL_REF)
10651 return false;
10652 /* FALLTHRU */
10653
10654 case SYMBOL_REF:
10655 /* TLS symbols are never valid. */
10656 if (SYMBOL_REF_TLS_MODEL (x))
10657 return false;
10658
10659 /* DLLIMPORT symbols are never valid. */
10660 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10661 && SYMBOL_REF_DLLIMPORT_P (x))
10662 return false;
10663
10664 #if TARGET_MACHO
10665 /* mdynamic-no-pic */
10666 if (MACHO_DYNAMIC_NO_PIC_P)
10667 return machopic_symbol_defined_p (x);
10668 #endif
10669
10670 /* External function address should be loaded
10671 via the GOT slot to avoid PLT. */
10672 if (ix86_force_load_from_GOT_p (x))
10673 return false;
10674
10675 break;
10676
10677 CASE_CONST_SCALAR_INT:
10678 if (ix86_endbr_immediate_operand (x, VOIDmode))
10679 return false;
10680
10681 switch (mode)
10682 {
10683 case E_TImode:
10684 if (TARGET_64BIT)
10685 return true;
10686 /* FALLTHRU */
10687 case E_OImode:
10688 case E_XImode:
10689 if (!standard_sse_constant_p (x, mode)
10690 && GET_MODE_SIZE (TARGET_AVX512F
10691 ? XImode
10692 : (TARGET_AVX
10693 ? OImode
10694 : (TARGET_SSE2
10695 ? TImode : DImode))) < GET_MODE_SIZE (mode))
10696 return false;
10697 default:
10698 break;
10699 }
10700 break;
10701
10702 case CONST_VECTOR:
10703 if (!standard_sse_constant_p (x, mode))
10704 return false;
10705 break;
10706
10707 case CONST_DOUBLE:
10708 if (mode == E_BFmode)
10709 return false;
10710
10711 default:
10712 break;
10713 }
10714
10715 /* Otherwise we handle everything else in the move patterns. */
10716 return true;
10717 }
10718
10719 /* Determine if it's legal to put X into the constant pool. This
10720 is not possible for the address of thread-local symbols, which
10721 is checked above. */
10722
10723 static bool
10724 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10725 {
10726 /* We can put any immediate constant in memory. */
10727 switch (GET_CODE (x))
10728 {
10729 CASE_CONST_ANY:
10730 return false;
10731
10732 default:
10733 break;
10734 }
10735
10736 return !ix86_legitimate_constant_p (mode, x);
10737 }
10738
10739 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10740 otherwise zero. */
10741
10742 static bool
10743 is_imported_p (rtx x)
10744 {
10745 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10746 || GET_CODE (x) != SYMBOL_REF)
10747 return false;
10748
10749 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10750 }
10751
10752
10753 /* Nonzero if the constant value X is a legitimate general operand
10754 when generating PIC code. It is given that flag_pic is on and
10755 that X satisfies CONSTANT_P. */
10756
10757 bool
10758 legitimate_pic_operand_p (rtx x)
10759 {
10760 rtx inner;
10761
10762 switch (GET_CODE (x))
10763 {
10764 case CONST:
10765 inner = XEXP (x, 0);
10766 if (GET_CODE (inner) == PLUS
10767 && CONST_INT_P (XEXP (inner, 1)))
10768 inner = XEXP (inner, 0);
10769
10770 /* Only some unspecs are valid as "constants". */
10771 if (GET_CODE (inner) == UNSPEC)
10772 switch (XINT (inner, 1))
10773 {
10774 case UNSPEC_GOT:
10775 case UNSPEC_GOTOFF:
10776 case UNSPEC_PLTOFF:
10777 return TARGET_64BIT;
10778 case UNSPEC_TPOFF:
10779 x = XVECEXP (inner, 0, 0);
10780 return (GET_CODE (x) == SYMBOL_REF
10781 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10782 case UNSPEC_MACHOPIC_OFFSET:
10783 return legitimate_pic_address_disp_p (x);
10784 default:
10785 return false;
10786 }
10787 /* FALLTHRU */
10788
10789 case SYMBOL_REF:
10790 case LABEL_REF:
10791 return legitimate_pic_address_disp_p (x);
10792
10793 default:
10794 return true;
10795 }
10796 }
10797
10798 /* Determine if a given CONST RTX is a valid memory displacement
10799 in PIC mode. */
10800
10801 bool
10802 legitimate_pic_address_disp_p (rtx disp)
10803 {
10804 bool saw_plus;
10805
10806 /* In 64bit mode we can allow direct addresses of symbols and labels
10807 when they are not dynamic symbols. */
10808 if (TARGET_64BIT)
10809 {
10810 rtx op0 = disp, op1;
10811
10812 switch (GET_CODE (disp))
10813 {
10814 case LABEL_REF:
10815 return true;
10816
10817 case CONST:
10818 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10819 break;
10820 op0 = XEXP (XEXP (disp, 0), 0);
10821 op1 = XEXP (XEXP (disp, 0), 1);
10822 if (!CONST_INT_P (op1))
10823 break;
10824 if (GET_CODE (op0) == UNSPEC
10825 && (XINT (op0, 1) == UNSPEC_DTPOFF
10826 || XINT (op0, 1) == UNSPEC_NTPOFF)
10827 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10828 return true;
10829 if (INTVAL (op1) >= 16*1024*1024
10830 || INTVAL (op1) < -16*1024*1024)
10831 break;
10832 if (GET_CODE (op0) == LABEL_REF)
10833 return true;
10834 if (GET_CODE (op0) == CONST
10835 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10836 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10837 return true;
10838 if (GET_CODE (op0) == UNSPEC
10839 && XINT (op0, 1) == UNSPEC_PCREL)
10840 return true;
10841 if (GET_CODE (op0) != SYMBOL_REF)
10842 break;
10843 /* FALLTHRU */
10844
10845 case SYMBOL_REF:
10846 /* TLS references should always be enclosed in UNSPEC.
10847 The dllimported symbol needs always to be resolved. */
10848 if (SYMBOL_REF_TLS_MODEL (op0)
10849 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10850 return false;
10851
10852 if (TARGET_PECOFF)
10853 {
10854 if (is_imported_p (op0))
10855 return true;
10856
10857 if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
10858 break;
10859
10860 /* Non-external-weak function symbols need to be resolved only
10861 for the large model. Non-external symbols don't need to be
10862 resolved for large and medium models. For the small model,
10863 we don't need to resolve anything here. */
10864 if ((ix86_cmodel != CM_LARGE_PIC
10865 && SYMBOL_REF_FUNCTION_P (op0)
10866 && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
10867 || !SYMBOL_REF_EXTERNAL_P (op0)
10868 || ix86_cmodel == CM_SMALL_PIC)
10869 return true;
10870 }
10871 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10872 && (SYMBOL_REF_LOCAL_P (op0)
10873 || ((ix86_direct_extern_access
10874 && !(SYMBOL_REF_DECL (op0)
10875 && lookup_attribute ("nodirect_extern_access",
10876 DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
10877 && HAVE_LD_PIE_COPYRELOC
10878 && flag_pie
10879 && !SYMBOL_REF_WEAK (op0)
10880 && !SYMBOL_REF_FUNCTION_P (op0)))
10881 && ix86_cmodel != CM_LARGE_PIC)
10882 return true;
10883 break;
10884
10885 default:
10886 break;
10887 }
10888 }
10889 if (GET_CODE (disp) != CONST)
10890 return false;
10891 disp = XEXP (disp, 0);
10892
10893 if (TARGET_64BIT)
10894 {
10895 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10896 of GOT tables. We should not need these anyway. */
10897 if (GET_CODE (disp) != UNSPEC
10898 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10899 && XINT (disp, 1) != UNSPEC_GOTOFF
10900 && XINT (disp, 1) != UNSPEC_PCREL
10901 && XINT (disp, 1) != UNSPEC_PLTOFF))
10902 return false;
10903
10904 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10905 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10906 return false;
10907 return true;
10908 }
10909
10910 saw_plus = false;
10911 if (GET_CODE (disp) == PLUS)
10912 {
10913 if (!CONST_INT_P (XEXP (disp, 1)))
10914 return false;
10915 disp = XEXP (disp, 0);
10916 saw_plus = true;
10917 }
10918
10919 if (TARGET_MACHO && darwin_local_data_pic (disp))
10920 return true;
10921
10922 if (GET_CODE (disp) != UNSPEC)
10923 return false;
10924
10925 switch (XINT (disp, 1))
10926 {
10927 case UNSPEC_GOT:
10928 if (saw_plus)
10929 return false;
10930 /* We need to check for both symbols and labels because VxWorks loads
10931 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10932 details. */
10933 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10934 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10935 case UNSPEC_GOTOFF:
10936 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10937 While ABI specify also 32bit relocation but we don't produce it in
10938 small PIC model at all. */
10939 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10940 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10941 && !TARGET_64BIT)
10942 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10943 return false;
10944 case UNSPEC_GOTTPOFF:
10945 case UNSPEC_GOTNTPOFF:
10946 case UNSPEC_INDNTPOFF:
10947 if (saw_plus)
10948 return false;
10949 disp = XVECEXP (disp, 0, 0);
10950 return (GET_CODE (disp) == SYMBOL_REF
10951 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10952 case UNSPEC_NTPOFF:
10953 disp = XVECEXP (disp, 0, 0);
10954 return (GET_CODE (disp) == SYMBOL_REF
10955 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10956 case UNSPEC_DTPOFF:
10957 disp = XVECEXP (disp, 0, 0);
10958 return (GET_CODE (disp) == SYMBOL_REF
10959 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10960 }
10961
10962 return false;
10963 }
10964
10965 /* Determine if op is suitable RTX for an address register.
10966 Return naked register if a register or a register subreg is
10967 found, otherwise return NULL_RTX. */
10968
10969 static rtx
10970 ix86_validate_address_register (rtx op)
10971 {
10972 machine_mode mode = GET_MODE (op);
10973
10974 /* Only SImode or DImode registers can form the address. */
10975 if (mode != SImode && mode != DImode)
10976 return NULL_RTX;
10977
10978 if (REG_P (op))
10979 return op;
10980 else if (SUBREG_P (op))
10981 {
10982 rtx reg = SUBREG_REG (op);
10983
10984 if (!REG_P (reg))
10985 return NULL_RTX;
10986
10987 mode = GET_MODE (reg);
10988
10989 /* Don't allow SUBREGs that span more than a word. It can
10990 lead to spill failures when the register is one word out
10991 of a two word structure. */
10992 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10993 return NULL_RTX;
10994
10995 /* Allow only SUBREGs of non-eliminable hard registers. */
10996 if (register_no_elim_operand (reg, mode))
10997 return reg;
10998 }
10999
11000 /* Op is not a register. */
11001 return NULL_RTX;
11002 }
11003
11004 /* Recognizes RTL expressions that are valid memory addresses for an
11005 instruction. The MODE argument is the machine mode for the MEM
11006 expression that wants to use this address.
11007
11008 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11009 convert common non-canonical forms to canonical form so that they will
11010 be recognized. */
11011
11012 static bool
11013 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
11014 {
11015 struct ix86_address parts;
11016 rtx base, index, disp;
11017 HOST_WIDE_INT scale;
11018 addr_space_t seg;
11019
11020 if (ix86_decompose_address (addr, &parts) == 0)
11021 /* Decomposition failed. */
11022 return false;
11023
11024 base = parts.base;
11025 index = parts.index;
11026 disp = parts.disp;
11027 scale = parts.scale;
11028 seg = parts.seg;
11029
11030 /* Validate base register. */
11031 if (base)
11032 {
11033 rtx reg = ix86_validate_address_register (base);
11034
11035 if (reg == NULL_RTX)
11036 return false;
11037
11038 unsigned int regno = REGNO (reg);
11039 if ((strict && !REGNO_OK_FOR_BASE_P (regno))
11040 || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
11041 /* Base is not valid. */
11042 return false;
11043 }
11044
11045 /* Validate index register. */
11046 if (index)
11047 {
11048 rtx reg = ix86_validate_address_register (index);
11049
11050 if (reg == NULL_RTX)
11051 return false;
11052
11053 unsigned int regno = REGNO (reg);
11054 if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
11055 || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
11056 /* Index is not valid. */
11057 return false;
11058 }
11059
11060 /* Index and base should have the same mode. */
11061 if (base && index
11062 && GET_MODE (base) != GET_MODE (index))
11063 return false;
11064
11065 /* Address override works only on the (%reg) part of %fs:(%reg). */
11066 if (seg != ADDR_SPACE_GENERIC
11067 && ((base && GET_MODE (base) != word_mode)
11068 || (index && GET_MODE (index) != word_mode)))
11069 return false;
11070
11071 /* Validate scale factor. */
11072 if (scale != 1)
11073 {
11074 if (!index)
11075 /* Scale without index. */
11076 return false;
11077
11078 if (scale != 2 && scale != 4 && scale != 8)
11079 /* Scale is not a valid multiplier. */
11080 return false;
11081 }
11082
11083 /* Validate displacement. */
11084 if (disp)
11085 {
11086 if (ix86_endbr_immediate_operand (disp, VOIDmode))
11087 return false;
11088
11089 if (GET_CODE (disp) == CONST
11090 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11091 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11092 switch (XINT (XEXP (disp, 0), 1))
11093 {
11094 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
11095 when used. While ABI specify also 32bit relocations, we
11096 don't produce them at all and use IP relative instead.
11097 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
11098 should be loaded via GOT. */
11099 case UNSPEC_GOT:
11100 if (!TARGET_64BIT
11101 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11102 goto is_legitimate_pic;
11103 /* FALLTHRU */
11104 case UNSPEC_GOTOFF:
11105 gcc_assert (flag_pic);
11106 if (!TARGET_64BIT)
11107 goto is_legitimate_pic;
11108
11109 /* 64bit address unspec. */
11110 return false;
11111
11112 case UNSPEC_GOTPCREL:
11113 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
11114 goto is_legitimate_pic;
11115 /* FALLTHRU */
11116 case UNSPEC_PCREL:
11117 gcc_assert (flag_pic);
11118 goto is_legitimate_pic;
11119
11120 case UNSPEC_GOTTPOFF:
11121 case UNSPEC_GOTNTPOFF:
11122 case UNSPEC_INDNTPOFF:
11123 case UNSPEC_NTPOFF:
11124 case UNSPEC_DTPOFF:
11125 break;
11126
11127 default:
11128 /* Invalid address unspec. */
11129 return false;
11130 }
11131
11132 else if (SYMBOLIC_CONST (disp)
11133 && (flag_pic
11134 #if TARGET_MACHO
11135 || (MACHOPIC_INDIRECT
11136 && !machopic_operand_p (disp))
11137 #endif
11138 ))
11139 {
11140
11141 is_legitimate_pic:
11142 if (TARGET_64BIT && (index || base))
11143 {
11144 /* foo@dtpoff(%rX) is ok. */
11145 if (GET_CODE (disp) != CONST
11146 || GET_CODE (XEXP (disp, 0)) != PLUS
11147 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11148 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11149 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11150 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11151 /* Non-constant pic memory reference. */
11152 return false;
11153 }
11154 else if ((!TARGET_MACHO || flag_pic)
11155 && ! legitimate_pic_address_disp_p (disp))
11156 /* Displacement is an invalid pic construct. */
11157 return false;
11158 #if TARGET_MACHO
11159 else if (MACHO_DYNAMIC_NO_PIC_P
11160 && !ix86_legitimate_constant_p (Pmode, disp))
11161 /* displacment must be referenced via non_lazy_pointer */
11162 return false;
11163 #endif
11164
11165 /* This code used to verify that a symbolic pic displacement
11166 includes the pic_offset_table_rtx register.
11167
11168 While this is good idea, unfortunately these constructs may
11169 be created by "adds using lea" optimization for incorrect
11170 code like:
11171
11172 int a;
11173 int foo(int i)
11174 {
11175 return *(&a+i);
11176 }
11177
11178 This code is nonsensical, but results in addressing
11179 GOT table with pic_offset_table_rtx base. We can't
11180 just refuse it easily, since it gets matched by
11181 "addsi3" pattern, that later gets split to lea in the
11182 case output register differs from input. While this
11183 can be handled by separate addsi pattern for this case
11184 that never results in lea, this seems to be easier and
11185 correct fix for crash to disable this test. */
11186 }
11187 else if (GET_CODE (disp) != LABEL_REF
11188 && !CONST_INT_P (disp)
11189 && (GET_CODE (disp) != CONST
11190 || !ix86_legitimate_constant_p (Pmode, disp))
11191 && (GET_CODE (disp) != SYMBOL_REF
11192 || !ix86_legitimate_constant_p (Pmode, disp)))
11193 /* Displacement is not constant. */
11194 return false;
11195 else if (TARGET_64BIT
11196 && !x86_64_immediate_operand (disp, VOIDmode))
11197 /* Displacement is out of range. */
11198 return false;
11199 /* In x32 mode, constant addresses are sign extended to 64bit, so
11200 we have to prevent addresses from 0x80000000 to 0xffffffff. */
11201 else if (TARGET_X32 && !(index || base)
11202 && CONST_INT_P (disp)
11203 && val_signbit_known_set_p (SImode, INTVAL (disp)))
11204 return false;
11205 }
11206
11207 /* Everything looks valid. */
11208 return true;
11209 }
11210
11211 /* Determine if a given RTX is a valid constant address. */
11212
11213 bool
11214 constant_address_p (rtx x)
11215 {
11216 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11217 }
11218 \f
11219 /* Return a unique alias set for the GOT. */
11220
11221 alias_set_type
11222 ix86_GOT_alias_set (void)
11223 {
11224 static alias_set_type set = -1;
11225 if (set == -1)
11226 set = new_alias_set ();
11227 return set;
11228 }
11229
11230 /* Return a legitimate reference for ORIG (an address) using the
11231 register REG. If REG is 0, a new pseudo is generated.
11232
11233 There are two types of references that must be handled:
11234
11235 1. Global data references must load the address from the GOT, via
11236 the PIC reg. An insn is emitted to do this load, and the reg is
11237 returned.
11238
11239 2. Static data references, constant pool addresses, and code labels
11240 compute the address as an offset from the GOT, whose base is in
11241 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11242 differentiate them from global data objects. The returned
11243 address is the PIC reg + an unspec constant.
11244
11245 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11246 reg also appears in the address. */
11247
11248 rtx
11249 legitimize_pic_address (rtx orig, rtx reg)
11250 {
11251 rtx addr = orig;
11252 rtx new_rtx = orig;
11253
11254 #if TARGET_MACHO
11255 if (TARGET_MACHO && !TARGET_64BIT)
11256 {
11257 if (reg == 0)
11258 reg = gen_reg_rtx (Pmode);
11259 /* Use the generic Mach-O PIC machinery. */
11260 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11261 }
11262 #endif
11263
11264 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11265 {
11266 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11267 if (tmp)
11268 return tmp;
11269 }
11270
11271 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11272 new_rtx = addr;
11273 else if ((!TARGET_64BIT
11274 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
11275 && !TARGET_PECOFF
11276 && gotoff_operand (addr, Pmode))
11277 {
11278 /* This symbol may be referenced via a displacement
11279 from the PIC base address (@GOTOFF). */
11280 if (GET_CODE (addr) == CONST)
11281 addr = XEXP (addr, 0);
11282
11283 if (GET_CODE (addr) == PLUS)
11284 {
11285 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11286 UNSPEC_GOTOFF);
11287 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11288 }
11289 else
11290 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11291
11292 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11293
11294 if (TARGET_64BIT)
11295 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11296
11297 if (reg != 0)
11298 {
11299 gcc_assert (REG_P (reg));
11300 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11301 new_rtx, reg, 1, OPTAB_DIRECT);
11302 }
11303 else
11304 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11305 }
11306 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11307 /* We can't always use @GOTOFF for text labels
11308 on VxWorks, see gotoff_operand. */
11309 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11310 {
11311 rtx tmp = legitimize_pe_coff_symbol (addr, true);
11312 if (tmp)
11313 return tmp;
11314
11315 /* For x64 PE-COFF there is no GOT table,
11316 so we use address directly. */
11317 if (TARGET_64BIT && TARGET_PECOFF)
11318 {
11319 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
11320 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11321 }
11322 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11323 {
11324 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11325 UNSPEC_GOTPCREL);
11326 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11327 new_rtx = gen_const_mem (Pmode, new_rtx);
11328 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11329 }
11330 else
11331 {
11332 /* This symbol must be referenced via a load
11333 from the Global Offset Table (@GOT). */
11334 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11335 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11336
11337 if (TARGET_64BIT)
11338 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11339
11340 if (reg != 0)
11341 {
11342 gcc_assert (REG_P (reg));
11343 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
11344 new_rtx, reg, 1, OPTAB_DIRECT);
11345 }
11346 else
11347 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11348
11349 new_rtx = gen_const_mem (Pmode, new_rtx);
11350 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11351 }
11352
11353 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11354 }
11355 else
11356 {
11357 if (CONST_INT_P (addr)
11358 && !x86_64_immediate_operand (addr, VOIDmode))
11359 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11360 else if (GET_CODE (addr) == CONST)
11361 {
11362 addr = XEXP (addr, 0);
11363
11364 /* We must match stuff we generate before. Assume the only
11365 unspecs that can get here are ours. Not that we could do
11366 anything with them anyway.... */
11367 if (GET_CODE (addr) == UNSPEC
11368 || (GET_CODE (addr) == PLUS
11369 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11370 return orig;
11371 gcc_assert (GET_CODE (addr) == PLUS);
11372 }
11373
11374 if (GET_CODE (addr) == PLUS)
11375 {
11376 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11377
11378 /* Check first to see if this is a constant
11379 offset from a @GOTOFF symbol reference. */
11380 if (!TARGET_PECOFF
11381 && gotoff_operand (op0, Pmode)
11382 && CONST_INT_P (op1))
11383 {
11384 if (!TARGET_64BIT)
11385 {
11386 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11387 UNSPEC_GOTOFF);
11388 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11389 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11390
11391 if (reg != 0)
11392 {
11393 gcc_assert (REG_P (reg));
11394 new_rtx = expand_simple_binop (Pmode, PLUS,
11395 pic_offset_table_rtx,
11396 new_rtx, reg, 1,
11397 OPTAB_DIRECT);
11398 }
11399 else
11400 new_rtx
11401 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11402 }
11403 else
11404 {
11405 if (INTVAL (op1) < -16*1024*1024
11406 || INTVAL (op1) >= 16*1024*1024)
11407 {
11408 if (!x86_64_immediate_operand (op1, Pmode))
11409 op1 = force_reg (Pmode, op1);
11410
11411 new_rtx
11412 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11413 }
11414 }
11415 }
11416 else
11417 {
11418 rtx base = legitimize_pic_address (op0, reg);
11419 machine_mode mode = GET_MODE (base);
11420 new_rtx
11421 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
11422
11423 if (CONST_INT_P (new_rtx))
11424 {
11425 if (INTVAL (new_rtx) < -16*1024*1024
11426 || INTVAL (new_rtx) >= 16*1024*1024)
11427 {
11428 if (!x86_64_immediate_operand (new_rtx, mode))
11429 new_rtx = force_reg (mode, new_rtx);
11430
11431 new_rtx
11432 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
11433 }
11434 else
11435 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
11436 }
11437 else
11438 {
11439 /* For %rip addressing, we have to use
11440 just disp32, not base nor index. */
11441 if (TARGET_64BIT
11442 && (GET_CODE (base) == SYMBOL_REF
11443 || GET_CODE (base) == LABEL_REF))
11444 base = force_reg (mode, base);
11445 if (GET_CODE (new_rtx) == PLUS
11446 && CONSTANT_P (XEXP (new_rtx, 1)))
11447 {
11448 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
11449 new_rtx = XEXP (new_rtx, 1);
11450 }
11451 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
11452 }
11453 }
11454 }
11455 }
11456 return new_rtx;
11457 }
11458 \f
11459 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11460
11461 static rtx
11462 get_thread_pointer (machine_mode tp_mode, bool to_reg)
11463 {
11464 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11465
11466 if (GET_MODE (tp) != tp_mode)
11467 {
11468 gcc_assert (GET_MODE (tp) == SImode);
11469 gcc_assert (tp_mode == DImode);
11470
11471 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
11472 }
11473
11474 if (to_reg)
11475 tp = copy_to_mode_reg (tp_mode, tp);
11476
11477 return tp;
11478 }
11479
11480 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11481
11482 static GTY(()) rtx ix86_tls_symbol;
11483
11484 static rtx
11485 ix86_tls_get_addr (void)
11486 {
11487 if (!ix86_tls_symbol)
11488 {
11489 const char *sym
11490 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
11491 ? "___tls_get_addr" : "__tls_get_addr");
11492
11493 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
11494 }
11495
11496 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
11497 {
11498 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
11499 UNSPEC_PLTOFF);
11500 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
11501 gen_rtx_CONST (Pmode, unspec));
11502 }
11503
11504 return ix86_tls_symbol;
11505 }
11506
11507 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11508
11509 static GTY(()) rtx ix86_tls_module_base_symbol;
11510
11511 rtx
11512 ix86_tls_module_base (void)
11513 {
11514 if (!ix86_tls_module_base_symbol)
11515 {
11516 ix86_tls_module_base_symbol
11517 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
11518
11519 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
11520 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
11521 }
11522
11523 return ix86_tls_module_base_symbol;
11524 }
11525
11526 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11527 false if we expect this to be used for a memory address and true if
11528 we expect to load the address into a register. */
11529
11530 rtx
11531 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
11532 {
11533 rtx dest, base, off;
11534 rtx pic = NULL_RTX, tp = NULL_RTX;
11535 machine_mode tp_mode = Pmode;
11536 int type;
11537
11538 /* Fall back to global dynamic model if tool chain cannot support local
11539 dynamic. */
11540 if (TARGET_SUN_TLS && !TARGET_64BIT
11541 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
11542 && model == TLS_MODEL_LOCAL_DYNAMIC)
11543 model = TLS_MODEL_GLOBAL_DYNAMIC;
11544
11545 switch (model)
11546 {
11547 case TLS_MODEL_GLOBAL_DYNAMIC:
11548 if (!TARGET_64BIT)
11549 {
11550 if (flag_pic && !TARGET_PECOFF)
11551 pic = pic_offset_table_rtx;
11552 else
11553 {
11554 pic = gen_reg_rtx (Pmode);
11555 emit_insn (gen_set_got (pic));
11556 }
11557 }
11558
11559 if (TARGET_GNU2_TLS)
11560 {
11561 dest = gen_reg_rtx (ptr_mode);
11562 if (TARGET_64BIT)
11563 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
11564 else
11565 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
11566
11567 tp = get_thread_pointer (ptr_mode, true);
11568 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11569 if (GET_MODE (dest) != Pmode)
11570 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11571 dest = force_reg (Pmode, dest);
11572
11573 if (GET_MODE (x) != Pmode)
11574 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11575
11576 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11577 }
11578 else
11579 {
11580 rtx caddr = ix86_tls_get_addr ();
11581
11582 dest = gen_reg_rtx (Pmode);
11583 if (TARGET_64BIT)
11584 {
11585 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11586 rtx_insn *insns;
11587
11588 start_sequence ();
11589 emit_call_insn
11590 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
11591 insns = get_insns ();
11592 end_sequence ();
11593
11594 if (GET_MODE (x) != Pmode)
11595 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11596
11597 RTL_CONST_CALL_P (insns) = 1;
11598 emit_libcall_block (insns, dest, rax, x);
11599 }
11600 else
11601 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
11602 }
11603 break;
11604
11605 case TLS_MODEL_LOCAL_DYNAMIC:
11606 if (!TARGET_64BIT)
11607 {
11608 if (flag_pic)
11609 pic = pic_offset_table_rtx;
11610 else
11611 {
11612 pic = gen_reg_rtx (Pmode);
11613 emit_insn (gen_set_got (pic));
11614 }
11615 }
11616
11617 if (TARGET_GNU2_TLS)
11618 {
11619 rtx tmp = ix86_tls_module_base ();
11620
11621 base = gen_reg_rtx (ptr_mode);
11622 if (TARGET_64BIT)
11623 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
11624 else
11625 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
11626
11627 tp = get_thread_pointer (ptr_mode, true);
11628 if (GET_MODE (base) != Pmode)
11629 base = gen_rtx_ZERO_EXTEND (Pmode, base);
11630 base = force_reg (Pmode, base);
11631 }
11632 else
11633 {
11634 rtx caddr = ix86_tls_get_addr ();
11635
11636 base = gen_reg_rtx (Pmode);
11637 if (TARGET_64BIT)
11638 {
11639 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11640 rtx_insn *insns;
11641 rtx eqv;
11642
11643 start_sequence ();
11644 emit_call_insn
11645 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11646 insns = get_insns ();
11647 end_sequence ();
11648
11649 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11650 share the LD_BASE result with other LD model accesses. */
11651 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11652 UNSPEC_TLS_LD_BASE);
11653
11654 RTL_CONST_CALL_P (insns) = 1;
11655 emit_libcall_block (insns, base, rax, eqv);
11656 }
11657 else
11658 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11659 }
11660
11661 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11662 off = gen_rtx_CONST (Pmode, off);
11663
11664 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11665
11666 if (TARGET_GNU2_TLS)
11667 {
11668 if (GET_MODE (tp) != Pmode)
11669 {
11670 dest = lowpart_subreg (ptr_mode, dest, Pmode);
11671 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11672 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11673 }
11674 else
11675 dest = gen_rtx_PLUS (Pmode, tp, dest);
11676 dest = force_reg (Pmode, dest);
11677
11678 if (GET_MODE (x) != Pmode)
11679 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11680
11681 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11682 }
11683 break;
11684
11685 case TLS_MODEL_INITIAL_EXEC:
11686 if (TARGET_64BIT)
11687 {
11688 if (TARGET_SUN_TLS && !TARGET_X32)
11689 {
11690 /* The Sun linker took the AMD64 TLS spec literally
11691 and can only handle %rax as destination of the
11692 initial executable code sequence. */
11693
11694 dest = gen_reg_rtx (DImode);
11695 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11696 return dest;
11697 }
11698
11699 /* Generate DImode references to avoid %fs:(%reg32)
11700 problems and linker IE->LE relaxation bug. */
11701 tp_mode = DImode;
11702 pic = NULL;
11703 type = UNSPEC_GOTNTPOFF;
11704 }
11705 else if (flag_pic)
11706 {
11707 pic = pic_offset_table_rtx;
11708 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11709 }
11710 else if (!TARGET_ANY_GNU_TLS)
11711 {
11712 pic = gen_reg_rtx (Pmode);
11713 emit_insn (gen_set_got (pic));
11714 type = UNSPEC_GOTTPOFF;
11715 }
11716 else
11717 {
11718 pic = NULL;
11719 type = UNSPEC_INDNTPOFF;
11720 }
11721
11722 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11723 off = gen_rtx_CONST (tp_mode, off);
11724 if (pic)
11725 off = gen_rtx_PLUS (tp_mode, pic, off);
11726 off = gen_const_mem (tp_mode, off);
11727 set_mem_alias_set (off, ix86_GOT_alias_set ());
11728
11729 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11730 {
11731 base = get_thread_pointer (tp_mode,
11732 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11733 off = force_reg (tp_mode, off);
11734 dest = gen_rtx_PLUS (tp_mode, base, off);
11735 if (tp_mode != Pmode)
11736 dest = convert_to_mode (Pmode, dest, 1);
11737 }
11738 else
11739 {
11740 base = get_thread_pointer (Pmode, true);
11741 dest = gen_reg_rtx (Pmode);
11742 emit_insn (gen_sub3_insn (dest, base, off));
11743 }
11744 break;
11745
11746 case TLS_MODEL_LOCAL_EXEC:
11747 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11748 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11749 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11750 off = gen_rtx_CONST (Pmode, off);
11751
11752 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11753 {
11754 base = get_thread_pointer (Pmode,
11755 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11756 return gen_rtx_PLUS (Pmode, base, off);
11757 }
11758 else
11759 {
11760 base = get_thread_pointer (Pmode, true);
11761 dest = gen_reg_rtx (Pmode);
11762 emit_insn (gen_sub3_insn (dest, base, off));
11763 }
11764 break;
11765
11766 default:
11767 gcc_unreachable ();
11768 }
11769
11770 return dest;
11771 }
11772
11773 /* Return true if the TLS address requires insn using integer registers.
11774 It's used to prevent KMOV/VMOV in TLS code sequences which require integer
11775 MOV instructions, refer to PR103275. */
11776 bool
11777 ix86_gpr_tls_address_pattern_p (rtx mem)
11778 {
11779 gcc_assert (MEM_P (mem));
11780
11781 rtx addr = XEXP (mem, 0);
11782 subrtx_var_iterator::array_type array;
11783 FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
11784 {
11785 rtx op = *iter;
11786 if (GET_CODE (op) == UNSPEC)
11787 switch (XINT (op, 1))
11788 {
11789 case UNSPEC_GOTNTPOFF:
11790 return true;
11791 case UNSPEC_TPOFF:
11792 if (!TARGET_64BIT)
11793 return true;
11794 break;
11795 default:
11796 break;
11797 }
11798 }
11799
11800 return false;
11801 }
11802
11803 /* Return true if OP refers to a TLS address. */
11804 bool
11805 ix86_tls_address_pattern_p (rtx op)
11806 {
11807 subrtx_var_iterator::array_type array;
11808 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11809 {
11810 rtx op = *iter;
11811 if (MEM_P (op))
11812 {
11813 rtx *x = &XEXP (op, 0);
11814 while (GET_CODE (*x) == PLUS)
11815 {
11816 int i;
11817 for (i = 0; i < 2; i++)
11818 {
11819 rtx u = XEXP (*x, i);
11820 if (GET_CODE (u) == ZERO_EXTEND)
11821 u = XEXP (u, 0);
11822 if (GET_CODE (u) == UNSPEC
11823 && XINT (u, 1) == UNSPEC_TP)
11824 return true;
11825 }
11826 x = &XEXP (*x, 0);
11827 }
11828
11829 iter.skip_subrtxes ();
11830 }
11831 }
11832
11833 return false;
11834 }
11835
11836 /* Rewrite *LOC so that it refers to a default TLS address space. */
11837 void
11838 ix86_rewrite_tls_address_1 (rtx *loc)
11839 {
11840 subrtx_ptr_iterator::array_type array;
11841 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11842 {
11843 rtx *loc = *iter;
11844 if (MEM_P (*loc))
11845 {
11846 rtx addr = XEXP (*loc, 0);
11847 rtx *x = &addr;
11848 while (GET_CODE (*x) == PLUS)
11849 {
11850 int i;
11851 for (i = 0; i < 2; i++)
11852 {
11853 rtx u = XEXP (*x, i);
11854 if (GET_CODE (u) == ZERO_EXTEND)
11855 u = XEXP (u, 0);
11856 if (GET_CODE (u) == UNSPEC
11857 && XINT (u, 1) == UNSPEC_TP)
11858 {
11859 addr_space_t as = DEFAULT_TLS_SEG_REG;
11860
11861 *x = XEXP (*x, 1 - i);
11862
11863 *loc = replace_equiv_address_nv (*loc, addr, true);
11864 set_mem_addr_space (*loc, as);
11865 return;
11866 }
11867 }
11868 x = &XEXP (*x, 0);
11869 }
11870
11871 iter.skip_subrtxes ();
11872 }
11873 }
11874 }
11875
11876 /* Rewrite instruction pattern involvning TLS address
11877 so that it refers to a default TLS address space. */
11878 rtx
11879 ix86_rewrite_tls_address (rtx pattern)
11880 {
11881 pattern = copy_insn (pattern);
11882 ix86_rewrite_tls_address_1 (&pattern);
11883 return pattern;
11884 }
11885
11886 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11887 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11888 unique refptr-DECL symbol corresponding to symbol DECL. */
11889
11890 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11891 {
11892 static inline hashval_t hash (tree_map *m) { return m->hash; }
11893 static inline bool
11894 equal (tree_map *a, tree_map *b)
11895 {
11896 return a->base.from == b->base.from;
11897 }
11898
11899 static int
11900 keep_cache_entry (tree_map *&m)
11901 {
11902 return ggc_marked_p (m->base.from);
11903 }
11904 };
11905
11906 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11907
11908 static tree
11909 get_dllimport_decl (tree decl, bool beimport)
11910 {
11911 struct tree_map *h, in;
11912 const char *name;
11913 const char *prefix;
11914 size_t namelen, prefixlen;
11915 char *imp_name;
11916 tree to;
11917 rtx rtl;
11918
11919 if (!dllimport_map)
11920 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11921
11922 in.hash = htab_hash_pointer (decl);
11923 in.base.from = decl;
11924 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11925 h = *loc;
11926 if (h)
11927 return h->to;
11928
11929 *loc = h = ggc_alloc<tree_map> ();
11930 h->hash = in.hash;
11931 h->base.from = decl;
11932 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11933 VAR_DECL, NULL, ptr_type_node);
11934 DECL_ARTIFICIAL (to) = 1;
11935 DECL_IGNORED_P (to) = 1;
11936 DECL_EXTERNAL (to) = 1;
11937 TREE_READONLY (to) = 1;
11938
11939 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11940 name = targetm.strip_name_encoding (name);
11941 if (beimport)
11942 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11943 ? "*__imp_" : "*__imp__";
11944 else
11945 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11946 namelen = strlen (name);
11947 prefixlen = strlen (prefix);
11948 imp_name = (char *) alloca (namelen + prefixlen + 1);
11949 memcpy (imp_name, prefix, prefixlen);
11950 memcpy (imp_name + prefixlen, name, namelen + 1);
11951
11952 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11953 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11954 SET_SYMBOL_REF_DECL (rtl, to);
11955 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11956 if (!beimport)
11957 {
11958 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11959 #ifdef SUB_TARGET_RECORD_STUB
11960 SUB_TARGET_RECORD_STUB (name);
11961 #endif
11962 }
11963
11964 rtl = gen_const_mem (Pmode, rtl);
11965 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11966
11967 SET_DECL_RTL (to, rtl);
11968 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11969
11970 return to;
11971 }
11972
11973 /* Expand SYMBOL into its corresponding far-address symbol.
11974 WANT_REG is true if we require the result be a register. */
11975
11976 static rtx
11977 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11978 {
11979 tree imp_decl;
11980 rtx x;
11981
11982 gcc_assert (SYMBOL_REF_DECL (symbol));
11983 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11984
11985 x = DECL_RTL (imp_decl);
11986 if (want_reg)
11987 x = force_reg (Pmode, x);
11988 return x;
11989 }
11990
11991 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11992 true if we require the result be a register. */
11993
11994 static rtx
11995 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11996 {
11997 tree imp_decl;
11998 rtx x;
11999
12000 gcc_assert (SYMBOL_REF_DECL (symbol));
12001 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
12002
12003 x = DECL_RTL (imp_decl);
12004 if (want_reg)
12005 x = force_reg (Pmode, x);
12006 return x;
12007 }
12008
12009 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
12010 is true if we require the result be a register. */
12011
12012 rtx
12013 legitimize_pe_coff_symbol (rtx addr, bool inreg)
12014 {
12015 if (!TARGET_PECOFF)
12016 return NULL_RTX;
12017
12018 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12019 {
12020 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
12021 return legitimize_dllimport_symbol (addr, inreg);
12022 if (GET_CODE (addr) == CONST
12023 && GET_CODE (XEXP (addr, 0)) == PLUS
12024 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12025 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
12026 {
12027 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
12028 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12029 }
12030 }
12031
12032 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
12033 return NULL_RTX;
12034 if (GET_CODE (addr) == SYMBOL_REF
12035 && !is_imported_p (addr)
12036 && SYMBOL_REF_EXTERNAL_P (addr)
12037 && SYMBOL_REF_DECL (addr))
12038 return legitimize_pe_coff_extern_decl (addr, inreg);
12039
12040 if (GET_CODE (addr) == CONST
12041 && GET_CODE (XEXP (addr, 0)) == PLUS
12042 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
12043 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
12044 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
12045 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
12046 {
12047 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
12048 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
12049 }
12050 return NULL_RTX;
12051 }
12052
12053 /* Try machine-dependent ways of modifying an illegitimate address
12054 to be legitimate. If we find one, return the new, valid address.
12055 This macro is used in only one place: `memory_address' in explow.cc.
12056
12057 OLDX is the address as it was before break_out_memory_refs was called.
12058 In some cases it is useful to look at this to decide what needs to be done.
12059
12060 It is always safe for this macro to do nothing. It exists to recognize
12061 opportunities to optimize the output.
12062
12063 For the 80386, we handle X+REG by loading X into a register R and
12064 using R+REG. R will go in a general reg and indexing will be used.
12065 However, if REG is a broken-out memory address or multiplication,
12066 nothing needs to be done because REG can certainly go in a general reg.
12067
12068 When -fpic is used, special handling is needed for symbolic references.
12069 See comments by legitimize_pic_address in i386.cc for details. */
12070
12071 static rtx
12072 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
12073 {
12074 bool changed = false;
12075 unsigned log;
12076
12077 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
12078 if (log)
12079 return legitimize_tls_address (x, (enum tls_model) log, false);
12080 if (GET_CODE (x) == CONST
12081 && GET_CODE (XEXP (x, 0)) == PLUS
12082 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
12083 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
12084 {
12085 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
12086 (enum tls_model) log, false);
12087 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
12088 }
12089
12090 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
12091 {
12092 rtx tmp = legitimize_pe_coff_symbol (x, true);
12093 if (tmp)
12094 return tmp;
12095 }
12096
12097 if (flag_pic && SYMBOLIC_CONST (x))
12098 return legitimize_pic_address (x, 0);
12099
12100 #if TARGET_MACHO
12101 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
12102 return machopic_indirect_data_reference (x, 0);
12103 #endif
12104
12105 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
12106 if (GET_CODE (x) == ASHIFT
12107 && CONST_INT_P (XEXP (x, 1))
12108 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
12109 {
12110 changed = true;
12111 log = INTVAL (XEXP (x, 1));
12112 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
12113 GEN_INT (1 << log));
12114 }
12115
12116 if (GET_CODE (x) == PLUS)
12117 {
12118 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
12119
12120 if (GET_CODE (XEXP (x, 0)) == ASHIFT
12121 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12122 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
12123 {
12124 changed = true;
12125 log = INTVAL (XEXP (XEXP (x, 0), 1));
12126 XEXP (x, 0) = gen_rtx_MULT (Pmode,
12127 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
12128 GEN_INT (1 << log));
12129 }
12130
12131 if (GET_CODE (XEXP (x, 1)) == ASHIFT
12132 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
12133 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
12134 {
12135 changed = true;
12136 log = INTVAL (XEXP (XEXP (x, 1), 1));
12137 XEXP (x, 1) = gen_rtx_MULT (Pmode,
12138 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
12139 GEN_INT (1 << log));
12140 }
12141
12142 /* Put multiply first if it isn't already. */
12143 if (GET_CODE (XEXP (x, 1)) == MULT)
12144 {
12145 std::swap (XEXP (x, 0), XEXP (x, 1));
12146 changed = true;
12147 }
12148
12149 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12150 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12151 created by virtual register instantiation, register elimination, and
12152 similar optimizations. */
12153 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12154 {
12155 changed = true;
12156 x = gen_rtx_PLUS (Pmode,
12157 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12158 XEXP (XEXP (x, 1), 0)),
12159 XEXP (XEXP (x, 1), 1));
12160 }
12161
12162 /* Canonicalize
12163 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12164 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12165 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12166 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12167 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12168 && CONSTANT_P (XEXP (x, 1)))
12169 {
12170 rtx constant;
12171 rtx other = NULL_RTX;
12172
12173 if (CONST_INT_P (XEXP (x, 1)))
12174 {
12175 constant = XEXP (x, 1);
12176 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12177 }
12178 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12179 {
12180 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12181 other = XEXP (x, 1);
12182 }
12183 else
12184 constant = 0;
12185
12186 if (constant)
12187 {
12188 changed = true;
12189 x = gen_rtx_PLUS (Pmode,
12190 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12191 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12192 plus_constant (Pmode, other,
12193 INTVAL (constant)));
12194 }
12195 }
12196
12197 if (changed && ix86_legitimate_address_p (mode, x, false))
12198 return x;
12199
12200 if (GET_CODE (XEXP (x, 0)) == MULT)
12201 {
12202 changed = true;
12203 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
12204 }
12205
12206 if (GET_CODE (XEXP (x, 1)) == MULT)
12207 {
12208 changed = true;
12209 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
12210 }
12211
12212 if (changed
12213 && REG_P (XEXP (x, 1))
12214 && REG_P (XEXP (x, 0)))
12215 return x;
12216
12217 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12218 {
12219 changed = true;
12220 x = legitimize_pic_address (x, 0);
12221 }
12222
12223 if (changed && ix86_legitimate_address_p (mode, x, false))
12224 return x;
12225
12226 if (REG_P (XEXP (x, 0)))
12227 {
12228 rtx temp = gen_reg_rtx (Pmode);
12229 rtx val = force_operand (XEXP (x, 1), temp);
12230 if (val != temp)
12231 {
12232 val = convert_to_mode (Pmode, val, 1);
12233 emit_move_insn (temp, val);
12234 }
12235
12236 XEXP (x, 1) = temp;
12237 return x;
12238 }
12239
12240 else if (REG_P (XEXP (x, 1)))
12241 {
12242 rtx temp = gen_reg_rtx (Pmode);
12243 rtx val = force_operand (XEXP (x, 0), temp);
12244 if (val != temp)
12245 {
12246 val = convert_to_mode (Pmode, val, 1);
12247 emit_move_insn (temp, val);
12248 }
12249
12250 XEXP (x, 0) = temp;
12251 return x;
12252 }
12253 }
12254
12255 return x;
12256 }
12257 \f
12258 /* Print an integer constant expression in assembler syntax. Addition
12259 and subtraction are the only arithmetic that may appear in these
12260 expressions. FILE is the stdio stream to write to, X is the rtx, and
12261 CODE is the operand print code from the output string. */
12262
12263 static void
12264 output_pic_addr_const (FILE *file, rtx x, int code)
12265 {
12266 char buf[256];
12267
12268 switch (GET_CODE (x))
12269 {
12270 case PC:
12271 gcc_assert (flag_pic);
12272 putc ('.', file);
12273 break;
12274
12275 case SYMBOL_REF:
12276 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
12277 output_addr_const (file, x);
12278 else
12279 {
12280 const char *name = XSTR (x, 0);
12281
12282 /* Mark the decl as referenced so that cgraph will
12283 output the function. */
12284 if (SYMBOL_REF_DECL (x))
12285 mark_decl_referenced (SYMBOL_REF_DECL (x));
12286
12287 #if TARGET_MACHO
12288 if (MACHOPIC_INDIRECT
12289 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12290 name = machopic_indirection_name (x, /*stub_p=*/true);
12291 #endif
12292 assemble_name (file, name);
12293 }
12294 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
12295 && code == 'P' && ix86_call_use_plt_p (x))
12296 fputs ("@PLT", file);
12297 break;
12298
12299 case LABEL_REF:
12300 x = XEXP (x, 0);
12301 /* FALLTHRU */
12302 case CODE_LABEL:
12303 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12304 assemble_name (asm_out_file, buf);
12305 break;
12306
12307 case CONST_INT:
12308 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12309 break;
12310
12311 case CONST:
12312 /* This used to output parentheses around the expression,
12313 but that does not work on the 386 (either ATT or BSD assembler). */
12314 output_pic_addr_const (file, XEXP (x, 0), code);
12315 break;
12316
12317 case CONST_DOUBLE:
12318 /* We can't handle floating point constants;
12319 TARGET_PRINT_OPERAND must handle them. */
12320 output_operand_lossage ("floating constant misused");
12321 break;
12322
12323 case PLUS:
12324 /* Some assemblers need integer constants to appear first. */
12325 if (CONST_INT_P (XEXP (x, 0)))
12326 {
12327 output_pic_addr_const (file, XEXP (x, 0), code);
12328 putc ('+', file);
12329 output_pic_addr_const (file, XEXP (x, 1), code);
12330 }
12331 else
12332 {
12333 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12334 output_pic_addr_const (file, XEXP (x, 1), code);
12335 putc ('+', file);
12336 output_pic_addr_const (file, XEXP (x, 0), code);
12337 }
12338 break;
12339
12340 case MINUS:
12341 if (!TARGET_MACHO)
12342 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12343 output_pic_addr_const (file, XEXP (x, 0), code);
12344 putc ('-', file);
12345 output_pic_addr_const (file, XEXP (x, 1), code);
12346 if (!TARGET_MACHO)
12347 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12348 break;
12349
12350 case UNSPEC:
12351 gcc_assert (XVECLEN (x, 0) == 1);
12352 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12353 switch (XINT (x, 1))
12354 {
12355 case UNSPEC_GOT:
12356 fputs ("@GOT", file);
12357 break;
12358 case UNSPEC_GOTOFF:
12359 fputs ("@GOTOFF", file);
12360 break;
12361 case UNSPEC_PLTOFF:
12362 fputs ("@PLTOFF", file);
12363 break;
12364 case UNSPEC_PCREL:
12365 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12366 "(%rip)" : "[rip]", file);
12367 break;
12368 case UNSPEC_GOTPCREL:
12369 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12370 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12371 break;
12372 case UNSPEC_GOTTPOFF:
12373 /* FIXME: This might be @TPOFF in Sun ld too. */
12374 fputs ("@gottpoff", file);
12375 break;
12376 case UNSPEC_TPOFF:
12377 fputs ("@tpoff", file);
12378 break;
12379 case UNSPEC_NTPOFF:
12380 if (TARGET_64BIT)
12381 fputs ("@tpoff", file);
12382 else
12383 fputs ("@ntpoff", file);
12384 break;
12385 case UNSPEC_DTPOFF:
12386 fputs ("@dtpoff", file);
12387 break;
12388 case UNSPEC_GOTNTPOFF:
12389 if (TARGET_64BIT)
12390 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12391 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12392 else
12393 fputs ("@gotntpoff", file);
12394 break;
12395 case UNSPEC_INDNTPOFF:
12396 fputs ("@indntpoff", file);
12397 break;
12398 #if TARGET_MACHO
12399 case UNSPEC_MACHOPIC_OFFSET:
12400 putc ('-', file);
12401 machopic_output_function_base_name (file);
12402 break;
12403 #endif
12404 default:
12405 output_operand_lossage ("invalid UNSPEC as operand");
12406 break;
12407 }
12408 break;
12409
12410 default:
12411 output_operand_lossage ("invalid expression as operand");
12412 }
12413 }
12414
12415 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12416 We need to emit DTP-relative relocations. */
12417
12418 static void ATTRIBUTE_UNUSED
12419 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12420 {
12421 fputs (ASM_LONG, file);
12422 output_addr_const (file, x);
12423 fputs ("@dtpoff", file);
12424 switch (size)
12425 {
12426 case 4:
12427 break;
12428 case 8:
12429 fputs (", 0", file);
12430 break;
12431 default:
12432 gcc_unreachable ();
12433 }
12434 }
12435
12436 /* Return true if X is a representation of the PIC register. This copes
12437 with calls from ix86_find_base_term, where the register might have
12438 been replaced by a cselib value. */
12439
12440 static bool
12441 ix86_pic_register_p (rtx x)
12442 {
12443 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12444 return (pic_offset_table_rtx
12445 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12446 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
12447 return true;
12448 else if (!REG_P (x))
12449 return false;
12450 else if (pic_offset_table_rtx)
12451 {
12452 if (REGNO (x) == REGNO (pic_offset_table_rtx))
12453 return true;
12454 if (HARD_REGISTER_P (x)
12455 && !HARD_REGISTER_P (pic_offset_table_rtx)
12456 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
12457 return true;
12458 return false;
12459 }
12460 else
12461 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12462 }
12463
12464 /* Helper function for ix86_delegitimize_address.
12465 Attempt to delegitimize TLS local-exec accesses. */
12466
12467 static rtx
12468 ix86_delegitimize_tls_address (rtx orig_x)
12469 {
12470 rtx x = orig_x, unspec;
12471 struct ix86_address addr;
12472
12473 if (!TARGET_TLS_DIRECT_SEG_REFS)
12474 return orig_x;
12475 if (MEM_P (x))
12476 x = XEXP (x, 0);
12477 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12478 return orig_x;
12479 if (ix86_decompose_address (x, &addr) == 0
12480 || addr.seg != DEFAULT_TLS_SEG_REG
12481 || addr.disp == NULL_RTX
12482 || GET_CODE (addr.disp) != CONST)
12483 return orig_x;
12484 unspec = XEXP (addr.disp, 0);
12485 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12486 unspec = XEXP (unspec, 0);
12487 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12488 return orig_x;
12489 x = XVECEXP (unspec, 0, 0);
12490 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12491 if (unspec != XEXP (addr.disp, 0))
12492 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12493 if (addr.index)
12494 {
12495 rtx idx = addr.index;
12496 if (addr.scale != 1)
12497 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12498 x = gen_rtx_PLUS (Pmode, idx, x);
12499 }
12500 if (addr.base)
12501 x = gen_rtx_PLUS (Pmode, addr.base, x);
12502 if (MEM_P (orig_x))
12503 x = replace_equiv_address_nv (orig_x, x);
12504 return x;
12505 }
12506
12507 /* In the name of slightly smaller debug output, and to cater to
12508 general assembler lossage, recognize PIC+GOTOFF and turn it back
12509 into a direct symbol reference.
12510
12511 On Darwin, this is necessary to avoid a crash, because Darwin
12512 has a different PIC label for each routine but the DWARF debugging
12513 information is not associated with any particular routine, so it's
12514 necessary to remove references to the PIC label from RTL stored by
12515 the DWARF output code.
12516
12517 This helper is used in the normal ix86_delegitimize_address
12518 entrypoint (e.g. used in the target delegitimization hook) and
12519 in ix86_find_base_term. As compile time memory optimization, we
12520 avoid allocating rtxes that will not change anything on the outcome
12521 of the callers (find_base_value and find_base_term). */
12522
12523 static inline rtx
12524 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
12525 {
12526 rtx orig_x = delegitimize_mem_from_attrs (x);
12527 /* addend is NULL or some rtx if x is something+GOTOFF where
12528 something doesn't include the PIC register. */
12529 rtx addend = NULL_RTX;
12530 /* reg_addend is NULL or a multiple of some register. */
12531 rtx reg_addend = NULL_RTX;
12532 /* const_addend is NULL or a const_int. */
12533 rtx const_addend = NULL_RTX;
12534 /* This is the result, or NULL. */
12535 rtx result = NULL_RTX;
12536
12537 x = orig_x;
12538
12539 if (MEM_P (x))
12540 x = XEXP (x, 0);
12541
12542 if (TARGET_64BIT)
12543 {
12544 if (GET_CODE (x) == CONST
12545 && GET_CODE (XEXP (x, 0)) == PLUS
12546 && GET_MODE (XEXP (x, 0)) == Pmode
12547 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12548 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
12549 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
12550 {
12551 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
12552 base. A CONST can't be arg_pointer_rtx based. */
12553 if (base_term_p && MEM_P (orig_x))
12554 return orig_x;
12555 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
12556 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
12557 if (MEM_P (orig_x))
12558 x = replace_equiv_address_nv (orig_x, x);
12559 return x;
12560 }
12561
12562 if (GET_CODE (x) == CONST
12563 && GET_CODE (XEXP (x, 0)) == UNSPEC
12564 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
12565 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
12566 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
12567 {
12568 x = XVECEXP (XEXP (x, 0), 0, 0);
12569 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
12570 {
12571 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
12572 if (x == NULL_RTX)
12573 return orig_x;
12574 }
12575 return x;
12576 }
12577
12578 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
12579 return ix86_delegitimize_tls_address (orig_x);
12580
12581 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12582 and -mcmodel=medium -fpic. */
12583 }
12584
12585 if (GET_CODE (x) != PLUS
12586 || GET_CODE (XEXP (x, 1)) != CONST)
12587 return ix86_delegitimize_tls_address (orig_x);
12588
12589 if (ix86_pic_register_p (XEXP (x, 0)))
12590 /* %ebx + GOT/GOTOFF */
12591 ;
12592 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12593 {
12594 /* %ebx + %reg * scale + GOT/GOTOFF */
12595 reg_addend = XEXP (x, 0);
12596 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12597 reg_addend = XEXP (reg_addend, 1);
12598 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12599 reg_addend = XEXP (reg_addend, 0);
12600 else
12601 {
12602 reg_addend = NULL_RTX;
12603 addend = XEXP (x, 0);
12604 }
12605 }
12606 else
12607 addend = XEXP (x, 0);
12608
12609 x = XEXP (XEXP (x, 1), 0);
12610 if (GET_CODE (x) == PLUS
12611 && CONST_INT_P (XEXP (x, 1)))
12612 {
12613 const_addend = XEXP (x, 1);
12614 x = XEXP (x, 0);
12615 }
12616
12617 if (GET_CODE (x) == UNSPEC
12618 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12619 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
12620 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
12621 && !MEM_P (orig_x) && !addend)))
12622 result = XVECEXP (x, 0, 0);
12623
12624 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
12625 && !MEM_P (orig_x))
12626 result = XVECEXP (x, 0, 0);
12627
12628 if (! result)
12629 return ix86_delegitimize_tls_address (orig_x);
12630
12631 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12632 recurse on the first operand. */
12633 if (const_addend && !base_term_p)
12634 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12635 if (reg_addend)
12636 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12637 if (addend)
12638 {
12639 /* If the rest of original X doesn't involve the PIC register, add
12640 addend and subtract pic_offset_table_rtx. This can happen e.g.
12641 for code like:
12642 leal (%ebx, %ecx, 4), %ecx
12643 ...
12644 movl foo@GOTOFF(%ecx), %edx
12645 in which case we return (%ecx - %ebx) + foo
12646 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12647 and reload has completed. Don't do the latter for debug,
12648 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12649 if (pic_offset_table_rtx
12650 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
12651 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12652 pic_offset_table_rtx),
12653 result);
12654 else if (base_term_p
12655 && pic_offset_table_rtx
12656 && !TARGET_MACHO
12657 && !TARGET_VXWORKS_RTP)
12658 {
12659 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
12660 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
12661 result = gen_rtx_PLUS (Pmode, tmp, result);
12662 }
12663 else
12664 return orig_x;
12665 }
12666 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12667 {
12668 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
12669 if (result == NULL_RTX)
12670 return orig_x;
12671 }
12672 return result;
12673 }
12674
12675 /* The normal instantiation of the above template. */
12676
12677 static rtx
12678 ix86_delegitimize_address (rtx x)
12679 {
12680 return ix86_delegitimize_address_1 (x, false);
12681 }
12682
12683 /* If X is a machine specific address (i.e. a symbol or label being
12684 referenced as a displacement from the GOT implemented using an
12685 UNSPEC), then return the base term. Otherwise return X. */
12686
12687 rtx
12688 ix86_find_base_term (rtx x)
12689 {
12690 rtx term;
12691
12692 if (TARGET_64BIT)
12693 {
12694 if (GET_CODE (x) != CONST)
12695 return x;
12696 term = XEXP (x, 0);
12697 if (GET_CODE (term) == PLUS
12698 && CONST_INT_P (XEXP (term, 1)))
12699 term = XEXP (term, 0);
12700 if (GET_CODE (term) != UNSPEC
12701 || (XINT (term, 1) != UNSPEC_GOTPCREL
12702 && XINT (term, 1) != UNSPEC_PCREL))
12703 return x;
12704
12705 return XVECEXP (term, 0, 0);
12706 }
12707
12708 return ix86_delegitimize_address_1 (x, true);
12709 }
12710
12711 /* Return true if X shouldn't be emitted into the debug info.
12712 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12713 symbol easily into the .debug_info section, so we need not to
12714 delegitimize, but instead assemble as @gotoff.
12715 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12716 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12717
12718 static bool
12719 ix86_const_not_ok_for_debug_p (rtx x)
12720 {
12721 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12722 return true;
12723
12724 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12725 return true;
12726
12727 return false;
12728 }
12729 \f
12730 static void
12731 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12732 bool fp, FILE *file)
12733 {
12734 const char *suffix;
12735
12736 if (mode == CCFPmode)
12737 {
12738 code = ix86_fp_compare_code_to_integer (code);
12739 mode = CCmode;
12740 }
12741 if (reverse)
12742 code = reverse_condition (code);
12743
12744 switch (code)
12745 {
12746 case EQ:
12747 gcc_assert (mode != CCGZmode);
12748 switch (mode)
12749 {
12750 case E_CCAmode:
12751 suffix = "a";
12752 break;
12753 case E_CCCmode:
12754 suffix = "c";
12755 break;
12756 case E_CCOmode:
12757 suffix = "o";
12758 break;
12759 case E_CCPmode:
12760 suffix = "p";
12761 break;
12762 case E_CCSmode:
12763 suffix = "s";
12764 break;
12765 default:
12766 suffix = "e";
12767 break;
12768 }
12769 break;
12770 case NE:
12771 gcc_assert (mode != CCGZmode);
12772 switch (mode)
12773 {
12774 case E_CCAmode:
12775 suffix = "na";
12776 break;
12777 case E_CCCmode:
12778 suffix = "nc";
12779 break;
12780 case E_CCOmode:
12781 suffix = "no";
12782 break;
12783 case E_CCPmode:
12784 suffix = "np";
12785 break;
12786 case E_CCSmode:
12787 suffix = "ns";
12788 break;
12789 default:
12790 suffix = "ne";
12791 break;
12792 }
12793 break;
12794 case GT:
12795 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12796 suffix = "g";
12797 break;
12798 case GTU:
12799 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12800 Those same assemblers have the same but opposite lossage on cmov. */
12801 if (mode == CCmode)
12802 suffix = fp ? "nbe" : "a";
12803 else
12804 gcc_unreachable ();
12805 break;
12806 case LT:
12807 switch (mode)
12808 {
12809 case E_CCNOmode:
12810 case E_CCGOCmode:
12811 suffix = "s";
12812 break;
12813
12814 case E_CCmode:
12815 case E_CCGCmode:
12816 case E_CCGZmode:
12817 suffix = "l";
12818 break;
12819
12820 default:
12821 gcc_unreachable ();
12822 }
12823 break;
12824 case LTU:
12825 if (mode == CCmode || mode == CCGZmode)
12826 suffix = "b";
12827 else if (mode == CCCmode)
12828 suffix = fp ? "b" : "c";
12829 else
12830 gcc_unreachable ();
12831 break;
12832 case GE:
12833 switch (mode)
12834 {
12835 case E_CCNOmode:
12836 case E_CCGOCmode:
12837 suffix = "ns";
12838 break;
12839
12840 case E_CCmode:
12841 case E_CCGCmode:
12842 case E_CCGZmode:
12843 suffix = "ge";
12844 break;
12845
12846 default:
12847 gcc_unreachable ();
12848 }
12849 break;
12850 case GEU:
12851 if (mode == CCmode || mode == CCGZmode)
12852 suffix = "nb";
12853 else if (mode == CCCmode)
12854 suffix = fp ? "nb" : "nc";
12855 else
12856 gcc_unreachable ();
12857 break;
12858 case LE:
12859 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12860 suffix = "le";
12861 break;
12862 case LEU:
12863 if (mode == CCmode)
12864 suffix = "be";
12865 else
12866 gcc_unreachable ();
12867 break;
12868 case UNORDERED:
12869 suffix = fp ? "u" : "p";
12870 break;
12871 case ORDERED:
12872 suffix = fp ? "nu" : "np";
12873 break;
12874 default:
12875 gcc_unreachable ();
12876 }
12877 fputs (suffix, file);
12878 }
12879
12880 /* Print the name of register X to FILE based on its machine mode and number.
12881 If CODE is 'w', pretend the mode is HImode.
12882 If CODE is 'b', pretend the mode is QImode.
12883 If CODE is 'k', pretend the mode is SImode.
12884 If CODE is 'q', pretend the mode is DImode.
12885 If CODE is 'x', pretend the mode is V4SFmode.
12886 If CODE is 't', pretend the mode is V8SFmode.
12887 If CODE is 'g', pretend the mode is V16SFmode.
12888 If CODE is 'h', pretend the reg is the 'high' byte register.
12889 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12890 If CODE is 'd', duplicate the operand for AVX instruction.
12891 If CODE is 'V', print naked full integer register name without %.
12892 */
12893
12894 void
12895 print_reg (rtx x, int code, FILE *file)
12896 {
12897 const char *reg;
12898 int msize;
12899 unsigned int regno;
12900 bool duplicated;
12901
12902 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12903 putc ('%', file);
12904
12905 if (x == pc_rtx)
12906 {
12907 gcc_assert (TARGET_64BIT);
12908 fputs ("rip", file);
12909 return;
12910 }
12911
12912 if (code == 'y' && STACK_TOP_P (x))
12913 {
12914 fputs ("st(0)", file);
12915 return;
12916 }
12917
12918 if (code == 'w')
12919 msize = 2;
12920 else if (code == 'b')
12921 msize = 1;
12922 else if (code == 'k')
12923 msize = 4;
12924 else if (code == 'q')
12925 msize = 8;
12926 else if (code == 'h')
12927 msize = 0;
12928 else if (code == 'x')
12929 msize = 16;
12930 else if (code == 't')
12931 msize = 32;
12932 else if (code == 'g')
12933 msize = 64;
12934 else
12935 msize = GET_MODE_SIZE (GET_MODE (x));
12936
12937 regno = REGNO (x);
12938
12939 if (regno == ARG_POINTER_REGNUM
12940 || regno == FRAME_POINTER_REGNUM
12941 || regno == FPSR_REG)
12942 {
12943 output_operand_lossage
12944 ("invalid use of register '%s'", reg_names[regno]);
12945 return;
12946 }
12947 else if (regno == FLAGS_REG)
12948 {
12949 output_operand_lossage ("invalid use of asm flag output");
12950 return;
12951 }
12952
12953 if (code == 'V')
12954 {
12955 if (GENERAL_REGNO_P (regno))
12956 msize = GET_MODE_SIZE (word_mode);
12957 else
12958 error ("%<V%> modifier on non-integer register");
12959 }
12960
12961 duplicated = code == 'd' && TARGET_AVX;
12962
12963 switch (msize)
12964 {
12965 case 16:
12966 case 12:
12967 case 8:
12968 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12969 warning (0, "unsupported size for integer register");
12970 /* FALLTHRU */
12971 case 4:
12972 if (LEGACY_INT_REGNO_P (regno))
12973 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12974 /* FALLTHRU */
12975 case 2:
12976 normal:
12977 reg = hi_reg_name[regno];
12978 break;
12979 case 1:
12980 if (regno >= ARRAY_SIZE (qi_reg_name))
12981 goto normal;
12982 if (!ANY_QI_REGNO_P (regno))
12983 error ("unsupported size for integer register");
12984 reg = qi_reg_name[regno];
12985 break;
12986 case 0:
12987 if (regno >= ARRAY_SIZE (qi_high_reg_name))
12988 goto normal;
12989 reg = qi_high_reg_name[regno];
12990 break;
12991 case 32:
12992 case 64:
12993 if (SSE_REGNO_P (regno))
12994 {
12995 gcc_assert (!duplicated);
12996 putc (msize == 32 ? 'y' : 'z', file);
12997 reg = hi_reg_name[regno] + 1;
12998 break;
12999 }
13000 goto normal;
13001 default:
13002 gcc_unreachable ();
13003 }
13004
13005 fputs (reg, file);
13006
13007 /* Irritatingly, AMD extended registers use
13008 different naming convention: "r%d[bwd]" */
13009 if (REX_INT_REGNO_P (regno))
13010 {
13011 gcc_assert (TARGET_64BIT);
13012 switch (msize)
13013 {
13014 case 0:
13015 error ("extended registers have no high halves");
13016 break;
13017 case 1:
13018 putc ('b', file);
13019 break;
13020 case 2:
13021 putc ('w', file);
13022 break;
13023 case 4:
13024 putc ('d', file);
13025 break;
13026 case 8:
13027 /* no suffix */
13028 break;
13029 default:
13030 error ("unsupported operand size for extended register");
13031 break;
13032 }
13033 return;
13034 }
13035
13036 if (duplicated)
13037 {
13038 if (ASSEMBLER_DIALECT == ASM_ATT)
13039 fprintf (file, ", %%%s", reg);
13040 else
13041 fprintf (file, ", %s", reg);
13042 }
13043 }
13044
13045 /* Meaning of CODE:
13046 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
13047 C -- print opcode suffix for set/cmov insn.
13048 c -- like C, but print reversed condition
13049 F,f -- likewise, but for floating-point.
13050 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
13051 otherwise nothing
13052 R -- print embedded rounding and sae.
13053 r -- print only sae.
13054 z -- print the opcode suffix for the size of the current operand.
13055 Z -- likewise, with special suffixes for x87 instructions.
13056 * -- print a star (in certain assembler syntax)
13057 A -- print an absolute memory reference.
13058 E -- print address with DImode register names if TARGET_64BIT.
13059 w -- print the operand as if it's a "word" (HImode) even if it isn't.
13060 s -- print a shift double count, followed by the assemblers argument
13061 delimiter.
13062 b -- print the QImode name of the register for the indicated operand.
13063 %b0 would print %al if operands[0] is reg 0.
13064 w -- likewise, print the HImode name of the register.
13065 k -- likewise, print the SImode name of the register.
13066 q -- likewise, print the DImode name of the register.
13067 x -- likewise, print the V4SFmode name of the register.
13068 t -- likewise, print the V8SFmode name of the register.
13069 g -- likewise, print the V16SFmode name of the register.
13070 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
13071 y -- print "st(0)" instead of "st" as a register.
13072 d -- print duplicated register operand for AVX instruction.
13073 D -- print condition for SSE cmp instruction.
13074 P -- if PIC, print an @PLT suffix. For -fno-plt, load function
13075 address from GOT.
13076 p -- print raw symbol name.
13077 X -- don't print any sort of PIC '@' suffix for a symbol.
13078 & -- print some in-use local-dynamic symbol name.
13079 H -- print a memory address offset by 8; used for sse high-parts
13080 Y -- print condition for XOP pcom* instruction.
13081 V -- print naked full integer register name without %.
13082 + -- print a branch hint as 'cs' or 'ds' prefix
13083 ; -- print a semicolon (after prefixes due to bug in older gas).
13084 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
13085 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
13086 M -- print addr32 prefix for TARGET_X32 with VSIB address.
13087 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
13088 N -- print maskz if it's constant 0 operand.
13089 */
13090
13091 void
13092 ix86_print_operand (FILE *file, rtx x, int code)
13093 {
13094 if (code)
13095 {
13096 switch (code)
13097 {
13098 case 'A':
13099 switch (ASSEMBLER_DIALECT)
13100 {
13101 case ASM_ATT:
13102 putc ('*', file);
13103 break;
13104
13105 case ASM_INTEL:
13106 /* Intel syntax. For absolute addresses, registers should not
13107 be surrounded by braces. */
13108 if (!REG_P (x))
13109 {
13110 putc ('[', file);
13111 ix86_print_operand (file, x, 0);
13112 putc (']', file);
13113 return;
13114 }
13115 break;
13116
13117 default:
13118 gcc_unreachable ();
13119 }
13120
13121 ix86_print_operand (file, x, 0);
13122 return;
13123
13124 case 'E':
13125 /* Wrap address in an UNSPEC to declare special handling. */
13126 if (TARGET_64BIT)
13127 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
13128
13129 output_address (VOIDmode, x);
13130 return;
13131
13132 case 'L':
13133 if (ASSEMBLER_DIALECT == ASM_ATT)
13134 putc ('l', file);
13135 return;
13136
13137 case 'W':
13138 if (ASSEMBLER_DIALECT == ASM_ATT)
13139 putc ('w', file);
13140 return;
13141
13142 case 'B':
13143 if (ASSEMBLER_DIALECT == ASM_ATT)
13144 putc ('b', file);
13145 return;
13146
13147 case 'Q':
13148 if (ASSEMBLER_DIALECT == ASM_ATT)
13149 putc ('l', file);
13150 return;
13151
13152 case 'S':
13153 if (ASSEMBLER_DIALECT == ASM_ATT)
13154 putc ('s', file);
13155 return;
13156
13157 case 'T':
13158 if (ASSEMBLER_DIALECT == ASM_ATT)
13159 putc ('t', file);
13160 return;
13161
13162 case 'O':
13163 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13164 if (ASSEMBLER_DIALECT != ASM_ATT)
13165 return;
13166
13167 switch (GET_MODE_SIZE (GET_MODE (x)))
13168 {
13169 case 2:
13170 putc ('w', file);
13171 break;
13172
13173 case 4:
13174 putc ('l', file);
13175 break;
13176
13177 case 8:
13178 putc ('q', file);
13179 break;
13180
13181 default:
13182 output_operand_lossage ("invalid operand size for operand "
13183 "code 'O'");
13184 return;
13185 }
13186
13187 putc ('.', file);
13188 #endif
13189 return;
13190
13191 case 'z':
13192 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13193 {
13194 /* Opcodes don't get size suffixes if using Intel opcodes. */
13195 if (ASSEMBLER_DIALECT == ASM_INTEL)
13196 return;
13197
13198 switch (GET_MODE_SIZE (GET_MODE (x)))
13199 {
13200 case 1:
13201 putc ('b', file);
13202 return;
13203
13204 case 2:
13205 putc ('w', file);
13206 return;
13207
13208 case 4:
13209 putc ('l', file);
13210 return;
13211
13212 case 8:
13213 putc ('q', file);
13214 return;
13215
13216 default:
13217 output_operand_lossage ("invalid operand size for operand "
13218 "code 'z'");
13219 return;
13220 }
13221 }
13222
13223 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13224 {
13225 if (this_is_asm_operands)
13226 warning_for_asm (this_is_asm_operands,
13227 "non-integer operand used with operand code %<z%>");
13228 else
13229 warning (0, "non-integer operand used with operand code %<z%>");
13230 }
13231 /* FALLTHRU */
13232
13233 case 'Z':
13234 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
13235 if (ASSEMBLER_DIALECT == ASM_INTEL)
13236 return;
13237
13238 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
13239 {
13240 switch (GET_MODE_SIZE (GET_MODE (x)))
13241 {
13242 case 2:
13243 #ifdef HAVE_AS_IX86_FILDS
13244 putc ('s', file);
13245 #endif
13246 return;
13247
13248 case 4:
13249 putc ('l', file);
13250 return;
13251
13252 case 8:
13253 #ifdef HAVE_AS_IX86_FILDQ
13254 putc ('q', file);
13255 #else
13256 fputs ("ll", file);
13257 #endif
13258 return;
13259
13260 default:
13261 break;
13262 }
13263 }
13264 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13265 {
13266 /* 387 opcodes don't get size suffixes
13267 if the operands are registers. */
13268 if (STACK_REG_P (x))
13269 return;
13270
13271 switch (GET_MODE_SIZE (GET_MODE (x)))
13272 {
13273 case 4:
13274 putc ('s', file);
13275 return;
13276
13277 case 8:
13278 putc ('l', file);
13279 return;
13280
13281 case 12:
13282 case 16:
13283 putc ('t', file);
13284 return;
13285
13286 default:
13287 break;
13288 }
13289 }
13290 else
13291 {
13292 output_operand_lossage ("invalid operand type used with "
13293 "operand code '%c'", code);
13294 return;
13295 }
13296
13297 output_operand_lossage ("invalid operand size for operand code '%c'",
13298 code);
13299 return;
13300
13301 case 'd':
13302 case 'b':
13303 case 'w':
13304 case 'k':
13305 case 'q':
13306 case 'h':
13307 case 't':
13308 case 'g':
13309 case 'y':
13310 case 'x':
13311 case 'X':
13312 case 'P':
13313 case 'p':
13314 case 'V':
13315 break;
13316
13317 case 's':
13318 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13319 {
13320 ix86_print_operand (file, x, 0);
13321 fputs (", ", file);
13322 }
13323 return;
13324
13325 case 'Y':
13326 switch (GET_CODE (x))
13327 {
13328 case NE:
13329 fputs ("neq", file);
13330 break;
13331 case EQ:
13332 fputs ("eq", file);
13333 break;
13334 case GE:
13335 case GEU:
13336 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13337 break;
13338 case GT:
13339 case GTU:
13340 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13341 break;
13342 case LE:
13343 case LEU:
13344 fputs ("le", file);
13345 break;
13346 case LT:
13347 case LTU:
13348 fputs ("lt", file);
13349 break;
13350 case UNORDERED:
13351 fputs ("unord", file);
13352 break;
13353 case ORDERED:
13354 fputs ("ord", file);
13355 break;
13356 case UNEQ:
13357 fputs ("ueq", file);
13358 break;
13359 case UNGE:
13360 fputs ("nlt", file);
13361 break;
13362 case UNGT:
13363 fputs ("nle", file);
13364 break;
13365 case UNLE:
13366 fputs ("ule", file);
13367 break;
13368 case UNLT:
13369 fputs ("ult", file);
13370 break;
13371 case LTGT:
13372 fputs ("une", file);
13373 break;
13374 default:
13375 output_operand_lossage ("operand is not a condition code, "
13376 "invalid operand code 'Y'");
13377 return;
13378 }
13379 return;
13380
13381 case 'D':
13382 /* Little bit of braindamage here. The SSE compare instructions
13383 does use completely different names for the comparisons that the
13384 fp conditional moves. */
13385 switch (GET_CODE (x))
13386 {
13387 case UNEQ:
13388 if (TARGET_AVX)
13389 {
13390 fputs ("eq_us", file);
13391 break;
13392 }
13393 /* FALLTHRU */
13394 case EQ:
13395 fputs ("eq", file);
13396 break;
13397 case UNLT:
13398 if (TARGET_AVX)
13399 {
13400 fputs ("nge", file);
13401 break;
13402 }
13403 /* FALLTHRU */
13404 case LT:
13405 fputs ("lt", file);
13406 break;
13407 case UNLE:
13408 if (TARGET_AVX)
13409 {
13410 fputs ("ngt", file);
13411 break;
13412 }
13413 /* FALLTHRU */
13414 case LE:
13415 fputs ("le", file);
13416 break;
13417 case UNORDERED:
13418 fputs ("unord", file);
13419 break;
13420 case LTGT:
13421 if (TARGET_AVX)
13422 {
13423 fputs ("neq_oq", file);
13424 break;
13425 }
13426 /* FALLTHRU */
13427 case NE:
13428 fputs ("neq", file);
13429 break;
13430 case GE:
13431 if (TARGET_AVX)
13432 {
13433 fputs ("ge", file);
13434 break;
13435 }
13436 /* FALLTHRU */
13437 case UNGE:
13438 fputs ("nlt", file);
13439 break;
13440 case GT:
13441 if (TARGET_AVX)
13442 {
13443 fputs ("gt", file);
13444 break;
13445 }
13446 /* FALLTHRU */
13447 case UNGT:
13448 fputs ("nle", file);
13449 break;
13450 case ORDERED:
13451 fputs ("ord", file);
13452 break;
13453 default:
13454 output_operand_lossage ("operand is not a condition code, "
13455 "invalid operand code 'D'");
13456 return;
13457 }
13458 return;
13459
13460 case 'F':
13461 case 'f':
13462 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13463 if (ASSEMBLER_DIALECT == ASM_ATT)
13464 putc ('.', file);
13465 gcc_fallthrough ();
13466 #endif
13467
13468 case 'C':
13469 case 'c':
13470 if (!COMPARISON_P (x))
13471 {
13472 output_operand_lossage ("operand is not a condition code, "
13473 "invalid operand code '%c'", code);
13474 return;
13475 }
13476 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
13477 code == 'c' || code == 'f',
13478 code == 'F' || code == 'f',
13479 file);
13480 return;
13481
13482 case 'H':
13483 if (!offsettable_memref_p (x))
13484 {
13485 output_operand_lossage ("operand is not an offsettable memory "
13486 "reference, invalid operand code 'H'");
13487 return;
13488 }
13489 /* It doesn't actually matter what mode we use here, as we're
13490 only going to use this for printing. */
13491 x = adjust_address_nv (x, DImode, 8);
13492 /* Output 'qword ptr' for intel assembler dialect. */
13493 if (ASSEMBLER_DIALECT == ASM_INTEL)
13494 code = 'q';
13495 break;
13496
13497 case 'K':
13498 if (!CONST_INT_P (x))
13499 {
13500 output_operand_lossage ("operand is not an integer, invalid "
13501 "operand code 'K'");
13502 return;
13503 }
13504
13505 if (INTVAL (x) & IX86_HLE_ACQUIRE)
13506 #ifdef HAVE_AS_IX86_HLE
13507 fputs ("xacquire ", file);
13508 #else
13509 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
13510 #endif
13511 else if (INTVAL (x) & IX86_HLE_RELEASE)
13512 #ifdef HAVE_AS_IX86_HLE
13513 fputs ("xrelease ", file);
13514 #else
13515 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
13516 #endif
13517 /* We do not want to print value of the operand. */
13518 return;
13519
13520 case 'N':
13521 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
13522 fputs ("{z}", file);
13523 return;
13524
13525 case 'r':
13526 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
13527 {
13528 output_operand_lossage ("operand is not a specific integer, "
13529 "invalid operand code 'r'");
13530 return;
13531 }
13532
13533 if (ASSEMBLER_DIALECT == ASM_INTEL)
13534 fputs (", ", file);
13535
13536 fputs ("{sae}", file);
13537
13538 if (ASSEMBLER_DIALECT == ASM_ATT)
13539 fputs (", ", file);
13540
13541 return;
13542
13543 case 'R':
13544 if (!CONST_INT_P (x))
13545 {
13546 output_operand_lossage ("operand is not an integer, invalid "
13547 "operand code 'R'");
13548 return;
13549 }
13550
13551 if (ASSEMBLER_DIALECT == ASM_INTEL)
13552 fputs (", ", file);
13553
13554 switch (INTVAL (x))
13555 {
13556 case ROUND_NEAREST_INT | ROUND_SAE:
13557 fputs ("{rn-sae}", file);
13558 break;
13559 case ROUND_NEG_INF | ROUND_SAE:
13560 fputs ("{rd-sae}", file);
13561 break;
13562 case ROUND_POS_INF | ROUND_SAE:
13563 fputs ("{ru-sae}", file);
13564 break;
13565 case ROUND_ZERO | ROUND_SAE:
13566 fputs ("{rz-sae}", file);
13567 break;
13568 default:
13569 output_operand_lossage ("operand is not a specific integer, "
13570 "invalid operand code 'R'");
13571 }
13572
13573 if (ASSEMBLER_DIALECT == ASM_ATT)
13574 fputs (", ", file);
13575
13576 return;
13577
13578 case '*':
13579 if (ASSEMBLER_DIALECT == ASM_ATT)
13580 putc ('*', file);
13581 return;
13582
13583 case '&':
13584 {
13585 const char *name = get_some_local_dynamic_name ();
13586 if (name == NULL)
13587 output_operand_lossage ("'%%&' used without any "
13588 "local dynamic TLS references");
13589 else
13590 assemble_name (file, name);
13591 return;
13592 }
13593
13594 case '+':
13595 {
13596 rtx x;
13597
13598 if (!optimize
13599 || optimize_function_for_size_p (cfun)
13600 || !TARGET_BRANCH_PREDICTION_HINTS)
13601 return;
13602
13603 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13604 if (x)
13605 {
13606 int pred_val = profile_probability::from_reg_br_prob_note
13607 (XINT (x, 0)).to_reg_br_prob_base ();
13608
13609 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13610 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13611 {
13612 bool taken = pred_val > REG_BR_PROB_BASE / 2;
13613 bool cputaken
13614 = final_forward_branch_p (current_output_insn) == 0;
13615
13616 /* Emit hints only in the case default branch prediction
13617 heuristics would fail. */
13618 if (taken != cputaken)
13619 {
13620 /* We use 3e (DS) prefix for taken branches and
13621 2e (CS) prefix for not taken branches. */
13622 if (taken)
13623 fputs ("ds ; ", file);
13624 else
13625 fputs ("cs ; ", file);
13626 }
13627 }
13628 }
13629 return;
13630 }
13631
13632 case ';':
13633 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13634 putc (';', file);
13635 #endif
13636 return;
13637
13638 case '~':
13639 putc (TARGET_AVX2 ? 'i' : 'f', file);
13640 return;
13641
13642 case 'M':
13643 if (TARGET_X32)
13644 {
13645 /* NB: 32-bit indices in VSIB address are sign-extended
13646 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13647 sign-extended to 0xfffffffff7fa3010 which is invalid
13648 address. Add addr32 prefix if there is no base
13649 register nor symbol. */
13650 bool ok;
13651 struct ix86_address parts;
13652 ok = ix86_decompose_address (x, &parts);
13653 gcc_assert (ok && parts.index == NULL_RTX);
13654 if (parts.base == NULL_RTX
13655 && (parts.disp == NULL_RTX
13656 || !symbolic_operand (parts.disp,
13657 GET_MODE (parts.disp))))
13658 fputs ("addr32 ", file);
13659 }
13660 return;
13661
13662 case '^':
13663 if (TARGET_64BIT && Pmode != word_mode)
13664 fputs ("addr32 ", file);
13665 return;
13666
13667 case '!':
13668 if (ix86_notrack_prefixed_insn_p (current_output_insn))
13669 fputs ("notrack ", file);
13670 return;
13671
13672 default:
13673 output_operand_lossage ("invalid operand code '%c'", code);
13674 }
13675 }
13676
13677 if (REG_P (x))
13678 print_reg (x, code, file);
13679
13680 else if (MEM_P (x))
13681 {
13682 rtx addr = XEXP (x, 0);
13683
13684 /* No `byte ptr' prefix for call instructions ... */
13685 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13686 {
13687 machine_mode mode = GET_MODE (x);
13688 const char *size;
13689
13690 /* Check for explicit size override codes. */
13691 if (code == 'b')
13692 size = "BYTE";
13693 else if (code == 'w')
13694 size = "WORD";
13695 else if (code == 'k')
13696 size = "DWORD";
13697 else if (code == 'q')
13698 size = "QWORD";
13699 else if (code == 'x')
13700 size = "XMMWORD";
13701 else if (code == 't')
13702 size = "YMMWORD";
13703 else if (code == 'g')
13704 size = "ZMMWORD";
13705 else if (mode == BLKmode)
13706 /* ... or BLKmode operands, when not overridden. */
13707 size = NULL;
13708 else
13709 switch (GET_MODE_SIZE (mode))
13710 {
13711 case 1: size = "BYTE"; break;
13712 case 2: size = "WORD"; break;
13713 case 4: size = "DWORD"; break;
13714 case 8: size = "QWORD"; break;
13715 case 12: size = "TBYTE"; break;
13716 case 16:
13717 if (mode == XFmode)
13718 size = "TBYTE";
13719 else
13720 size = "XMMWORD";
13721 break;
13722 case 32: size = "YMMWORD"; break;
13723 case 64: size = "ZMMWORD"; break;
13724 default:
13725 gcc_unreachable ();
13726 }
13727 if (size)
13728 {
13729 fputs (size, file);
13730 fputs (" PTR ", file);
13731 }
13732 }
13733
13734 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13735 output_operand_lossage ("invalid constraints for operand");
13736 else
13737 ix86_print_operand_address_as
13738 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13739 }
13740
13741 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
13742 {
13743 long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
13744 REAL_MODE_FORMAT (HFmode));
13745 if (ASSEMBLER_DIALECT == ASM_ATT)
13746 putc ('$', file);
13747 fprintf (file, "0x%04x", (unsigned int) l);
13748 }
13749
13750 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13751 {
13752 long l;
13753
13754 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13755
13756 if (ASSEMBLER_DIALECT == ASM_ATT)
13757 putc ('$', file);
13758 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13759 if (code == 'q')
13760 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13761 (unsigned long long) (int) l);
13762 else
13763 fprintf (file, "0x%08x", (unsigned int) l);
13764 }
13765
13766 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13767 {
13768 long l[2];
13769
13770 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13771
13772 if (ASSEMBLER_DIALECT == ASM_ATT)
13773 putc ('$', file);
13774 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13775 }
13776
13777 /* These float cases don't actually occur as immediate operands. */
13778 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13779 {
13780 char dstr[30];
13781
13782 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13783 fputs (dstr, file);
13784 }
13785
13786 /* Print bcst_mem_operand. */
13787 else if (GET_CODE (x) == VEC_DUPLICATE)
13788 {
13789 machine_mode vmode = GET_MODE (x);
13790 /* Must be bcst_memory_operand. */
13791 gcc_assert (bcst_mem_operand (x, vmode));
13792
13793 rtx mem = XEXP (x,0);
13794 ix86_print_operand (file, mem, 0);
13795
13796 switch (vmode)
13797 {
13798 case E_V2DImode:
13799 case E_V2DFmode:
13800 fputs ("{1to2}", file);
13801 break;
13802 case E_V4SImode:
13803 case E_V4SFmode:
13804 case E_V4DImode:
13805 case E_V4DFmode:
13806 fputs ("{1to4}", file);
13807 break;
13808 case E_V8SImode:
13809 case E_V8SFmode:
13810 case E_V8DFmode:
13811 case E_V8DImode:
13812 case E_V8HFmode:
13813 fputs ("{1to8}", file);
13814 break;
13815 case E_V16SFmode:
13816 case E_V16SImode:
13817 case E_V16HFmode:
13818 fputs ("{1to16}", file);
13819 break;
13820 case E_V32HFmode:
13821 fputs ("{1to32}", file);
13822 break;
13823 default:
13824 gcc_unreachable ();
13825 }
13826 }
13827
13828 else
13829 {
13830 /* We have patterns that allow zero sets of memory, for instance.
13831 In 64-bit mode, we should probably support all 8-byte vectors,
13832 since we can in fact encode that into an immediate. */
13833 if (GET_CODE (x) == CONST_VECTOR)
13834 {
13835 if (x != CONST0_RTX (GET_MODE (x)))
13836 output_operand_lossage ("invalid vector immediate");
13837 x = const0_rtx;
13838 }
13839
13840 if (code == 'P')
13841 {
13842 if (ix86_force_load_from_GOT_p (x, true))
13843 {
13844 /* For inline assembly statement, load function address
13845 from GOT with 'P' operand modifier to avoid PLT. */
13846 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13847 (TARGET_64BIT
13848 ? UNSPEC_GOTPCREL
13849 : UNSPEC_GOT));
13850 x = gen_rtx_CONST (Pmode, x);
13851 x = gen_const_mem (Pmode, x);
13852 ix86_print_operand (file, x, 'A');
13853 return;
13854 }
13855 }
13856 else if (code != 'p')
13857 {
13858 if (CONST_INT_P (x))
13859 {
13860 if (ASSEMBLER_DIALECT == ASM_ATT)
13861 putc ('$', file);
13862 }
13863 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13864 || GET_CODE (x) == LABEL_REF)
13865 {
13866 if (ASSEMBLER_DIALECT == ASM_ATT)
13867 putc ('$', file);
13868 else
13869 fputs ("OFFSET FLAT:", file);
13870 }
13871 }
13872 if (CONST_INT_P (x))
13873 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13874 else if (flag_pic || MACHOPIC_INDIRECT)
13875 output_pic_addr_const (file, x, code);
13876 else
13877 output_addr_const (file, x);
13878 }
13879 }
13880
13881 static bool
13882 ix86_print_operand_punct_valid_p (unsigned char code)
13883 {
13884 return (code == '*' || code == '+' || code == '&' || code == ';'
13885 || code == '~' || code == '^' || code == '!');
13886 }
13887 \f
13888 /* Print a memory operand whose address is ADDR. */
13889
13890 static void
13891 ix86_print_operand_address_as (FILE *file, rtx addr,
13892 addr_space_t as, bool raw)
13893 {
13894 struct ix86_address parts;
13895 rtx base, index, disp;
13896 int scale;
13897 int ok;
13898 bool vsib = false;
13899 int code = 0;
13900
13901 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13902 {
13903 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13904 gcc_assert (parts.index == NULL_RTX);
13905 parts.index = XVECEXP (addr, 0, 1);
13906 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13907 addr = XVECEXP (addr, 0, 0);
13908 vsib = true;
13909 }
13910 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13911 {
13912 gcc_assert (TARGET_64BIT);
13913 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13914 code = 'q';
13915 }
13916 else
13917 ok = ix86_decompose_address (addr, &parts);
13918
13919 gcc_assert (ok);
13920
13921 base = parts.base;
13922 index = parts.index;
13923 disp = parts.disp;
13924 scale = parts.scale;
13925
13926 if (ADDR_SPACE_GENERIC_P (as))
13927 as = parts.seg;
13928 else
13929 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13930
13931 if (!ADDR_SPACE_GENERIC_P (as) && !raw)
13932 {
13933 if (ASSEMBLER_DIALECT == ASM_ATT)
13934 putc ('%', file);
13935
13936 switch (as)
13937 {
13938 case ADDR_SPACE_SEG_FS:
13939 fputs ("fs:", file);
13940 break;
13941 case ADDR_SPACE_SEG_GS:
13942 fputs ("gs:", file);
13943 break;
13944 default:
13945 gcc_unreachable ();
13946 }
13947 }
13948
13949 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13950 if (TARGET_64BIT && !base && !index && !raw)
13951 {
13952 rtx symbol = disp;
13953
13954 if (GET_CODE (disp) == CONST
13955 && GET_CODE (XEXP (disp, 0)) == PLUS
13956 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13957 symbol = XEXP (XEXP (disp, 0), 0);
13958
13959 if (GET_CODE (symbol) == LABEL_REF
13960 || (GET_CODE (symbol) == SYMBOL_REF
13961 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13962 base = pc_rtx;
13963 }
13964
13965 if (!base && !index)
13966 {
13967 /* Displacement only requires special attention. */
13968 if (CONST_INT_P (disp))
13969 {
13970 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13971 fputs ("ds:", file);
13972 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13973 }
13974 /* Load the external function address via the GOT slot to avoid PLT. */
13975 else if (GET_CODE (disp) == CONST
13976 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13977 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13978 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13979 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13980 output_pic_addr_const (file, disp, 0);
13981 else if (flag_pic)
13982 output_pic_addr_const (file, disp, 0);
13983 else
13984 output_addr_const (file, disp);
13985 }
13986 else
13987 {
13988 /* Print SImode register names to force addr32 prefix. */
13989 if (SImode_address_operand (addr, VOIDmode))
13990 {
13991 if (flag_checking)
13992 {
13993 gcc_assert (TARGET_64BIT);
13994 switch (GET_CODE (addr))
13995 {
13996 case SUBREG:
13997 gcc_assert (GET_MODE (addr) == SImode);
13998 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13999 break;
14000 case ZERO_EXTEND:
14001 case AND:
14002 gcc_assert (GET_MODE (addr) == DImode);
14003 break;
14004 default:
14005 gcc_unreachable ();
14006 }
14007 }
14008 gcc_assert (!code);
14009 code = 'k';
14010 }
14011 else if (code == 0
14012 && TARGET_X32
14013 && disp
14014 && CONST_INT_P (disp)
14015 && INTVAL (disp) < -16*1024*1024)
14016 {
14017 /* X32 runs in 64-bit mode, where displacement, DISP, in
14018 address DISP(%r64), is encoded as 32-bit immediate sign-
14019 extended from 32-bit to 64-bit. For -0x40000300(%r64),
14020 address is %r64 + 0xffffffffbffffd00. When %r64 <
14021 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
14022 which is invalid for x32. The correct address is %r64
14023 - 0x40000300 == 0xf7ffdd64. To properly encode
14024 -0x40000300(%r64) for x32, we zero-extend negative
14025 displacement by forcing addr32 prefix which truncates
14026 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
14027 zero-extend all negative displacements, including -1(%rsp).
14028 However, for small negative displacements, sign-extension
14029 won't cause overflow. We only zero-extend negative
14030 displacements if they < -16*1024*1024, which is also used
14031 to check legitimate address displacements for PIC. */
14032 code = 'k';
14033 }
14034
14035 /* Since the upper 32 bits of RSP are always zero for x32,
14036 we can encode %esp as %rsp to avoid 0x67 prefix if
14037 there is no index register. */
14038 if (TARGET_X32 && Pmode == SImode
14039 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
14040 code = 'q';
14041
14042 if (ASSEMBLER_DIALECT == ASM_ATT)
14043 {
14044 if (disp)
14045 {
14046 if (flag_pic)
14047 output_pic_addr_const (file, disp, 0);
14048 else if (GET_CODE (disp) == LABEL_REF)
14049 output_asm_label (disp);
14050 else
14051 output_addr_const (file, disp);
14052 }
14053
14054 putc ('(', file);
14055 if (base)
14056 print_reg (base, code, file);
14057 if (index)
14058 {
14059 putc (',', file);
14060 print_reg (index, vsib ? 0 : code, file);
14061 if (scale != 1 || vsib)
14062 fprintf (file, ",%d", scale);
14063 }
14064 putc (')', file);
14065 }
14066 else
14067 {
14068 rtx offset = NULL_RTX;
14069
14070 if (disp)
14071 {
14072 /* Pull out the offset of a symbol; print any symbol itself. */
14073 if (GET_CODE (disp) == CONST
14074 && GET_CODE (XEXP (disp, 0)) == PLUS
14075 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
14076 {
14077 offset = XEXP (XEXP (disp, 0), 1);
14078 disp = gen_rtx_CONST (VOIDmode,
14079 XEXP (XEXP (disp, 0), 0));
14080 }
14081
14082 if (flag_pic)
14083 output_pic_addr_const (file, disp, 0);
14084 else if (GET_CODE (disp) == LABEL_REF)
14085 output_asm_label (disp);
14086 else if (CONST_INT_P (disp))
14087 offset = disp;
14088 else
14089 output_addr_const (file, disp);
14090 }
14091
14092 putc ('[', file);
14093 if (base)
14094 {
14095 print_reg (base, code, file);
14096 if (offset)
14097 {
14098 if (INTVAL (offset) >= 0)
14099 putc ('+', file);
14100 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14101 }
14102 }
14103 else if (offset)
14104 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
14105 else
14106 putc ('0', file);
14107
14108 if (index)
14109 {
14110 putc ('+', file);
14111 print_reg (index, vsib ? 0 : code, file);
14112 if (scale != 1 || vsib)
14113 fprintf (file, "*%d", scale);
14114 }
14115 putc (']', file);
14116 }
14117 }
14118 }
14119
14120 static void
14121 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
14122 {
14123 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
14124 output_operand_lossage ("invalid constraints for operand");
14125 else
14126 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
14127 }
14128
14129 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14130
14131 static bool
14132 i386_asm_output_addr_const_extra (FILE *file, rtx x)
14133 {
14134 rtx op;
14135
14136 if (GET_CODE (x) != UNSPEC)
14137 return false;
14138
14139 op = XVECEXP (x, 0, 0);
14140 switch (XINT (x, 1))
14141 {
14142 case UNSPEC_GOTOFF:
14143 output_addr_const (file, op);
14144 fputs ("@gotoff", file);
14145 break;
14146 case UNSPEC_GOTTPOFF:
14147 output_addr_const (file, op);
14148 /* FIXME: This might be @TPOFF in Sun ld. */
14149 fputs ("@gottpoff", file);
14150 break;
14151 case UNSPEC_TPOFF:
14152 output_addr_const (file, op);
14153 fputs ("@tpoff", file);
14154 break;
14155 case UNSPEC_NTPOFF:
14156 output_addr_const (file, op);
14157 if (TARGET_64BIT)
14158 fputs ("@tpoff", file);
14159 else
14160 fputs ("@ntpoff", file);
14161 break;
14162 case UNSPEC_DTPOFF:
14163 output_addr_const (file, op);
14164 fputs ("@dtpoff", file);
14165 break;
14166 case UNSPEC_GOTNTPOFF:
14167 output_addr_const (file, op);
14168 if (TARGET_64BIT)
14169 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14170 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
14171 else
14172 fputs ("@gotntpoff", file);
14173 break;
14174 case UNSPEC_INDNTPOFF:
14175 output_addr_const (file, op);
14176 fputs ("@indntpoff", file);
14177 break;
14178 #if TARGET_MACHO
14179 case UNSPEC_MACHOPIC_OFFSET:
14180 output_addr_const (file, op);
14181 putc ('-', file);
14182 machopic_output_function_base_name (file);
14183 break;
14184 #endif
14185
14186 default:
14187 return false;
14188 }
14189
14190 return true;
14191 }
14192 \f
14193 \f
14194 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
14195 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
14196 is the expression of the binary operation. The output may either be
14197 emitted here, or returned to the caller, like all output_* functions.
14198
14199 There is no guarantee that the operands are the same mode, as they
14200 might be within FLOAT or FLOAT_EXTEND expressions. */
14201
14202 #ifndef SYSV386_COMPAT
14203 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
14204 wants to fix the assemblers because that causes incompatibility
14205 with gcc. No-one wants to fix gcc because that causes
14206 incompatibility with assemblers... You can use the option of
14207 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
14208 #define SYSV386_COMPAT 1
14209 #endif
14210
14211 const char *
14212 output_387_binary_op (rtx_insn *insn, rtx *operands)
14213 {
14214 static char buf[40];
14215 const char *p;
14216 bool is_sse
14217 = (SSE_REG_P (operands[0])
14218 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
14219
14220 if (is_sse)
14221 p = "%v";
14222 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
14223 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
14224 p = "fi";
14225 else
14226 p = "f";
14227
14228 strcpy (buf, p);
14229
14230 switch (GET_CODE (operands[3]))
14231 {
14232 case PLUS:
14233 p = "add"; break;
14234 case MINUS:
14235 p = "sub"; break;
14236 case MULT:
14237 p = "mul"; break;
14238 case DIV:
14239 p = "div"; break;
14240 default:
14241 gcc_unreachable ();
14242 }
14243
14244 strcat (buf, p);
14245
14246 if (is_sse)
14247 {
14248 p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
14249 strcat (buf, p);
14250
14251 if (TARGET_AVX)
14252 p = "\t{%2, %1, %0|%0, %1, %2}";
14253 else
14254 p = "\t{%2, %0|%0, %2}";
14255
14256 strcat (buf, p);
14257 return buf;
14258 }
14259
14260 /* Even if we do not want to check the inputs, this documents input
14261 constraints. Which helps in understanding the following code. */
14262 if (flag_checking)
14263 {
14264 if (STACK_REG_P (operands[0])
14265 && ((REG_P (operands[1])
14266 && REGNO (operands[0]) == REGNO (operands[1])
14267 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
14268 || (REG_P (operands[2])
14269 && REGNO (operands[0]) == REGNO (operands[2])
14270 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
14271 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
14272 ; /* ok */
14273 else
14274 gcc_unreachable ();
14275 }
14276
14277 switch (GET_CODE (operands[3]))
14278 {
14279 case MULT:
14280 case PLUS:
14281 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
14282 std::swap (operands[1], operands[2]);
14283
14284 /* know operands[0] == operands[1]. */
14285
14286 if (MEM_P (operands[2]))
14287 {
14288 p = "%Z2\t%2";
14289 break;
14290 }
14291
14292 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14293 {
14294 if (STACK_TOP_P (operands[0]))
14295 /* How is it that we are storing to a dead operand[2]?
14296 Well, presumably operands[1] is dead too. We can't
14297 store the result to st(0) as st(0) gets popped on this
14298 instruction. Instead store to operands[2] (which I
14299 think has to be st(1)). st(1) will be popped later.
14300 gcc <= 2.8.1 didn't have this check and generated
14301 assembly code that the Unixware assembler rejected. */
14302 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14303 else
14304 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14305 break;
14306 }
14307
14308 if (STACK_TOP_P (operands[0]))
14309 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14310 else
14311 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14312 break;
14313
14314 case MINUS:
14315 case DIV:
14316 if (MEM_P (operands[1]))
14317 {
14318 p = "r%Z1\t%1";
14319 break;
14320 }
14321
14322 if (MEM_P (operands[2]))
14323 {
14324 p = "%Z2\t%2";
14325 break;
14326 }
14327
14328 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
14329 {
14330 #if SYSV386_COMPAT
14331 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
14332 derived assemblers, confusingly reverse the direction of
14333 the operation for fsub{r} and fdiv{r} when the
14334 destination register is not st(0). The Intel assembler
14335 doesn't have this brain damage. Read !SYSV386_COMPAT to
14336 figure out what the hardware really does. */
14337 if (STACK_TOP_P (operands[0]))
14338 p = "{p\t%0, %2|rp\t%2, %0}";
14339 else
14340 p = "{rp\t%2, %0|p\t%0, %2}";
14341 #else
14342 if (STACK_TOP_P (operands[0]))
14343 /* As above for fmul/fadd, we can't store to st(0). */
14344 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
14345 else
14346 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
14347 #endif
14348 break;
14349 }
14350
14351 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
14352 {
14353 #if SYSV386_COMPAT
14354 if (STACK_TOP_P (operands[0]))
14355 p = "{rp\t%0, %1|p\t%1, %0}";
14356 else
14357 p = "{p\t%1, %0|rp\t%0, %1}";
14358 #else
14359 if (STACK_TOP_P (operands[0]))
14360 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
14361 else
14362 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
14363 #endif
14364 break;
14365 }
14366
14367 if (STACK_TOP_P (operands[0]))
14368 {
14369 if (STACK_TOP_P (operands[1]))
14370 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
14371 else
14372 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
14373 break;
14374 }
14375 else if (STACK_TOP_P (operands[1]))
14376 {
14377 #if SYSV386_COMPAT
14378 p = "{\t%1, %0|r\t%0, %1}";
14379 #else
14380 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
14381 #endif
14382 }
14383 else
14384 {
14385 #if SYSV386_COMPAT
14386 p = "{r\t%2, %0|\t%0, %2}";
14387 #else
14388 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
14389 #endif
14390 }
14391 break;
14392
14393 default:
14394 gcc_unreachable ();
14395 }
14396
14397 strcat (buf, p);
14398 return buf;
14399 }
14400
14401 /* Return needed mode for entity in optimize_mode_switching pass. */
14402
14403 static int
14404 ix86_dirflag_mode_needed (rtx_insn *insn)
14405 {
14406 if (CALL_P (insn))
14407 {
14408 if (cfun->machine->func_type == TYPE_NORMAL)
14409 return X86_DIRFLAG_ANY;
14410 else
14411 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14412 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
14413 }
14414
14415 if (recog_memoized (insn) < 0)
14416 return X86_DIRFLAG_ANY;
14417
14418 if (get_attr_type (insn) == TYPE_STR)
14419 {
14420 /* Emit cld instruction if stringops are used in the function. */
14421 if (cfun->machine->func_type == TYPE_NORMAL)
14422 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
14423 else
14424 return X86_DIRFLAG_RESET;
14425 }
14426
14427 return X86_DIRFLAG_ANY;
14428 }
14429
14430 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14431
14432 static bool
14433 ix86_check_avx_upper_register (const_rtx exp)
14434 {
14435 return (SSE_REG_P (exp)
14436 && !EXT_REX_SSE_REG_P (exp)
14437 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
14438 }
14439
14440 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14441
14442 static void
14443 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
14444 {
14445 if (ix86_check_avx_upper_register (dest))
14446 {
14447 bool *used = (bool *) data;
14448 *used = true;
14449 }
14450 }
14451
14452 /* Return needed mode for entity in optimize_mode_switching pass. */
14453
14454 static int
14455 ix86_avx_u128_mode_needed (rtx_insn *insn)
14456 {
14457 if (DEBUG_INSN_P (insn))
14458 return AVX_U128_ANY;
14459
14460 if (CALL_P (insn))
14461 {
14462 rtx link;
14463
14464 /* Needed mode is set to AVX_U128_CLEAN if there are
14465 no 256bit or 512bit modes used in function arguments. */
14466 for (link = CALL_INSN_FUNCTION_USAGE (insn);
14467 link;
14468 link = XEXP (link, 1))
14469 {
14470 if (GET_CODE (XEXP (link, 0)) == USE)
14471 {
14472 rtx arg = XEXP (XEXP (link, 0), 0);
14473
14474 if (ix86_check_avx_upper_register (arg))
14475 return AVX_U128_DIRTY;
14476 }
14477 }
14478
14479 /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
14480 nor 512bit registers used in the function return register. */
14481 bool avx_upper_reg_found = false;
14482 note_stores (insn, ix86_check_avx_upper_stores,
14483 &avx_upper_reg_found);
14484 if (avx_upper_reg_found)
14485 return AVX_U128_DIRTY;
14486
14487 /* If the function is known to preserve some SSE registers,
14488 RA and previous passes can legitimately rely on that for
14489 modes wider than 256 bits. It's only safe to issue a
14490 vzeroupper if all SSE registers are clobbered. */
14491 const function_abi &abi = insn_callee_abi (insn);
14492 if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
14493 abi.mode_clobbers (V4DImode)))
14494 return AVX_U128_ANY;
14495
14496 return AVX_U128_CLEAN;
14497 }
14498
14499 subrtx_iterator::array_type array;
14500
14501 rtx set = single_set (insn);
14502 if (set)
14503 {
14504 rtx dest = SET_DEST (set);
14505 rtx src = SET_SRC (set);
14506 if (ix86_check_avx_upper_register (dest))
14507 {
14508 /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
14509 source isn't zero. */
14510 if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
14511 return AVX_U128_DIRTY;
14512 else
14513 return AVX_U128_ANY;
14514 }
14515 else
14516 {
14517 FOR_EACH_SUBRTX (iter, array, src, NONCONST)
14518 if (ix86_check_avx_upper_register (*iter))
14519 return AVX_U128_DIRTY;
14520 }
14521
14522 /* This isn't YMM/ZMM load/store. */
14523 return AVX_U128_ANY;
14524 }
14525
14526 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14527 Hardware changes state only when a 256bit register is written to,
14528 but we need to prevent the compiler from moving optimal insertion
14529 point above eventual read from 256bit or 512 bit register. */
14530 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14531 if (ix86_check_avx_upper_register (*iter))
14532 return AVX_U128_DIRTY;
14533
14534 return AVX_U128_ANY;
14535 }
14536
14537 /* Return mode that i387 must be switched into
14538 prior to the execution of insn. */
14539
14540 static int
14541 ix86_i387_mode_needed (int entity, rtx_insn *insn)
14542 {
14543 enum attr_i387_cw mode;
14544
14545 /* The mode UNINITIALIZED is used to store control word after a
14546 function call or ASM pattern. The mode ANY specify that function
14547 has no requirements on the control word and make no changes in the
14548 bits we are interested in. */
14549
14550 if (CALL_P (insn)
14551 || (NONJUMP_INSN_P (insn)
14552 && (asm_noperands (PATTERN (insn)) >= 0
14553 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14554 return I387_CW_UNINITIALIZED;
14555
14556 if (recog_memoized (insn) < 0)
14557 return I387_CW_ANY;
14558
14559 mode = get_attr_i387_cw (insn);
14560
14561 switch (entity)
14562 {
14563 case I387_ROUNDEVEN:
14564 if (mode == I387_CW_ROUNDEVEN)
14565 return mode;
14566 break;
14567
14568 case I387_TRUNC:
14569 if (mode == I387_CW_TRUNC)
14570 return mode;
14571 break;
14572
14573 case I387_FLOOR:
14574 if (mode == I387_CW_FLOOR)
14575 return mode;
14576 break;
14577
14578 case I387_CEIL:
14579 if (mode == I387_CW_CEIL)
14580 return mode;
14581 break;
14582
14583 default:
14584 gcc_unreachable ();
14585 }
14586
14587 return I387_CW_ANY;
14588 }
14589
14590 /* Return mode that entity must be switched into
14591 prior to the execution of insn. */
14592
14593 static int
14594 ix86_mode_needed (int entity, rtx_insn *insn)
14595 {
14596 switch (entity)
14597 {
14598 case X86_DIRFLAG:
14599 return ix86_dirflag_mode_needed (insn);
14600 case AVX_U128:
14601 return ix86_avx_u128_mode_needed (insn);
14602 case I387_ROUNDEVEN:
14603 case I387_TRUNC:
14604 case I387_FLOOR:
14605 case I387_CEIL:
14606 return ix86_i387_mode_needed (entity, insn);
14607 default:
14608 gcc_unreachable ();
14609 }
14610 return 0;
14611 }
14612
14613 /* Calculate mode of upper 128bit AVX registers after the insn. */
14614
14615 static int
14616 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
14617 {
14618 rtx pat = PATTERN (insn);
14619
14620 if (vzeroupper_pattern (pat, VOIDmode)
14621 || vzeroall_pattern (pat, VOIDmode))
14622 return AVX_U128_CLEAN;
14623
14624 /* We know that state is clean after CALL insn if there are no
14625 256bit or 512bit registers used in the function return register. */
14626 if (CALL_P (insn))
14627 {
14628 bool avx_upper_reg_found = false;
14629 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
14630
14631 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
14632 }
14633
14634 /* Otherwise, return current mode. Remember that if insn
14635 references AVX 256bit or 512bit registers, the mode was already
14636 changed to DIRTY from MODE_NEEDED. */
14637 return mode;
14638 }
14639
14640 /* Return the mode that an insn results in. */
14641
14642 static int
14643 ix86_mode_after (int entity, int mode, rtx_insn *insn)
14644 {
14645 switch (entity)
14646 {
14647 case X86_DIRFLAG:
14648 return mode;
14649 case AVX_U128:
14650 return ix86_avx_u128_mode_after (mode, insn);
14651 case I387_ROUNDEVEN:
14652 case I387_TRUNC:
14653 case I387_FLOOR:
14654 case I387_CEIL:
14655 return mode;
14656 default:
14657 gcc_unreachable ();
14658 }
14659 }
14660
14661 static int
14662 ix86_dirflag_mode_entry (void)
14663 {
14664 /* For TARGET_CLD or in the interrupt handler we can't assume
14665 direction flag state at function entry. */
14666 if (TARGET_CLD
14667 || cfun->machine->func_type != TYPE_NORMAL)
14668 return X86_DIRFLAG_ANY;
14669
14670 return X86_DIRFLAG_RESET;
14671 }
14672
14673 static int
14674 ix86_avx_u128_mode_entry (void)
14675 {
14676 tree arg;
14677
14678 /* Entry mode is set to AVX_U128_DIRTY if there are
14679 256bit or 512bit modes used in function arguments. */
14680 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
14681 arg = TREE_CHAIN (arg))
14682 {
14683 rtx incoming = DECL_INCOMING_RTL (arg);
14684
14685 if (incoming && ix86_check_avx_upper_register (incoming))
14686 return AVX_U128_DIRTY;
14687 }
14688
14689 return AVX_U128_CLEAN;
14690 }
14691
14692 /* Return a mode that ENTITY is assumed to be
14693 switched to at function entry. */
14694
14695 static int
14696 ix86_mode_entry (int entity)
14697 {
14698 switch (entity)
14699 {
14700 case X86_DIRFLAG:
14701 return ix86_dirflag_mode_entry ();
14702 case AVX_U128:
14703 return ix86_avx_u128_mode_entry ();
14704 case I387_ROUNDEVEN:
14705 case I387_TRUNC:
14706 case I387_FLOOR:
14707 case I387_CEIL:
14708 return I387_CW_ANY;
14709 default:
14710 gcc_unreachable ();
14711 }
14712 }
14713
14714 static int
14715 ix86_avx_u128_mode_exit (void)
14716 {
14717 rtx reg = crtl->return_rtx;
14718
14719 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14720 or 512 bit modes used in the function return register. */
14721 if (reg && ix86_check_avx_upper_register (reg))
14722 return AVX_U128_DIRTY;
14723
14724 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14725 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14726 */
14727 return ix86_avx_u128_mode_entry ();
14728 }
14729
14730 /* Return a mode that ENTITY is assumed to be
14731 switched to at function exit. */
14732
14733 static int
14734 ix86_mode_exit (int entity)
14735 {
14736 switch (entity)
14737 {
14738 case X86_DIRFLAG:
14739 return X86_DIRFLAG_ANY;
14740 case AVX_U128:
14741 return ix86_avx_u128_mode_exit ();
14742 case I387_ROUNDEVEN:
14743 case I387_TRUNC:
14744 case I387_FLOOR:
14745 case I387_CEIL:
14746 return I387_CW_ANY;
14747 default:
14748 gcc_unreachable ();
14749 }
14750 }
14751
14752 static int
14753 ix86_mode_priority (int, int n)
14754 {
14755 return n;
14756 }
14757
14758 /* Output code to initialize control word copies used by trunc?f?i and
14759 rounding patterns. CURRENT_MODE is set to current control word,
14760 while NEW_MODE is set to new control word. */
14761
14762 static void
14763 emit_i387_cw_initialization (int mode)
14764 {
14765 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14766 rtx new_mode;
14767
14768 enum ix86_stack_slot slot;
14769
14770 rtx reg = gen_reg_rtx (HImode);
14771
14772 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14773 emit_move_insn (reg, copy_rtx (stored_mode));
14774
14775 switch (mode)
14776 {
14777 case I387_CW_ROUNDEVEN:
14778 /* round to nearest */
14779 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14780 slot = SLOT_CW_ROUNDEVEN;
14781 break;
14782
14783 case I387_CW_TRUNC:
14784 /* round toward zero (truncate) */
14785 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14786 slot = SLOT_CW_TRUNC;
14787 break;
14788
14789 case I387_CW_FLOOR:
14790 /* round down toward -oo */
14791 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14792 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14793 slot = SLOT_CW_FLOOR;
14794 break;
14795
14796 case I387_CW_CEIL:
14797 /* round up toward +oo */
14798 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14799 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14800 slot = SLOT_CW_CEIL;
14801 break;
14802
14803 default:
14804 gcc_unreachable ();
14805 }
14806
14807 gcc_assert (slot < MAX_386_STACK_LOCALS);
14808
14809 new_mode = assign_386_stack_local (HImode, slot);
14810 emit_move_insn (new_mode, reg);
14811 }
14812
14813 /* Generate one or more insns to set ENTITY to MODE. */
14814
14815 static void
14816 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14817 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14818 {
14819 switch (entity)
14820 {
14821 case X86_DIRFLAG:
14822 if (mode == X86_DIRFLAG_RESET)
14823 emit_insn (gen_cld ());
14824 break;
14825 case AVX_U128:
14826 if (mode == AVX_U128_CLEAN)
14827 ix86_expand_avx_vzeroupper ();
14828 break;
14829 case I387_ROUNDEVEN:
14830 case I387_TRUNC:
14831 case I387_FLOOR:
14832 case I387_CEIL:
14833 if (mode != I387_CW_ANY
14834 && mode != I387_CW_UNINITIALIZED)
14835 emit_i387_cw_initialization (mode);
14836 break;
14837 default:
14838 gcc_unreachable ();
14839 }
14840 }
14841
14842 /* Output code for INSN to convert a float to a signed int. OPERANDS
14843 are the insn operands. The output may be [HSD]Imode and the input
14844 operand may be [SDX]Fmode. */
14845
14846 const char *
14847 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14848 {
14849 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14850 bool dimode_p = GET_MODE (operands[0]) == DImode;
14851 int round_mode = get_attr_i387_cw (insn);
14852
14853 static char buf[40];
14854 const char *p;
14855
14856 /* Jump through a hoop or two for DImode, since the hardware has no
14857 non-popping instruction. We used to do this a different way, but
14858 that was somewhat fragile and broke with post-reload splitters. */
14859 if ((dimode_p || fisttp) && !stack_top_dies)
14860 output_asm_insn ("fld\t%y1", operands);
14861
14862 gcc_assert (STACK_TOP_P (operands[1]));
14863 gcc_assert (MEM_P (operands[0]));
14864 gcc_assert (GET_MODE (operands[1]) != TFmode);
14865
14866 if (fisttp)
14867 return "fisttp%Z0\t%0";
14868
14869 strcpy (buf, "fist");
14870
14871 if (round_mode != I387_CW_ANY)
14872 output_asm_insn ("fldcw\t%3", operands);
14873
14874 p = "p%Z0\t%0";
14875 strcat (buf, p + !(stack_top_dies || dimode_p));
14876
14877 output_asm_insn (buf, operands);
14878
14879 if (round_mode != I387_CW_ANY)
14880 output_asm_insn ("fldcw\t%2", operands);
14881
14882 return "";
14883 }
14884
14885 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14886 have the values zero or one, indicates the ffreep insn's operand
14887 from the OPERANDS array. */
14888
14889 static const char *
14890 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14891 {
14892 if (TARGET_USE_FFREEP)
14893 #ifdef HAVE_AS_IX86_FFREEP
14894 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14895 #else
14896 {
14897 static char retval[32];
14898 int regno = REGNO (operands[opno]);
14899
14900 gcc_assert (STACK_REGNO_P (regno));
14901
14902 regno -= FIRST_STACK_REG;
14903
14904 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14905 return retval;
14906 }
14907 #endif
14908
14909 return opno ? "fstp\t%y1" : "fstp\t%y0";
14910 }
14911
14912
14913 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14914 should be used. UNORDERED_P is true when fucom should be used. */
14915
14916 const char *
14917 output_fp_compare (rtx_insn *insn, rtx *operands,
14918 bool eflags_p, bool unordered_p)
14919 {
14920 rtx *xops = eflags_p ? &operands[0] : &operands[1];
14921 bool stack_top_dies;
14922
14923 static char buf[40];
14924 const char *p;
14925
14926 gcc_assert (STACK_TOP_P (xops[0]));
14927
14928 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14929
14930 if (eflags_p)
14931 {
14932 p = unordered_p ? "fucomi" : "fcomi";
14933 strcpy (buf, p);
14934
14935 p = "p\t{%y1, %0|%0, %y1}";
14936 strcat (buf, p + !stack_top_dies);
14937
14938 return buf;
14939 }
14940
14941 if (STACK_REG_P (xops[1])
14942 && stack_top_dies
14943 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14944 {
14945 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14946
14947 /* If both the top of the 387 stack die, and the other operand
14948 is also a stack register that dies, then this must be a
14949 `fcompp' float compare. */
14950 p = unordered_p ? "fucompp" : "fcompp";
14951 strcpy (buf, p);
14952 }
14953 else if (const0_operand (xops[1], VOIDmode))
14954 {
14955 gcc_assert (!unordered_p);
14956 strcpy (buf, "ftst");
14957 }
14958 else
14959 {
14960 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14961 {
14962 gcc_assert (!unordered_p);
14963 p = "ficom";
14964 }
14965 else
14966 p = unordered_p ? "fucom" : "fcom";
14967
14968 strcpy (buf, p);
14969
14970 p = "p%Z2\t%y2";
14971 strcat (buf, p + !stack_top_dies);
14972 }
14973
14974 output_asm_insn (buf, operands);
14975 return "fnstsw\t%0";
14976 }
14977
14978 void
14979 ix86_output_addr_vec_elt (FILE *file, int value)
14980 {
14981 const char *directive = ASM_LONG;
14982
14983 #ifdef ASM_QUAD
14984 if (TARGET_LP64)
14985 directive = ASM_QUAD;
14986 #else
14987 gcc_assert (!TARGET_64BIT);
14988 #endif
14989
14990 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14991 }
14992
14993 void
14994 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14995 {
14996 const char *directive = ASM_LONG;
14997
14998 #ifdef ASM_QUAD
14999 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
15000 directive = ASM_QUAD;
15001 #else
15002 gcc_assert (!TARGET_64BIT);
15003 #endif
15004 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
15005 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
15006 fprintf (file, "%s%s%d-%s%d\n",
15007 directive, LPREFIX, value, LPREFIX, rel);
15008 #if TARGET_MACHO
15009 else if (TARGET_MACHO)
15010 {
15011 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
15012 machopic_output_function_base_name (file);
15013 putc ('\n', file);
15014 }
15015 #endif
15016 else if (HAVE_AS_GOTOFF_IN_DATA)
15017 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
15018 else
15019 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
15020 GOT_SYMBOL_NAME, LPREFIX, value);
15021 }
15022 \f
15023 #define LEA_MAX_STALL (3)
15024 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
15025
15026 /* Increase given DISTANCE in half-cycles according to
15027 dependencies between PREV and NEXT instructions.
15028 Add 1 half-cycle if there is no dependency and
15029 go to next cycle if there is some dependecy. */
15030
15031 static unsigned int
15032 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
15033 {
15034 df_ref def, use;
15035
15036 if (!prev || !next)
15037 return distance + (distance & 1) + 2;
15038
15039 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
15040 return distance + 1;
15041
15042 FOR_EACH_INSN_USE (use, next)
15043 FOR_EACH_INSN_DEF (def, prev)
15044 if (!DF_REF_IS_ARTIFICIAL (def)
15045 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
15046 return distance + (distance & 1) + 2;
15047
15048 return distance + 1;
15049 }
15050
15051 /* Function checks if instruction INSN defines register number
15052 REGNO1 or REGNO2. */
15053
15054 bool
15055 insn_defines_reg (unsigned int regno1, unsigned int regno2,
15056 rtx_insn *insn)
15057 {
15058 df_ref def;
15059
15060 FOR_EACH_INSN_DEF (def, insn)
15061 if (DF_REF_REG_DEF_P (def)
15062 && !DF_REF_IS_ARTIFICIAL (def)
15063 && (regno1 == DF_REF_REGNO (def)
15064 || regno2 == DF_REF_REGNO (def)))
15065 return true;
15066
15067 return false;
15068 }
15069
15070 /* Function checks if instruction INSN uses register number
15071 REGNO as a part of address expression. */
15072
15073 static bool
15074 insn_uses_reg_mem (unsigned int regno, rtx insn)
15075 {
15076 df_ref use;
15077
15078 FOR_EACH_INSN_USE (use, insn)
15079 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
15080 return true;
15081
15082 return false;
15083 }
15084
15085 /* Search backward for non-agu definition of register number REGNO1
15086 or register number REGNO2 in basic block starting from instruction
15087 START up to head of basic block or instruction INSN.
15088
15089 Function puts true value into *FOUND var if definition was found
15090 and false otherwise.
15091
15092 Distance in half-cycles between START and found instruction or head
15093 of BB is added to DISTANCE and returned. */
15094
15095 static int
15096 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
15097 rtx_insn *insn, int distance,
15098 rtx_insn *start, bool *found)
15099 {
15100 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
15101 rtx_insn *prev = start;
15102 rtx_insn *next = NULL;
15103
15104 *found = false;
15105
15106 while (prev
15107 && prev != insn
15108 && distance < LEA_SEARCH_THRESHOLD)
15109 {
15110 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
15111 {
15112 distance = increase_distance (prev, next, distance);
15113 if (insn_defines_reg (regno1, regno2, prev))
15114 {
15115 if (recog_memoized (prev) < 0
15116 || get_attr_type (prev) != TYPE_LEA)
15117 {
15118 *found = true;
15119 return distance;
15120 }
15121 }
15122
15123 next = prev;
15124 }
15125 if (prev == BB_HEAD (bb))
15126 break;
15127
15128 prev = PREV_INSN (prev);
15129 }
15130
15131 return distance;
15132 }
15133
15134 /* Search backward for non-agu definition of register number REGNO1
15135 or register number REGNO2 in INSN's basic block until
15136 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15137 2. Reach neighbor BBs boundary, or
15138 3. Reach agu definition.
15139 Returns the distance between the non-agu definition point and INSN.
15140 If no definition point, returns -1. */
15141
15142 static int
15143 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15144 rtx_insn *insn)
15145 {
15146 basic_block bb = BLOCK_FOR_INSN (insn);
15147 int distance = 0;
15148 bool found = false;
15149
15150 if (insn != BB_HEAD (bb))
15151 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
15152 distance, PREV_INSN (insn),
15153 &found);
15154
15155 if (!found && distance < LEA_SEARCH_THRESHOLD)
15156 {
15157 edge e;
15158 edge_iterator ei;
15159 bool simple_loop = false;
15160
15161 FOR_EACH_EDGE (e, ei, bb->preds)
15162 if (e->src == bb)
15163 {
15164 simple_loop = true;
15165 break;
15166 }
15167
15168 if (simple_loop)
15169 distance = distance_non_agu_define_in_bb (regno1, regno2,
15170 insn, distance,
15171 BB_END (bb), &found);
15172 else
15173 {
15174 int shortest_dist = -1;
15175 bool found_in_bb = false;
15176
15177 FOR_EACH_EDGE (e, ei, bb->preds)
15178 {
15179 int bb_dist
15180 = distance_non_agu_define_in_bb (regno1, regno2,
15181 insn, distance,
15182 BB_END (e->src),
15183 &found_in_bb);
15184 if (found_in_bb)
15185 {
15186 if (shortest_dist < 0)
15187 shortest_dist = bb_dist;
15188 else if (bb_dist > 0)
15189 shortest_dist = MIN (bb_dist, shortest_dist);
15190
15191 found = true;
15192 }
15193 }
15194
15195 distance = shortest_dist;
15196 }
15197 }
15198
15199 if (!found)
15200 return -1;
15201
15202 return distance >> 1;
15203 }
15204
15205 /* Return the distance in half-cycles between INSN and the next
15206 insn that uses register number REGNO in memory address added
15207 to DISTANCE. Return -1 if REGNO0 is set.
15208
15209 Put true value into *FOUND if register usage was found and
15210 false otherwise.
15211 Put true value into *REDEFINED if register redefinition was
15212 found and false otherwise. */
15213
15214 static int
15215 distance_agu_use_in_bb (unsigned int regno,
15216 rtx_insn *insn, int distance, rtx_insn *start,
15217 bool *found, bool *redefined)
15218 {
15219 basic_block bb = NULL;
15220 rtx_insn *next = start;
15221 rtx_insn *prev = NULL;
15222
15223 *found = false;
15224 *redefined = false;
15225
15226 if (start != NULL_RTX)
15227 {
15228 bb = BLOCK_FOR_INSN (start);
15229 if (start != BB_HEAD (bb))
15230 /* If insn and start belong to the same bb, set prev to insn,
15231 so the call to increase_distance will increase the distance
15232 between insns by 1. */
15233 prev = insn;
15234 }
15235
15236 while (next
15237 && next != insn
15238 && distance < LEA_SEARCH_THRESHOLD)
15239 {
15240 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
15241 {
15242 distance = increase_distance(prev, next, distance);
15243 if (insn_uses_reg_mem (regno, next))
15244 {
15245 /* Return DISTANCE if OP0 is used in memory
15246 address in NEXT. */
15247 *found = true;
15248 return distance;
15249 }
15250
15251 if (insn_defines_reg (regno, INVALID_REGNUM, next))
15252 {
15253 /* Return -1 if OP0 is set in NEXT. */
15254 *redefined = true;
15255 return -1;
15256 }
15257
15258 prev = next;
15259 }
15260
15261 if (next == BB_END (bb))
15262 break;
15263
15264 next = NEXT_INSN (next);
15265 }
15266
15267 return distance;
15268 }
15269
15270 /* Return the distance between INSN and the next insn that uses
15271 register number REGNO0 in memory address. Return -1 if no such
15272 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15273
15274 static int
15275 distance_agu_use (unsigned int regno0, rtx_insn *insn)
15276 {
15277 basic_block bb = BLOCK_FOR_INSN (insn);
15278 int distance = 0;
15279 bool found = false;
15280 bool redefined = false;
15281
15282 if (insn != BB_END (bb))
15283 distance = distance_agu_use_in_bb (regno0, insn, distance,
15284 NEXT_INSN (insn),
15285 &found, &redefined);
15286
15287 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
15288 {
15289 edge e;
15290 edge_iterator ei;
15291 bool simple_loop = false;
15292
15293 FOR_EACH_EDGE (e, ei, bb->succs)
15294 if (e->dest == bb)
15295 {
15296 simple_loop = true;
15297 break;
15298 }
15299
15300 if (simple_loop)
15301 distance = distance_agu_use_in_bb (regno0, insn,
15302 distance, BB_HEAD (bb),
15303 &found, &redefined);
15304 else
15305 {
15306 int shortest_dist = -1;
15307 bool found_in_bb = false;
15308 bool redefined_in_bb = false;
15309
15310 FOR_EACH_EDGE (e, ei, bb->succs)
15311 {
15312 int bb_dist
15313 = distance_agu_use_in_bb (regno0, insn,
15314 distance, BB_HEAD (e->dest),
15315 &found_in_bb, &redefined_in_bb);
15316 if (found_in_bb)
15317 {
15318 if (shortest_dist < 0)
15319 shortest_dist = bb_dist;
15320 else if (bb_dist > 0)
15321 shortest_dist = MIN (bb_dist, shortest_dist);
15322
15323 found = true;
15324 }
15325 }
15326
15327 distance = shortest_dist;
15328 }
15329 }
15330
15331 if (!found || redefined)
15332 return -1;
15333
15334 return distance >> 1;
15335 }
15336
15337 /* Define this macro to tune LEA priority vs ADD, it take effect when
15338 there is a dilemma of choosing LEA or ADD
15339 Negative value: ADD is more preferred than LEA
15340 Zero: Neutral
15341 Positive value: LEA is more preferred than ADD. */
15342 #define IX86_LEA_PRIORITY 0
15343
15344 /* Return true if usage of lea INSN has performance advantage
15345 over a sequence of instructions. Instructions sequence has
15346 SPLIT_COST cycles higher latency than lea latency. */
15347
15348 static bool
15349 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
15350 unsigned int regno2, int split_cost, bool has_scale)
15351 {
15352 int dist_define, dist_use;
15353
15354 /* For Atom processors newer than Bonnell, if using a 2-source or
15355 3-source LEA for non-destructive destination purposes, or due to
15356 wanting ability to use SCALE, the use of LEA is justified. */
15357 if (!TARGET_CPU_P (BONNELL))
15358 {
15359 if (has_scale)
15360 return true;
15361 if (split_cost < 1)
15362 return false;
15363 if (regno0 == regno1 || regno0 == regno2)
15364 return false;
15365 return true;
15366 }
15367
15368 /* Remember recog_data content. */
15369 struct recog_data_d recog_data_save = recog_data;
15370
15371 dist_define = distance_non_agu_define (regno1, regno2, insn);
15372 dist_use = distance_agu_use (regno0, insn);
15373
15374 /* distance_non_agu_define can call get_attr_type which can call
15375 recog_memoized, restore recog_data back to previous content. */
15376 recog_data = recog_data_save;
15377
15378 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
15379 {
15380 /* If there is no non AGU operand definition, no AGU
15381 operand usage and split cost is 0 then both lea
15382 and non lea variants have same priority. Currently
15383 we prefer lea for 64 bit code and non lea on 32 bit
15384 code. */
15385 if (dist_use < 0 && split_cost == 0)
15386 return TARGET_64BIT || IX86_LEA_PRIORITY;
15387 else
15388 return true;
15389 }
15390
15391 /* With longer definitions distance lea is more preferable.
15392 Here we change it to take into account splitting cost and
15393 lea priority. */
15394 dist_define += split_cost + IX86_LEA_PRIORITY;
15395
15396 /* If there is no use in memory addess then we just check
15397 that split cost exceeds AGU stall. */
15398 if (dist_use < 0)
15399 return dist_define > LEA_MAX_STALL;
15400
15401 /* If this insn has both backward non-agu dependence and forward
15402 agu dependence, the one with short distance takes effect. */
15403 return dist_define >= dist_use;
15404 }
15405
15406 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15407 move and add to avoid AGU stalls. */
15408
15409 bool
15410 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
15411 {
15412 unsigned int regno0, regno1, regno2;
15413
15414 /* Check if we need to optimize. */
15415 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15416 return false;
15417
15418 regno0 = true_regnum (operands[0]);
15419 regno1 = true_regnum (operands[1]);
15420 regno2 = true_regnum (operands[2]);
15421
15422 /* We need to split only adds with non destructive
15423 destination operand. */
15424 if (regno0 == regno1 || regno0 == regno2)
15425 return false;
15426 else
15427 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
15428 }
15429
15430 /* Return true if we should emit lea instruction instead of mov
15431 instruction. */
15432
15433 bool
15434 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
15435 {
15436 unsigned int regno0, regno1;
15437
15438 /* Check if we need to optimize. */
15439 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15440 return false;
15441
15442 /* Use lea for reg to reg moves only. */
15443 if (!REG_P (operands[0]) || !REG_P (operands[1]))
15444 return false;
15445
15446 regno0 = true_regnum (operands[0]);
15447 regno1 = true_regnum (operands[1]);
15448
15449 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
15450 }
15451
15452 /* Return true if we need to split lea into a sequence of
15453 instructions to avoid AGU stalls during peephole2. */
15454
15455 bool
15456 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
15457 {
15458 unsigned int regno0, regno1, regno2;
15459 int split_cost;
15460 struct ix86_address parts;
15461 int ok;
15462
15463 /* The "at least two components" test below might not catch simple
15464 move or zero extension insns if parts.base is non-NULL and parts.disp
15465 is const0_rtx as the only components in the address, e.g. if the
15466 register is %rbp or %r13. As this test is much cheaper and moves or
15467 zero extensions are the common case, do this check first. */
15468 if (REG_P (operands[1])
15469 || (SImode_address_operand (operands[1], VOIDmode)
15470 && REG_P (XEXP (operands[1], 0))))
15471 return false;
15472
15473 ok = ix86_decompose_address (operands[1], &parts);
15474 gcc_assert (ok);
15475
15476 /* There should be at least two components in the address. */
15477 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
15478 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
15479 return false;
15480
15481 /* We should not split into add if non legitimate pic
15482 operand is used as displacement. */
15483 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
15484 return false;
15485
15486 regno0 = true_regnum (operands[0]) ;
15487 regno1 = INVALID_REGNUM;
15488 regno2 = INVALID_REGNUM;
15489
15490 if (parts.base)
15491 regno1 = true_regnum (parts.base);
15492 if (parts.index)
15493 regno2 = true_regnum (parts.index);
15494
15495 /* Use add for a = a + b and a = b + a since it is faster and shorter
15496 than lea for most processors. For the processors like BONNELL, if
15497 the destination register of LEA holds an actual address which will
15498 be used soon, LEA is better and otherwise ADD is better. */
15499 if (!TARGET_CPU_P (BONNELL)
15500 && parts.scale == 1
15501 && (!parts.disp || parts.disp == const0_rtx)
15502 && (regno0 == regno1 || regno0 == regno2))
15503 return true;
15504
15505 /* Check we need to optimize. */
15506 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
15507 return false;
15508
15509 split_cost = 0;
15510
15511 /* Compute how many cycles we will add to execution time
15512 if split lea into a sequence of instructions. */
15513 if (parts.base || parts.index)
15514 {
15515 /* Have to use mov instruction if non desctructive
15516 destination form is used. */
15517 if (regno1 != regno0 && regno2 != regno0)
15518 split_cost += 1;
15519
15520 /* Have to add index to base if both exist. */
15521 if (parts.base && parts.index)
15522 split_cost += 1;
15523
15524 /* Have to use shift and adds if scale is 2 or greater. */
15525 if (parts.scale > 1)
15526 {
15527 if (regno0 != regno1)
15528 split_cost += 1;
15529 else if (regno2 == regno0)
15530 split_cost += 4;
15531 else
15532 split_cost += parts.scale;
15533 }
15534
15535 /* Have to use add instruction with immediate if
15536 disp is non zero. */
15537 if (parts.disp && parts.disp != const0_rtx)
15538 split_cost += 1;
15539
15540 /* Subtract the price of lea. */
15541 split_cost -= 1;
15542 }
15543
15544 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
15545 parts.scale > 1);
15546 }
15547
15548 /* Return true if it is ok to optimize an ADD operation to LEA
15549 operation to avoid flag register consumation. For most processors,
15550 ADD is faster than LEA. For the processors like BONNELL, if the
15551 destination register of LEA holds an actual address which will be
15552 used soon, LEA is better and otherwise ADD is better. */
15553
15554 bool
15555 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
15556 {
15557 unsigned int regno0 = true_regnum (operands[0]);
15558 unsigned int regno1 = true_regnum (operands[1]);
15559 unsigned int regno2 = true_regnum (operands[2]);
15560
15561 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15562 if (regno0 != regno1 && regno0 != regno2)
15563 return true;
15564
15565 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15566 return false;
15567
15568 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
15569 }
15570
15571 /* Return true if destination reg of SET_BODY is shift count of
15572 USE_BODY. */
15573
15574 static bool
15575 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15576 {
15577 rtx set_dest;
15578 rtx shift_rtx;
15579 int i;
15580
15581 /* Retrieve destination of SET_BODY. */
15582 switch (GET_CODE (set_body))
15583 {
15584 case SET:
15585 set_dest = SET_DEST (set_body);
15586 if (!set_dest || !REG_P (set_dest))
15587 return false;
15588 break;
15589 case PARALLEL:
15590 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15591 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15592 use_body))
15593 return true;
15594 /* FALLTHROUGH */
15595 default:
15596 return false;
15597 }
15598
15599 /* Retrieve shift count of USE_BODY. */
15600 switch (GET_CODE (use_body))
15601 {
15602 case SET:
15603 shift_rtx = XEXP (use_body, 1);
15604 break;
15605 case PARALLEL:
15606 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15607 if (ix86_dep_by_shift_count_body (set_body,
15608 XVECEXP (use_body, 0, i)))
15609 return true;
15610 /* FALLTHROUGH */
15611 default:
15612 return false;
15613 }
15614
15615 if (shift_rtx
15616 && (GET_CODE (shift_rtx) == ASHIFT
15617 || GET_CODE (shift_rtx) == LSHIFTRT
15618 || GET_CODE (shift_rtx) == ASHIFTRT
15619 || GET_CODE (shift_rtx) == ROTATE
15620 || GET_CODE (shift_rtx) == ROTATERT))
15621 {
15622 rtx shift_count = XEXP (shift_rtx, 1);
15623
15624 /* Return true if shift count is dest of SET_BODY. */
15625 if (REG_P (shift_count))
15626 {
15627 /* Add check since it can be invoked before register
15628 allocation in pre-reload schedule. */
15629 if (reload_completed
15630 && true_regnum (set_dest) == true_regnum (shift_count))
15631 return true;
15632 else if (REGNO(set_dest) == REGNO(shift_count))
15633 return true;
15634 }
15635 }
15636
15637 return false;
15638 }
15639
15640 /* Return true if destination reg of SET_INSN is shift count of
15641 USE_INSN. */
15642
15643 bool
15644 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15645 {
15646 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15647 PATTERN (use_insn));
15648 }
15649
15650 /* Return TRUE or FALSE depending on whether the unary operator meets the
15651 appropriate constraints. */
15652
15653 bool
15654 ix86_unary_operator_ok (enum rtx_code,
15655 machine_mode,
15656 rtx operands[2])
15657 {
15658 /* If one of operands is memory, source and destination must match. */
15659 if ((MEM_P (operands[0])
15660 || MEM_P (operands[1]))
15661 && ! rtx_equal_p (operands[0], operands[1]))
15662 return false;
15663 return true;
15664 }
15665
15666 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15667 are ok, keeping in mind the possible movddup alternative. */
15668
15669 bool
15670 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15671 {
15672 if (MEM_P (operands[0]))
15673 return rtx_equal_p (operands[0], operands[1 + high]);
15674 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15675 return false;
15676 return true;
15677 }
15678
15679 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15680 then replicate the value for all elements of the vector
15681 register. */
15682
15683 rtx
15684 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
15685 {
15686 int i, n_elt;
15687 rtvec v;
15688 machine_mode scalar_mode;
15689
15690 switch (mode)
15691 {
15692 case E_V64QImode:
15693 case E_V32QImode:
15694 case E_V16QImode:
15695 case E_V32HImode:
15696 case E_V16HImode:
15697 case E_V8HImode:
15698 case E_V16SImode:
15699 case E_V8SImode:
15700 case E_V4SImode:
15701 case E_V2SImode:
15702 case E_V8DImode:
15703 case E_V4DImode:
15704 case E_V2DImode:
15705 gcc_assert (vect);
15706 /* FALLTHRU */
15707 case E_V8HFmode:
15708 case E_V16HFmode:
15709 case E_V32HFmode:
15710 case E_V16SFmode:
15711 case E_V8SFmode:
15712 case E_V4SFmode:
15713 case E_V2SFmode:
15714 case E_V8DFmode:
15715 case E_V4DFmode:
15716 case E_V2DFmode:
15717 n_elt = GET_MODE_NUNITS (mode);
15718 v = rtvec_alloc (n_elt);
15719 scalar_mode = GET_MODE_INNER (mode);
15720
15721 RTVEC_ELT (v, 0) = value;
15722
15723 for (i = 1; i < n_elt; ++i)
15724 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
15725
15726 return gen_rtx_CONST_VECTOR (mode, v);
15727
15728 default:
15729 gcc_unreachable ();
15730 }
15731 }
15732
15733 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15734 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15735 for an SSE register. If VECT is true, then replicate the mask for
15736 all elements of the vector register. If INVERT is true, then create
15737 a mask excluding the sign bit. */
15738
15739 rtx
15740 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
15741 {
15742 machine_mode vec_mode, imode;
15743 wide_int w;
15744 rtx mask, v;
15745
15746 switch (mode)
15747 {
15748 case E_V8HFmode:
15749 case E_V16HFmode:
15750 case E_V32HFmode:
15751 vec_mode = mode;
15752 imode = HImode;
15753 break;
15754
15755 case E_V16SImode:
15756 case E_V16SFmode:
15757 case E_V8SImode:
15758 case E_V4SImode:
15759 case E_V8SFmode:
15760 case E_V4SFmode:
15761 case E_V2SFmode:
15762 case E_V2SImode:
15763 vec_mode = mode;
15764 imode = SImode;
15765 break;
15766
15767 case E_V8DImode:
15768 case E_V4DImode:
15769 case E_V2DImode:
15770 case E_V8DFmode:
15771 case E_V4DFmode:
15772 case E_V2DFmode:
15773 vec_mode = mode;
15774 imode = DImode;
15775 break;
15776
15777 case E_TImode:
15778 case E_TFmode:
15779 vec_mode = VOIDmode;
15780 imode = TImode;
15781 break;
15782
15783 default:
15784 gcc_unreachable ();
15785 }
15786
15787 machine_mode inner_mode = GET_MODE_INNER (mode);
15788 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15789 GET_MODE_BITSIZE (inner_mode));
15790 if (invert)
15791 w = wi::bit_not (w);
15792
15793 /* Force this value into the low part of a fp vector constant. */
15794 mask = immed_wide_int_const (w, imode);
15795 mask = gen_lowpart (inner_mode, mask);
15796
15797 if (vec_mode == VOIDmode)
15798 return force_reg (inner_mode, mask);
15799
15800 v = ix86_build_const_vector (vec_mode, vect, mask);
15801 return force_reg (vec_mode, v);
15802 }
15803
15804 /* Return HOST_WIDE_INT for const vector OP in MODE. */
15805
15806 HOST_WIDE_INT
15807 ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
15808 {
15809 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
15810 gcc_unreachable ();
15811
15812 int nunits = GET_MODE_NUNITS (mode);
15813 wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
15814 machine_mode innermode = GET_MODE_INNER (mode);
15815 unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
15816
15817 switch (mode)
15818 {
15819 case E_V2QImode:
15820 case E_V4QImode:
15821 case E_V2HImode:
15822 case E_V8QImode:
15823 case E_V4HImode:
15824 case E_V2SImode:
15825 for (int i = 0; i < nunits; ++i)
15826 {
15827 int v = INTVAL (XVECEXP (op, 0, i));
15828 wide_int wv = wi::shwi (v, innermode_bits);
15829 val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
15830 }
15831 break;
15832 case E_V2HFmode:
15833 case E_V2BFmode:
15834 case E_V4HFmode:
15835 case E_V4BFmode:
15836 case E_V2SFmode:
15837 for (int i = 0; i < nunits; ++i)
15838 {
15839 rtx x = XVECEXP (op, 0, i);
15840 int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
15841 REAL_MODE_FORMAT (innermode));
15842 wide_int wv = wi::shwi (v, innermode_bits);
15843 val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
15844 }
15845 break;
15846 default:
15847 gcc_unreachable ();
15848 }
15849
15850 return val.to_shwi ();
15851 }
15852
15853 /* Return TRUE or FALSE depending on whether the first SET in INSN
15854 has source and destination with matching CC modes, and that the
15855 CC mode is at least as constrained as REQ_MODE. */
15856
15857 bool
15858 ix86_match_ccmode (rtx insn, machine_mode req_mode)
15859 {
15860 rtx set;
15861 machine_mode set_mode;
15862
15863 set = PATTERN (insn);
15864 if (GET_CODE (set) == PARALLEL)
15865 set = XVECEXP (set, 0, 0);
15866 gcc_assert (GET_CODE (set) == SET);
15867 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15868
15869 set_mode = GET_MODE (SET_DEST (set));
15870 switch (set_mode)
15871 {
15872 case E_CCNOmode:
15873 if (req_mode != CCNOmode
15874 && (req_mode != CCmode
15875 || XEXP (SET_SRC (set), 1) != const0_rtx))
15876 return false;
15877 break;
15878 case E_CCmode:
15879 if (req_mode == CCGCmode)
15880 return false;
15881 /* FALLTHRU */
15882 case E_CCGCmode:
15883 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15884 return false;
15885 /* FALLTHRU */
15886 case E_CCGOCmode:
15887 if (req_mode == CCZmode)
15888 return false;
15889 /* FALLTHRU */
15890 case E_CCZmode:
15891 break;
15892
15893 case E_CCGZmode:
15894
15895 case E_CCAmode:
15896 case E_CCCmode:
15897 case E_CCOmode:
15898 case E_CCPmode:
15899 case E_CCSmode:
15900 if (set_mode != req_mode)
15901 return false;
15902 break;
15903
15904 default:
15905 gcc_unreachable ();
15906 }
15907
15908 return GET_MODE (SET_SRC (set)) == set_mode;
15909 }
15910
15911 machine_mode
15912 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15913 {
15914 machine_mode mode = GET_MODE (op0);
15915
15916 if (SCALAR_FLOAT_MODE_P (mode))
15917 {
15918 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15919 return CCFPmode;
15920 }
15921
15922 switch (code)
15923 {
15924 /* Only zero flag is needed. */
15925 case EQ: /* ZF=0 */
15926 case NE: /* ZF!=0 */
15927 return CCZmode;
15928 /* Codes needing carry flag. */
15929 case GEU: /* CF=0 */
15930 case LTU: /* CF=1 */
15931 rtx geu;
15932 /* Detect overflow checks. They need just the carry flag. */
15933 if (GET_CODE (op0) == PLUS
15934 && (rtx_equal_p (op1, XEXP (op0, 0))
15935 || rtx_equal_p (op1, XEXP (op0, 1))))
15936 return CCCmode;
15937 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
15938 Match LTU of op0
15939 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
15940 and op1
15941 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
15942 where CC_CCC is either CC or CCC. */
15943 else if (code == LTU
15944 && GET_CODE (op0) == NEG
15945 && GET_CODE (geu = XEXP (op0, 0)) == GEU
15946 && REG_P (XEXP (geu, 0))
15947 && (GET_MODE (XEXP (geu, 0)) == CCCmode
15948 || GET_MODE (XEXP (geu, 0)) == CCmode)
15949 && REGNO (XEXP (geu, 0)) == FLAGS_REG
15950 && XEXP (geu, 1) == const0_rtx
15951 && GET_CODE (op1) == LTU
15952 && REG_P (XEXP (op1, 0))
15953 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
15954 && REGNO (XEXP (op1, 0)) == FLAGS_REG
15955 && XEXP (op1, 1) == const0_rtx)
15956 return CCCmode;
15957 else
15958 return CCmode;
15959 case GTU: /* CF=0 & ZF=0 */
15960 case LEU: /* CF=1 | ZF=1 */
15961 return CCmode;
15962 /* Codes possibly doable only with sign flag when
15963 comparing against zero. */
15964 case GE: /* SF=OF or SF=0 */
15965 case LT: /* SF<>OF or SF=1 */
15966 if (op1 == const0_rtx)
15967 return CCGOCmode;
15968 else
15969 /* For other cases Carry flag is not required. */
15970 return CCGCmode;
15971 /* Codes doable only with sign flag when comparing
15972 against zero, but we miss jump instruction for it
15973 so we need to use relational tests against overflow
15974 that thus needs to be zero. */
15975 case GT: /* ZF=0 & SF=OF */
15976 case LE: /* ZF=1 | SF<>OF */
15977 if (op1 == const0_rtx)
15978 return CCNOmode;
15979 else
15980 return CCGCmode;
15981 /* strcmp pattern do (use flags) and combine may ask us for proper
15982 mode. */
15983 case USE:
15984 return CCmode;
15985 default:
15986 gcc_unreachable ();
15987 }
15988 }
15989
15990 /* Return the fixed registers used for condition codes. */
15991
15992 static bool
15993 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15994 {
15995 *p1 = FLAGS_REG;
15996 *p2 = INVALID_REGNUM;
15997 return true;
15998 }
15999
16000 /* If two condition code modes are compatible, return a condition code
16001 mode which is compatible with both. Otherwise, return
16002 VOIDmode. */
16003
16004 static machine_mode
16005 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
16006 {
16007 if (m1 == m2)
16008 return m1;
16009
16010 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16011 return VOIDmode;
16012
16013 if ((m1 == CCGCmode && m2 == CCGOCmode)
16014 || (m1 == CCGOCmode && m2 == CCGCmode))
16015 return CCGCmode;
16016
16017 if ((m1 == CCNOmode && m2 == CCGOCmode)
16018 || (m1 == CCGOCmode && m2 == CCNOmode))
16019 return CCNOmode;
16020
16021 if (m1 == CCZmode
16022 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
16023 return m2;
16024 else if (m2 == CCZmode
16025 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
16026 return m1;
16027
16028 switch (m1)
16029 {
16030 default:
16031 gcc_unreachable ();
16032
16033 case E_CCmode:
16034 case E_CCGCmode:
16035 case E_CCGOCmode:
16036 case E_CCNOmode:
16037 case E_CCAmode:
16038 case E_CCCmode:
16039 case E_CCOmode:
16040 case E_CCPmode:
16041 case E_CCSmode:
16042 case E_CCZmode:
16043 switch (m2)
16044 {
16045 default:
16046 return VOIDmode;
16047
16048 case E_CCmode:
16049 case E_CCGCmode:
16050 case E_CCGOCmode:
16051 case E_CCNOmode:
16052 case E_CCAmode:
16053 case E_CCCmode:
16054 case E_CCOmode:
16055 case E_CCPmode:
16056 case E_CCSmode:
16057 case E_CCZmode:
16058 return CCmode;
16059 }
16060
16061 case E_CCFPmode:
16062 /* These are only compatible with themselves, which we already
16063 checked above. */
16064 return VOIDmode;
16065 }
16066 }
16067
16068 /* Return strategy to use for floating-point. We assume that fcomi is always
16069 preferrable where available, since that is also true when looking at size
16070 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16071
16072 enum ix86_fpcmp_strategy
16073 ix86_fp_comparison_strategy (enum rtx_code)
16074 {
16075 /* Do fcomi/sahf based test when profitable. */
16076
16077 if (TARGET_CMOVE)
16078 return IX86_FPCMP_COMI;
16079
16080 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
16081 return IX86_FPCMP_SAHF;
16082
16083 return IX86_FPCMP_ARITH;
16084 }
16085
16086 /* Convert comparison codes we use to represent FP comparison to integer
16087 code that will result in proper branch. Return UNKNOWN if no such code
16088 is available. */
16089
16090 enum rtx_code
16091 ix86_fp_compare_code_to_integer (enum rtx_code code)
16092 {
16093 switch (code)
16094 {
16095 case GT:
16096 return GTU;
16097 case GE:
16098 return GEU;
16099 case ORDERED:
16100 case UNORDERED:
16101 return code;
16102 case UNEQ:
16103 return EQ;
16104 case UNLT:
16105 return LTU;
16106 case UNLE:
16107 return LEU;
16108 case LTGT:
16109 return NE;
16110 default:
16111 return UNKNOWN;
16112 }
16113 }
16114
16115 /* Zero extend possibly SImode EXP to Pmode register. */
16116 rtx
16117 ix86_zero_extend_to_Pmode (rtx exp)
16118 {
16119 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
16120 }
16121
16122 /* Return true if the function is called via PLT. */
16123
16124 bool
16125 ix86_call_use_plt_p (rtx call_op)
16126 {
16127 if (SYMBOL_REF_LOCAL_P (call_op))
16128 {
16129 if (SYMBOL_REF_DECL (call_op)
16130 && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
16131 {
16132 /* NB: All ifunc functions must be called via PLT. */
16133 cgraph_node *node
16134 = cgraph_node::get (SYMBOL_REF_DECL (call_op));
16135 if (node && node->ifunc_resolver)
16136 return true;
16137 }
16138 return false;
16139 }
16140 return true;
16141 }
16142
16143 /* Implement TARGET_IFUNC_REF_LOCAL_OK. If this hook returns true,
16144 the PLT entry will be used as the function address for local IFUNC
16145 functions. When the PIC register is needed for PLT call, indirect
16146 call via the PLT entry will fail since the PIC register may not be
16147 set up properly for indirect call. In this case, we should return
16148 false. */
16149
16150 static bool
16151 ix86_ifunc_ref_local_ok (void)
16152 {
16153 return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
16154 }
16155
16156 /* Return true if the function being called was marked with attribute
16157 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
16158 to handle the non-PIC case in the backend because there is no easy
16159 interface for the front-end to force non-PLT calls to use the GOT.
16160 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
16161 to call the function marked "noplt" indirectly. */
16162
16163 static bool
16164 ix86_nopic_noplt_attribute_p (rtx call_op)
16165 {
16166 if (flag_pic || ix86_cmodel == CM_LARGE
16167 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
16168 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
16169 || SYMBOL_REF_LOCAL_P (call_op))
16170 return false;
16171
16172 tree symbol_decl = SYMBOL_REF_DECL (call_op);
16173
16174 if (!flag_plt
16175 || (symbol_decl != NULL_TREE
16176 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
16177 return true;
16178
16179 return false;
16180 }
16181
16182 /* Helper to output the jmp/call. */
16183 static void
16184 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
16185 {
16186 if (thunk_name != NULL)
16187 {
16188 if (REX_INT_REGNO_P (regno)
16189 && ix86_indirect_branch_cs_prefix)
16190 fprintf (asm_out_file, "\tcs\n");
16191 fprintf (asm_out_file, "\tjmp\t");
16192 assemble_name (asm_out_file, thunk_name);
16193 putc ('\n', asm_out_file);
16194 if ((ix86_harden_sls & harden_sls_indirect_jmp))
16195 fputs ("\tint3\n", asm_out_file);
16196 }
16197 else
16198 output_indirect_thunk (regno);
16199 }
16200
16201 /* Output indirect branch via a call and return thunk. CALL_OP is a
16202 register which contains the branch target. XASM is the assembly
16203 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
16204 A normal call is converted to:
16205
16206 call __x86_indirect_thunk_reg
16207
16208 and a tail call is converted to:
16209
16210 jmp __x86_indirect_thunk_reg
16211 */
16212
16213 static void
16214 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
16215 {
16216 char thunk_name_buf[32];
16217 char *thunk_name;
16218 enum indirect_thunk_prefix need_prefix
16219 = indirect_thunk_need_prefix (current_output_insn);
16220 int regno = REGNO (call_op);
16221
16222 if (cfun->machine->indirect_branch_type
16223 != indirect_branch_thunk_inline)
16224 {
16225 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16226 SET_HARD_REG_BIT (indirect_thunks_used, regno);
16227
16228 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16229 thunk_name = thunk_name_buf;
16230 }
16231 else
16232 thunk_name = NULL;
16233
16234 if (sibcall_p)
16235 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16236 else
16237 {
16238 if (thunk_name != NULL)
16239 {
16240 if (REX_INT_REGNO_P (regno)
16241 && ix86_indirect_branch_cs_prefix)
16242 fprintf (asm_out_file, "\tcs\n");
16243 fprintf (asm_out_file, "\tcall\t");
16244 assemble_name (asm_out_file, thunk_name);
16245 putc ('\n', asm_out_file);
16246 return;
16247 }
16248
16249 char indirectlabel1[32];
16250 char indirectlabel2[32];
16251
16252 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16253 INDIRECT_LABEL,
16254 indirectlabelno++);
16255 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16256 INDIRECT_LABEL,
16257 indirectlabelno++);
16258
16259 /* Jump. */
16260 fputs ("\tjmp\t", asm_out_file);
16261 assemble_name_raw (asm_out_file, indirectlabel2);
16262 fputc ('\n', asm_out_file);
16263
16264 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16265
16266 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16267
16268 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16269
16270 /* Call. */
16271 fputs ("\tcall\t", asm_out_file);
16272 assemble_name_raw (asm_out_file, indirectlabel1);
16273 fputc ('\n', asm_out_file);
16274 }
16275 }
16276
16277 /* Output indirect branch via a call and return thunk. CALL_OP is
16278 the branch target. XASM is the assembly template for CALL_OP.
16279 Branch is a tail call if SIBCALL_P is true. A normal call is
16280 converted to:
16281
16282 jmp L2
16283 L1:
16284 push CALL_OP
16285 jmp __x86_indirect_thunk
16286 L2:
16287 call L1
16288
16289 and a tail call is converted to:
16290
16291 push CALL_OP
16292 jmp __x86_indirect_thunk
16293 */
16294
16295 static void
16296 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
16297 bool sibcall_p)
16298 {
16299 char thunk_name_buf[32];
16300 char *thunk_name;
16301 char push_buf[64];
16302 enum indirect_thunk_prefix need_prefix
16303 = indirect_thunk_need_prefix (current_output_insn);
16304 int regno = -1;
16305
16306 if (cfun->machine->indirect_branch_type
16307 != indirect_branch_thunk_inline)
16308 {
16309 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
16310 indirect_thunk_needed = true;
16311 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
16312 thunk_name = thunk_name_buf;
16313 }
16314 else
16315 thunk_name = NULL;
16316
16317 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
16318 TARGET_64BIT ? 'q' : 'l', xasm);
16319
16320 if (sibcall_p)
16321 {
16322 output_asm_insn (push_buf, &call_op);
16323 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16324 }
16325 else
16326 {
16327 char indirectlabel1[32];
16328 char indirectlabel2[32];
16329
16330 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
16331 INDIRECT_LABEL,
16332 indirectlabelno++);
16333 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
16334 INDIRECT_LABEL,
16335 indirectlabelno++);
16336
16337 /* Jump. */
16338 fputs ("\tjmp\t", asm_out_file);
16339 assemble_name_raw (asm_out_file, indirectlabel2);
16340 fputc ('\n', asm_out_file);
16341
16342 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
16343
16344 /* An external function may be called via GOT, instead of PLT. */
16345 if (MEM_P (call_op))
16346 {
16347 struct ix86_address parts;
16348 rtx addr = XEXP (call_op, 0);
16349 if (ix86_decompose_address (addr, &parts)
16350 && parts.base == stack_pointer_rtx)
16351 {
16352 /* Since call will adjust stack by -UNITS_PER_WORD,
16353 we must convert "disp(stack, index, scale)" to
16354 "disp+UNITS_PER_WORD(stack, index, scale)". */
16355 if (parts.index)
16356 {
16357 addr = gen_rtx_MULT (Pmode, parts.index,
16358 GEN_INT (parts.scale));
16359 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
16360 addr);
16361 }
16362 else
16363 addr = stack_pointer_rtx;
16364
16365 rtx disp;
16366 if (parts.disp != NULL_RTX)
16367 disp = plus_constant (Pmode, parts.disp,
16368 UNITS_PER_WORD);
16369 else
16370 disp = GEN_INT (UNITS_PER_WORD);
16371
16372 addr = gen_rtx_PLUS (Pmode, addr, disp);
16373 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
16374 }
16375 }
16376
16377 output_asm_insn (push_buf, &call_op);
16378
16379 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
16380
16381 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
16382
16383 /* Call. */
16384 fputs ("\tcall\t", asm_out_file);
16385 assemble_name_raw (asm_out_file, indirectlabel1);
16386 fputc ('\n', asm_out_file);
16387 }
16388 }
16389
16390 /* Output indirect branch via a call and return thunk. CALL_OP is
16391 the branch target. XASM is the assembly template for CALL_OP.
16392 Branch is a tail call if SIBCALL_P is true. */
16393
16394 static void
16395 ix86_output_indirect_branch (rtx call_op, const char *xasm,
16396 bool sibcall_p)
16397 {
16398 if (REG_P (call_op))
16399 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
16400 else
16401 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
16402 }
16403
16404 /* Output indirect jump. CALL_OP is the jump target. */
16405
16406 const char *
16407 ix86_output_indirect_jmp (rtx call_op)
16408 {
16409 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
16410 {
16411 /* We can't have red-zone since "call" in the indirect thunk
16412 pushes the return address onto stack, destroying red-zone. */
16413 if (ix86_red_zone_used)
16414 gcc_unreachable ();
16415
16416 ix86_output_indirect_branch (call_op, "%0", true);
16417 }
16418 else
16419 output_asm_insn ("%!jmp\t%A0", &call_op);
16420 return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
16421 }
16422
16423 /* Output return instrumentation for current function if needed. */
16424
16425 static void
16426 output_return_instrumentation (void)
16427 {
16428 if (ix86_instrument_return != instrument_return_none
16429 && flag_fentry
16430 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
16431 {
16432 if (ix86_flag_record_return)
16433 fprintf (asm_out_file, "1:\n");
16434 switch (ix86_instrument_return)
16435 {
16436 case instrument_return_call:
16437 fprintf (asm_out_file, "\tcall\t__return__\n");
16438 break;
16439 case instrument_return_nop5:
16440 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
16441 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
16442 break;
16443 case instrument_return_none:
16444 break;
16445 }
16446
16447 if (ix86_flag_record_return)
16448 {
16449 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
16450 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
16451 fprintf (asm_out_file, "\t.previous\n");
16452 }
16453 }
16454 }
16455
16456 /* Output function return. CALL_OP is the jump target. Add a REP
16457 prefix to RET if LONG_P is true and function return is kept. */
16458
16459 const char *
16460 ix86_output_function_return (bool long_p)
16461 {
16462 output_return_instrumentation ();
16463
16464 if (cfun->machine->function_return_type != indirect_branch_keep)
16465 {
16466 char thunk_name[32];
16467 enum indirect_thunk_prefix need_prefix
16468 = indirect_thunk_need_prefix (current_output_insn);
16469
16470 if (cfun->machine->function_return_type
16471 != indirect_branch_thunk_inline)
16472 {
16473 bool need_thunk = (cfun->machine->function_return_type
16474 == indirect_branch_thunk);
16475 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
16476 true);
16477 indirect_return_needed |= need_thunk;
16478 fprintf (asm_out_file, "\tjmp\t");
16479 assemble_name (asm_out_file, thunk_name);
16480 putc ('\n', asm_out_file);
16481 }
16482 else
16483 output_indirect_thunk (INVALID_REGNUM);
16484
16485 return "";
16486 }
16487
16488 output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
16489 return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
16490 }
16491
16492 /* Output indirect function return. RET_OP is the function return
16493 target. */
16494
16495 const char *
16496 ix86_output_indirect_function_return (rtx ret_op)
16497 {
16498 if (cfun->machine->function_return_type != indirect_branch_keep)
16499 {
16500 char thunk_name[32];
16501 enum indirect_thunk_prefix need_prefix
16502 = indirect_thunk_need_prefix (current_output_insn);
16503 unsigned int regno = REGNO (ret_op);
16504 gcc_assert (regno == CX_REG);
16505
16506 if (cfun->machine->function_return_type
16507 != indirect_branch_thunk_inline)
16508 {
16509 bool need_thunk = (cfun->machine->function_return_type
16510 == indirect_branch_thunk);
16511 indirect_thunk_name (thunk_name, regno, need_prefix, true);
16512
16513 if (need_thunk)
16514 {
16515 indirect_return_via_cx = true;
16516 SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
16517 }
16518 fprintf (asm_out_file, "\tjmp\t");
16519 assemble_name (asm_out_file, thunk_name);
16520 putc ('\n', asm_out_file);
16521 }
16522 else
16523 output_indirect_thunk (regno);
16524 }
16525 else
16526 {
16527 output_asm_insn ("%!jmp\t%A0", &ret_op);
16528 if (ix86_harden_sls & harden_sls_indirect_jmp)
16529 fputs ("\tint3\n", asm_out_file);
16530 }
16531 return "";
16532 }
16533
16534 /* Output the assembly for a call instruction. */
16535
16536 const char *
16537 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
16538 {
16539 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
16540 bool output_indirect_p
16541 = (!TARGET_SEH
16542 && cfun->machine->indirect_branch_type != indirect_branch_keep);
16543 bool seh_nop_p = false;
16544 const char *xasm;
16545
16546 if (SIBLING_CALL_P (insn))
16547 {
16548 output_return_instrumentation ();
16549 if (direct_p)
16550 {
16551 if (ix86_nopic_noplt_attribute_p (call_op))
16552 {
16553 direct_p = false;
16554 if (TARGET_64BIT)
16555 {
16556 if (output_indirect_p)
16557 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16558 else
16559 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16560 }
16561 else
16562 {
16563 if (output_indirect_p)
16564 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16565 else
16566 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16567 }
16568 }
16569 else
16570 xasm = "%!jmp\t%P0";
16571 }
16572 /* SEH epilogue detection requires the indirect branch case
16573 to include REX.W. */
16574 else if (TARGET_SEH)
16575 xasm = "%!rex.W jmp\t%A0";
16576 else
16577 {
16578 if (output_indirect_p)
16579 xasm = "%0";
16580 else
16581 xasm = "%!jmp\t%A0";
16582 }
16583
16584 if (output_indirect_p && !direct_p)
16585 ix86_output_indirect_branch (call_op, xasm, true);
16586 else
16587 {
16588 output_asm_insn (xasm, &call_op);
16589 if (!direct_p
16590 && (ix86_harden_sls & harden_sls_indirect_jmp))
16591 return "int3";
16592 }
16593 return "";
16594 }
16595
16596 /* SEH unwinding can require an extra nop to be emitted in several
16597 circumstances. Determine if we have one of those. */
16598 if (TARGET_SEH)
16599 {
16600 rtx_insn *i;
16601
16602 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
16603 {
16604 /* Prevent a catch region from being adjacent to a jump that would
16605 be interpreted as an epilogue sequence by the unwinder. */
16606 if (JUMP_P(i) && CROSSING_JUMP_P (i))
16607 {
16608 seh_nop_p = true;
16609 break;
16610 }
16611
16612 /* If we get to another real insn, we don't need the nop. */
16613 if (INSN_P (i))
16614 break;
16615
16616 /* If we get to the epilogue note, prevent a catch region from
16617 being adjacent to the standard epilogue sequence. Note that,
16618 if non-call exceptions are enabled, we already did it during
16619 epilogue expansion, or else, if the insn can throw internally,
16620 we already did it during the reorg pass. */
16621 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
16622 && !flag_non_call_exceptions
16623 && !can_throw_internal (insn))
16624 {
16625 seh_nop_p = true;
16626 break;
16627 }
16628 }
16629
16630 /* If we didn't find a real insn following the call, prevent the
16631 unwinder from looking into the next function. */
16632 if (i == NULL)
16633 seh_nop_p = true;
16634 }
16635
16636 if (direct_p)
16637 {
16638 if (ix86_nopic_noplt_attribute_p (call_op))
16639 {
16640 direct_p = false;
16641 if (TARGET_64BIT)
16642 {
16643 if (output_indirect_p)
16644 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16645 else
16646 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16647 }
16648 else
16649 {
16650 if (output_indirect_p)
16651 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16652 else
16653 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16654 }
16655 }
16656 else
16657 xasm = "%!call\t%P0";
16658 }
16659 else
16660 {
16661 if (output_indirect_p)
16662 xasm = "%0";
16663 else
16664 xasm = "%!call\t%A0";
16665 }
16666
16667 if (output_indirect_p && !direct_p)
16668 ix86_output_indirect_branch (call_op, xasm, false);
16669 else
16670 output_asm_insn (xasm, &call_op);
16671
16672 if (seh_nop_p)
16673 return "nop";
16674
16675 return "";
16676 }
16677 \f
16678 /* Return a MEM corresponding to a stack slot with mode MODE.
16679 Allocate a new slot if necessary.
16680
16681 The RTL for a function can have several slots available: N is
16682 which slot to use. */
16683
16684 rtx
16685 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
16686 {
16687 struct stack_local_entry *s;
16688
16689 gcc_assert (n < MAX_386_STACK_LOCALS);
16690
16691 for (s = ix86_stack_locals; s; s = s->next)
16692 if (s->mode == mode && s->n == n)
16693 return validize_mem (copy_rtx (s->rtl));
16694
16695 int align = 0;
16696 /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
16697 alignment with -m32 -mpreferred-stack-boundary=2. */
16698 if (mode == DImode
16699 && !TARGET_64BIT
16700 && n == SLOT_FLOATxFDI_387
16701 && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
16702 align = 32;
16703 s = ggc_alloc<stack_local_entry> ();
16704 s->n = n;
16705 s->mode = mode;
16706 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
16707
16708 s->next = ix86_stack_locals;
16709 ix86_stack_locals = s;
16710 return validize_mem (copy_rtx (s->rtl));
16711 }
16712
16713 static void
16714 ix86_instantiate_decls (void)
16715 {
16716 struct stack_local_entry *s;
16717
16718 for (s = ix86_stack_locals; s; s = s->next)
16719 if (s->rtl != NULL_RTX)
16720 instantiate_decl_rtl (s->rtl);
16721 }
16722 \f
16723 /* Check whether x86 address PARTS is a pc-relative address. */
16724
16725 bool
16726 ix86_rip_relative_addr_p (struct ix86_address *parts)
16727 {
16728 rtx base, index, disp;
16729
16730 base = parts->base;
16731 index = parts->index;
16732 disp = parts->disp;
16733
16734 if (disp && !base && !index)
16735 {
16736 if (TARGET_64BIT)
16737 {
16738 rtx symbol = disp;
16739
16740 if (GET_CODE (disp) == CONST)
16741 symbol = XEXP (disp, 0);
16742 if (GET_CODE (symbol) == PLUS
16743 && CONST_INT_P (XEXP (symbol, 1)))
16744 symbol = XEXP (symbol, 0);
16745
16746 if (GET_CODE (symbol) == LABEL_REF
16747 || (GET_CODE (symbol) == SYMBOL_REF
16748 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
16749 || (GET_CODE (symbol) == UNSPEC
16750 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
16751 || XINT (symbol, 1) == UNSPEC_PCREL
16752 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
16753 return true;
16754 }
16755 }
16756 return false;
16757 }
16758
16759 /* Calculate the length of the memory address in the instruction encoding.
16760 Includes addr32 prefix, does not include the one-byte modrm, opcode,
16761 or other prefixes. We never generate addr32 prefix for LEA insn. */
16762
16763 int
16764 memory_address_length (rtx addr, bool lea)
16765 {
16766 struct ix86_address parts;
16767 rtx base, index, disp;
16768 int len;
16769 int ok;
16770
16771 if (GET_CODE (addr) == PRE_DEC
16772 || GET_CODE (addr) == POST_INC
16773 || GET_CODE (addr) == PRE_MODIFY
16774 || GET_CODE (addr) == POST_MODIFY)
16775 return 0;
16776
16777 ok = ix86_decompose_address (addr, &parts);
16778 gcc_assert (ok);
16779
16780 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
16781
16782 /* If this is not LEA instruction, add the length of addr32 prefix. */
16783 if (TARGET_64BIT && !lea
16784 && (SImode_address_operand (addr, VOIDmode)
16785 || (parts.base && GET_MODE (parts.base) == SImode)
16786 || (parts.index && GET_MODE (parts.index) == SImode)))
16787 len++;
16788
16789 base = parts.base;
16790 index = parts.index;
16791 disp = parts.disp;
16792
16793 if (base && SUBREG_P (base))
16794 base = SUBREG_REG (base);
16795 if (index && SUBREG_P (index))
16796 index = SUBREG_REG (index);
16797
16798 gcc_assert (base == NULL_RTX || REG_P (base));
16799 gcc_assert (index == NULL_RTX || REG_P (index));
16800
16801 /* Rule of thumb:
16802 - esp as the base always wants an index,
16803 - ebp as the base always wants a displacement,
16804 - r12 as the base always wants an index,
16805 - r13 as the base always wants a displacement. */
16806
16807 /* Register Indirect. */
16808 if (base && !index && !disp)
16809 {
16810 /* esp (for its index) and ebp (for its displacement) need
16811 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16812 code. */
16813 if (base == arg_pointer_rtx
16814 || base == frame_pointer_rtx
16815 || REGNO (base) == SP_REG
16816 || REGNO (base) == BP_REG
16817 || REGNO (base) == R12_REG
16818 || REGNO (base) == R13_REG)
16819 len++;
16820 }
16821
16822 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16823 is not disp32, but disp32(%rip), so for disp32
16824 SIB byte is needed, unless print_operand_address
16825 optimizes it into disp32(%rip) or (%rip) is implied
16826 by UNSPEC. */
16827 else if (disp && !base && !index)
16828 {
16829 len += 4;
16830 if (!ix86_rip_relative_addr_p (&parts))
16831 len++;
16832 }
16833 else
16834 {
16835 /* Find the length of the displacement constant. */
16836 if (disp)
16837 {
16838 if (base && satisfies_constraint_K (disp))
16839 len += 1;
16840 else
16841 len += 4;
16842 }
16843 /* ebp always wants a displacement. Similarly r13. */
16844 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
16845 len++;
16846
16847 /* An index requires the two-byte modrm form.... */
16848 if (index
16849 /* ...like esp (or r12), which always wants an index. */
16850 || base == arg_pointer_rtx
16851 || base == frame_pointer_rtx
16852 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
16853 len++;
16854 }
16855
16856 return len;
16857 }
16858
16859 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16860 is set, expect that insn have 8bit immediate alternative. */
16861 int
16862 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
16863 {
16864 int len = 0;
16865 int i;
16866 extract_insn_cached (insn);
16867 for (i = recog_data.n_operands - 1; i >= 0; --i)
16868 if (CONSTANT_P (recog_data.operand[i]))
16869 {
16870 enum attr_mode mode = get_attr_mode (insn);
16871
16872 gcc_assert (!len);
16873 if (shortform && CONST_INT_P (recog_data.operand[i]))
16874 {
16875 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
16876 switch (mode)
16877 {
16878 case MODE_QI:
16879 len = 1;
16880 continue;
16881 case MODE_HI:
16882 ival = trunc_int_for_mode (ival, HImode);
16883 break;
16884 case MODE_SI:
16885 ival = trunc_int_for_mode (ival, SImode);
16886 break;
16887 default:
16888 break;
16889 }
16890 if (IN_RANGE (ival, -128, 127))
16891 {
16892 len = 1;
16893 continue;
16894 }
16895 }
16896 switch (mode)
16897 {
16898 case MODE_QI:
16899 len = 1;
16900 break;
16901 case MODE_HI:
16902 len = 2;
16903 break;
16904 case MODE_SI:
16905 len = 4;
16906 break;
16907 /* Immediates for DImode instructions are encoded
16908 as 32bit sign extended values. */
16909 case MODE_DI:
16910 len = 4;
16911 break;
16912 default:
16913 fatal_insn ("unknown insn mode", insn);
16914 }
16915 }
16916 return len;
16917 }
16918
16919 /* Compute default value for "length_address" attribute. */
16920 int
16921 ix86_attr_length_address_default (rtx_insn *insn)
16922 {
16923 int i;
16924
16925 if (get_attr_type (insn) == TYPE_LEA)
16926 {
16927 rtx set = PATTERN (insn), addr;
16928
16929 if (GET_CODE (set) == PARALLEL)
16930 set = XVECEXP (set, 0, 0);
16931
16932 gcc_assert (GET_CODE (set) == SET);
16933
16934 addr = SET_SRC (set);
16935
16936 return memory_address_length (addr, true);
16937 }
16938
16939 extract_insn_cached (insn);
16940 for (i = recog_data.n_operands - 1; i >= 0; --i)
16941 {
16942 rtx op = recog_data.operand[i];
16943 if (MEM_P (op))
16944 {
16945 constrain_operands_cached (insn, reload_completed);
16946 if (which_alternative != -1)
16947 {
16948 const char *constraints = recog_data.constraints[i];
16949 int alt = which_alternative;
16950
16951 while (*constraints == '=' || *constraints == '+')
16952 constraints++;
16953 while (alt-- > 0)
16954 while (*constraints++ != ',')
16955 ;
16956 /* Skip ignored operands. */
16957 if (*constraints == 'X')
16958 continue;
16959 }
16960
16961 int len = memory_address_length (XEXP (op, 0), false);
16962
16963 /* Account for segment prefix for non-default addr spaces. */
16964 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
16965 len++;
16966
16967 return len;
16968 }
16969 }
16970 return 0;
16971 }
16972
16973 /* Compute default value for "length_vex" attribute. It includes
16974 2 or 3 byte VEX prefix and 1 opcode byte. */
16975
16976 int
16977 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
16978 bool has_vex_w)
16979 {
16980 int i, reg_only = 2 + 1;
16981 bool has_mem = false;
16982
16983 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
16984 byte VEX prefix. */
16985 if (!has_0f_opcode || has_vex_w)
16986 return 3 + 1;
16987
16988 /* We can always use 2 byte VEX prefix in 32bit. */
16989 if (!TARGET_64BIT)
16990 return 2 + 1;
16991
16992 extract_insn_cached (insn);
16993
16994 for (i = recog_data.n_operands - 1; i >= 0; --i)
16995 if (REG_P (recog_data.operand[i]))
16996 {
16997 /* REX.W bit uses 3 byte VEX prefix. */
16998 if (GET_MODE (recog_data.operand[i]) == DImode
16999 && GENERAL_REG_P (recog_data.operand[i]))
17000 return 3 + 1;
17001
17002 /* REX.B bit requires 3-byte VEX. Right here we don't know which
17003 operand will be encoded using VEX.B, so be conservative. */
17004 if (REX_INT_REGNO_P (recog_data.operand[i])
17005 || REX_SSE_REGNO_P (recog_data.operand[i]))
17006 reg_only = 3 + 1;
17007 }
17008 else if (MEM_P (recog_data.operand[i]))
17009 {
17010 /* REX.X or REX.B bits use 3 byte VEX prefix. */
17011 if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
17012 return 3 + 1;
17013
17014 has_mem = true;
17015 }
17016
17017 return has_mem ? 2 + 1 : reg_only;
17018 }
17019 \f
17020
17021 static bool
17022 ix86_class_likely_spilled_p (reg_class_t);
17023
17024 /* Returns true if lhs of insn is HW function argument register and set up
17025 is_spilled to true if it is likely spilled HW register. */
17026 static bool
17027 insn_is_function_arg (rtx insn, bool* is_spilled)
17028 {
17029 rtx dst;
17030
17031 if (!NONDEBUG_INSN_P (insn))
17032 return false;
17033 /* Call instructions are not movable, ignore it. */
17034 if (CALL_P (insn))
17035 return false;
17036 insn = PATTERN (insn);
17037 if (GET_CODE (insn) == PARALLEL)
17038 insn = XVECEXP (insn, 0, 0);
17039 if (GET_CODE (insn) != SET)
17040 return false;
17041 dst = SET_DEST (insn);
17042 if (REG_P (dst) && HARD_REGISTER_P (dst)
17043 && ix86_function_arg_regno_p (REGNO (dst)))
17044 {
17045 /* Is it likely spilled HW register? */
17046 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
17047 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
17048 *is_spilled = true;
17049 return true;
17050 }
17051 return false;
17052 }
17053
17054 /* Add output dependencies for chain of function adjacent arguments if only
17055 there is a move to likely spilled HW register. Return first argument
17056 if at least one dependence was added or NULL otherwise. */
17057 static rtx_insn *
17058 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
17059 {
17060 rtx_insn *insn;
17061 rtx_insn *last = call;
17062 rtx_insn *first_arg = NULL;
17063 bool is_spilled = false;
17064
17065 head = PREV_INSN (head);
17066
17067 /* Find nearest to call argument passing instruction. */
17068 while (true)
17069 {
17070 last = PREV_INSN (last);
17071 if (last == head)
17072 return NULL;
17073 if (!NONDEBUG_INSN_P (last))
17074 continue;
17075 if (insn_is_function_arg (last, &is_spilled))
17076 break;
17077 return NULL;
17078 }
17079
17080 first_arg = last;
17081 while (true)
17082 {
17083 insn = PREV_INSN (last);
17084 if (!INSN_P (insn))
17085 break;
17086 if (insn == head)
17087 break;
17088 if (!NONDEBUG_INSN_P (insn))
17089 {
17090 last = insn;
17091 continue;
17092 }
17093 if (insn_is_function_arg (insn, &is_spilled))
17094 {
17095 /* Add output depdendence between two function arguments if chain
17096 of output arguments contains likely spilled HW registers. */
17097 if (is_spilled)
17098 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17099 first_arg = last = insn;
17100 }
17101 else
17102 break;
17103 }
17104 if (!is_spilled)
17105 return NULL;
17106 return first_arg;
17107 }
17108
17109 /* Add output or anti dependency from insn to first_arg to restrict its code
17110 motion. */
17111 static void
17112 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
17113 {
17114 rtx set;
17115 rtx tmp;
17116
17117 set = single_set (insn);
17118 if (!set)
17119 return;
17120 tmp = SET_DEST (set);
17121 if (REG_P (tmp))
17122 {
17123 /* Add output dependency to the first function argument. */
17124 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
17125 return;
17126 }
17127 /* Add anti dependency. */
17128 add_dependence (first_arg, insn, REG_DEP_ANTI);
17129 }
17130
17131 /* Avoid cross block motion of function argument through adding dependency
17132 from the first non-jump instruction in bb. */
17133 static void
17134 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
17135 {
17136 rtx_insn *insn = BB_END (bb);
17137
17138 while (insn)
17139 {
17140 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
17141 {
17142 rtx set = single_set (insn);
17143 if (set)
17144 {
17145 avoid_func_arg_motion (arg, insn);
17146 return;
17147 }
17148 }
17149 if (insn == BB_HEAD (bb))
17150 return;
17151 insn = PREV_INSN (insn);
17152 }
17153 }
17154
17155 /* Hook for pre-reload schedule - avoid motion of function arguments
17156 passed in likely spilled HW registers. */
17157 static void
17158 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
17159 {
17160 rtx_insn *insn;
17161 rtx_insn *first_arg = NULL;
17162 if (reload_completed)
17163 return;
17164 while (head != tail && DEBUG_INSN_P (head))
17165 head = NEXT_INSN (head);
17166 for (insn = tail; insn != head; insn = PREV_INSN (insn))
17167 if (INSN_P (insn) && CALL_P (insn))
17168 {
17169 first_arg = add_parameter_dependencies (insn, head);
17170 if (first_arg)
17171 {
17172 /* Add dependee for first argument to predecessors if only
17173 region contains more than one block. */
17174 basic_block bb = BLOCK_FOR_INSN (insn);
17175 int rgn = CONTAINING_RGN (bb->index);
17176 int nr_blks = RGN_NR_BLOCKS (rgn);
17177 /* Skip trivial regions and region head blocks that can have
17178 predecessors outside of region. */
17179 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
17180 {
17181 edge e;
17182 edge_iterator ei;
17183
17184 /* Regions are SCCs with the exception of selective
17185 scheduling with pipelining of outer blocks enabled.
17186 So also check that immediate predecessors of a non-head
17187 block are in the same region. */
17188 FOR_EACH_EDGE (e, ei, bb->preds)
17189 {
17190 /* Avoid creating of loop-carried dependencies through
17191 using topological ordering in the region. */
17192 if (rgn == CONTAINING_RGN (e->src->index)
17193 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
17194 add_dependee_for_func_arg (first_arg, e->src);
17195 }
17196 }
17197 insn = first_arg;
17198 if (insn == head)
17199 break;
17200 }
17201 }
17202 else if (first_arg)
17203 avoid_func_arg_motion (first_arg, insn);
17204 }
17205
17206 /* Hook for pre-reload schedule - set priority of moves from likely spilled
17207 HW registers to maximum, to schedule them at soon as possible. These are
17208 moves from function argument registers at the top of the function entry
17209 and moves from function return value registers after call. */
17210 static int
17211 ix86_adjust_priority (rtx_insn *insn, int priority)
17212 {
17213 rtx set;
17214
17215 if (reload_completed)
17216 return priority;
17217
17218 if (!NONDEBUG_INSN_P (insn))
17219 return priority;
17220
17221 set = single_set (insn);
17222 if (set)
17223 {
17224 rtx tmp = SET_SRC (set);
17225 if (REG_P (tmp)
17226 && HARD_REGISTER_P (tmp)
17227 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
17228 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
17229 return current_sched_info->sched_max_insns_priority;
17230 }
17231
17232 return priority;
17233 }
17234
17235 /* Prepare for scheduling pass. */
17236 static void
17237 ix86_sched_init_global (FILE *, int, int)
17238 {
17239 /* Install scheduling hooks for current CPU. Some of these hooks are used
17240 in time-critical parts of the scheduler, so we only set them up when
17241 they are actually used. */
17242 switch (ix86_tune)
17243 {
17244 case PROCESSOR_CORE2:
17245 case PROCESSOR_NEHALEM:
17246 case PROCESSOR_SANDYBRIDGE:
17247 case PROCESSOR_HASWELL:
17248 case PROCESSOR_TREMONT:
17249 case PROCESSOR_ALDERLAKE:
17250 case PROCESSOR_GENERIC:
17251 /* Do not perform multipass scheduling for pre-reload schedule
17252 to save compile time. */
17253 if (reload_completed)
17254 {
17255 ix86_core2i7_init_hooks ();
17256 break;
17257 }
17258 /* Fall through. */
17259 default:
17260 targetm.sched.dfa_post_advance_cycle = NULL;
17261 targetm.sched.first_cycle_multipass_init = NULL;
17262 targetm.sched.first_cycle_multipass_begin = NULL;
17263 targetm.sched.first_cycle_multipass_issue = NULL;
17264 targetm.sched.first_cycle_multipass_backtrack = NULL;
17265 targetm.sched.first_cycle_multipass_end = NULL;
17266 targetm.sched.first_cycle_multipass_fini = NULL;
17267 break;
17268 }
17269 }
17270
17271 \f
17272 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
17273
17274 static HOST_WIDE_INT
17275 ix86_static_rtx_alignment (machine_mode mode)
17276 {
17277 if (mode == DFmode)
17278 return 64;
17279 if (ALIGN_MODE_128 (mode))
17280 return MAX (128, GET_MODE_ALIGNMENT (mode));
17281 return GET_MODE_ALIGNMENT (mode);
17282 }
17283
17284 /* Implement TARGET_CONSTANT_ALIGNMENT. */
17285
17286 static HOST_WIDE_INT
17287 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
17288 {
17289 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17290 || TREE_CODE (exp) == INTEGER_CST)
17291 {
17292 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
17293 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
17294 return MAX (mode_align, align);
17295 }
17296 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17297 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17298 return BITS_PER_WORD;
17299
17300 return align;
17301 }
17302
17303 /* Implement TARGET_EMPTY_RECORD_P. */
17304
17305 static bool
17306 ix86_is_empty_record (const_tree type)
17307 {
17308 if (!TARGET_64BIT)
17309 return false;
17310 return default_is_empty_record (type);
17311 }
17312
17313 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
17314
17315 static void
17316 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
17317 {
17318 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
17319
17320 if (!cum->warn_empty)
17321 return;
17322
17323 if (!TYPE_EMPTY_P (type))
17324 return;
17325
17326 /* Don't warn if the function isn't visible outside of the TU. */
17327 if (cum->decl && !TREE_PUBLIC (cum->decl))
17328 return;
17329
17330 const_tree ctx = get_ultimate_context (cum->decl);
17331 if (ctx != NULL_TREE
17332 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
17333 return;
17334
17335 /* If the actual size of the type is zero, then there is no change
17336 in how objects of this size are passed. */
17337 if (int_size_in_bytes (type) == 0)
17338 return;
17339
17340 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
17341 "changes in %<-fabi-version=12%> (GCC 8)", type);
17342
17343 /* Only warn once. */
17344 cum->warn_empty = false;
17345 }
17346
17347 /* This hook returns name of multilib ABI. */
17348
17349 static const char *
17350 ix86_get_multilib_abi_name (void)
17351 {
17352 if (!(TARGET_64BIT_P (ix86_isa_flags)))
17353 return "i386";
17354 else if (TARGET_X32_P (ix86_isa_flags))
17355 return "x32";
17356 else
17357 return "x86_64";
17358 }
17359
17360 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
17361 the data type, and ALIGN is the alignment that the object would
17362 ordinarily have. */
17363
17364 static int
17365 iamcu_alignment (tree type, int align)
17366 {
17367 machine_mode mode;
17368
17369 if (align < 32 || TYPE_USER_ALIGN (type))
17370 return align;
17371
17372 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
17373 bytes. */
17374 type = strip_array_types (type);
17375 if (TYPE_ATOMIC (type))
17376 return align;
17377
17378 mode = TYPE_MODE (type);
17379 switch (GET_MODE_CLASS (mode))
17380 {
17381 case MODE_INT:
17382 case MODE_COMPLEX_INT:
17383 case MODE_COMPLEX_FLOAT:
17384 case MODE_FLOAT:
17385 case MODE_DECIMAL_FLOAT:
17386 return 32;
17387 default:
17388 return align;
17389 }
17390 }
17391
17392 /* Compute the alignment for a static variable.
17393 TYPE is the data type, and ALIGN is the alignment that
17394 the object would ordinarily have. The value of this function is used
17395 instead of that alignment to align the object. */
17396
17397 int
17398 ix86_data_alignment (tree type, unsigned int align, bool opt)
17399 {
17400 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
17401 for symbols from other compilation units or symbols that don't need
17402 to bind locally. In order to preserve some ABI compatibility with
17403 those compilers, ensure we don't decrease alignment from what we
17404 used to assume. */
17405
17406 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
17407
17408 /* A data structure, equal or greater than the size of a cache line
17409 (64 bytes in the Pentium 4 and other recent Intel processors, including
17410 processors based on Intel Core microarchitecture) should be aligned
17411 so that its base address is a multiple of a cache line size. */
17412
17413 unsigned int max_align
17414 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
17415
17416 if (max_align < BITS_PER_WORD)
17417 max_align = BITS_PER_WORD;
17418
17419 switch (ix86_align_data_type)
17420 {
17421 case ix86_align_data_type_abi: opt = false; break;
17422 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
17423 case ix86_align_data_type_cacheline: break;
17424 }
17425
17426 if (TARGET_IAMCU)
17427 align = iamcu_alignment (type, align);
17428
17429 if (opt
17430 && AGGREGATE_TYPE_P (type)
17431 && TYPE_SIZE (type)
17432 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
17433 {
17434 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
17435 && align < max_align_compat)
17436 align = max_align_compat;
17437 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
17438 && align < max_align)
17439 align = max_align;
17440 }
17441
17442 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17443 to 16byte boundary. */
17444 if (TARGET_64BIT)
17445 {
17446 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
17447 && TYPE_SIZE (type)
17448 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17449 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
17450 && align < 128)
17451 return 128;
17452 }
17453
17454 if (!opt)
17455 return align;
17456
17457 if (TREE_CODE (type) == ARRAY_TYPE)
17458 {
17459 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17460 return 64;
17461 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17462 return 128;
17463 }
17464 else if (TREE_CODE (type) == COMPLEX_TYPE)
17465 {
17466
17467 if (TYPE_MODE (type) == DCmode && align < 64)
17468 return 64;
17469 if ((TYPE_MODE (type) == XCmode
17470 || TYPE_MODE (type) == TCmode) && align < 128)
17471 return 128;
17472 }
17473 else if (RECORD_OR_UNION_TYPE_P (type)
17474 && TYPE_FIELDS (type))
17475 {
17476 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17477 return 64;
17478 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17479 return 128;
17480 }
17481 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
17482 || TREE_CODE (type) == INTEGER_TYPE)
17483 {
17484 if (TYPE_MODE (type) == DFmode && align < 64)
17485 return 64;
17486 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17487 return 128;
17488 }
17489
17490 return align;
17491 }
17492
17493 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
17494 static void
17495 ix86_lower_local_decl_alignment (tree decl)
17496 {
17497 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
17498 DECL_ALIGN (decl), true);
17499 if (new_align < DECL_ALIGN (decl))
17500 SET_DECL_ALIGN (decl, new_align);
17501 }
17502
17503 /* Compute the alignment for a local variable or a stack slot. EXP is
17504 the data type or decl itself, MODE is the widest mode available and
17505 ALIGN is the alignment that the object would ordinarily have. The
17506 value of this macro is used instead of that alignment to align the
17507 object. */
17508
17509 unsigned int
17510 ix86_local_alignment (tree exp, machine_mode mode,
17511 unsigned int align, bool may_lower)
17512 {
17513 tree type, decl;
17514
17515 if (exp && DECL_P (exp))
17516 {
17517 type = TREE_TYPE (exp);
17518 decl = exp;
17519 }
17520 else
17521 {
17522 type = exp;
17523 decl = NULL;
17524 }
17525
17526 /* Don't do dynamic stack realignment for long long objects with
17527 -mpreferred-stack-boundary=2. */
17528 if (may_lower
17529 && !TARGET_64BIT
17530 && align == 64
17531 && ix86_preferred_stack_boundary < 64
17532 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
17533 && (!type || (!TYPE_USER_ALIGN (type)
17534 && !TYPE_ATOMIC (strip_array_types (type))))
17535 && (!decl || !DECL_USER_ALIGN (decl)))
17536 align = 32;
17537
17538 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17539 register in MODE. We will return the largest alignment of XF
17540 and DF. */
17541 if (!type)
17542 {
17543 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17544 align = GET_MODE_ALIGNMENT (DFmode);
17545 return align;
17546 }
17547
17548 /* Don't increase alignment for Intel MCU psABI. */
17549 if (TARGET_IAMCU)
17550 return align;
17551
17552 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17553 to 16byte boundary. Exact wording is:
17554
17555 An array uses the same alignment as its elements, except that a local or
17556 global array variable of length at least 16 bytes or
17557 a C99 variable-length array variable always has alignment of at least 16 bytes.
17558
17559 This was added to allow use of aligned SSE instructions at arrays. This
17560 rule is meant for static storage (where compiler cannot do the analysis
17561 by itself). We follow it for automatic variables only when convenient.
17562 We fully control everything in the function compiled and functions from
17563 other unit cannot rely on the alignment.
17564
17565 Exclude va_list type. It is the common case of local array where
17566 we cannot benefit from the alignment.
17567
17568 TODO: Probably one should optimize for size only when var is not escaping. */
17569 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
17570 && TARGET_SSE)
17571 {
17572 if (AGGREGATE_TYPE_P (type)
17573 && (va_list_type_node == NULL_TREE
17574 || (TYPE_MAIN_VARIANT (type)
17575 != TYPE_MAIN_VARIANT (va_list_type_node)))
17576 && TYPE_SIZE (type)
17577 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17578 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
17579 && align < 128)
17580 return 128;
17581 }
17582 if (TREE_CODE (type) == ARRAY_TYPE)
17583 {
17584 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17585 return 64;
17586 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17587 return 128;
17588 }
17589 else if (TREE_CODE (type) == COMPLEX_TYPE)
17590 {
17591 if (TYPE_MODE (type) == DCmode && align < 64)
17592 return 64;
17593 if ((TYPE_MODE (type) == XCmode
17594 || TYPE_MODE (type) == TCmode) && align < 128)
17595 return 128;
17596 }
17597 else if (RECORD_OR_UNION_TYPE_P (type)
17598 && TYPE_FIELDS (type))
17599 {
17600 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17601 return 64;
17602 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17603 return 128;
17604 }
17605 else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
17606 || TREE_CODE (type) == INTEGER_TYPE)
17607 {
17608
17609 if (TYPE_MODE (type) == DFmode && align < 64)
17610 return 64;
17611 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17612 return 128;
17613 }
17614 return align;
17615 }
17616
17617 /* Compute the minimum required alignment for dynamic stack realignment
17618 purposes for a local variable, parameter or a stack slot. EXP is
17619 the data type or decl itself, MODE is its mode and ALIGN is the
17620 alignment that the object would ordinarily have. */
17621
17622 unsigned int
17623 ix86_minimum_alignment (tree exp, machine_mode mode,
17624 unsigned int align)
17625 {
17626 tree type, decl;
17627
17628 if (exp && DECL_P (exp))
17629 {
17630 type = TREE_TYPE (exp);
17631 decl = exp;
17632 }
17633 else
17634 {
17635 type = exp;
17636 decl = NULL;
17637 }
17638
17639 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
17640 return align;
17641
17642 /* Don't do dynamic stack realignment for long long objects with
17643 -mpreferred-stack-boundary=2. */
17644 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
17645 && (!type || (!TYPE_USER_ALIGN (type)
17646 && !TYPE_ATOMIC (strip_array_types (type))))
17647 && (!decl || !DECL_USER_ALIGN (decl)))
17648 {
17649 gcc_checking_assert (!TARGET_STV);
17650 return 32;
17651 }
17652
17653 return align;
17654 }
17655 \f
17656 /* Find a location for the static chain incoming to a nested function.
17657 This is a register, unless all free registers are used by arguments. */
17658
17659 static rtx
17660 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
17661 {
17662 unsigned regno;
17663
17664 if (TARGET_64BIT)
17665 {
17666 /* We always use R10 in 64-bit mode. */
17667 regno = R10_REG;
17668 }
17669 else
17670 {
17671 const_tree fntype, fndecl;
17672 unsigned int ccvt;
17673
17674 /* By default in 32-bit mode we use ECX to pass the static chain. */
17675 regno = CX_REG;
17676
17677 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
17678 {
17679 fntype = TREE_TYPE (fndecl_or_type);
17680 fndecl = fndecl_or_type;
17681 }
17682 else
17683 {
17684 fntype = fndecl_or_type;
17685 fndecl = NULL;
17686 }
17687
17688 ccvt = ix86_get_callcvt (fntype);
17689 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
17690 {
17691 /* Fastcall functions use ecx/edx for arguments, which leaves
17692 us with EAX for the static chain.
17693 Thiscall functions use ecx for arguments, which also
17694 leaves us with EAX for the static chain. */
17695 regno = AX_REG;
17696 }
17697 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
17698 {
17699 /* Thiscall functions use ecx for arguments, which leaves
17700 us with EAX and EDX for the static chain.
17701 We are using for abi-compatibility EAX. */
17702 regno = AX_REG;
17703 }
17704 else if (ix86_function_regparm (fntype, fndecl) == 3)
17705 {
17706 /* For regparm 3, we have no free call-clobbered registers in
17707 which to store the static chain. In order to implement this,
17708 we have the trampoline push the static chain to the stack.
17709 However, we can't push a value below the return address when
17710 we call the nested function directly, so we have to use an
17711 alternate entry point. For this we use ESI, and have the
17712 alternate entry point push ESI, so that things appear the
17713 same once we're executing the nested function. */
17714 if (incoming_p)
17715 {
17716 if (fndecl == current_function_decl
17717 && !ix86_static_chain_on_stack)
17718 {
17719 gcc_assert (!reload_completed);
17720 ix86_static_chain_on_stack = true;
17721 }
17722 return gen_frame_mem (SImode,
17723 plus_constant (Pmode,
17724 arg_pointer_rtx, -8));
17725 }
17726 regno = SI_REG;
17727 }
17728 }
17729
17730 return gen_rtx_REG (Pmode, regno);
17731 }
17732
17733 /* Emit RTL insns to initialize the variable parts of a trampoline.
17734 FNDECL is the decl of the target address; M_TRAMP is a MEM for
17735 the trampoline, and CHAIN_VALUE is an RTX for the static chain
17736 to be passed to the target function. */
17737
17738 static void
17739 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
17740 {
17741 rtx mem, fnaddr;
17742 int opcode;
17743 int offset = 0;
17744 bool need_endbr = (flag_cf_protection & CF_BRANCH);
17745
17746 fnaddr = XEXP (DECL_RTL (fndecl), 0);
17747
17748 if (TARGET_64BIT)
17749 {
17750 int size;
17751
17752 if (need_endbr)
17753 {
17754 /* Insert ENDBR64. */
17755 mem = adjust_address (m_tramp, SImode, offset);
17756 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
17757 offset += 4;
17758 }
17759
17760 /* Load the function address to r11. Try to load address using
17761 the shorter movl instead of movabs. We may want to support
17762 movq for kernel mode, but kernel does not use trampolines at
17763 the moment. FNADDR is a 32bit address and may not be in
17764 DImode when ptr_mode == SImode. Always use movl in this
17765 case. */
17766 if (ptr_mode == SImode
17767 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17768 {
17769 fnaddr = copy_addr_to_reg (fnaddr);
17770
17771 mem = adjust_address (m_tramp, HImode, offset);
17772 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
17773
17774 mem = adjust_address (m_tramp, SImode, offset + 2);
17775 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
17776 offset += 6;
17777 }
17778 else
17779 {
17780 mem = adjust_address (m_tramp, HImode, offset);
17781 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
17782
17783 mem = adjust_address (m_tramp, DImode, offset + 2);
17784 emit_move_insn (mem, fnaddr);
17785 offset += 10;
17786 }
17787
17788 /* Load static chain using movabs to r10. Use the shorter movl
17789 instead of movabs when ptr_mode == SImode. */
17790 if (ptr_mode == SImode)
17791 {
17792 opcode = 0xba41;
17793 size = 6;
17794 }
17795 else
17796 {
17797 opcode = 0xba49;
17798 size = 10;
17799 }
17800
17801 mem = adjust_address (m_tramp, HImode, offset);
17802 emit_move_insn (mem, gen_int_mode (opcode, HImode));
17803
17804 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
17805 emit_move_insn (mem, chain_value);
17806 offset += size;
17807
17808 /* Jump to r11; the last (unused) byte is a nop, only there to
17809 pad the write out to a single 32-bit store. */
17810 mem = adjust_address (m_tramp, SImode, offset);
17811 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
17812 offset += 4;
17813 }
17814 else
17815 {
17816 rtx disp, chain;
17817
17818 /* Depending on the static chain location, either load a register
17819 with a constant, or push the constant to the stack. All of the
17820 instructions are the same size. */
17821 chain = ix86_static_chain (fndecl, true);
17822 if (REG_P (chain))
17823 {
17824 switch (REGNO (chain))
17825 {
17826 case AX_REG:
17827 opcode = 0xb8; break;
17828 case CX_REG:
17829 opcode = 0xb9; break;
17830 default:
17831 gcc_unreachable ();
17832 }
17833 }
17834 else
17835 opcode = 0x68;
17836
17837 if (need_endbr)
17838 {
17839 /* Insert ENDBR32. */
17840 mem = adjust_address (m_tramp, SImode, offset);
17841 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
17842 offset += 4;
17843 }
17844
17845 mem = adjust_address (m_tramp, QImode, offset);
17846 emit_move_insn (mem, gen_int_mode (opcode, QImode));
17847
17848 mem = adjust_address (m_tramp, SImode, offset + 1);
17849 emit_move_insn (mem, chain_value);
17850 offset += 5;
17851
17852 mem = adjust_address (m_tramp, QImode, offset);
17853 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
17854
17855 mem = adjust_address (m_tramp, SImode, offset + 1);
17856
17857 /* Compute offset from the end of the jmp to the target function.
17858 In the case in which the trampoline stores the static chain on
17859 the stack, we need to skip the first insn which pushes the
17860 (call-saved) register static chain; this push is 1 byte. */
17861 offset += 5;
17862 int skip = MEM_P (chain) ? 1 : 0;
17863 /* Skip ENDBR32 at the entry of the target function. */
17864 if (need_endbr
17865 && !cgraph_node::get (fndecl)->only_called_directly_p ())
17866 skip += 4;
17867 disp = expand_binop (SImode, sub_optab, fnaddr,
17868 plus_constant (Pmode, XEXP (m_tramp, 0),
17869 offset - skip),
17870 NULL_RTX, 1, OPTAB_DIRECT);
17871 emit_move_insn (mem, disp);
17872 }
17873
17874 gcc_assert (offset <= TRAMPOLINE_SIZE);
17875
17876 #ifdef HAVE_ENABLE_EXECUTE_STACK
17877 #ifdef CHECK_EXECUTE_STACK_ENABLED
17878 if (CHECK_EXECUTE_STACK_ENABLED)
17879 #endif
17880 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17881 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
17882 #endif
17883 }
17884
17885 static bool
17886 ix86_allocate_stack_slots_for_args (void)
17887 {
17888 /* Naked functions should not allocate stack slots for arguments. */
17889 return !ix86_function_naked (current_function_decl);
17890 }
17891
17892 static bool
17893 ix86_warn_func_return (tree decl)
17894 {
17895 /* Naked functions are implemented entirely in assembly, including the
17896 return sequence, so suppress warnings about this. */
17897 return !ix86_function_naked (decl);
17898 }
17899 \f
17900 /* Return the shift count of a vector by scalar shift builtin second argument
17901 ARG1. */
17902 static tree
17903 ix86_vector_shift_count (tree arg1)
17904 {
17905 if (tree_fits_uhwi_p (arg1))
17906 return arg1;
17907 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17908 {
17909 /* The count argument is weird, passed in as various 128-bit
17910 (or 64-bit) vectors, the low 64 bits from it are the count. */
17911 unsigned char buf[16];
17912 int len = native_encode_expr (arg1, buf, 16);
17913 if (len == 0)
17914 return NULL_TREE;
17915 tree t = native_interpret_expr (uint64_type_node, buf, len);
17916 if (t && tree_fits_uhwi_p (t))
17917 return t;
17918 }
17919 return NULL_TREE;
17920 }
17921
17922 /* Return true if arg_mask is all ones, ELEMS is elements number of
17923 corresponding vector. */
17924 static bool
17925 ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
17926 {
17927 if (TREE_CODE (arg_mask) != INTEGER_CST)
17928 return false;
17929
17930 unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
17931 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
17932 return false;
17933
17934 return true;
17935 }
17936
17937 static tree
17938 ix86_fold_builtin (tree fndecl, int n_args,
17939 tree *args, bool ignore ATTRIBUTE_UNUSED)
17940 {
17941 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17942 {
17943 enum ix86_builtins fn_code
17944 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17945 enum rtx_code rcode;
17946 bool is_vshift;
17947 unsigned HOST_WIDE_INT mask;
17948
17949 switch (fn_code)
17950 {
17951 case IX86_BUILTIN_CPU_IS:
17952 case IX86_BUILTIN_CPU_SUPPORTS:
17953 gcc_assert (n_args == 1);
17954 return fold_builtin_cpu (fndecl, args);
17955
17956 case IX86_BUILTIN_NANQ:
17957 case IX86_BUILTIN_NANSQ:
17958 {
17959 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17960 const char *str = c_getstr (*args);
17961 int quiet = fn_code == IX86_BUILTIN_NANQ;
17962 REAL_VALUE_TYPE real;
17963
17964 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17965 return build_real (type, real);
17966 return NULL_TREE;
17967 }
17968
17969 case IX86_BUILTIN_INFQ:
17970 case IX86_BUILTIN_HUGE_VALQ:
17971 {
17972 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17973 REAL_VALUE_TYPE inf;
17974 real_inf (&inf);
17975 return build_real (type, inf);
17976 }
17977
17978 case IX86_BUILTIN_TZCNT16:
17979 case IX86_BUILTIN_CTZS:
17980 case IX86_BUILTIN_TZCNT32:
17981 case IX86_BUILTIN_TZCNT64:
17982 gcc_assert (n_args == 1);
17983 if (TREE_CODE (args[0]) == INTEGER_CST)
17984 {
17985 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17986 tree arg = args[0];
17987 if (fn_code == IX86_BUILTIN_TZCNT16
17988 || fn_code == IX86_BUILTIN_CTZS)
17989 arg = fold_convert (short_unsigned_type_node, arg);
17990 if (integer_zerop (arg))
17991 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17992 else
17993 return fold_const_call (CFN_CTZ, type, arg);
17994 }
17995 break;
17996
17997 case IX86_BUILTIN_LZCNT16:
17998 case IX86_BUILTIN_CLZS:
17999 case IX86_BUILTIN_LZCNT32:
18000 case IX86_BUILTIN_LZCNT64:
18001 gcc_assert (n_args == 1);
18002 if (TREE_CODE (args[0]) == INTEGER_CST)
18003 {
18004 tree type = TREE_TYPE (TREE_TYPE (fndecl));
18005 tree arg = args[0];
18006 if (fn_code == IX86_BUILTIN_LZCNT16
18007 || fn_code == IX86_BUILTIN_CLZS)
18008 arg = fold_convert (short_unsigned_type_node, arg);
18009 if (integer_zerop (arg))
18010 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
18011 else
18012 return fold_const_call (CFN_CLZ, type, arg);
18013 }
18014 break;
18015
18016 case IX86_BUILTIN_BEXTR32:
18017 case IX86_BUILTIN_BEXTR64:
18018 case IX86_BUILTIN_BEXTRI32:
18019 case IX86_BUILTIN_BEXTRI64:
18020 gcc_assert (n_args == 2);
18021 if (tree_fits_uhwi_p (args[1]))
18022 {
18023 unsigned HOST_WIDE_INT res = 0;
18024 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
18025 unsigned int start = tree_to_uhwi (args[1]);
18026 unsigned int len = (start & 0xff00) >> 8;
18027 start &= 0xff;
18028 if (start >= prec || len == 0)
18029 res = 0;
18030 else if (!tree_fits_uhwi_p (args[0]))
18031 break;
18032 else
18033 res = tree_to_uhwi (args[0]) >> start;
18034 if (len > prec)
18035 len = prec;
18036 if (len < HOST_BITS_PER_WIDE_INT)
18037 res &= (HOST_WIDE_INT_1U << len) - 1;
18038 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18039 }
18040 break;
18041
18042 case IX86_BUILTIN_BZHI32:
18043 case IX86_BUILTIN_BZHI64:
18044 gcc_assert (n_args == 2);
18045 if (tree_fits_uhwi_p (args[1]))
18046 {
18047 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
18048 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
18049 return args[0];
18050 if (idx == 0)
18051 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
18052 if (!tree_fits_uhwi_p (args[0]))
18053 break;
18054 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
18055 res &= ~(HOST_WIDE_INT_M1U << idx);
18056 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18057 }
18058 break;
18059
18060 case IX86_BUILTIN_PDEP32:
18061 case IX86_BUILTIN_PDEP64:
18062 gcc_assert (n_args == 2);
18063 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18064 {
18065 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18066 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18067 unsigned HOST_WIDE_INT res = 0;
18068 unsigned HOST_WIDE_INT m, k = 1;
18069 for (m = 1; m; m <<= 1)
18070 if ((mask & m) != 0)
18071 {
18072 if ((src & k) != 0)
18073 res |= m;
18074 k <<= 1;
18075 }
18076 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18077 }
18078 break;
18079
18080 case IX86_BUILTIN_PEXT32:
18081 case IX86_BUILTIN_PEXT64:
18082 gcc_assert (n_args == 2);
18083 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
18084 {
18085 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
18086 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
18087 unsigned HOST_WIDE_INT res = 0;
18088 unsigned HOST_WIDE_INT m, k = 1;
18089 for (m = 1; m; m <<= 1)
18090 if ((mask & m) != 0)
18091 {
18092 if ((src & m) != 0)
18093 res |= k;
18094 k <<= 1;
18095 }
18096 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
18097 }
18098 break;
18099
18100 case IX86_BUILTIN_MOVMSKPS:
18101 case IX86_BUILTIN_PMOVMSKB:
18102 case IX86_BUILTIN_MOVMSKPD:
18103 case IX86_BUILTIN_PMOVMSKB128:
18104 case IX86_BUILTIN_MOVMSKPD256:
18105 case IX86_BUILTIN_MOVMSKPS256:
18106 case IX86_BUILTIN_PMOVMSKB256:
18107 gcc_assert (n_args == 1);
18108 if (TREE_CODE (args[0]) == VECTOR_CST)
18109 {
18110 HOST_WIDE_INT res = 0;
18111 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
18112 {
18113 tree e = VECTOR_CST_ELT (args[0], i);
18114 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
18115 {
18116 if (wi::neg_p (wi::to_wide (e)))
18117 res |= HOST_WIDE_INT_1 << i;
18118 }
18119 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
18120 {
18121 if (TREE_REAL_CST (e).sign)
18122 res |= HOST_WIDE_INT_1 << i;
18123 }
18124 else
18125 return NULL_TREE;
18126 }
18127 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
18128 }
18129 break;
18130
18131 case IX86_BUILTIN_PSLLD:
18132 case IX86_BUILTIN_PSLLD128:
18133 case IX86_BUILTIN_PSLLD128_MASK:
18134 case IX86_BUILTIN_PSLLD256:
18135 case IX86_BUILTIN_PSLLD256_MASK:
18136 case IX86_BUILTIN_PSLLD512:
18137 case IX86_BUILTIN_PSLLDI:
18138 case IX86_BUILTIN_PSLLDI128:
18139 case IX86_BUILTIN_PSLLDI128_MASK:
18140 case IX86_BUILTIN_PSLLDI256:
18141 case IX86_BUILTIN_PSLLDI256_MASK:
18142 case IX86_BUILTIN_PSLLDI512:
18143 case IX86_BUILTIN_PSLLQ:
18144 case IX86_BUILTIN_PSLLQ128:
18145 case IX86_BUILTIN_PSLLQ128_MASK:
18146 case IX86_BUILTIN_PSLLQ256:
18147 case IX86_BUILTIN_PSLLQ256_MASK:
18148 case IX86_BUILTIN_PSLLQ512:
18149 case IX86_BUILTIN_PSLLQI:
18150 case IX86_BUILTIN_PSLLQI128:
18151 case IX86_BUILTIN_PSLLQI128_MASK:
18152 case IX86_BUILTIN_PSLLQI256:
18153 case IX86_BUILTIN_PSLLQI256_MASK:
18154 case IX86_BUILTIN_PSLLQI512:
18155 case IX86_BUILTIN_PSLLW:
18156 case IX86_BUILTIN_PSLLW128:
18157 case IX86_BUILTIN_PSLLW128_MASK:
18158 case IX86_BUILTIN_PSLLW256:
18159 case IX86_BUILTIN_PSLLW256_MASK:
18160 case IX86_BUILTIN_PSLLW512_MASK:
18161 case IX86_BUILTIN_PSLLWI:
18162 case IX86_BUILTIN_PSLLWI128:
18163 case IX86_BUILTIN_PSLLWI128_MASK:
18164 case IX86_BUILTIN_PSLLWI256:
18165 case IX86_BUILTIN_PSLLWI256_MASK:
18166 case IX86_BUILTIN_PSLLWI512_MASK:
18167 rcode = ASHIFT;
18168 is_vshift = false;
18169 goto do_shift;
18170 case IX86_BUILTIN_PSRAD:
18171 case IX86_BUILTIN_PSRAD128:
18172 case IX86_BUILTIN_PSRAD128_MASK:
18173 case IX86_BUILTIN_PSRAD256:
18174 case IX86_BUILTIN_PSRAD256_MASK:
18175 case IX86_BUILTIN_PSRAD512:
18176 case IX86_BUILTIN_PSRADI:
18177 case IX86_BUILTIN_PSRADI128:
18178 case IX86_BUILTIN_PSRADI128_MASK:
18179 case IX86_BUILTIN_PSRADI256:
18180 case IX86_BUILTIN_PSRADI256_MASK:
18181 case IX86_BUILTIN_PSRADI512:
18182 case IX86_BUILTIN_PSRAQ128_MASK:
18183 case IX86_BUILTIN_PSRAQ256_MASK:
18184 case IX86_BUILTIN_PSRAQ512:
18185 case IX86_BUILTIN_PSRAQI128_MASK:
18186 case IX86_BUILTIN_PSRAQI256_MASK:
18187 case IX86_BUILTIN_PSRAQI512:
18188 case IX86_BUILTIN_PSRAW:
18189 case IX86_BUILTIN_PSRAW128:
18190 case IX86_BUILTIN_PSRAW128_MASK:
18191 case IX86_BUILTIN_PSRAW256:
18192 case IX86_BUILTIN_PSRAW256_MASK:
18193 case IX86_BUILTIN_PSRAW512:
18194 case IX86_BUILTIN_PSRAWI:
18195 case IX86_BUILTIN_PSRAWI128:
18196 case IX86_BUILTIN_PSRAWI128_MASK:
18197 case IX86_BUILTIN_PSRAWI256:
18198 case IX86_BUILTIN_PSRAWI256_MASK:
18199 case IX86_BUILTIN_PSRAWI512:
18200 rcode = ASHIFTRT;
18201 is_vshift = false;
18202 goto do_shift;
18203 case IX86_BUILTIN_PSRLD:
18204 case IX86_BUILTIN_PSRLD128:
18205 case IX86_BUILTIN_PSRLD128_MASK:
18206 case IX86_BUILTIN_PSRLD256:
18207 case IX86_BUILTIN_PSRLD256_MASK:
18208 case IX86_BUILTIN_PSRLD512:
18209 case IX86_BUILTIN_PSRLDI:
18210 case IX86_BUILTIN_PSRLDI128:
18211 case IX86_BUILTIN_PSRLDI128_MASK:
18212 case IX86_BUILTIN_PSRLDI256:
18213 case IX86_BUILTIN_PSRLDI256_MASK:
18214 case IX86_BUILTIN_PSRLDI512:
18215 case IX86_BUILTIN_PSRLQ:
18216 case IX86_BUILTIN_PSRLQ128:
18217 case IX86_BUILTIN_PSRLQ128_MASK:
18218 case IX86_BUILTIN_PSRLQ256:
18219 case IX86_BUILTIN_PSRLQ256_MASK:
18220 case IX86_BUILTIN_PSRLQ512:
18221 case IX86_BUILTIN_PSRLQI:
18222 case IX86_BUILTIN_PSRLQI128:
18223 case IX86_BUILTIN_PSRLQI128_MASK:
18224 case IX86_BUILTIN_PSRLQI256:
18225 case IX86_BUILTIN_PSRLQI256_MASK:
18226 case IX86_BUILTIN_PSRLQI512:
18227 case IX86_BUILTIN_PSRLW:
18228 case IX86_BUILTIN_PSRLW128:
18229 case IX86_BUILTIN_PSRLW128_MASK:
18230 case IX86_BUILTIN_PSRLW256:
18231 case IX86_BUILTIN_PSRLW256_MASK:
18232 case IX86_BUILTIN_PSRLW512:
18233 case IX86_BUILTIN_PSRLWI:
18234 case IX86_BUILTIN_PSRLWI128:
18235 case IX86_BUILTIN_PSRLWI128_MASK:
18236 case IX86_BUILTIN_PSRLWI256:
18237 case IX86_BUILTIN_PSRLWI256_MASK:
18238 case IX86_BUILTIN_PSRLWI512:
18239 rcode = LSHIFTRT;
18240 is_vshift = false;
18241 goto do_shift;
18242 case IX86_BUILTIN_PSLLVV16HI:
18243 case IX86_BUILTIN_PSLLVV16SI:
18244 case IX86_BUILTIN_PSLLVV2DI:
18245 case IX86_BUILTIN_PSLLVV2DI_MASK:
18246 case IX86_BUILTIN_PSLLVV32HI:
18247 case IX86_BUILTIN_PSLLVV4DI:
18248 case IX86_BUILTIN_PSLLVV4DI_MASK:
18249 case IX86_BUILTIN_PSLLVV4SI:
18250 case IX86_BUILTIN_PSLLVV4SI_MASK:
18251 case IX86_BUILTIN_PSLLVV8DI:
18252 case IX86_BUILTIN_PSLLVV8HI:
18253 case IX86_BUILTIN_PSLLVV8SI:
18254 case IX86_BUILTIN_PSLLVV8SI_MASK:
18255 rcode = ASHIFT;
18256 is_vshift = true;
18257 goto do_shift;
18258 case IX86_BUILTIN_PSRAVQ128:
18259 case IX86_BUILTIN_PSRAVQ256:
18260 case IX86_BUILTIN_PSRAVV16HI:
18261 case IX86_BUILTIN_PSRAVV16SI:
18262 case IX86_BUILTIN_PSRAVV32HI:
18263 case IX86_BUILTIN_PSRAVV4SI:
18264 case IX86_BUILTIN_PSRAVV4SI_MASK:
18265 case IX86_BUILTIN_PSRAVV8DI:
18266 case IX86_BUILTIN_PSRAVV8HI:
18267 case IX86_BUILTIN_PSRAVV8SI:
18268 case IX86_BUILTIN_PSRAVV8SI_MASK:
18269 rcode = ASHIFTRT;
18270 is_vshift = true;
18271 goto do_shift;
18272 case IX86_BUILTIN_PSRLVV16HI:
18273 case IX86_BUILTIN_PSRLVV16SI:
18274 case IX86_BUILTIN_PSRLVV2DI:
18275 case IX86_BUILTIN_PSRLVV2DI_MASK:
18276 case IX86_BUILTIN_PSRLVV32HI:
18277 case IX86_BUILTIN_PSRLVV4DI:
18278 case IX86_BUILTIN_PSRLVV4DI_MASK:
18279 case IX86_BUILTIN_PSRLVV4SI:
18280 case IX86_BUILTIN_PSRLVV4SI_MASK:
18281 case IX86_BUILTIN_PSRLVV8DI:
18282 case IX86_BUILTIN_PSRLVV8HI:
18283 case IX86_BUILTIN_PSRLVV8SI:
18284 case IX86_BUILTIN_PSRLVV8SI_MASK:
18285 rcode = LSHIFTRT;
18286 is_vshift = true;
18287 goto do_shift;
18288
18289 do_shift:
18290 gcc_assert (n_args >= 2);
18291 if (TREE_CODE (args[0]) != VECTOR_CST)
18292 break;
18293 mask = HOST_WIDE_INT_M1U;
18294 if (n_args > 2)
18295 {
18296 /* This is masked shift. */
18297 if (!tree_fits_uhwi_p (args[n_args - 1])
18298 || TREE_SIDE_EFFECTS (args[n_args - 2]))
18299 break;
18300 mask = tree_to_uhwi (args[n_args - 1]);
18301 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
18302 mask |= HOST_WIDE_INT_M1U << elems;
18303 if (mask != HOST_WIDE_INT_M1U
18304 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
18305 break;
18306 if (mask == (HOST_WIDE_INT_M1U << elems))
18307 return args[n_args - 2];
18308 }
18309 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
18310 break;
18311 if (tree tem = (is_vshift ? integer_one_node
18312 : ix86_vector_shift_count (args[1])))
18313 {
18314 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
18315 unsigned HOST_WIDE_INT prec
18316 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
18317 if (count == 0 && mask == HOST_WIDE_INT_M1U)
18318 return args[0];
18319 if (count >= prec)
18320 {
18321 if (rcode == ASHIFTRT)
18322 count = prec - 1;
18323 else if (mask == HOST_WIDE_INT_M1U)
18324 return build_zero_cst (TREE_TYPE (args[0]));
18325 }
18326 tree countt = NULL_TREE;
18327 if (!is_vshift)
18328 {
18329 if (count >= prec)
18330 countt = integer_zero_node;
18331 else
18332 countt = build_int_cst (integer_type_node, count);
18333 }
18334 tree_vector_builder builder;
18335 if (mask != HOST_WIDE_INT_M1U || is_vshift)
18336 builder.new_vector (TREE_TYPE (args[0]),
18337 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
18338 1);
18339 else
18340 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
18341 false);
18342 unsigned int cnt = builder.encoded_nelts ();
18343 for (unsigned int i = 0; i < cnt; ++i)
18344 {
18345 tree elt = VECTOR_CST_ELT (args[0], i);
18346 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
18347 return NULL_TREE;
18348 tree type = TREE_TYPE (elt);
18349 if (rcode == LSHIFTRT)
18350 elt = fold_convert (unsigned_type_for (type), elt);
18351 if (is_vshift)
18352 {
18353 countt = VECTOR_CST_ELT (args[1], i);
18354 if (TREE_CODE (countt) != INTEGER_CST
18355 || TREE_OVERFLOW (countt))
18356 return NULL_TREE;
18357 if (wi::neg_p (wi::to_wide (countt))
18358 || wi::to_widest (countt) >= prec)
18359 {
18360 if (rcode == ASHIFTRT)
18361 countt = build_int_cst (TREE_TYPE (countt),
18362 prec - 1);
18363 else
18364 {
18365 elt = build_zero_cst (TREE_TYPE (elt));
18366 countt = build_zero_cst (TREE_TYPE (countt));
18367 }
18368 }
18369 }
18370 else if (count >= prec)
18371 elt = build_zero_cst (TREE_TYPE (elt));
18372 elt = const_binop (rcode == ASHIFT
18373 ? LSHIFT_EXPR : RSHIFT_EXPR,
18374 TREE_TYPE (elt), elt, countt);
18375 if (!elt || TREE_CODE (elt) != INTEGER_CST)
18376 return NULL_TREE;
18377 if (rcode == LSHIFTRT)
18378 elt = fold_convert (type, elt);
18379 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
18380 {
18381 elt = VECTOR_CST_ELT (args[n_args - 2], i);
18382 if (TREE_CODE (elt) != INTEGER_CST
18383 || TREE_OVERFLOW (elt))
18384 return NULL_TREE;
18385 }
18386 builder.quick_push (elt);
18387 }
18388 return builder.build ();
18389 }
18390 break;
18391
18392 default:
18393 break;
18394 }
18395 }
18396
18397 #ifdef SUBTARGET_FOLD_BUILTIN
18398 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
18399 #endif
18400
18401 return NULL_TREE;
18402 }
18403
18404 /* Fold a MD builtin (use ix86_fold_builtin for folding into
18405 constant) in GIMPLE. */
18406
18407 bool
18408 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
18409 {
18410 gimple *stmt = gsi_stmt (*gsi);
18411 tree fndecl = gimple_call_fndecl (stmt);
18412 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
18413 int n_args = gimple_call_num_args (stmt);
18414 enum ix86_builtins fn_code
18415 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
18416 tree decl = NULL_TREE;
18417 tree arg0, arg1, arg2;
18418 enum rtx_code rcode;
18419 enum tree_code tcode;
18420 unsigned HOST_WIDE_INT count;
18421 bool is_vshift;
18422 unsigned HOST_WIDE_INT elems;
18423
18424 /* Don't fold when there's isa mismatch. */
18425 if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
18426 return false;
18427
18428 switch (fn_code)
18429 {
18430 case IX86_BUILTIN_TZCNT32:
18431 decl = builtin_decl_implicit (BUILT_IN_CTZ);
18432 goto fold_tzcnt_lzcnt;
18433
18434 case IX86_BUILTIN_TZCNT64:
18435 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
18436 goto fold_tzcnt_lzcnt;
18437
18438 case IX86_BUILTIN_LZCNT32:
18439 decl = builtin_decl_implicit (BUILT_IN_CLZ);
18440 goto fold_tzcnt_lzcnt;
18441
18442 case IX86_BUILTIN_LZCNT64:
18443 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
18444 goto fold_tzcnt_lzcnt;
18445
18446 fold_tzcnt_lzcnt:
18447 gcc_assert (n_args == 1);
18448 arg0 = gimple_call_arg (stmt, 0);
18449 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
18450 {
18451 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
18452 /* If arg0 is provably non-zero, optimize into generic
18453 __builtin_c[tl]z{,ll} function the middle-end handles
18454 better. */
18455 if (!expr_not_equal_to (arg0, wi::zero (prec)))
18456 return false;
18457
18458 location_t loc = gimple_location (stmt);
18459 gimple *g = gimple_build_call (decl, 1, arg0);
18460 gimple_set_location (g, loc);
18461 tree lhs = make_ssa_name (integer_type_node);
18462 gimple_call_set_lhs (g, lhs);
18463 gsi_insert_before (gsi, g, GSI_SAME_STMT);
18464 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
18465 gimple_set_location (g, loc);
18466 gsi_replace (gsi, g, false);
18467 return true;
18468 }
18469 break;
18470
18471 case IX86_BUILTIN_BZHI32:
18472 case IX86_BUILTIN_BZHI64:
18473 gcc_assert (n_args == 2);
18474 arg1 = gimple_call_arg (stmt, 1);
18475 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
18476 {
18477 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
18478 arg0 = gimple_call_arg (stmt, 0);
18479 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
18480 break;
18481 location_t loc = gimple_location (stmt);
18482 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18483 gimple_set_location (g, loc);
18484 gsi_replace (gsi, g, false);
18485 return true;
18486 }
18487 break;
18488
18489 case IX86_BUILTIN_PDEP32:
18490 case IX86_BUILTIN_PDEP64:
18491 case IX86_BUILTIN_PEXT32:
18492 case IX86_BUILTIN_PEXT64:
18493 gcc_assert (n_args == 2);
18494 arg1 = gimple_call_arg (stmt, 1);
18495 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
18496 {
18497 location_t loc = gimple_location (stmt);
18498 arg0 = gimple_call_arg (stmt, 0);
18499 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18500 gimple_set_location (g, loc);
18501 gsi_replace (gsi, g, false);
18502 return true;
18503 }
18504 break;
18505
18506 case IX86_BUILTIN_PBLENDVB256:
18507 case IX86_BUILTIN_BLENDVPS256:
18508 case IX86_BUILTIN_BLENDVPD256:
18509 /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
18510 to scalar operations and not combined back. */
18511 if (!TARGET_AVX2)
18512 break;
18513
18514 /* FALLTHRU. */
18515 case IX86_BUILTIN_BLENDVPD:
18516 /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
18517 w/o sse4.2, it's veclowered to scalar operations and
18518 not combined back. */
18519 if (!TARGET_SSE4_2)
18520 break;
18521 /* FALLTHRU. */
18522 case IX86_BUILTIN_PBLENDVB128:
18523 case IX86_BUILTIN_BLENDVPS:
18524 gcc_assert (n_args == 3);
18525 arg0 = gimple_call_arg (stmt, 0);
18526 arg1 = gimple_call_arg (stmt, 1);
18527 arg2 = gimple_call_arg (stmt, 2);
18528 if (gimple_call_lhs (stmt))
18529 {
18530 location_t loc = gimple_location (stmt);
18531 tree type = TREE_TYPE (arg2);
18532 gimple_seq stmts = NULL;
18533 if (VECTOR_FLOAT_TYPE_P (type))
18534 {
18535 tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
18536 ? intSI_type_node : intDI_type_node;
18537 type = get_same_sized_vectype (itype, type);
18538 arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
18539 }
18540 tree zero_vec = build_zero_cst (type);
18541 tree cmp_type = truth_type_for (type);
18542 tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
18543 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
18544 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18545 VEC_COND_EXPR, cmp,
18546 arg1, arg0);
18547 gimple_set_location (g, loc);
18548 gsi_replace (gsi, g, false);
18549 }
18550 else
18551 gsi_replace (gsi, gimple_build_nop (), false);
18552 return true;
18553
18554
18555 case IX86_BUILTIN_PCMPEQB128:
18556 case IX86_BUILTIN_PCMPEQW128:
18557 case IX86_BUILTIN_PCMPEQD128:
18558 case IX86_BUILTIN_PCMPEQQ:
18559 case IX86_BUILTIN_PCMPEQB256:
18560 case IX86_BUILTIN_PCMPEQW256:
18561 case IX86_BUILTIN_PCMPEQD256:
18562 case IX86_BUILTIN_PCMPEQQ256:
18563 tcode = EQ_EXPR;
18564 goto do_cmp;
18565
18566 case IX86_BUILTIN_PCMPGTB128:
18567 case IX86_BUILTIN_PCMPGTW128:
18568 case IX86_BUILTIN_PCMPGTD128:
18569 case IX86_BUILTIN_PCMPGTQ:
18570 case IX86_BUILTIN_PCMPGTB256:
18571 case IX86_BUILTIN_PCMPGTW256:
18572 case IX86_BUILTIN_PCMPGTD256:
18573 case IX86_BUILTIN_PCMPGTQ256:
18574 tcode = GT_EXPR;
18575
18576 do_cmp:
18577 gcc_assert (n_args == 2);
18578 arg0 = gimple_call_arg (stmt, 0);
18579 arg1 = gimple_call_arg (stmt, 1);
18580 if (gimple_call_lhs (stmt))
18581 {
18582 location_t loc = gimple_location (stmt);
18583 tree type = TREE_TYPE (arg0);
18584 tree zero_vec = build_zero_cst (type);
18585 tree minus_one_vec = build_minus_one_cst (type);
18586 tree cmp_type = truth_type_for (type);
18587 gimple_seq stmts = NULL;
18588 tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
18589 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
18590 gimple* g = gimple_build_assign (gimple_call_lhs (stmt),
18591 VEC_COND_EXPR, cmp,
18592 minus_one_vec, zero_vec);
18593 gimple_set_location (g, loc);
18594 gsi_replace (gsi, g, false);
18595 }
18596 else
18597 gsi_replace (gsi, gimple_build_nop (), false);
18598 return true;
18599
18600 case IX86_BUILTIN_PSLLD:
18601 case IX86_BUILTIN_PSLLD128:
18602 case IX86_BUILTIN_PSLLD128_MASK:
18603 case IX86_BUILTIN_PSLLD256:
18604 case IX86_BUILTIN_PSLLD256_MASK:
18605 case IX86_BUILTIN_PSLLD512:
18606 case IX86_BUILTIN_PSLLDI:
18607 case IX86_BUILTIN_PSLLDI128:
18608 case IX86_BUILTIN_PSLLDI128_MASK:
18609 case IX86_BUILTIN_PSLLDI256:
18610 case IX86_BUILTIN_PSLLDI256_MASK:
18611 case IX86_BUILTIN_PSLLDI512:
18612 case IX86_BUILTIN_PSLLQ:
18613 case IX86_BUILTIN_PSLLQ128:
18614 case IX86_BUILTIN_PSLLQ128_MASK:
18615 case IX86_BUILTIN_PSLLQ256:
18616 case IX86_BUILTIN_PSLLQ256_MASK:
18617 case IX86_BUILTIN_PSLLQ512:
18618 case IX86_BUILTIN_PSLLQI:
18619 case IX86_BUILTIN_PSLLQI128:
18620 case IX86_BUILTIN_PSLLQI128_MASK:
18621 case IX86_BUILTIN_PSLLQI256:
18622 case IX86_BUILTIN_PSLLQI256_MASK:
18623 case IX86_BUILTIN_PSLLQI512:
18624 case IX86_BUILTIN_PSLLW:
18625 case IX86_BUILTIN_PSLLW128:
18626 case IX86_BUILTIN_PSLLW128_MASK:
18627 case IX86_BUILTIN_PSLLW256:
18628 case IX86_BUILTIN_PSLLW256_MASK:
18629 case IX86_BUILTIN_PSLLW512_MASK:
18630 case IX86_BUILTIN_PSLLWI:
18631 case IX86_BUILTIN_PSLLWI128:
18632 case IX86_BUILTIN_PSLLWI128_MASK:
18633 case IX86_BUILTIN_PSLLWI256:
18634 case IX86_BUILTIN_PSLLWI256_MASK:
18635 case IX86_BUILTIN_PSLLWI512_MASK:
18636 rcode = ASHIFT;
18637 is_vshift = false;
18638 goto do_shift;
18639 case IX86_BUILTIN_PSRAD:
18640 case IX86_BUILTIN_PSRAD128:
18641 case IX86_BUILTIN_PSRAD128_MASK:
18642 case IX86_BUILTIN_PSRAD256:
18643 case IX86_BUILTIN_PSRAD256_MASK:
18644 case IX86_BUILTIN_PSRAD512:
18645 case IX86_BUILTIN_PSRADI:
18646 case IX86_BUILTIN_PSRADI128:
18647 case IX86_BUILTIN_PSRADI128_MASK:
18648 case IX86_BUILTIN_PSRADI256:
18649 case IX86_BUILTIN_PSRADI256_MASK:
18650 case IX86_BUILTIN_PSRADI512:
18651 case IX86_BUILTIN_PSRAQ128_MASK:
18652 case IX86_BUILTIN_PSRAQ256_MASK:
18653 case IX86_BUILTIN_PSRAQ512:
18654 case IX86_BUILTIN_PSRAQI128_MASK:
18655 case IX86_BUILTIN_PSRAQI256_MASK:
18656 case IX86_BUILTIN_PSRAQI512:
18657 case IX86_BUILTIN_PSRAW:
18658 case IX86_BUILTIN_PSRAW128:
18659 case IX86_BUILTIN_PSRAW128_MASK:
18660 case IX86_BUILTIN_PSRAW256:
18661 case IX86_BUILTIN_PSRAW256_MASK:
18662 case IX86_BUILTIN_PSRAW512:
18663 case IX86_BUILTIN_PSRAWI:
18664 case IX86_BUILTIN_PSRAWI128:
18665 case IX86_BUILTIN_PSRAWI128_MASK:
18666 case IX86_BUILTIN_PSRAWI256:
18667 case IX86_BUILTIN_PSRAWI256_MASK:
18668 case IX86_BUILTIN_PSRAWI512:
18669 rcode = ASHIFTRT;
18670 is_vshift = false;
18671 goto do_shift;
18672 case IX86_BUILTIN_PSRLD:
18673 case IX86_BUILTIN_PSRLD128:
18674 case IX86_BUILTIN_PSRLD128_MASK:
18675 case IX86_BUILTIN_PSRLD256:
18676 case IX86_BUILTIN_PSRLD256_MASK:
18677 case IX86_BUILTIN_PSRLD512:
18678 case IX86_BUILTIN_PSRLDI:
18679 case IX86_BUILTIN_PSRLDI128:
18680 case IX86_BUILTIN_PSRLDI128_MASK:
18681 case IX86_BUILTIN_PSRLDI256:
18682 case IX86_BUILTIN_PSRLDI256_MASK:
18683 case IX86_BUILTIN_PSRLDI512:
18684 case IX86_BUILTIN_PSRLQ:
18685 case IX86_BUILTIN_PSRLQ128:
18686 case IX86_BUILTIN_PSRLQ128_MASK:
18687 case IX86_BUILTIN_PSRLQ256:
18688 case IX86_BUILTIN_PSRLQ256_MASK:
18689 case IX86_BUILTIN_PSRLQ512:
18690 case IX86_BUILTIN_PSRLQI:
18691 case IX86_BUILTIN_PSRLQI128:
18692 case IX86_BUILTIN_PSRLQI128_MASK:
18693 case IX86_BUILTIN_PSRLQI256:
18694 case IX86_BUILTIN_PSRLQI256_MASK:
18695 case IX86_BUILTIN_PSRLQI512:
18696 case IX86_BUILTIN_PSRLW:
18697 case IX86_BUILTIN_PSRLW128:
18698 case IX86_BUILTIN_PSRLW128_MASK:
18699 case IX86_BUILTIN_PSRLW256:
18700 case IX86_BUILTIN_PSRLW256_MASK:
18701 case IX86_BUILTIN_PSRLW512:
18702 case IX86_BUILTIN_PSRLWI:
18703 case IX86_BUILTIN_PSRLWI128:
18704 case IX86_BUILTIN_PSRLWI128_MASK:
18705 case IX86_BUILTIN_PSRLWI256:
18706 case IX86_BUILTIN_PSRLWI256_MASK:
18707 case IX86_BUILTIN_PSRLWI512:
18708 rcode = LSHIFTRT;
18709 is_vshift = false;
18710 goto do_shift;
18711 case IX86_BUILTIN_PSLLVV16HI:
18712 case IX86_BUILTIN_PSLLVV16SI:
18713 case IX86_BUILTIN_PSLLVV2DI:
18714 case IX86_BUILTIN_PSLLVV2DI_MASK:
18715 case IX86_BUILTIN_PSLLVV32HI:
18716 case IX86_BUILTIN_PSLLVV4DI:
18717 case IX86_BUILTIN_PSLLVV4DI_MASK:
18718 case IX86_BUILTIN_PSLLVV4SI:
18719 case IX86_BUILTIN_PSLLVV4SI_MASK:
18720 case IX86_BUILTIN_PSLLVV8DI:
18721 case IX86_BUILTIN_PSLLVV8HI:
18722 case IX86_BUILTIN_PSLLVV8SI:
18723 case IX86_BUILTIN_PSLLVV8SI_MASK:
18724 rcode = ASHIFT;
18725 is_vshift = true;
18726 goto do_shift;
18727 case IX86_BUILTIN_PSRAVQ128:
18728 case IX86_BUILTIN_PSRAVQ256:
18729 case IX86_BUILTIN_PSRAVV16HI:
18730 case IX86_BUILTIN_PSRAVV16SI:
18731 case IX86_BUILTIN_PSRAVV32HI:
18732 case IX86_BUILTIN_PSRAVV4SI:
18733 case IX86_BUILTIN_PSRAVV4SI_MASK:
18734 case IX86_BUILTIN_PSRAVV8DI:
18735 case IX86_BUILTIN_PSRAVV8HI:
18736 case IX86_BUILTIN_PSRAVV8SI:
18737 case IX86_BUILTIN_PSRAVV8SI_MASK:
18738 rcode = ASHIFTRT;
18739 is_vshift = true;
18740 goto do_shift;
18741 case IX86_BUILTIN_PSRLVV16HI:
18742 case IX86_BUILTIN_PSRLVV16SI:
18743 case IX86_BUILTIN_PSRLVV2DI:
18744 case IX86_BUILTIN_PSRLVV2DI_MASK:
18745 case IX86_BUILTIN_PSRLVV32HI:
18746 case IX86_BUILTIN_PSRLVV4DI:
18747 case IX86_BUILTIN_PSRLVV4DI_MASK:
18748 case IX86_BUILTIN_PSRLVV4SI:
18749 case IX86_BUILTIN_PSRLVV4SI_MASK:
18750 case IX86_BUILTIN_PSRLVV8DI:
18751 case IX86_BUILTIN_PSRLVV8HI:
18752 case IX86_BUILTIN_PSRLVV8SI:
18753 case IX86_BUILTIN_PSRLVV8SI_MASK:
18754 rcode = LSHIFTRT;
18755 is_vshift = true;
18756 goto do_shift;
18757
18758 do_shift:
18759 gcc_assert (n_args >= 2);
18760 if (!gimple_call_lhs (stmt))
18761 break;
18762 arg0 = gimple_call_arg (stmt, 0);
18763 arg1 = gimple_call_arg (stmt, 1);
18764 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18765 /* For masked shift, only optimize if the mask is all ones. */
18766 if (n_args > 2
18767 && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
18768 break;
18769 if (is_vshift)
18770 {
18771 if (TREE_CODE (arg1) != VECTOR_CST)
18772 break;
18773 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
18774 if (integer_zerop (arg1))
18775 count = 0;
18776 else if (rcode == ASHIFTRT)
18777 break;
18778 else
18779 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
18780 {
18781 tree elt = VECTOR_CST_ELT (arg1, i);
18782 if (!wi::neg_p (wi::to_wide (elt))
18783 && wi::to_widest (elt) < count)
18784 return false;
18785 }
18786 }
18787 else
18788 {
18789 arg1 = ix86_vector_shift_count (arg1);
18790 if (!arg1)
18791 break;
18792 count = tree_to_uhwi (arg1);
18793 }
18794 if (count == 0)
18795 {
18796 /* Just return the first argument for shift by 0. */
18797 location_t loc = gimple_location (stmt);
18798 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18799 gimple_set_location (g, loc);
18800 gsi_replace (gsi, g, false);
18801 return true;
18802 }
18803 if (rcode != ASHIFTRT
18804 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
18805 {
18806 /* For shift counts equal or greater than precision, except for
18807 arithmetic right shift the result is zero. */
18808 location_t loc = gimple_location (stmt);
18809 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18810 build_zero_cst (TREE_TYPE (arg0)));
18811 gimple_set_location (g, loc);
18812 gsi_replace (gsi, g, false);
18813 return true;
18814 }
18815 break;
18816
18817 case IX86_BUILTIN_SHUFPD512:
18818 case IX86_BUILTIN_SHUFPS512:
18819 case IX86_BUILTIN_SHUFPD:
18820 case IX86_BUILTIN_SHUFPD256:
18821 case IX86_BUILTIN_SHUFPS:
18822 case IX86_BUILTIN_SHUFPS256:
18823 arg0 = gimple_call_arg (stmt, 0);
18824 elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18825 /* This is masked shuffle. Only optimize if the mask is all ones. */
18826 if (n_args > 3
18827 && !ix86_masked_all_ones (elems,
18828 gimple_call_arg (stmt, n_args - 1)))
18829 break;
18830 arg2 = gimple_call_arg (stmt, 2);
18831 if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
18832 {
18833 unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
18834 /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
18835 if (shuffle_mask > 255)
18836 return false;
18837
18838 machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
18839 location_t loc = gimple_location (stmt);
18840 tree itype = (imode == E_DFmode
18841 ? long_long_integer_type_node : integer_type_node);
18842 tree vtype = build_vector_type (itype, elems);
18843 tree_vector_builder elts (vtype, elems, 1);
18844
18845
18846 /* Transform integer shuffle_mask to vector perm_mask which
18847 is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md. */
18848 for (unsigned i = 0; i != elems; i++)
18849 {
18850 unsigned sel_idx;
18851 /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
18852 provide 2 select constrols for each element of the
18853 destination. */
18854 if (imode == E_DFmode)
18855 sel_idx = (i & 1) * elems + (i & ~1)
18856 + ((shuffle_mask >> i) & 1);
18857 else
18858 {
18859 /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
18860 controls for each element of the destination. */
18861 unsigned j = i % 4;
18862 sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
18863 + ((shuffle_mask >> 2 * j) & 3);
18864 }
18865 elts.quick_push (build_int_cst (itype, sel_idx));
18866 }
18867
18868 tree perm_mask = elts.build ();
18869 arg1 = gimple_call_arg (stmt, 1);
18870 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18871 VEC_PERM_EXPR,
18872 arg0, arg1, perm_mask);
18873 gimple_set_location (g, loc);
18874 gsi_replace (gsi, g, false);
18875 return true;
18876 }
18877 // Do not error yet, the constant could be propagated later?
18878 break;
18879
18880 default:
18881 break;
18882 }
18883
18884 return false;
18885 }
18886
18887 /* Handler for an SVML-style interface to
18888 a library with vectorized intrinsics. */
18889
18890 tree
18891 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
18892 {
18893 char name[20];
18894 tree fntype, new_fndecl, args;
18895 unsigned arity;
18896 const char *bname;
18897 machine_mode el_mode, in_mode;
18898 int n, in_n;
18899
18900 /* The SVML is suitable for unsafe math only. */
18901 if (!flag_unsafe_math_optimizations)
18902 return NULL_TREE;
18903
18904 el_mode = TYPE_MODE (TREE_TYPE (type_out));
18905 n = TYPE_VECTOR_SUBPARTS (type_out);
18906 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18907 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18908 if (el_mode != in_mode
18909 || n != in_n)
18910 return NULL_TREE;
18911
18912 switch (fn)
18913 {
18914 CASE_CFN_EXP:
18915 CASE_CFN_LOG:
18916 CASE_CFN_LOG10:
18917 CASE_CFN_POW:
18918 CASE_CFN_TANH:
18919 CASE_CFN_TAN:
18920 CASE_CFN_ATAN:
18921 CASE_CFN_ATAN2:
18922 CASE_CFN_ATANH:
18923 CASE_CFN_CBRT:
18924 CASE_CFN_SINH:
18925 CASE_CFN_SIN:
18926 CASE_CFN_ASINH:
18927 CASE_CFN_ASIN:
18928 CASE_CFN_COSH:
18929 CASE_CFN_COS:
18930 CASE_CFN_ACOSH:
18931 CASE_CFN_ACOS:
18932 if ((el_mode != DFmode || n != 2)
18933 && (el_mode != SFmode || n != 4))
18934 return NULL_TREE;
18935 break;
18936
18937 default:
18938 return NULL_TREE;
18939 }
18940
18941 tree fndecl = mathfn_built_in (el_mode == DFmode
18942 ? double_type_node : float_type_node, fn);
18943 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18944
18945 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
18946 strcpy (name, "vmlsLn4");
18947 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
18948 strcpy (name, "vmldLn2");
18949 else if (n == 4)
18950 {
18951 sprintf (name, "vmls%s", bname+10);
18952 name[strlen (name)-1] = '4';
18953 }
18954 else
18955 sprintf (name, "vmld%s2", bname+10);
18956
18957 /* Convert to uppercase. */
18958 name[4] &= ~0x20;
18959
18960 arity = 0;
18961 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18962 arity++;
18963
18964 if (arity == 1)
18965 fntype = build_function_type_list (type_out, type_in, NULL);
18966 else
18967 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18968
18969 /* Build a function declaration for the vectorized function. */
18970 new_fndecl = build_decl (BUILTINS_LOCATION,
18971 FUNCTION_DECL, get_identifier (name), fntype);
18972 TREE_PUBLIC (new_fndecl) = 1;
18973 DECL_EXTERNAL (new_fndecl) = 1;
18974 DECL_IS_NOVOPS (new_fndecl) = 1;
18975 TREE_READONLY (new_fndecl) = 1;
18976
18977 return new_fndecl;
18978 }
18979
18980 /* Handler for an ACML-style interface to
18981 a library with vectorized intrinsics. */
18982
18983 tree
18984 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
18985 {
18986 char name[20] = "__vr.._";
18987 tree fntype, new_fndecl, args;
18988 unsigned arity;
18989 const char *bname;
18990 machine_mode el_mode, in_mode;
18991 int n, in_n;
18992
18993 /* The ACML is 64bits only and suitable for unsafe math only as
18994 it does not correctly support parts of IEEE with the required
18995 precision such as denormals. */
18996 if (!TARGET_64BIT
18997 || !flag_unsafe_math_optimizations)
18998 return NULL_TREE;
18999
19000 el_mode = TYPE_MODE (TREE_TYPE (type_out));
19001 n = TYPE_VECTOR_SUBPARTS (type_out);
19002 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19003 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19004 if (el_mode != in_mode
19005 || n != in_n)
19006 return NULL_TREE;
19007
19008 switch (fn)
19009 {
19010 CASE_CFN_SIN:
19011 CASE_CFN_COS:
19012 CASE_CFN_EXP:
19013 CASE_CFN_LOG:
19014 CASE_CFN_LOG2:
19015 CASE_CFN_LOG10:
19016 if (el_mode == DFmode && n == 2)
19017 {
19018 name[4] = 'd';
19019 name[5] = '2';
19020 }
19021 else if (el_mode == SFmode && n == 4)
19022 {
19023 name[4] = 's';
19024 name[5] = '4';
19025 }
19026 else
19027 return NULL_TREE;
19028 break;
19029
19030 default:
19031 return NULL_TREE;
19032 }
19033
19034 tree fndecl = mathfn_built_in (el_mode == DFmode
19035 ? double_type_node : float_type_node, fn);
19036 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
19037 sprintf (name + 7, "%s", bname+10);
19038
19039 arity = 0;
19040 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
19041 arity++;
19042
19043 if (arity == 1)
19044 fntype = build_function_type_list (type_out, type_in, NULL);
19045 else
19046 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
19047
19048 /* Build a function declaration for the vectorized function. */
19049 new_fndecl = build_decl (BUILTINS_LOCATION,
19050 FUNCTION_DECL, get_identifier (name), fntype);
19051 TREE_PUBLIC (new_fndecl) = 1;
19052 DECL_EXTERNAL (new_fndecl) = 1;
19053 DECL_IS_NOVOPS (new_fndecl) = 1;
19054 TREE_READONLY (new_fndecl) = 1;
19055
19056 return new_fndecl;
19057 }
19058
19059 /* Returns a decl of a function that implements scatter store with
19060 register type VECTYPE and index type INDEX_TYPE and SCALE.
19061 Return NULL_TREE if it is not available. */
19062
19063 static tree
19064 ix86_vectorize_builtin_scatter (const_tree vectype,
19065 const_tree index_type, int scale)
19066 {
19067 bool si;
19068 enum ix86_builtins code;
19069
19070 if (!TARGET_AVX512F)
19071 return NULL_TREE;
19072
19073 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
19074 ? !TARGET_USE_SCATTER_2PARTS
19075 : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
19076 ? !TARGET_USE_SCATTER_4PARTS
19077 : !TARGET_USE_SCATTER))
19078 return NULL_TREE;
19079
19080 if ((TREE_CODE (index_type) != INTEGER_TYPE
19081 && !POINTER_TYPE_P (index_type))
19082 || (TYPE_MODE (index_type) != SImode
19083 && TYPE_MODE (index_type) != DImode))
19084 return NULL_TREE;
19085
19086 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
19087 return NULL_TREE;
19088
19089 /* v*scatter* insn sign extends index to pointer mode. */
19090 if (TYPE_PRECISION (index_type) < POINTER_SIZE
19091 && TYPE_UNSIGNED (index_type))
19092 return NULL_TREE;
19093
19094 /* Scale can be 1, 2, 4 or 8. */
19095 if (scale <= 0
19096 || scale > 8
19097 || (scale & (scale - 1)) != 0)
19098 return NULL_TREE;
19099
19100 si = TYPE_MODE (index_type) == SImode;
19101 switch (TYPE_MODE (vectype))
19102 {
19103 case E_V8DFmode:
19104 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
19105 break;
19106 case E_V8DImode:
19107 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
19108 break;
19109 case E_V16SFmode:
19110 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
19111 break;
19112 case E_V16SImode:
19113 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
19114 break;
19115 case E_V4DFmode:
19116 if (TARGET_AVX512VL)
19117 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
19118 else
19119 return NULL_TREE;
19120 break;
19121 case E_V4DImode:
19122 if (TARGET_AVX512VL)
19123 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
19124 else
19125 return NULL_TREE;
19126 break;
19127 case E_V8SFmode:
19128 if (TARGET_AVX512VL)
19129 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
19130 else
19131 return NULL_TREE;
19132 break;
19133 case E_V8SImode:
19134 if (TARGET_AVX512VL)
19135 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
19136 else
19137 return NULL_TREE;
19138 break;
19139 case E_V2DFmode:
19140 if (TARGET_AVX512VL)
19141 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
19142 else
19143 return NULL_TREE;
19144 break;
19145 case E_V2DImode:
19146 if (TARGET_AVX512VL)
19147 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
19148 else
19149 return NULL_TREE;
19150 break;
19151 case E_V4SFmode:
19152 if (TARGET_AVX512VL)
19153 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
19154 else
19155 return NULL_TREE;
19156 break;
19157 case E_V4SImode:
19158 if (TARGET_AVX512VL)
19159 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
19160 else
19161 return NULL_TREE;
19162 break;
19163 default:
19164 return NULL_TREE;
19165 }
19166
19167 return get_ix86_builtin (code);
19168 }
19169
19170 /* Return true if it is safe to use the rsqrt optabs to optimize
19171 1.0/sqrt. */
19172
19173 static bool
19174 use_rsqrt_p (machine_mode mode)
19175 {
19176 return ((mode == HFmode
19177 || (TARGET_SSE && TARGET_SSE_MATH))
19178 && flag_finite_math_only
19179 && !flag_trapping_math
19180 && flag_unsafe_math_optimizations);
19181 }
19182 \f
19183 /* Helper for avx_vpermilps256_operand et al. This is also used by
19184 the expansion functions to turn the parallel back into a mask.
19185 The return value is 0 for no match and the imm8+1 for a match. */
19186
19187 int
19188 avx_vpermilp_parallel (rtx par, machine_mode mode)
19189 {
19190 unsigned i, nelt = GET_MODE_NUNITS (mode);
19191 unsigned mask = 0;
19192 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
19193
19194 if (XVECLEN (par, 0) != (int) nelt)
19195 return 0;
19196
19197 /* Validate that all of the elements are constants, and not totally
19198 out of range. Copy the data into an integral array to make the
19199 subsequent checks easier. */
19200 for (i = 0; i < nelt; ++i)
19201 {
19202 rtx er = XVECEXP (par, 0, i);
19203 unsigned HOST_WIDE_INT ei;
19204
19205 if (!CONST_INT_P (er))
19206 return 0;
19207 ei = INTVAL (er);
19208 if (ei >= nelt)
19209 return 0;
19210 ipar[i] = ei;
19211 }
19212
19213 switch (mode)
19214 {
19215 case E_V8DFmode:
19216 /* In the 512-bit DFmode case, we can only move elements within
19217 a 128-bit lane. First fill the second part of the mask,
19218 then fallthru. */
19219 for (i = 4; i < 6; ++i)
19220 {
19221 if (ipar[i] < 4 || ipar[i] >= 6)
19222 return 0;
19223 mask |= (ipar[i] - 4) << i;
19224 }
19225 for (i = 6; i < 8; ++i)
19226 {
19227 if (ipar[i] < 6)
19228 return 0;
19229 mask |= (ipar[i] - 6) << i;
19230 }
19231 /* FALLTHRU */
19232
19233 case E_V4DFmode:
19234 /* In the 256-bit DFmode case, we can only move elements within
19235 a 128-bit lane. */
19236 for (i = 0; i < 2; ++i)
19237 {
19238 if (ipar[i] >= 2)
19239 return 0;
19240 mask |= ipar[i] << i;
19241 }
19242 for (i = 2; i < 4; ++i)
19243 {
19244 if (ipar[i] < 2)
19245 return 0;
19246 mask |= (ipar[i] - 2) << i;
19247 }
19248 break;
19249
19250 case E_V16SFmode:
19251 /* In 512 bit SFmode case, permutation in the upper 256 bits
19252 must mirror the permutation in the lower 256-bits. */
19253 for (i = 0; i < 8; ++i)
19254 if (ipar[i] + 8 != ipar[i + 8])
19255 return 0;
19256 /* FALLTHRU */
19257
19258 case E_V8SFmode:
19259 /* In 256 bit SFmode case, we have full freedom of
19260 movement within the low 128-bit lane, but the high 128-bit
19261 lane must mirror the exact same pattern. */
19262 for (i = 0; i < 4; ++i)
19263 if (ipar[i] + 4 != ipar[i + 4])
19264 return 0;
19265 nelt = 4;
19266 /* FALLTHRU */
19267
19268 case E_V2DFmode:
19269 case E_V4SFmode:
19270 /* In the 128-bit case, we've full freedom in the placement of
19271 the elements from the source operand. */
19272 for (i = 0; i < nelt; ++i)
19273 mask |= ipar[i] << (i * (nelt / 2));
19274 break;
19275
19276 default:
19277 gcc_unreachable ();
19278 }
19279
19280 /* Make sure success has a non-zero value by adding one. */
19281 return mask + 1;
19282 }
19283
19284 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
19285 the expansion functions to turn the parallel back into a mask.
19286 The return value is 0 for no match and the imm8+1 for a match. */
19287
19288 int
19289 avx_vperm2f128_parallel (rtx par, machine_mode mode)
19290 {
19291 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
19292 unsigned mask = 0;
19293 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
19294
19295 if (XVECLEN (par, 0) != (int) nelt)
19296 return 0;
19297
19298 /* Validate that all of the elements are constants, and not totally
19299 out of range. Copy the data into an integral array to make the
19300 subsequent checks easier. */
19301 for (i = 0; i < nelt; ++i)
19302 {
19303 rtx er = XVECEXP (par, 0, i);
19304 unsigned HOST_WIDE_INT ei;
19305
19306 if (!CONST_INT_P (er))
19307 return 0;
19308 ei = INTVAL (er);
19309 if (ei >= 2 * nelt)
19310 return 0;
19311 ipar[i] = ei;
19312 }
19313
19314 /* Validate that the halves of the permute are halves. */
19315 for (i = 0; i < nelt2 - 1; ++i)
19316 if (ipar[i] + 1 != ipar[i + 1])
19317 return 0;
19318 for (i = nelt2; i < nelt - 1; ++i)
19319 if (ipar[i] + 1 != ipar[i + 1])
19320 return 0;
19321
19322 /* Reconstruct the mask. */
19323 for (i = 0; i < 2; ++i)
19324 {
19325 unsigned e = ipar[i * nelt2];
19326 if (e % nelt2)
19327 return 0;
19328 e /= nelt2;
19329 mask |= e << (i * 4);
19330 }
19331
19332 /* Make sure success has a non-zero value by adding one. */
19333 return mask + 1;
19334 }
19335 \f
19336 /* Return a register priority for hard reg REGNO. */
19337 static int
19338 ix86_register_priority (int hard_regno)
19339 {
19340 /* ebp and r13 as the base always wants a displacement, r12 as the
19341 base always wants an index. So discourage their usage in an
19342 address. */
19343 if (hard_regno == R12_REG || hard_regno == R13_REG)
19344 return 0;
19345 if (hard_regno == BP_REG)
19346 return 1;
19347 /* New x86-64 int registers result in bigger code size. Discourage them. */
19348 if (REX_INT_REGNO_P (hard_regno))
19349 return 2;
19350 /* New x86-64 SSE registers result in bigger code size. Discourage them. */
19351 if (REX_SSE_REGNO_P (hard_regno))
19352 return 2;
19353 if (EXT_REX_SSE_REGNO_P (hard_regno))
19354 return 1;
19355 /* Usage of AX register results in smaller code. Prefer it. */
19356 if (hard_regno == AX_REG)
19357 return 4;
19358 return 3;
19359 }
19360
19361 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
19362
19363 Put float CONST_DOUBLE in the constant pool instead of fp regs.
19364 QImode must go into class Q_REGS.
19365 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19366 movdf to do mem-to-mem moves through integer regs. */
19367
19368 static reg_class_t
19369 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
19370 {
19371 machine_mode mode = GET_MODE (x);
19372
19373 /* We're only allowed to return a subclass of CLASS. Many of the
19374 following checks fail for NO_REGS, so eliminate that early. */
19375 if (regclass == NO_REGS)
19376 return NO_REGS;
19377
19378 /* All classes can load zeros. */
19379 if (x == CONST0_RTX (mode))
19380 return regclass;
19381
19382 /* Force constants into memory if we are loading a (nonzero) constant into
19383 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
19384 instructions to load from a constant. */
19385 if (CONSTANT_P (x)
19386 && (MAYBE_MMX_CLASS_P (regclass)
19387 || MAYBE_SSE_CLASS_P (regclass)
19388 || MAYBE_MASK_CLASS_P (regclass)))
19389 return NO_REGS;
19390
19391 /* Floating-point constants need more complex checks. */
19392 if (CONST_DOUBLE_P (x))
19393 {
19394 /* General regs can load everything. */
19395 if (INTEGER_CLASS_P (regclass))
19396 return regclass;
19397
19398 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19399 zero above. We only want to wind up preferring 80387 registers if
19400 we plan on doing computation with them. */
19401 if (IS_STACK_MODE (mode)
19402 && standard_80387_constant_p (x) > 0)
19403 {
19404 /* Limit class to FP regs. */
19405 if (FLOAT_CLASS_P (regclass))
19406 return FLOAT_REGS;
19407 }
19408
19409 return NO_REGS;
19410 }
19411
19412 /* Prefer SSE if we can use them for math. Also allow integer regs
19413 when moves between register units are cheap. */
19414 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19415 {
19416 if (TARGET_INTER_UNIT_MOVES_FROM_VEC
19417 && TARGET_INTER_UNIT_MOVES_TO_VEC
19418 && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
19419 return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
19420 else
19421 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
19422 }
19423
19424 /* Generally when we see PLUS here, it's the function invariant
19425 (plus soft-fp const_int). Which can only be computed into general
19426 regs. */
19427 if (GET_CODE (x) == PLUS)
19428 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
19429
19430 /* QImode constants are easy to load, but non-constant QImode data
19431 must go into Q_REGS or ALL_MASK_REGS. */
19432 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19433 {
19434 if (Q_CLASS_P (regclass))
19435 return regclass;
19436 else if (reg_class_subset_p (Q_REGS, regclass))
19437 return Q_REGS;
19438 else if (MASK_CLASS_P (regclass))
19439 return regclass;
19440 else
19441 return NO_REGS;
19442 }
19443
19444 return regclass;
19445 }
19446
19447 /* Discourage putting floating-point values in SSE registers unless
19448 SSE math is being used, and likewise for the 387 registers. */
19449 static reg_class_t
19450 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
19451 {
19452 /* Restrict the output reload class to the register bank that we are doing
19453 math on. If we would like not to return a subset of CLASS, reject this
19454 alternative: if reload cannot do this, it will still use its choice. */
19455 machine_mode mode = GET_MODE (x);
19456 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19457 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
19458
19459 if (IS_STACK_MODE (mode))
19460 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
19461
19462 return regclass;
19463 }
19464
19465 static reg_class_t
19466 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
19467 machine_mode mode, secondary_reload_info *sri)
19468 {
19469 /* Double-word spills from general registers to non-offsettable memory
19470 references (zero-extended addresses) require special handling. */
19471 if (TARGET_64BIT
19472 && MEM_P (x)
19473 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
19474 && INTEGER_CLASS_P (rclass)
19475 && !offsettable_memref_p (x))
19476 {
19477 sri->icode = (in_p
19478 ? CODE_FOR_reload_noff_load
19479 : CODE_FOR_reload_noff_store);
19480 /* Add the cost of moving address to a temporary. */
19481 sri->extra_cost = 1;
19482
19483 return NO_REGS;
19484 }
19485
19486 /* QImode spills from non-QI registers require
19487 intermediate register on 32bit targets. */
19488 if (mode == QImode
19489 && ((!TARGET_64BIT && !in_p
19490 && INTEGER_CLASS_P (rclass)
19491 && MAYBE_NON_Q_CLASS_P (rclass))
19492 || (!TARGET_AVX512DQ
19493 && MAYBE_MASK_CLASS_P (rclass))))
19494 {
19495 int regno = true_regnum (x);
19496
19497 /* Return Q_REGS if the operand is in memory. */
19498 if (regno == -1)
19499 return Q_REGS;
19500
19501 return NO_REGS;
19502 }
19503
19504 /* Require movement to gpr, and then store to memory. */
19505 if ((mode == HFmode || mode == HImode || mode == V2QImode
19506 || mode == BFmode)
19507 && !TARGET_SSE4_1
19508 && SSE_CLASS_P (rclass)
19509 && !in_p && MEM_P (x))
19510 {
19511 sri->extra_cost = 1;
19512 return GENERAL_REGS;
19513 }
19514
19515 /* This condition handles corner case where an expression involving
19516 pointers gets vectorized. We're trying to use the address of a
19517 stack slot as a vector initializer.
19518
19519 (set (reg:V2DI 74 [ vect_cst_.2 ])
19520 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
19521
19522 Eventually frame gets turned into sp+offset like this:
19523
19524 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19525 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
19526 (const_int 392 [0x188]))))
19527
19528 That later gets turned into:
19529
19530 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19531 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
19532 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
19533
19534 We'll have the following reload recorded:
19535
19536 Reload 0: reload_in (DI) =
19537 (plus:DI (reg/f:DI 7 sp)
19538 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
19539 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19540 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
19541 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
19542 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
19543 reload_reg_rtx: (reg:V2DI 22 xmm1)
19544
19545 Which isn't going to work since SSE instructions can't handle scalar
19546 additions. Returning GENERAL_REGS forces the addition into integer
19547 register and reload can handle subsequent reloads without problems. */
19548
19549 if (in_p && GET_CODE (x) == PLUS
19550 && SSE_CLASS_P (rclass)
19551 && SCALAR_INT_MODE_P (mode))
19552 return GENERAL_REGS;
19553
19554 return NO_REGS;
19555 }
19556
19557 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
19558
19559 static bool
19560 ix86_class_likely_spilled_p (reg_class_t rclass)
19561 {
19562 switch (rclass)
19563 {
19564 case AREG:
19565 case DREG:
19566 case CREG:
19567 case BREG:
19568 case AD_REGS:
19569 case SIREG:
19570 case DIREG:
19571 case SSE_FIRST_REG:
19572 case FP_TOP_REG:
19573 case FP_SECOND_REG:
19574 return true;
19575
19576 default:
19577 break;
19578 }
19579
19580 return false;
19581 }
19582
19583 /* Return true if a set of DST by the expression SRC should be allowed.
19584 This prevents complex sets of likely_spilled hard regs before reload. */
19585
19586 bool
19587 ix86_hardreg_mov_ok (rtx dst, rtx src)
19588 {
19589 /* Avoid complex sets of likely_spilled hard registers before reload. */
19590 if (REG_P (dst) && HARD_REGISTER_P (dst)
19591 && !REG_P (src) && !MEM_P (src)
19592 && !(VECTOR_MODE_P (GET_MODE (dst))
19593 ? standard_sse_constant_p (src, GET_MODE (dst))
19594 : x86_64_immediate_operand (src, GET_MODE (dst)))
19595 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
19596 && !reload_completed)
19597 return false;
19598 return true;
19599 }
19600
19601 /* If we are copying between registers from different register sets
19602 (e.g. FP and integer), we may need a memory location.
19603
19604 The function can't work reliably when one of the CLASSES is a class
19605 containing registers from multiple sets. We avoid this by never combining
19606 different sets in a single alternative in the machine description.
19607 Ensure that this constraint holds to avoid unexpected surprises.
19608
19609 When STRICT is false, we are being called from REGISTER_MOVE_COST,
19610 so do not enforce these sanity checks.
19611
19612 To optimize register_move_cost performance, define inline variant. */
19613
19614 static inline bool
19615 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
19616 reg_class_t class2, int strict)
19617 {
19618 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
19619 return false;
19620
19621 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
19622 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
19623 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
19624 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
19625 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
19626 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
19627 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
19628 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
19629 {
19630 gcc_assert (!strict || lra_in_progress);
19631 return true;
19632 }
19633
19634 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
19635 return true;
19636
19637 /* ??? This is a lie. We do have moves between mmx/general, and for
19638 mmx/sse2. But by saying we need secondary memory we discourage the
19639 register allocator from using the mmx registers unless needed. */
19640 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19641 return true;
19642
19643 /* Between mask and general, we have moves no larger than word size. */
19644 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
19645 {
19646 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
19647 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19648 return true;
19649 }
19650
19651 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19652 {
19653 /* SSE1 doesn't have any direct moves from other classes. */
19654 if (!TARGET_SSE2)
19655 return true;
19656
19657 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
19658 return true;
19659
19660 int msize = GET_MODE_SIZE (mode);
19661
19662 /* Between SSE and general, we have moves no larger than word size. */
19663 if (msize > UNITS_PER_WORD)
19664 return true;
19665
19666 /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
19667 Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16. */
19668 int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
19669
19670 if (msize < minsize)
19671 return true;
19672
19673 /* If the target says that inter-unit moves are more expensive
19674 than moving through memory, then don't generate them. */
19675 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
19676 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
19677 return true;
19678 }
19679
19680 return false;
19681 }
19682
19683 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
19684
19685 static bool
19686 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
19687 reg_class_t class2)
19688 {
19689 return inline_secondary_memory_needed (mode, class1, class2, true);
19690 }
19691
19692 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
19693
19694 get_secondary_mem widens integral modes to BITS_PER_WORD.
19695 There is no need to emit full 64 bit move on 64 bit targets
19696 for integral modes that can be moved using 32 bit move. */
19697
19698 static machine_mode
19699 ix86_secondary_memory_needed_mode (machine_mode mode)
19700 {
19701 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
19702 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
19703 return mode;
19704 }
19705
19706 /* Implement the TARGET_CLASS_MAX_NREGS hook.
19707
19708 On the 80386, this is the size of MODE in words,
19709 except in the FP regs, where a single reg is always enough. */
19710
19711 static unsigned char
19712 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
19713 {
19714 if (MAYBE_INTEGER_CLASS_P (rclass))
19715 {
19716 if (mode == XFmode)
19717 return (TARGET_64BIT ? 2 : 3);
19718 else if (mode == XCmode)
19719 return (TARGET_64BIT ? 4 : 6);
19720 else
19721 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
19722 }
19723 else
19724 {
19725 if (COMPLEX_MODE_P (mode))
19726 return 2;
19727 else
19728 return 1;
19729 }
19730 }
19731
19732 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19733
19734 static bool
19735 ix86_can_change_mode_class (machine_mode from, machine_mode to,
19736 reg_class_t regclass)
19737 {
19738 if (from == to)
19739 return true;
19740
19741 /* x87 registers can't do subreg at all, as all values are reformatted
19742 to extended precision. */
19743 if (MAYBE_FLOAT_CLASS_P (regclass))
19744 return false;
19745
19746 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
19747 {
19748 /* Vector registers do not support QI or HImode loads. If we don't
19749 disallow a change to these modes, reload will assume it's ok to
19750 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19751 the vec_dupv4hi pattern.
19752 NB: SSE2 can load 16bit data to sse register via pinsrw. */
19753 int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
19754 if (GET_MODE_SIZE (from) < mov_size
19755 || GET_MODE_SIZE (to) < mov_size)
19756 return false;
19757 }
19758
19759 return true;
19760 }
19761
19762 /* Return index of MODE in the sse load/store tables. */
19763
19764 static inline int
19765 sse_store_index (machine_mode mode)
19766 {
19767 /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
19768 costs to processor_costs, which requires changes to all entries in
19769 processor cost table. */
19770 if (mode == E_HFmode)
19771 mode = E_SFmode;
19772
19773 switch (GET_MODE_SIZE (mode))
19774 {
19775 case 4:
19776 return 0;
19777 case 8:
19778 return 1;
19779 case 16:
19780 return 2;
19781 case 32:
19782 return 3;
19783 case 64:
19784 return 4;
19785 default:
19786 return -1;
19787 }
19788 }
19789
19790 /* Return the cost of moving data of mode M between a
19791 register and memory. A value of 2 is the default; this cost is
19792 relative to those in `REGISTER_MOVE_COST'.
19793
19794 This function is used extensively by register_move_cost that is used to
19795 build tables at startup. Make it inline in this case.
19796 When IN is 2, return maximum of in and out move cost.
19797
19798 If moving between registers and memory is more expensive than
19799 between two registers, you should define this macro to express the
19800 relative cost.
19801
19802 Model also increased moving costs of QImode registers in non
19803 Q_REGS classes.
19804 */
19805 static inline int
19806 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
19807 {
19808 int cost;
19809
19810 if (FLOAT_CLASS_P (regclass))
19811 {
19812 int index;
19813 switch (mode)
19814 {
19815 case E_SFmode:
19816 index = 0;
19817 break;
19818 case E_DFmode:
19819 index = 1;
19820 break;
19821 case E_XFmode:
19822 index = 2;
19823 break;
19824 default:
19825 return 100;
19826 }
19827 if (in == 2)
19828 return MAX (ix86_cost->hard_register.fp_load [index],
19829 ix86_cost->hard_register.fp_store [index]);
19830 return in ? ix86_cost->hard_register.fp_load [index]
19831 : ix86_cost->hard_register.fp_store [index];
19832 }
19833 if (SSE_CLASS_P (regclass))
19834 {
19835 int index = sse_store_index (mode);
19836 if (index == -1)
19837 return 100;
19838 if (in == 2)
19839 return MAX (ix86_cost->hard_register.sse_load [index],
19840 ix86_cost->hard_register.sse_store [index]);
19841 return in ? ix86_cost->hard_register.sse_load [index]
19842 : ix86_cost->hard_register.sse_store [index];
19843 }
19844 if (MASK_CLASS_P (regclass))
19845 {
19846 int index;
19847 switch (GET_MODE_SIZE (mode))
19848 {
19849 case 1:
19850 index = 0;
19851 break;
19852 case 2:
19853 index = 1;
19854 break;
19855 /* DImode loads and stores assumed to cost the same as SImode. */
19856 case 4:
19857 case 8:
19858 index = 2;
19859 break;
19860 default:
19861 return 100;
19862 }
19863
19864 if (in == 2)
19865 return MAX (ix86_cost->hard_register.mask_load[index],
19866 ix86_cost->hard_register.mask_store[index]);
19867 return in ? ix86_cost->hard_register.mask_load[2]
19868 : ix86_cost->hard_register.mask_store[2];
19869 }
19870 if (MMX_CLASS_P (regclass))
19871 {
19872 int index;
19873 switch (GET_MODE_SIZE (mode))
19874 {
19875 case 4:
19876 index = 0;
19877 break;
19878 case 8:
19879 index = 1;
19880 break;
19881 default:
19882 return 100;
19883 }
19884 if (in == 2)
19885 return MAX (ix86_cost->hard_register.mmx_load [index],
19886 ix86_cost->hard_register.mmx_store [index]);
19887 return in ? ix86_cost->hard_register.mmx_load [index]
19888 : ix86_cost->hard_register.mmx_store [index];
19889 }
19890 switch (GET_MODE_SIZE (mode))
19891 {
19892 case 1:
19893 if (Q_CLASS_P (regclass) || TARGET_64BIT)
19894 {
19895 if (!in)
19896 return ix86_cost->hard_register.int_store[0];
19897 if (TARGET_PARTIAL_REG_DEPENDENCY
19898 && optimize_function_for_speed_p (cfun))
19899 cost = ix86_cost->hard_register.movzbl_load;
19900 else
19901 cost = ix86_cost->hard_register.int_load[0];
19902 if (in == 2)
19903 return MAX (cost, ix86_cost->hard_register.int_store[0]);
19904 return cost;
19905 }
19906 else
19907 {
19908 if (in == 2)
19909 return MAX (ix86_cost->hard_register.movzbl_load,
19910 ix86_cost->hard_register.int_store[0] + 4);
19911 if (in)
19912 return ix86_cost->hard_register.movzbl_load;
19913 else
19914 return ix86_cost->hard_register.int_store[0] + 4;
19915 }
19916 break;
19917 case 2:
19918 {
19919 int cost;
19920 if (in == 2)
19921 cost = MAX (ix86_cost->hard_register.int_load[1],
19922 ix86_cost->hard_register.int_store[1]);
19923 else
19924 cost = in ? ix86_cost->hard_register.int_load[1]
19925 : ix86_cost->hard_register.int_store[1];
19926
19927 if (mode == E_HFmode)
19928 {
19929 /* Prefer SSE over GPR for HFmode. */
19930 int sse_cost;
19931 int index = sse_store_index (mode);
19932 if (in == 2)
19933 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
19934 ix86_cost->hard_register.sse_store[index]);
19935 else
19936 sse_cost = (in
19937 ? ix86_cost->hard_register.sse_load [index]
19938 : ix86_cost->hard_register.sse_store [index]);
19939 if (sse_cost >= cost)
19940 cost = sse_cost + 1;
19941 }
19942 return cost;
19943 }
19944 default:
19945 if (in == 2)
19946 cost = MAX (ix86_cost->hard_register.int_load[2],
19947 ix86_cost->hard_register.int_store[2]);
19948 else if (in)
19949 cost = ix86_cost->hard_register.int_load[2];
19950 else
19951 cost = ix86_cost->hard_register.int_store[2];
19952 /* Multiply with the number of GPR moves needed. */
19953 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
19954 }
19955 }
19956
19957 static int
19958 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
19959 {
19960 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
19961 }
19962
19963
19964 /* Return the cost of moving data from a register in class CLASS1 to
19965 one in class CLASS2.
19966
19967 It is not required that the cost always equal 2 when FROM is the same as TO;
19968 on some machines it is expensive to move between registers if they are not
19969 general registers. */
19970
19971 static int
19972 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
19973 reg_class_t class2_i)
19974 {
19975 enum reg_class class1 = (enum reg_class) class1_i;
19976 enum reg_class class2 = (enum reg_class) class2_i;
19977
19978 /* In case we require secondary memory, compute cost of the store followed
19979 by load. In order to avoid bad register allocation choices, we need
19980 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19981
19982 if (inline_secondary_memory_needed (mode, class1, class2, false))
19983 {
19984 int cost = 1;
19985
19986 cost += inline_memory_move_cost (mode, class1, 2);
19987 cost += inline_memory_move_cost (mode, class2, 2);
19988
19989 /* In case of copying from general_purpose_register we may emit multiple
19990 stores followed by single load causing memory size mismatch stall.
19991 Count this as arbitrarily high cost of 20. */
19992 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
19993 && TARGET_MEMORY_MISMATCH_STALL
19994 && targetm.class_max_nregs (class1, mode)
19995 > targetm.class_max_nregs (class2, mode))
19996 cost += 20;
19997
19998 /* In the case of FP/MMX moves, the registers actually overlap, and we
19999 have to switch modes in order to treat them differently. */
20000 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20001 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20002 cost += 20;
20003
20004 return cost;
20005 }
20006
20007 /* Moves between MMX and non-MMX units require secondary memory. */
20008 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20009 gcc_unreachable ();
20010
20011 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20012 return (SSE_CLASS_P (class1)
20013 ? ix86_cost->hard_register.sse_to_integer
20014 : ix86_cost->hard_register.integer_to_sse);
20015
20016 /* Moves between mask register and GPR. */
20017 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
20018 {
20019 return (MASK_CLASS_P (class1)
20020 ? ix86_cost->hard_register.mask_to_integer
20021 : ix86_cost->hard_register.integer_to_mask);
20022 }
20023 /* Moving between mask registers. */
20024 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
20025 return ix86_cost->hard_register.mask_move;
20026
20027 if (MAYBE_FLOAT_CLASS_P (class1))
20028 return ix86_cost->hard_register.fp_move;
20029 if (MAYBE_SSE_CLASS_P (class1))
20030 {
20031 if (GET_MODE_BITSIZE (mode) <= 128)
20032 return ix86_cost->hard_register.xmm_move;
20033 if (GET_MODE_BITSIZE (mode) <= 256)
20034 return ix86_cost->hard_register.ymm_move;
20035 return ix86_cost->hard_register.zmm_move;
20036 }
20037 if (MAYBE_MMX_CLASS_P (class1))
20038 return ix86_cost->hard_register.mmx_move;
20039 return 2;
20040 }
20041
20042 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
20043 words of a value of mode MODE but can be less for certain modes in
20044 special long registers.
20045
20046 Actually there are no two word move instructions for consecutive
20047 registers. And only registers 0-3 may have mov byte instructions
20048 applied to them. */
20049
20050 static unsigned int
20051 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
20052 {
20053 if (GENERAL_REGNO_P (regno))
20054 {
20055 if (mode == XFmode)
20056 return TARGET_64BIT ? 2 : 3;
20057 if (mode == XCmode)
20058 return TARGET_64BIT ? 4 : 6;
20059 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
20060 }
20061 if (COMPLEX_MODE_P (mode))
20062 return 2;
20063 /* Register pair for mask registers. */
20064 if (mode == P2QImode || mode == P2HImode)
20065 return 2;
20066 if (mode == V64SFmode || mode == V64SImode)
20067 return 4;
20068 return 1;
20069 }
20070
20071 /* Implement REGMODE_NATURAL_SIZE(MODE). */
20072 unsigned int
20073 ix86_regmode_natural_size (machine_mode mode)
20074 {
20075 if (mode == P2HImode || mode == P2QImode)
20076 return GET_MODE_SIZE (mode) / 2;
20077 return UNITS_PER_WORD;
20078 }
20079
20080 /* Implement TARGET_HARD_REGNO_MODE_OK. */
20081
20082 static bool
20083 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
20084 {
20085 /* Flags and only flags can only hold CCmode values. */
20086 if (CC_REGNO_P (regno))
20087 return GET_MODE_CLASS (mode) == MODE_CC;
20088 if (GET_MODE_CLASS (mode) == MODE_CC
20089 || GET_MODE_CLASS (mode) == MODE_RANDOM)
20090 return false;
20091 if (STACK_REGNO_P (regno))
20092 return VALID_FP_MODE_P (mode);
20093 if (MASK_REGNO_P (regno))
20094 {
20095 /* Register pair only starts at even register number. */
20096 if ((mode == P2QImode || mode == P2HImode))
20097 return MASK_PAIR_REGNO_P(regno);
20098
20099 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
20100 || (TARGET_AVX512BW
20101 && VALID_MASK_AVX512BW_MODE (mode)));
20102 }
20103
20104 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20105 return false;
20106
20107 if (SSE_REGNO_P (regno))
20108 {
20109 /* We implement the move patterns for all vector modes into and
20110 out of SSE registers, even when no operation instructions
20111 are available. */
20112
20113 /* For AVX-512 we allow, regardless of regno:
20114 - XI mode
20115 - any of 512-bit wide vector mode
20116 - any scalar mode. */
20117 if (TARGET_AVX512F
20118 && (VALID_AVX512F_REG_OR_XI_MODE (mode)
20119 || VALID_AVX512F_SCALAR_MODE (mode)))
20120 return true;
20121
20122 /* For AVX-5124FMAPS or AVX-5124VNNIW
20123 allow V64SF and V64SI modes for special regnos. */
20124 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
20125 && (mode == V64SFmode || mode == V64SImode)
20126 && MOD4_SSE_REGNO_P (regno))
20127 return true;
20128
20129 /* TODO check for QI/HI scalars. */
20130 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
20131 if (TARGET_AVX512VL
20132 && (VALID_AVX256_REG_OR_OI_MODE (mode)
20133 || VALID_AVX512VL_128_REG_MODE (mode)))
20134 return true;
20135
20136 /* xmm16-xmm31 are only available for AVX-512. */
20137 if (EXT_REX_SSE_REGNO_P (regno))
20138 return false;
20139
20140 /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer. */
20141 if (TARGET_SSE2 && mode == HImode)
20142 return true;
20143
20144 /* OImode and AVX modes are available only when AVX is enabled. */
20145 return ((TARGET_AVX
20146 && VALID_AVX256_REG_OR_OI_MODE (mode))
20147 || VALID_SSE_REG_MODE (mode)
20148 || VALID_SSE2_REG_MODE (mode)
20149 || VALID_MMX_REG_MODE (mode)
20150 || VALID_MMX_REG_MODE_3DNOW (mode));
20151 }
20152 if (MMX_REGNO_P (regno))
20153 {
20154 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20155 so if the register is available at all, then we can move data of
20156 the given mode into or out of it. */
20157 return (VALID_MMX_REG_MODE (mode)
20158 || VALID_MMX_REG_MODE_3DNOW (mode));
20159 }
20160
20161 if (mode == QImode)
20162 {
20163 /* Take care for QImode values - they can be in non-QI regs,
20164 but then they do cause partial register stalls. */
20165 if (ANY_QI_REGNO_P (regno))
20166 return true;
20167 if (!TARGET_PARTIAL_REG_STALL)
20168 return true;
20169 /* LRA checks if the hard register is OK for the given mode.
20170 QImode values can live in non-QI regs, so we allow all
20171 registers here. */
20172 if (lra_in_progress)
20173 return true;
20174 return !can_create_pseudo_p ();
20175 }
20176 /* We handle both integer and floats in the general purpose registers. */
20177 else if (VALID_INT_MODE_P (mode)
20178 || VALID_FP_MODE_P (mode))
20179 return true;
20180 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20181 on to use that value in smaller contexts, this can easily force a
20182 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20183 supporting DImode, allow it. */
20184 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20185 return true;
20186
20187 return false;
20188 }
20189
20190 /* Implement TARGET_INSN_CALLEE_ABI. */
20191
20192 const predefined_function_abi &
20193 ix86_insn_callee_abi (const rtx_insn *insn)
20194 {
20195 unsigned int abi_id = 0;
20196 rtx pat = PATTERN (insn);
20197 if (vzeroupper_pattern (pat, VOIDmode))
20198 abi_id = ABI_VZEROUPPER;
20199
20200 return function_abis[abi_id];
20201 }
20202
20203 /* Initialize function_abis with corresponding abi_id,
20204 currently only handle vzeroupper. */
20205 void
20206 ix86_initialize_callee_abi (unsigned int abi_id)
20207 {
20208 gcc_assert (abi_id == ABI_VZEROUPPER);
20209 predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
20210 if (!vzeroupper_abi.initialized_p ())
20211 {
20212 HARD_REG_SET full_reg_clobbers;
20213 CLEAR_HARD_REG_SET (full_reg_clobbers);
20214 vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
20215 }
20216 }
20217
20218 void
20219 ix86_expand_avx_vzeroupper (void)
20220 {
20221 /* Initialize vzeroupper_abi here. */
20222 ix86_initialize_callee_abi (ABI_VZEROUPPER);
20223 rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
20224 /* Return false for non-local goto in can_nonlocal_goto. */
20225 make_reg_eh_region_note (insn, 0, INT_MIN);
20226 /* Flag used for call_insn indicates it's a fake call. */
20227 RTX_FLAG (insn, used) = 1;
20228 }
20229
20230
20231 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
20232 saves SSE registers across calls is Win64 (thus no need to check the
20233 current ABI here), and with AVX enabled Win64 only guarantees that
20234 the low 16 bytes are saved. */
20235
20236 static bool
20237 ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
20238 machine_mode mode)
20239 {
20240 /* Special ABI for vzeroupper which only clobber higher part of sse regs. */
20241 if (abi_id == ABI_VZEROUPPER)
20242 return (GET_MODE_SIZE (mode) > 16
20243 && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
20244 || LEGACY_SSE_REGNO_P (regno)));
20245
20246 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
20247 }
20248
20249 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20250 tieable integer mode. */
20251
20252 static bool
20253 ix86_tieable_integer_mode_p (machine_mode mode)
20254 {
20255 switch (mode)
20256 {
20257 case E_HImode:
20258 case E_SImode:
20259 return true;
20260
20261 case E_QImode:
20262 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20263
20264 case E_DImode:
20265 return TARGET_64BIT;
20266
20267 default:
20268 return false;
20269 }
20270 }
20271
20272 /* Implement TARGET_MODES_TIEABLE_P.
20273
20274 Return true if MODE1 is accessible in a register that can hold MODE2
20275 without copying. That is, all register classes that can hold MODE2
20276 can also hold MODE1. */
20277
20278 static bool
20279 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
20280 {
20281 if (mode1 == mode2)
20282 return true;
20283
20284 if (ix86_tieable_integer_mode_p (mode1)
20285 && ix86_tieable_integer_mode_p (mode2))
20286 return true;
20287
20288 /* MODE2 being XFmode implies fp stack or general regs, which means we
20289 can tie any smaller floating point modes to it. Note that we do not
20290 tie this with TFmode. */
20291 if (mode2 == XFmode)
20292 return mode1 == SFmode || mode1 == DFmode;
20293
20294 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20295 that we can tie it with SFmode. */
20296 if (mode2 == DFmode)
20297 return mode1 == SFmode;
20298
20299 /* If MODE2 is only appropriate for an SSE register, then tie with
20300 any other mode acceptable to SSE registers. */
20301 if (GET_MODE_SIZE (mode2) == 64
20302 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20303 return (GET_MODE_SIZE (mode1) == 64
20304 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20305 if (GET_MODE_SIZE (mode2) == 32
20306 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20307 return (GET_MODE_SIZE (mode1) == 32
20308 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20309 if (GET_MODE_SIZE (mode2) == 16
20310 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20311 return (GET_MODE_SIZE (mode1) == 16
20312 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20313
20314 /* If MODE2 is appropriate for an MMX register, then tie
20315 with any other mode acceptable to MMX registers. */
20316 if (GET_MODE_SIZE (mode2) == 8
20317 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20318 return (GET_MODE_SIZE (mode1) == 8
20319 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20320
20321 /* SCmode and DImode can be tied. */
20322 if ((mode1 == E_SCmode && mode2 == E_DImode)
20323 || (mode1 == E_DImode && mode2 == E_SCmode))
20324 return TARGET_64BIT;
20325
20326 /* [SD]Cmode and V2[SD]Fmode modes can be tied. */
20327 if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
20328 || (mode1 == E_V2SFmode && mode2 == E_SCmode)
20329 || (mode1 == E_DCmode && mode2 == E_V2DFmode)
20330 || (mode1 == E_V2DFmode && mode2 == E_DCmode))
20331 return true;
20332
20333 return false;
20334 }
20335
20336 /* Return the cost of moving between two registers of mode MODE. */
20337
20338 static int
20339 ix86_set_reg_reg_cost (machine_mode mode)
20340 {
20341 unsigned int units = UNITS_PER_WORD;
20342
20343 switch (GET_MODE_CLASS (mode))
20344 {
20345 default:
20346 break;
20347
20348 case MODE_CC:
20349 units = GET_MODE_SIZE (CCmode);
20350 break;
20351
20352 case MODE_FLOAT:
20353 if ((TARGET_SSE && mode == TFmode)
20354 || (TARGET_80387 && mode == XFmode)
20355 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
20356 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
20357 units = GET_MODE_SIZE (mode);
20358 break;
20359
20360 case MODE_COMPLEX_FLOAT:
20361 if ((TARGET_SSE && mode == TCmode)
20362 || (TARGET_80387 && mode == XCmode)
20363 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
20364 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
20365 units = GET_MODE_SIZE (mode);
20366 break;
20367
20368 case MODE_VECTOR_INT:
20369 case MODE_VECTOR_FLOAT:
20370 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
20371 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
20372 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20373 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20374 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
20375 && VALID_MMX_REG_MODE (mode)))
20376 units = GET_MODE_SIZE (mode);
20377 }
20378
20379 /* Return the cost of moving between two registers of mode MODE,
20380 assuming that the move will be in pieces of at most UNITS bytes. */
20381 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
20382 }
20383
20384 /* Return cost of vector operation in MODE given that scalar version has
20385 COST. */
20386
20387 static int
20388 ix86_vec_cost (machine_mode mode, int cost)
20389 {
20390 if (!VECTOR_MODE_P (mode))
20391 return cost;
20392
20393 if (GET_MODE_BITSIZE (mode) == 128
20394 && TARGET_SSE_SPLIT_REGS)
20395 return cost * GET_MODE_BITSIZE (mode) / 64;
20396 else if (GET_MODE_BITSIZE (mode) > 128
20397 && TARGET_AVX256_SPLIT_REGS)
20398 return cost * GET_MODE_BITSIZE (mode) / 128;
20399 else if (GET_MODE_BITSIZE (mode) > 256
20400 && TARGET_AVX512_SPLIT_REGS)
20401 return cost * GET_MODE_BITSIZE (mode) / 256;
20402 return cost;
20403 }
20404
20405 /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
20406 vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2. */
20407 static int
20408 ix86_widen_mult_cost (const struct processor_costs *cost,
20409 enum machine_mode mode, bool uns_p)
20410 {
20411 gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
20412 int extra_cost = 0;
20413 int basic_cost = 0;
20414 switch (mode)
20415 {
20416 case V8HImode:
20417 case V16HImode:
20418 if (!uns_p || mode == V16HImode)
20419 extra_cost = cost->sse_op * 2;
20420 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
20421 break;
20422 case V4SImode:
20423 case V8SImode:
20424 /* pmulhw/pmullw can be used. */
20425 basic_cost = cost->mulss * 2 + cost->sse_op * 2;
20426 break;
20427 case V2DImode:
20428 /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
20429 require extra 4 mul, 4 add, 4 cmp and 2 shift. */
20430 if (!TARGET_SSE4_1 && !uns_p)
20431 extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
20432 + cost->sse_op * 2;
20433 /* Fallthru. */
20434 case V4DImode:
20435 basic_cost = cost->mulss * 2 + cost->sse_op * 4;
20436 break;
20437 default:
20438 /* Not implemented. */
20439 return 100;
20440 }
20441 return ix86_vec_cost (mode, basic_cost + extra_cost);
20442 }
20443
20444 /* Return cost of multiplication in MODE. */
20445
20446 static int
20447 ix86_multiplication_cost (const struct processor_costs *cost,
20448 enum machine_mode mode)
20449 {
20450 machine_mode inner_mode = mode;
20451 if (VECTOR_MODE_P (mode))
20452 inner_mode = GET_MODE_INNER (mode);
20453
20454 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20455 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
20456 else if (X87_FLOAT_MODE_P (mode))
20457 return cost->fmul;
20458 else if (FLOAT_MODE_P (mode))
20459 return ix86_vec_cost (mode,
20460 inner_mode == DFmode ? cost->mulsd : cost->mulss);
20461 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20462 switch (mode)
20463 {
20464 case V4QImode:
20465 case V8QImode:
20466 /* Partial V*QImode is emulated with 4-6 insns. */
20467 if (TARGET_AVX512BW && TARGET_AVX512VL)
20468 return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
20469 else if (TARGET_AVX2)
20470 return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 5);
20471 else if (TARGET_XOP)
20472 return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3)
20473 + cost->sse_load[2]);
20474 else
20475 return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4)
20476 + cost->sse_load[2]);
20477
20478 case V16QImode:
20479 /* V*QImode is emulated with 4-11 insns. */
20480 if (TARGET_AVX512BW && TARGET_AVX512VL)
20481 return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
20482 else if (TARGET_AVX2)
20483 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 8);
20484 else if (TARGET_XOP)
20485 return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5)
20486 + cost->sse_load[2]);
20487 else
20488 return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7)
20489 + cost->sse_load[2]);
20490
20491 case V32QImode:
20492 if (TARGET_AVX512BW)
20493 return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
20494 else
20495 return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7)
20496 + cost->sse_load[3] * 2);
20497
20498 case V64QImode:
20499 return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9)
20500 + cost->sse_load[3] * 2
20501 + cost->sse_load[4] * 2);
20502
20503 case V4SImode:
20504 /* pmulld is used in this case. No emulation is needed. */
20505 if (TARGET_SSE4_1)
20506 goto do_native;
20507 /* V4SImode is emulated with 7 insns. */
20508 else
20509 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
20510
20511 case V2DImode:
20512 case V4DImode:
20513 /* vpmullq is used in this case. No emulation is needed. */
20514 if (TARGET_AVX512DQ && TARGET_AVX512VL)
20515 goto do_native;
20516 /* V*DImode is emulated with 6-8 insns. */
20517 else if (TARGET_XOP && mode == V2DImode)
20518 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
20519 /* FALLTHRU */
20520 case V8DImode:
20521 /* vpmullq is used in this case. No emulation is needed. */
20522 if (TARGET_AVX512DQ && mode == V8DImode)
20523 goto do_native;
20524 else
20525 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
20526
20527 default:
20528 do_native:
20529 return ix86_vec_cost (mode, cost->mulss);
20530 }
20531 else
20532 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
20533 }
20534
20535 /* Return cost of multiplication in MODE. */
20536
20537 static int
20538 ix86_division_cost (const struct processor_costs *cost,
20539 enum machine_mode mode)
20540 {
20541 machine_mode inner_mode = mode;
20542 if (VECTOR_MODE_P (mode))
20543 inner_mode = GET_MODE_INNER (mode);
20544
20545 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
20546 return inner_mode == DFmode ? cost->divsd : cost->divss;
20547 else if (X87_FLOAT_MODE_P (mode))
20548 return cost->fdiv;
20549 else if (FLOAT_MODE_P (mode))
20550 return ix86_vec_cost (mode,
20551 inner_mode == DFmode ? cost->divsd : cost->divss);
20552 else
20553 return cost->divide[MODE_INDEX (mode)];
20554 }
20555
20556 /* Return cost of shift in MODE.
20557 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
20558 AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
20559 if op1 is a result of subreg.
20560
20561 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
20562
20563 static int
20564 ix86_shift_rotate_cost (const struct processor_costs *cost,
20565 enum rtx_code code,
20566 enum machine_mode mode, bool constant_op1,
20567 HOST_WIDE_INT op1_val,
20568 bool and_in_op1,
20569 bool shift_and_truncate,
20570 bool *skip_op0, bool *skip_op1)
20571 {
20572 if (skip_op0)
20573 *skip_op0 = *skip_op1 = false;
20574
20575 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20576 {
20577 int count;
20578 /* Cost of reading the memory. */
20579 int extra;
20580
20581 switch (mode)
20582 {
20583 case V4QImode:
20584 case V8QImode:
20585 if (TARGET_AVX2)
20586 /* Use vpbroadcast. */
20587 extra = cost->sse_op;
20588 else
20589 extra = cost->sse_load[2];
20590
20591 if (constant_op1)
20592 {
20593 if (code == ASHIFTRT)
20594 {
20595 count = 4;
20596 extra *= 2;
20597 }
20598 else
20599 count = 2;
20600 }
20601 else if (TARGET_AVX512BW && TARGET_AVX512VL)
20602 {
20603 count = 3;
20604 return ix86_vec_cost (mode, cost->sse_op * count);
20605 }
20606 else if (TARGET_SSE4_1)
20607 count = 4;
20608 else if (code == ASHIFTRT)
20609 count = 5;
20610 else
20611 count = 4;
20612 return ix86_vec_cost (mode, cost->sse_op * count) + extra;
20613
20614 case V16QImode:
20615 if (TARGET_XOP)
20616 {
20617 /* For XOP we use vpshab, which requires a broadcast of the
20618 value to the variable shift insn. For constants this
20619 means a V16Q const in mem; even when we can perform the
20620 shift with one insn set the cost to prefer paddb. */
20621 if (constant_op1)
20622 {
20623 extra = cost->sse_load[2];
20624 return ix86_vec_cost (mode, cost->sse_op) + extra;
20625 }
20626 else
20627 {
20628 count = (code == ASHIFT) ? 2 : 3;
20629 return ix86_vec_cost (mode, cost->sse_op * count);
20630 }
20631 }
20632 /* FALLTHRU */
20633 case V32QImode:
20634 if (TARGET_AVX2)
20635 /* Use vpbroadcast. */
20636 extra = cost->sse_op;
20637 else
20638 extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
20639
20640 if (constant_op1)
20641 {
20642 if (code == ASHIFTRT)
20643 {
20644 count = 4;
20645 extra *= 2;
20646 }
20647 else
20648 count = 2;
20649 }
20650 else if (TARGET_SSE4_1)
20651 count = 8;
20652 else if (code == ASHIFTRT)
20653 count = 9;
20654 else
20655 count = 8;
20656 return ix86_vec_cost (mode, cost->sse_op * count) + extra;
20657
20658 case V2DImode:
20659 case V4DImode:
20660 /* V*DImode arithmetic right shift is emulated. */
20661 if (code == ASHIFTRT && !TARGET_AVX512VL)
20662 {
20663 if (constant_op1)
20664 {
20665 if (op1_val == 63)
20666 count = TARGET_SSE4_2 ? 1 : 2;
20667 else if (TARGET_XOP)
20668 count = 2;
20669 else
20670 count = 4;
20671 }
20672 else if (TARGET_XOP)
20673 count = 3;
20674 else if (TARGET_SSE4_2)
20675 count = 4;
20676 else
20677 count = 5;
20678
20679 return ix86_vec_cost (mode, cost->sse_op * count);
20680 }
20681 /* FALLTHRU */
20682 default:
20683 return ix86_vec_cost (mode, cost->sse_op);
20684 }
20685 }
20686
20687 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20688 {
20689 if (constant_op1)
20690 {
20691 if (op1_val > 32)
20692 return cost->shift_const + COSTS_N_INSNS (2);
20693 else
20694 return cost->shift_const * 2;
20695 }
20696 else
20697 {
20698 if (and_in_op1)
20699 return cost->shift_var * 2;
20700 else
20701 return cost->shift_var * 6 + COSTS_N_INSNS (2);
20702 }
20703 }
20704 else
20705 {
20706 if (constant_op1)
20707 return cost->shift_const;
20708 else if (shift_and_truncate)
20709 {
20710 if (skip_op0)
20711 *skip_op0 = *skip_op1 = true;
20712 /* Return the cost after shift-and truncation. */
20713 return cost->shift_var;
20714 }
20715 else
20716 return cost->shift_var;
20717 }
20718 }
20719
20720 /* Compute a (partial) cost for rtx X. Return true if the complete
20721 cost has been computed, and false if subexpressions should be
20722 scanned. In either case, *TOTAL contains the cost result. */
20723
20724 static bool
20725 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
20726 int *total, bool speed)
20727 {
20728 rtx mask;
20729 enum rtx_code code = GET_CODE (x);
20730 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20731 const struct processor_costs *cost
20732 = speed ? ix86_tune_cost : &ix86_size_cost;
20733 int src_cost;
20734
20735 switch (code)
20736 {
20737 case SET:
20738 if (register_operand (SET_DEST (x), VOIDmode)
20739 && register_operand (SET_SRC (x), VOIDmode))
20740 {
20741 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
20742 return true;
20743 }
20744
20745 if (register_operand (SET_SRC (x), VOIDmode))
20746 /* Avoid potentially incorrect high cost from rtx_costs
20747 for non-tieable SUBREGs. */
20748 src_cost = 0;
20749 else
20750 {
20751 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
20752
20753 if (CONSTANT_P (SET_SRC (x)))
20754 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
20755 a small value, possibly zero for cheap constants. */
20756 src_cost += COSTS_N_INSNS (1);
20757 }
20758
20759 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
20760 return true;
20761
20762 case CONST_INT:
20763 case CONST:
20764 case LABEL_REF:
20765 case SYMBOL_REF:
20766 if (x86_64_immediate_operand (x, VOIDmode))
20767 *total = 0;
20768 else
20769 *total = 1;
20770 return true;
20771
20772 case CONST_DOUBLE:
20773 if (IS_STACK_MODE (mode))
20774 switch (standard_80387_constant_p (x))
20775 {
20776 case -1:
20777 case 0:
20778 break;
20779 case 1: /* 0.0 */
20780 *total = 1;
20781 return true;
20782 default: /* Other constants */
20783 *total = 2;
20784 return true;
20785 }
20786 /* FALLTHRU */
20787
20788 case CONST_VECTOR:
20789 switch (standard_sse_constant_p (x, mode))
20790 {
20791 case 0:
20792 break;
20793 case 1: /* 0: xor eliminates false dependency */
20794 *total = 0;
20795 return true;
20796 default: /* -1: cmp contains false dependency */
20797 *total = 1;
20798 return true;
20799 }
20800 /* FALLTHRU */
20801
20802 case CONST_WIDE_INT:
20803 /* Fall back to (MEM (SYMBOL_REF)), since that's where
20804 it'll probably end up. Add a penalty for size. */
20805 *total = (COSTS_N_INSNS (1)
20806 + (!TARGET_64BIT && flag_pic)
20807 + (GET_MODE_SIZE (mode) <= 4
20808 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
20809 return true;
20810
20811 case ZERO_EXTEND:
20812 /* The zero extensions is often completely free on x86_64, so make
20813 it as cheap as possible. */
20814 if (TARGET_64BIT && mode == DImode
20815 && GET_MODE (XEXP (x, 0)) == SImode)
20816 *total = 1;
20817 else if (TARGET_ZERO_EXTEND_WITH_AND)
20818 *total = cost->add;
20819 else
20820 *total = cost->movzx;
20821 return false;
20822
20823 case SIGN_EXTEND:
20824 *total = cost->movsx;
20825 return false;
20826
20827 case ASHIFT:
20828 if (SCALAR_INT_MODE_P (mode)
20829 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
20830 && CONST_INT_P (XEXP (x, 1)))
20831 {
20832 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20833 if (value == 1)
20834 {
20835 *total = cost->add;
20836 return false;
20837 }
20838 if ((value == 2 || value == 3)
20839 && cost->lea <= cost->shift_const)
20840 {
20841 *total = cost->lea;
20842 return false;
20843 }
20844 }
20845 /* FALLTHRU */
20846
20847 case ROTATE:
20848 case ASHIFTRT:
20849 case LSHIFTRT:
20850 case ROTATERT:
20851 bool skip_op0, skip_op1;
20852 *total = ix86_shift_rotate_cost (cost, code, mode,
20853 CONSTANT_P (XEXP (x, 1)),
20854 CONST_INT_P (XEXP (x, 1))
20855 ? INTVAL (XEXP (x, 1)) : -1,
20856 GET_CODE (XEXP (x, 1)) == AND,
20857 SUBREG_P (XEXP (x, 1))
20858 && GET_CODE (XEXP (XEXP (x, 1),
20859 0)) == AND,
20860 &skip_op0, &skip_op1);
20861 if (skip_op0 || skip_op1)
20862 {
20863 if (!skip_op0)
20864 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
20865 if (!skip_op1)
20866 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
20867 return true;
20868 }
20869 return false;
20870
20871 case FMA:
20872 {
20873 rtx sub;
20874
20875 gcc_assert (FLOAT_MODE_P (mode));
20876 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
20877
20878 *total = ix86_vec_cost (mode,
20879 GET_MODE_INNER (mode) == SFmode
20880 ? cost->fmass : cost->fmasd);
20881 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
20882
20883 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
20884 sub = XEXP (x, 0);
20885 if (GET_CODE (sub) == NEG)
20886 sub = XEXP (sub, 0);
20887 *total += rtx_cost (sub, mode, FMA, 0, speed);
20888
20889 sub = XEXP (x, 2);
20890 if (GET_CODE (sub) == NEG)
20891 sub = XEXP (sub, 0);
20892 *total += rtx_cost (sub, mode, FMA, 2, speed);
20893 return true;
20894 }
20895
20896 case MULT:
20897 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
20898 {
20899 rtx op0 = XEXP (x, 0);
20900 rtx op1 = XEXP (x, 1);
20901 int nbits;
20902 if (CONST_INT_P (XEXP (x, 1)))
20903 {
20904 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20905 for (nbits = 0; value != 0; value &= value - 1)
20906 nbits++;
20907 }
20908 else
20909 /* This is arbitrary. */
20910 nbits = 7;
20911
20912 /* Compute costs correctly for widening multiplication. */
20913 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
20914 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20915 == GET_MODE_SIZE (mode))
20916 {
20917 int is_mulwiden = 0;
20918 machine_mode inner_mode = GET_MODE (op0);
20919
20920 if (GET_CODE (op0) == GET_CODE (op1))
20921 is_mulwiden = 1, op1 = XEXP (op1, 0);
20922 else if (CONST_INT_P (op1))
20923 {
20924 if (GET_CODE (op0) == SIGN_EXTEND)
20925 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20926 == INTVAL (op1);
20927 else
20928 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20929 }
20930
20931 if (is_mulwiden)
20932 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20933 }
20934
20935 int mult_init;
20936 // Double word multiplication requires 3 mults and 2 adds.
20937 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20938 {
20939 mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
20940 + 2 * cost->add;
20941 nbits *= 3;
20942 }
20943 else mult_init = cost->mult_init[MODE_INDEX (mode)];
20944
20945 *total = (mult_init
20946 + nbits * cost->mult_bit
20947 + rtx_cost (op0, mode, outer_code, opno, speed)
20948 + rtx_cost (op1, mode, outer_code, opno, speed));
20949
20950 return true;
20951 }
20952 *total = ix86_multiplication_cost (cost, mode);
20953 return false;
20954
20955 case DIV:
20956 case UDIV:
20957 case MOD:
20958 case UMOD:
20959 *total = ix86_division_cost (cost, mode);
20960 return false;
20961
20962 case PLUS:
20963 if (GET_MODE_CLASS (mode) == MODE_INT
20964 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
20965 {
20966 if (GET_CODE (XEXP (x, 0)) == PLUS
20967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20968 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20969 && CONSTANT_P (XEXP (x, 1)))
20970 {
20971 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20972 if (val == 2 || val == 4 || val == 8)
20973 {
20974 *total = cost->lea;
20975 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
20976 outer_code, opno, speed);
20977 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
20978 outer_code, opno, speed);
20979 *total += rtx_cost (XEXP (x, 1), mode,
20980 outer_code, opno, speed);
20981 return true;
20982 }
20983 }
20984 else if (GET_CODE (XEXP (x, 0)) == MULT
20985 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20986 {
20987 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20988 if (val == 2 || val == 4 || val == 8)
20989 {
20990 *total = cost->lea;
20991 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20992 outer_code, opno, speed);
20993 *total += rtx_cost (XEXP (x, 1), mode,
20994 outer_code, opno, speed);
20995 return true;
20996 }
20997 }
20998 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20999 {
21000 rtx op = XEXP (XEXP (x, 0), 0);
21001
21002 /* Add with carry, ignore the cost of adding a carry flag. */
21003 if (ix86_carry_flag_operator (op, mode)
21004 || ix86_carry_flag_unset_operator (op, mode))
21005 *total = cost->add;
21006 else
21007 {
21008 *total = cost->lea;
21009 *total += rtx_cost (op, mode,
21010 outer_code, opno, speed);
21011 }
21012
21013 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
21014 outer_code, opno, speed);
21015 *total += rtx_cost (XEXP (x, 1), mode,
21016 outer_code, opno, speed);
21017 return true;
21018 }
21019 }
21020 /* FALLTHRU */
21021
21022 case MINUS:
21023 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
21024 if (GET_MODE_CLASS (mode) == MODE_INT
21025 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
21026 && GET_CODE (XEXP (x, 0)) == MINUS
21027 && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
21028 || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
21029 {
21030 *total = cost->add;
21031 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21032 outer_code, opno, speed);
21033 *total += rtx_cost (XEXP (x, 1), mode,
21034 outer_code, opno, speed);
21035 return true;
21036 }
21037
21038 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21039 *total = cost->addss;
21040 else if (X87_FLOAT_MODE_P (mode))
21041 *total = cost->fadd;
21042 else if (FLOAT_MODE_P (mode))
21043 *total = ix86_vec_cost (mode, cost->addss);
21044 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21045 *total = ix86_vec_cost (mode, cost->sse_op);
21046 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21047 *total = cost->add * 2;
21048 else
21049 *total = cost->add;
21050 return false;
21051
21052 case IOR:
21053 case XOR:
21054 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21055 *total = ix86_vec_cost (mode, cost->sse_op);
21056 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21057 *total = cost->add * 2;
21058 else
21059 *total = cost->add;
21060 return false;
21061
21062 case AND:
21063 if (address_no_seg_operand (x, mode))
21064 {
21065 *total = cost->lea;
21066 return true;
21067 }
21068 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21069 {
21070 /* pandn is a single instruction. */
21071 if (GET_CODE (XEXP (x, 0)) == NOT)
21072 {
21073 *total = ix86_vec_cost (mode, cost->sse_op)
21074 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21075 outer_code, opno, speed)
21076 + rtx_cost (XEXP (x, 1), mode,
21077 outer_code, opno, speed);
21078 return true;
21079 }
21080 else if (GET_CODE (XEXP (x, 1)) == NOT)
21081 {
21082 *total = ix86_vec_cost (mode, cost->sse_op)
21083 + rtx_cost (XEXP (x, 0), mode,
21084 outer_code, opno, speed)
21085 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21086 outer_code, opno, speed);
21087 return true;
21088 }
21089 *total = ix86_vec_cost (mode, cost->sse_op);
21090 }
21091 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21092 {
21093 if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21094 {
21095 *total = cost->add * 2
21096 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21097 outer_code, opno, speed)
21098 + rtx_cost (XEXP (x, 1), mode,
21099 outer_code, opno, speed);
21100 return true;
21101 }
21102 else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
21103 {
21104 *total = cost->add * 2
21105 + rtx_cost (XEXP (x, 0), mode,
21106 outer_code, opno, speed)
21107 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21108 outer_code, opno, speed);
21109 return true;
21110 }
21111 *total = cost->add * 2;
21112 }
21113 else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
21114 {
21115 *total = cost->add
21116 + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
21117 outer_code, opno, speed)
21118 + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
21119 return true;
21120 }
21121 else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
21122 {
21123 *total = cost->add
21124 + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
21125 + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
21126 outer_code, opno, speed);
21127 return true;
21128 }
21129 else
21130 *total = cost->add;
21131 return false;
21132
21133 case NOT:
21134 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21135 // vnot is pxor -1.
21136 *total = ix86_vec_cost (mode, cost->sse_op) + 1;
21137 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21138 *total = cost->add * 2;
21139 else
21140 *total = cost->add;
21141 return false;
21142
21143 case NEG:
21144 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21145 *total = cost->sse_op;
21146 else if (X87_FLOAT_MODE_P (mode))
21147 *total = cost->fchs;
21148 else if (FLOAT_MODE_P (mode))
21149 *total = ix86_vec_cost (mode, cost->sse_op);
21150 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
21151 *total = ix86_vec_cost (mode, cost->sse_op);
21152 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
21153 *total = cost->add * 3;
21154 else
21155 *total = cost->add;
21156 return false;
21157
21158 case COMPARE:
21159 rtx op0, op1;
21160 op0 = XEXP (x, 0);
21161 op1 = XEXP (x, 1);
21162 if (GET_CODE (op0) == ZERO_EXTRACT
21163 && XEXP (op0, 1) == const1_rtx
21164 && CONST_INT_P (XEXP (op0, 2))
21165 && op1 == const0_rtx)
21166 {
21167 /* This kind of construct is implemented using test[bwl].
21168 Treat it as if we had an AND. */
21169 mode = GET_MODE (XEXP (op0, 0));
21170 *total = (cost->add
21171 + rtx_cost (XEXP (op0, 0), mode, outer_code,
21172 opno, speed)
21173 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
21174 return true;
21175 }
21176
21177 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
21178 {
21179 /* This is an overflow detection, count it as a normal compare. */
21180 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
21181 return true;
21182 }
21183
21184 rtx geu;
21185 /* Match x
21186 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
21187 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
21188 if (mode == CCCmode
21189 && GET_CODE (op0) == NEG
21190 && GET_CODE (geu = XEXP (op0, 0)) == GEU
21191 && REG_P (XEXP (geu, 0))
21192 && (GET_MODE (XEXP (geu, 0)) == CCCmode
21193 || GET_MODE (XEXP (geu, 0)) == CCmode)
21194 && REGNO (XEXP (geu, 0)) == FLAGS_REG
21195 && XEXP (geu, 1) == const0_rtx
21196 && GET_CODE (op1) == LTU
21197 && REG_P (XEXP (op1, 0))
21198 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
21199 && REGNO (XEXP (op1, 0)) == FLAGS_REG
21200 && XEXP (op1, 1) == const0_rtx)
21201 {
21202 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
21203 *total = 0;
21204 return true;
21205 }
21206
21207 if (SCALAR_INT_MODE_P (GET_MODE (op0))
21208 && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
21209 {
21210 if (op1 == const0_rtx)
21211 *total = cost->add
21212 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
21213 else
21214 *total = 3*cost->add
21215 + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
21216 + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
21217 return true;
21218 }
21219
21220 /* The embedded comparison operand is completely free. */
21221 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
21222 *total = 0;
21223
21224 return false;
21225
21226 case FLOAT_EXTEND:
21227 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21228 *total = 0;
21229 else
21230 *total = ix86_vec_cost (mode, cost->addss);
21231 return false;
21232
21233 case FLOAT_TRUNCATE:
21234 if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21235 *total = cost->fadd;
21236 else
21237 *total = ix86_vec_cost (mode, cost->addss);
21238 return false;
21239
21240 case ABS:
21241 /* SSE requires memory load for the constant operand. It may make
21242 sense to account for this. Of course the constant operand may or
21243 may not be reused. */
21244 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21245 *total = cost->sse_op;
21246 else if (X87_FLOAT_MODE_P (mode))
21247 *total = cost->fabs;
21248 else if (FLOAT_MODE_P (mode))
21249 *total = ix86_vec_cost (mode, cost->sse_op);
21250 return false;
21251
21252 case SQRT:
21253 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
21254 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
21255 else if (X87_FLOAT_MODE_P (mode))
21256 *total = cost->fsqrt;
21257 else if (FLOAT_MODE_P (mode))
21258 *total = ix86_vec_cost (mode,
21259 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
21260 return false;
21261
21262 case UNSPEC:
21263 if (XINT (x, 1) == UNSPEC_TP)
21264 *total = 0;
21265 else if (XINT (x, 1) == UNSPEC_VTERNLOG)
21266 {
21267 *total = cost->sse_op;
21268 return true;
21269 }
21270 else if (XINT (x, 1) == UNSPEC_PTEST)
21271 {
21272 *total = cost->sse_op;
21273 if (XVECLEN (x, 0) == 2
21274 && GET_CODE (XVECEXP (x, 0, 0)) == AND)
21275 {
21276 rtx andop = XVECEXP (x, 0, 0);
21277 *total += rtx_cost (XEXP (andop, 0), GET_MODE (andop),
21278 AND, opno, speed)
21279 + rtx_cost (XEXP (andop, 1), GET_MODE (andop),
21280 AND, opno, speed);
21281 return true;
21282 }
21283 }
21284 return false;
21285
21286 case VEC_SELECT:
21287 case VEC_CONCAT:
21288 case VEC_DUPLICATE:
21289 /* ??? Assume all of these vector manipulation patterns are
21290 recognizable. In which case they all pretty much have the
21291 same cost. */
21292 *total = cost->sse_op;
21293 return true;
21294 case VEC_MERGE:
21295 mask = XEXP (x, 2);
21296 /* This is masked instruction, assume the same cost,
21297 as nonmasked variant. */
21298 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
21299 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
21300 else
21301 *total = cost->sse_op;
21302 return true;
21303
21304 case MEM:
21305 /* An insn that accesses memory is slightly more expensive
21306 than one that does not. */
21307 if (speed)
21308 *total += 1;
21309 return false;
21310
21311 case ZERO_EXTRACT:
21312 if (XEXP (x, 1) == const1_rtx
21313 && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
21314 && GET_MODE (XEXP (x, 2)) == SImode
21315 && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
21316 {
21317 /* Ignore cost of zero extension and masking of last argument. */
21318 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21319 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
21320 *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
21321 return true;
21322 }
21323 return false;
21324
21325 case IF_THEN_ELSE:
21326 if (TARGET_XOP
21327 && VECTOR_MODE_P (mode)
21328 && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
21329 {
21330 /* vpcmov. */
21331 *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
21332 if (!REG_P (XEXP (x, 0)))
21333 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21334 if (!REG_P (XEXP (x, 1)))
21335 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
21336 if (!REG_P (XEXP (x, 2)))
21337 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
21338 return true;
21339 }
21340 else if (TARGET_CMOVE
21341 && SCALAR_INT_MODE_P (mode)
21342 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
21343 {
21344 /* cmov. */
21345 *total = COSTS_N_INSNS (1);
21346 if (!REG_P (XEXP (x, 0)))
21347 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
21348 if (!REG_P (XEXP (x, 1)))
21349 *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
21350 if (!REG_P (XEXP (x, 2)))
21351 *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
21352 return true;
21353 }
21354 return false;
21355
21356 default:
21357 return false;
21358 }
21359 }
21360
21361 #if TARGET_MACHO
21362
21363 static int current_machopic_label_num;
21364
21365 /* Given a symbol name and its associated stub, write out the
21366 definition of the stub. */
21367
21368 void
21369 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21370 {
21371 unsigned int length;
21372 char *binder_name, *symbol_name, lazy_ptr_name[32];
21373 int label = ++current_machopic_label_num;
21374
21375 /* For 64-bit we shouldn't get here. */
21376 gcc_assert (!TARGET_64BIT);
21377
21378 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21379 symb = targetm.strip_name_encoding (symb);
21380
21381 length = strlen (stub);
21382 binder_name = XALLOCAVEC (char, length + 32);
21383 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
21384
21385 length = strlen (symb);
21386 symbol_name = XALLOCAVEC (char, length + 32);
21387 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21388
21389 sprintf (lazy_ptr_name, "L%d$lz", label);
21390
21391 if (MACHOPIC_ATT_STUB)
21392 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
21393 else if (MACHOPIC_PURE)
21394 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
21395 else
21396 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
21397
21398 fprintf (file, "%s:\n", stub);
21399 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21400
21401 if (MACHOPIC_ATT_STUB)
21402 {
21403 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
21404 }
21405 else if (MACHOPIC_PURE)
21406 {
21407 /* PIC stub. */
21408 /* 25-byte PIC stub using "CALL get_pc_thunk". */
21409 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21410 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
21411 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
21412 label, lazy_ptr_name, label);
21413 fprintf (file, "\tjmp\t*%%ecx\n");
21414 }
21415 else
21416 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
21417
21418 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
21419 it needs no stub-binding-helper. */
21420 if (MACHOPIC_ATT_STUB)
21421 return;
21422
21423 fprintf (file, "%s:\n", binder_name);
21424
21425 if (MACHOPIC_PURE)
21426 {
21427 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
21428 fprintf (file, "\tpushl\t%%ecx\n");
21429 }
21430 else
21431 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
21432
21433 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
21434
21435 /* N.B. Keep the correspondence of these
21436 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
21437 old-pic/new-pic/non-pic stubs; altering this will break
21438 compatibility with existing dylibs. */
21439 if (MACHOPIC_PURE)
21440 {
21441 /* 25-byte PIC stub using "CALL get_pc_thunk". */
21442 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
21443 }
21444 else
21445 /* 16-byte -mdynamic-no-pic stub. */
21446 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
21447
21448 fprintf (file, "%s:\n", lazy_ptr_name);
21449 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21450 fprintf (file, ASM_LONG "%s\n", binder_name);
21451 }
21452 #endif /* TARGET_MACHO */
21453
21454 /* Order the registers for register allocator. */
21455
21456 void
21457 x86_order_regs_for_local_alloc (void)
21458 {
21459 int pos = 0;
21460 int i;
21461
21462 /* First allocate the local general purpose registers. */
21463 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21464 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
21465 reg_alloc_order [pos++] = i;
21466
21467 /* Global general purpose registers. */
21468 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21469 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
21470 reg_alloc_order [pos++] = i;
21471
21472 /* x87 registers come first in case we are doing FP math
21473 using them. */
21474 if (!TARGET_SSE_MATH)
21475 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21476 reg_alloc_order [pos++] = i;
21477
21478 /* SSE registers. */
21479 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21480 reg_alloc_order [pos++] = i;
21481 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21482 reg_alloc_order [pos++] = i;
21483
21484 /* Extended REX SSE registers. */
21485 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
21486 reg_alloc_order [pos++] = i;
21487
21488 /* Mask register. */
21489 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
21490 reg_alloc_order [pos++] = i;
21491
21492 /* x87 registers. */
21493 if (TARGET_SSE_MATH)
21494 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21495 reg_alloc_order [pos++] = i;
21496
21497 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21498 reg_alloc_order [pos++] = i;
21499
21500 /* Initialize the rest of array as we do not allocate some registers
21501 at all. */
21502 while (pos < FIRST_PSEUDO_REGISTER)
21503 reg_alloc_order [pos++] = 0;
21504 }
21505
21506 static bool
21507 ix86_ms_bitfield_layout_p (const_tree record_type)
21508 {
21509 return ((TARGET_MS_BITFIELD_LAYOUT
21510 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21511 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
21512 }
21513
21514 /* Returns an expression indicating where the this parameter is
21515 located on entry to the FUNCTION. */
21516
21517 static rtx
21518 x86_this_parameter (tree function)
21519 {
21520 tree type = TREE_TYPE (function);
21521 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21522 int nregs;
21523
21524 if (TARGET_64BIT)
21525 {
21526 const int *parm_regs;
21527
21528 if (ix86_function_type_abi (type) == MS_ABI)
21529 parm_regs = x86_64_ms_abi_int_parameter_registers;
21530 else
21531 parm_regs = x86_64_int_parameter_registers;
21532 return gen_rtx_REG (Pmode, parm_regs[aggr]);
21533 }
21534
21535 nregs = ix86_function_regparm (type, function);
21536
21537 if (nregs > 0 && !stdarg_p (type))
21538 {
21539 int regno;
21540 unsigned int ccvt = ix86_get_callcvt (type);
21541
21542 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
21543 regno = aggr ? DX_REG : CX_REG;
21544 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
21545 {
21546 regno = CX_REG;
21547 if (aggr)
21548 return gen_rtx_MEM (SImode,
21549 plus_constant (Pmode, stack_pointer_rtx, 4));
21550 }
21551 else
21552 {
21553 regno = AX_REG;
21554 if (aggr)
21555 {
21556 regno = DX_REG;
21557 if (nregs == 1)
21558 return gen_rtx_MEM (SImode,
21559 plus_constant (Pmode,
21560 stack_pointer_rtx, 4));
21561 }
21562 }
21563 return gen_rtx_REG (SImode, regno);
21564 }
21565
21566 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
21567 aggr ? 8 : 4));
21568 }
21569
21570 /* Determine whether x86_output_mi_thunk can succeed. */
21571
21572 static bool
21573 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
21574 const_tree function)
21575 {
21576 /* 64-bit can handle anything. */
21577 if (TARGET_64BIT)
21578 return true;
21579
21580 /* For 32-bit, everything's fine if we have one free register. */
21581 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21582 return true;
21583
21584 /* Need a free register for vcall_offset. */
21585 if (vcall_offset)
21586 return false;
21587
21588 /* Need a free register for GOT references. */
21589 if (flag_pic && !targetm.binds_local_p (function))
21590 return false;
21591
21592 /* Otherwise ok. */
21593 return true;
21594 }
21595
21596 /* Output the assembler code for a thunk function. THUNK_DECL is the
21597 declaration for the thunk function itself, FUNCTION is the decl for
21598 the target function. DELTA is an immediate constant offset to be
21599 added to THIS. If VCALL_OFFSET is nonzero, the word at
21600 *(*this + vcall_offset) should be added to THIS. */
21601
21602 static void
21603 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
21604 HOST_WIDE_INT vcall_offset, tree function)
21605 {
21606 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
21607 rtx this_param = x86_this_parameter (function);
21608 rtx this_reg, tmp, fnaddr;
21609 unsigned int tmp_regno;
21610 rtx_insn *insn;
21611 int saved_flag_force_indirect_call = flag_force_indirect_call;
21612
21613 if (TARGET_64BIT)
21614 tmp_regno = R10_REG;
21615 else
21616 {
21617 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
21618 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
21619 tmp_regno = AX_REG;
21620 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
21621 tmp_regno = DX_REG;
21622 else
21623 tmp_regno = CX_REG;
21624
21625 if (flag_pic)
21626 flag_force_indirect_call = 0;
21627 }
21628
21629 emit_note (NOTE_INSN_PROLOGUE_END);
21630
21631 /* CET is enabled, insert EB instruction. */
21632 if ((flag_cf_protection & CF_BRANCH))
21633 emit_insn (gen_nop_endbr ());
21634
21635 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21636 pull it in now and let DELTA benefit. */
21637 if (REG_P (this_param))
21638 this_reg = this_param;
21639 else if (vcall_offset)
21640 {
21641 /* Put the this parameter into %eax. */
21642 this_reg = gen_rtx_REG (Pmode, AX_REG);
21643 emit_move_insn (this_reg, this_param);
21644 }
21645 else
21646 this_reg = NULL_RTX;
21647
21648 /* Adjust the this parameter by a fixed constant. */
21649 if (delta)
21650 {
21651 rtx delta_rtx = GEN_INT (delta);
21652 rtx delta_dst = this_reg ? this_reg : this_param;
21653
21654 if (TARGET_64BIT)
21655 {
21656 if (!x86_64_general_operand (delta_rtx, Pmode))
21657 {
21658 tmp = gen_rtx_REG (Pmode, tmp_regno);
21659 emit_move_insn (tmp, delta_rtx);
21660 delta_rtx = tmp;
21661 }
21662 }
21663
21664 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
21665 }
21666
21667 /* Adjust the this parameter by a value stored in the vtable. */
21668 if (vcall_offset)
21669 {
21670 rtx vcall_addr, vcall_mem, this_mem;
21671
21672 tmp = gen_rtx_REG (Pmode, tmp_regno);
21673
21674 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
21675 if (Pmode != ptr_mode)
21676 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
21677 emit_move_insn (tmp, this_mem);
21678
21679 /* Adjust the this parameter. */
21680 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
21681 if (TARGET_64BIT
21682 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
21683 {
21684 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
21685 emit_move_insn (tmp2, GEN_INT (vcall_offset));
21686 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
21687 }
21688
21689 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
21690 if (Pmode != ptr_mode)
21691 emit_insn (gen_addsi_1_zext (this_reg,
21692 gen_rtx_REG (ptr_mode,
21693 REGNO (this_reg)),
21694 vcall_mem));
21695 else
21696 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
21697 }
21698
21699 /* If necessary, drop THIS back to its stack slot. */
21700 if (this_reg && this_reg != this_param)
21701 emit_move_insn (this_param, this_reg);
21702
21703 fnaddr = XEXP (DECL_RTL (function), 0);
21704 if (TARGET_64BIT)
21705 {
21706 if (!flag_pic || targetm.binds_local_p (function)
21707 || TARGET_PECOFF)
21708 ;
21709 else
21710 {
21711 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
21712 tmp = gen_rtx_CONST (Pmode, tmp);
21713 fnaddr = gen_const_mem (Pmode, tmp);
21714 }
21715 }
21716 else
21717 {
21718 if (!flag_pic || targetm.binds_local_p (function))
21719 ;
21720 #if TARGET_MACHO
21721 else if (TARGET_MACHO)
21722 {
21723 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
21724 fnaddr = XEXP (fnaddr, 0);
21725 }
21726 #endif /* TARGET_MACHO */
21727 else
21728 {
21729 tmp = gen_rtx_REG (Pmode, CX_REG);
21730 output_set_got (tmp, NULL_RTX);
21731
21732 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
21733 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
21734 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
21735 fnaddr = gen_const_mem (Pmode, fnaddr);
21736 }
21737 }
21738
21739 /* Our sibling call patterns do not allow memories, because we have no
21740 predicate that can distinguish between frame and non-frame memory.
21741 For our purposes here, we can get away with (ab)using a jump pattern,
21742 because we're going to do no optimization. */
21743 if (MEM_P (fnaddr))
21744 {
21745 if (sibcall_insn_operand (fnaddr, word_mode))
21746 {
21747 fnaddr = XEXP (DECL_RTL (function), 0);
21748 tmp = gen_rtx_MEM (QImode, fnaddr);
21749 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
21750 tmp = emit_call_insn (tmp);
21751 SIBLING_CALL_P (tmp) = 1;
21752 }
21753 else
21754 emit_jump_insn (gen_indirect_jump (fnaddr));
21755 }
21756 else
21757 {
21758 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
21759 {
21760 // CM_LARGE_PIC always uses pseudo PIC register which is
21761 // uninitialized. Since FUNCTION is local and calling it
21762 // doesn't go through PLT, we use scratch register %r11 as
21763 // PIC register and initialize it here.
21764 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
21765 ix86_init_large_pic_reg (tmp_regno);
21766 fnaddr = legitimize_pic_address (fnaddr,
21767 gen_rtx_REG (Pmode, tmp_regno));
21768 }
21769
21770 if (!sibcall_insn_operand (fnaddr, word_mode))
21771 {
21772 tmp = gen_rtx_REG (word_mode, tmp_regno);
21773 if (GET_MODE (fnaddr) != word_mode)
21774 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
21775 emit_move_insn (tmp, fnaddr);
21776 fnaddr = tmp;
21777 }
21778
21779 tmp = gen_rtx_MEM (QImode, fnaddr);
21780 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
21781 tmp = emit_call_insn (tmp);
21782 SIBLING_CALL_P (tmp) = 1;
21783 }
21784 emit_barrier ();
21785
21786 /* Emit just enough of rest_of_compilation to get the insns emitted. */
21787 insn = get_insns ();
21788 shorten_branches (insn);
21789 assemble_start_function (thunk_fndecl, fnname);
21790 final_start_function (insn, file, 1);
21791 final (insn, file, 1);
21792 final_end_function ();
21793 assemble_end_function (thunk_fndecl, fnname);
21794
21795 flag_force_indirect_call = saved_flag_force_indirect_call;
21796 }
21797
21798 static void
21799 x86_file_start (void)
21800 {
21801 default_file_start ();
21802 if (TARGET_16BIT)
21803 fputs ("\t.code16gcc\n", asm_out_file);
21804 #if TARGET_MACHO
21805 darwin_file_start ();
21806 #endif
21807 if (X86_FILE_START_VERSION_DIRECTIVE)
21808 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21809 if (X86_FILE_START_FLTUSED)
21810 fputs ("\t.global\t__fltused\n", asm_out_file);
21811 if (ix86_asm_dialect == ASM_INTEL)
21812 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
21813 }
21814
21815 int
21816 x86_field_alignment (tree type, int computed)
21817 {
21818 machine_mode mode;
21819
21820 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21821 return computed;
21822 if (TARGET_IAMCU)
21823 return iamcu_alignment (type, computed);
21824 type = strip_array_types (type);
21825 mode = TYPE_MODE (type);
21826 if (mode == DFmode || mode == DCmode
21827 || GET_MODE_CLASS (mode) == MODE_INT
21828 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21829 {
21830 if (TYPE_ATOMIC (type) && computed > 32)
21831 {
21832 static bool warned;
21833
21834 if (!warned && warn_psabi)
21835 {
21836 const char *url
21837 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
21838
21839 warned = true;
21840 inform (input_location, "the alignment of %<_Atomic %T%> "
21841 "fields changed in %{GCC 11.1%}",
21842 TYPE_MAIN_VARIANT (type), url);
21843 }
21844 }
21845 else
21846 return MIN (32, computed);
21847 }
21848 return computed;
21849 }
21850
21851 /* Print call to TARGET to FILE. */
21852
21853 static void
21854 x86_print_call_or_nop (FILE *file, const char *target)
21855 {
21856 if (flag_nop_mcount || !strcmp (target, "nop"))
21857 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
21858 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
21859 else
21860 fprintf (file, "1:\tcall\t%s\n", target);
21861 }
21862
21863 static bool
21864 current_fentry_name (const char **name)
21865 {
21866 tree attr = lookup_attribute ("fentry_name",
21867 DECL_ATTRIBUTES (current_function_decl));
21868 if (!attr)
21869 return false;
21870 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
21871 return true;
21872 }
21873
21874 static bool
21875 current_fentry_section (const char **name)
21876 {
21877 tree attr = lookup_attribute ("fentry_section",
21878 DECL_ATTRIBUTES (current_function_decl));
21879 if (!attr)
21880 return false;
21881 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
21882 return true;
21883 }
21884
21885 /* Output assembler code to FILE to increment profiler label # LABELNO
21886 for profiling a function entry. */
21887 void
21888 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21889 {
21890 if (cfun->machine->insn_queued_at_entrance)
21891 {
21892 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
21893 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
21894 unsigned int patch_area_size
21895 = crtl->patch_area_size - crtl->patch_area_entry;
21896 if (patch_area_size)
21897 ix86_output_patchable_area (patch_area_size,
21898 crtl->patch_area_entry == 0);
21899 }
21900
21901 const char *mcount_name = MCOUNT_NAME;
21902
21903 if (current_fentry_name (&mcount_name))
21904 ;
21905 else if (fentry_name)
21906 mcount_name = fentry_name;
21907 else if (flag_fentry)
21908 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
21909
21910 if (TARGET_64BIT)
21911 {
21912 #ifndef NO_PROFILE_COUNTERS
21913 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
21914 #endif
21915
21916 if (!TARGET_PECOFF)
21917 {
21918 switch (ix86_cmodel)
21919 {
21920 case CM_LARGE:
21921 /* NB: R10 is caller-saved. Although it can be used as a
21922 static chain register, it is preserved when calling
21923 mcount for nested functions. */
21924 fprintf (file, "1:\tmovabsq\t$%s, %%r10\n\tcall\t*%%r10\n",
21925 mcount_name);
21926 break;
21927 case CM_LARGE_PIC:
21928 #ifdef NO_PROFILE_COUNTERS
21929 fprintf (file, "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
21930 fprintf (file, "\tleaq\t1b(%%rip), %%r10\n");
21931 fprintf (file, "\taddq\t%%r11, %%r10\n");
21932 fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
21933 fprintf (file, "\taddq\t%%r11, %%r10\n");
21934 fprintf (file, "\tcall\t*%%r10\n");
21935 #else
21936 sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
21937 #endif
21938 break;
21939 case CM_SMALL_PIC:
21940 case CM_MEDIUM_PIC:
21941 if (!ix86_direct_extern_access)
21942 {
21943 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
21944 break;
21945 }
21946 /* fall through */
21947 default:
21948 x86_print_call_or_nop (file, mcount_name);
21949 break;
21950 }
21951 }
21952 else
21953 x86_print_call_or_nop (file, mcount_name);
21954 }
21955 else if (flag_pic)
21956 {
21957 #ifndef NO_PROFILE_COUNTERS
21958 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
21959 LPREFIX, labelno);
21960 #endif
21961 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
21962 }
21963 else
21964 {
21965 #ifndef NO_PROFILE_COUNTERS
21966 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
21967 LPREFIX, labelno);
21968 #endif
21969 x86_print_call_or_nop (file, mcount_name);
21970 }
21971
21972 if (flag_record_mcount
21973 || lookup_attribute ("fentry_section",
21974 DECL_ATTRIBUTES (current_function_decl)))
21975 {
21976 const char *sname = "__mcount_loc";
21977
21978 if (current_fentry_section (&sname))
21979 ;
21980 else if (fentry_section)
21981 sname = fentry_section;
21982
21983 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
21984 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
21985 fprintf (file, "\t.previous\n");
21986 }
21987 }
21988
21989 /* We don't have exact information about the insn sizes, but we may assume
21990 quite safely that we are informed about all 1 byte insns and memory
21991 address sizes. This is enough to eliminate unnecessary padding in
21992 99% of cases. */
21993
21994 int
21995 ix86_min_insn_size (rtx_insn *insn)
21996 {
21997 int l = 0, len;
21998
21999 if (!INSN_P (insn) || !active_insn_p (insn))
22000 return 0;
22001
22002 /* Discard alignments we've emit and jump instructions. */
22003 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
22004 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
22005 return 0;
22006
22007 /* Important case - calls are always 5 bytes.
22008 It is common to have many calls in the row. */
22009 if (CALL_P (insn)
22010 && symbolic_reference_mentioned_p (PATTERN (insn))
22011 && !SIBLING_CALL_P (insn))
22012 return 5;
22013 len = get_attr_length (insn);
22014 if (len <= 1)
22015 return 1;
22016
22017 /* For normal instructions we rely on get_attr_length being exact,
22018 with a few exceptions. */
22019 if (!JUMP_P (insn))
22020 {
22021 enum attr_type type = get_attr_type (insn);
22022
22023 switch (type)
22024 {
22025 case TYPE_MULTI:
22026 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
22027 || asm_noperands (PATTERN (insn)) >= 0)
22028 return 0;
22029 break;
22030 case TYPE_OTHER:
22031 case TYPE_FCMP:
22032 break;
22033 default:
22034 /* Otherwise trust get_attr_length. */
22035 return len;
22036 }
22037
22038 l = get_attr_length_address (insn);
22039 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
22040 l = 4;
22041 }
22042 if (l)
22043 return 1+l;
22044 else
22045 return 2;
22046 }
22047
22048 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
22049
22050 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
22051 window. */
22052
22053 static void
22054 ix86_avoid_jump_mispredicts (void)
22055 {
22056 rtx_insn *insn, *start = get_insns ();
22057 int nbytes = 0, njumps = 0;
22058 bool isjump = false;
22059
22060 /* Look for all minimal intervals of instructions containing 4 jumps.
22061 The intervals are bounded by START and INSN. NBYTES is the total
22062 size of instructions in the interval including INSN and not including
22063 START. When the NBYTES is smaller than 16 bytes, it is possible
22064 that the end of START and INSN ends up in the same 16byte page.
22065
22066 The smallest offset in the page INSN can start is the case where START
22067 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
22068 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
22069
22070 Don't consider asm goto as jump, while it can contain a jump, it doesn't
22071 have to, control transfer to label(s) can be performed through other
22072 means, and also we estimate minimum length of all asm stmts as 0. */
22073 for (insn = start; insn; insn = NEXT_INSN (insn))
22074 {
22075 int min_size;
22076
22077 if (LABEL_P (insn))
22078 {
22079 align_flags alignment = label_to_alignment (insn);
22080 int align = alignment.levels[0].log;
22081 int max_skip = alignment.levels[0].maxskip;
22082
22083 if (max_skip > 15)
22084 max_skip = 15;
22085 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
22086 already in the current 16 byte page, because otherwise
22087 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
22088 bytes to reach 16 byte boundary. */
22089 if (align <= 0
22090 || (align <= 3 && max_skip != (1 << align) - 1))
22091 max_skip = 0;
22092 if (dump_file)
22093 fprintf (dump_file, "Label %i with max_skip %i\n",
22094 INSN_UID (insn), max_skip);
22095 if (max_skip)
22096 {
22097 while (nbytes + max_skip >= 16)
22098 {
22099 start = NEXT_INSN (start);
22100 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
22101 || CALL_P (start))
22102 njumps--, isjump = true;
22103 else
22104 isjump = false;
22105 nbytes -= ix86_min_insn_size (start);
22106 }
22107 }
22108 continue;
22109 }
22110
22111 min_size = ix86_min_insn_size (insn);
22112 nbytes += min_size;
22113 if (dump_file)
22114 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
22115 INSN_UID (insn), min_size);
22116 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
22117 || CALL_P (insn))
22118 njumps++;
22119 else
22120 continue;
22121
22122 while (njumps > 3)
22123 {
22124 start = NEXT_INSN (start);
22125 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
22126 || CALL_P (start))
22127 njumps--, isjump = true;
22128 else
22129 isjump = false;
22130 nbytes -= ix86_min_insn_size (start);
22131 }
22132 gcc_assert (njumps >= 0);
22133 if (dump_file)
22134 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
22135 INSN_UID (start), INSN_UID (insn), nbytes);
22136
22137 if (njumps == 3 && isjump && nbytes < 16)
22138 {
22139 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
22140
22141 if (dump_file)
22142 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
22143 INSN_UID (insn), padsize);
22144 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
22145 }
22146 }
22147 }
22148 #endif
22149
22150 /* AMD Athlon works faster
22151 when RET is not destination of conditional jump or directly preceded
22152 by other jump instruction. We avoid the penalty by inserting NOP just
22153 before the RET instructions in such cases. */
22154 static void
22155 ix86_pad_returns (void)
22156 {
22157 edge e;
22158 edge_iterator ei;
22159
22160 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22161 {
22162 basic_block bb = e->src;
22163 rtx_insn *ret = BB_END (bb);
22164 rtx_insn *prev;
22165 bool replace = false;
22166
22167 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
22168 || optimize_bb_for_size_p (bb))
22169 continue;
22170 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
22171 if (active_insn_p (prev) || LABEL_P (prev))
22172 break;
22173 if (prev && LABEL_P (prev))
22174 {
22175 edge e;
22176 edge_iterator ei;
22177
22178 FOR_EACH_EDGE (e, ei, bb->preds)
22179 if (EDGE_FREQUENCY (e) && e->src->index >= 0
22180 && !(e->flags & EDGE_FALLTHRU))
22181 {
22182 replace = true;
22183 break;
22184 }
22185 }
22186 if (!replace)
22187 {
22188 prev = prev_active_insn (ret);
22189 if (prev
22190 && ((JUMP_P (prev) && any_condjump_p (prev))
22191 || CALL_P (prev)))
22192 replace = true;
22193 /* Empty functions get branch mispredict even when
22194 the jump destination is not visible to us. */
22195 if (!prev && !optimize_function_for_size_p (cfun))
22196 replace = true;
22197 }
22198 if (replace)
22199 {
22200 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
22201 delete_insn (ret);
22202 }
22203 }
22204 }
22205
22206 /* Count the minimum number of instructions in BB. Return 4 if the
22207 number of instructions >= 4. */
22208
22209 static int
22210 ix86_count_insn_bb (basic_block bb)
22211 {
22212 rtx_insn *insn;
22213 int insn_count = 0;
22214
22215 /* Count number of instructions in this block. Return 4 if the number
22216 of instructions >= 4. */
22217 FOR_BB_INSNS (bb, insn)
22218 {
22219 /* Only happen in exit blocks. */
22220 if (JUMP_P (insn)
22221 && ANY_RETURN_P (PATTERN (insn)))
22222 break;
22223
22224 if (NONDEBUG_INSN_P (insn)
22225 && GET_CODE (PATTERN (insn)) != USE
22226 && GET_CODE (PATTERN (insn)) != CLOBBER)
22227 {
22228 insn_count++;
22229 if (insn_count >= 4)
22230 return insn_count;
22231 }
22232 }
22233
22234 return insn_count;
22235 }
22236
22237
22238 /* Count the minimum number of instructions in code path in BB.
22239 Return 4 if the number of instructions >= 4. */
22240
22241 static int
22242 ix86_count_insn (basic_block bb)
22243 {
22244 edge e;
22245 edge_iterator ei;
22246 int min_prev_count;
22247
22248 /* Only bother counting instructions along paths with no
22249 more than 2 basic blocks between entry and exit. Given
22250 that BB has an edge to exit, determine if a predecessor
22251 of BB has an edge from entry. If so, compute the number
22252 of instructions in the predecessor block. If there
22253 happen to be multiple such blocks, compute the minimum. */
22254 min_prev_count = 4;
22255 FOR_EACH_EDGE (e, ei, bb->preds)
22256 {
22257 edge prev_e;
22258 edge_iterator prev_ei;
22259
22260 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
22261 {
22262 min_prev_count = 0;
22263 break;
22264 }
22265 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
22266 {
22267 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
22268 {
22269 int count = ix86_count_insn_bb (e->src);
22270 if (count < min_prev_count)
22271 min_prev_count = count;
22272 break;
22273 }
22274 }
22275 }
22276
22277 if (min_prev_count < 4)
22278 min_prev_count += ix86_count_insn_bb (bb);
22279
22280 return min_prev_count;
22281 }
22282
22283 /* Pad short function to 4 instructions. */
22284
22285 static void
22286 ix86_pad_short_function (void)
22287 {
22288 edge e;
22289 edge_iterator ei;
22290
22291 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22292 {
22293 rtx_insn *ret = BB_END (e->src);
22294 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
22295 {
22296 int insn_count = ix86_count_insn (e->src);
22297
22298 /* Pad short function. */
22299 if (insn_count < 4)
22300 {
22301 rtx_insn *insn = ret;
22302
22303 /* Find epilogue. */
22304 while (insn
22305 && (!NOTE_P (insn)
22306 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
22307 insn = PREV_INSN (insn);
22308
22309 if (!insn)
22310 insn = ret;
22311
22312 /* Two NOPs count as one instruction. */
22313 insn_count = 2 * (4 - insn_count);
22314 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
22315 }
22316 }
22317 }
22318 }
22319
22320 /* Fix up a Windows system unwinder issue. If an EH region falls through into
22321 the epilogue, the Windows system unwinder will apply epilogue logic and
22322 produce incorrect offsets. This can be avoided by adding a nop between
22323 the last insn that can throw and the first insn of the epilogue. */
22324
22325 static void
22326 ix86_seh_fixup_eh_fallthru (void)
22327 {
22328 edge e;
22329 edge_iterator ei;
22330
22331 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
22332 {
22333 rtx_insn *insn, *next;
22334
22335 /* Find the beginning of the epilogue. */
22336 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
22337 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
22338 break;
22339 if (insn == NULL)
22340 continue;
22341
22342 /* We only care about preceding insns that can throw. */
22343 insn = prev_active_insn (insn);
22344 if (insn == NULL || !can_throw_internal (insn))
22345 continue;
22346
22347 /* Do not separate calls from their debug information. */
22348 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
22349 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
22350 insn = next;
22351 else
22352 break;
22353
22354 emit_insn_after (gen_nops (const1_rtx), insn);
22355 }
22356 }
22357 /* Split vector load from parm_decl to elemental loads to avoid STLF
22358 stalls. */
22359 static void
22360 ix86_split_stlf_stall_load ()
22361 {
22362 rtx_insn* insn, *start = get_insns ();
22363 unsigned window = 0;
22364
22365 for (insn = start; insn; insn = NEXT_INSN (insn))
22366 {
22367 if (!NONDEBUG_INSN_P (insn))
22368 continue;
22369 window++;
22370 /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
22371 other, just emulate for pipeline) before stalled load, stlf stall
22372 case is as fast as no stall cases on CLX.
22373 Since CFG is freed before machine_reorg, just do a rough
22374 calculation of the window according to the layout. */
22375 if (window > (unsigned) x86_stlf_window_ninsns)
22376 return;
22377
22378 if (any_uncondjump_p (insn)
22379 || ANY_RETURN_P (PATTERN (insn))
22380 || CALL_P (insn))
22381 return;
22382
22383 rtx set = single_set (insn);
22384 if (!set)
22385 continue;
22386 rtx src = SET_SRC (set);
22387 if (!MEM_P (src)
22388 /* Only handle V2DFmode load since it doesn't need any scratch
22389 register. */
22390 || GET_MODE (src) != E_V2DFmode
22391 || !MEM_EXPR (src)
22392 || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
22393 continue;
22394
22395 rtx zero = CONST0_RTX (V2DFmode);
22396 rtx dest = SET_DEST (set);
22397 rtx m = adjust_address (src, DFmode, 0);
22398 rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
22399 emit_insn_before (loadlpd, insn);
22400 m = adjust_address (src, DFmode, 8);
22401 rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
22402 if (dump_file && (dump_flags & TDF_DETAILS))
22403 {
22404 fputs ("Due to potential STLF stall, split instruction:\n",
22405 dump_file);
22406 print_rtl_single (dump_file, insn);
22407 fputs ("To:\n", dump_file);
22408 print_rtl_single (dump_file, loadlpd);
22409 print_rtl_single (dump_file, loadhpd);
22410 }
22411 PATTERN (insn) = loadhpd;
22412 INSN_CODE (insn) = -1;
22413 gcc_assert (recog_memoized (insn) != -1);
22414 }
22415 }
22416
22417 /* Implement machine specific optimizations. We implement padding of returns
22418 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
22419 static void
22420 ix86_reorg (void)
22421 {
22422 /* We are freeing block_for_insn in the toplev to keep compatibility
22423 with old MDEP_REORGS that are not CFG based. Recompute it now. */
22424 compute_bb_for_insn ();
22425
22426 if (TARGET_SEH && current_function_has_exception_handlers ())
22427 ix86_seh_fixup_eh_fallthru ();
22428
22429 if (optimize && optimize_function_for_speed_p (cfun))
22430 {
22431 if (TARGET_SSE2)
22432 ix86_split_stlf_stall_load ();
22433 if (TARGET_PAD_SHORT_FUNCTION)
22434 ix86_pad_short_function ();
22435 else if (TARGET_PAD_RETURNS)
22436 ix86_pad_returns ();
22437 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
22438 if (TARGET_FOUR_JUMP_LIMIT)
22439 ix86_avoid_jump_mispredicts ();
22440 #endif
22441 }
22442 }
22443
22444 /* Return nonzero when QImode register that must be represented via REX prefix
22445 is used. */
22446 bool
22447 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
22448 {
22449 int i;
22450 extract_insn_cached (insn);
22451 for (i = 0; i < recog_data.n_operands; i++)
22452 if (GENERAL_REG_P (recog_data.operand[i])
22453 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
22454 return true;
22455 return false;
22456 }
22457
22458 /* Return true when INSN mentions register that must be encoded using REX
22459 prefix. */
22460 bool
22461 x86_extended_reg_mentioned_p (rtx insn)
22462 {
22463 subrtx_iterator::array_type array;
22464 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
22465 {
22466 const_rtx x = *iter;
22467 if (REG_P (x)
22468 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
22469 return true;
22470 }
22471 return false;
22472 }
22473
22474 /* If profitable, negate (without causing overflow) integer constant
22475 of mode MODE at location LOC. Return true in this case. */
22476 bool
22477 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
22478 {
22479 HOST_WIDE_INT val;
22480
22481 if (!CONST_INT_P (*loc))
22482 return false;
22483
22484 switch (mode)
22485 {
22486 case E_DImode:
22487 /* DImode x86_64 constants must fit in 32 bits. */
22488 gcc_assert (x86_64_immediate_operand (*loc, mode));
22489
22490 mode = SImode;
22491 break;
22492
22493 case E_SImode:
22494 case E_HImode:
22495 case E_QImode:
22496 break;
22497
22498 default:
22499 gcc_unreachable ();
22500 }
22501
22502 /* Avoid overflows. */
22503 if (mode_signbit_p (mode, *loc))
22504 return false;
22505
22506 val = INTVAL (*loc);
22507
22508 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
22509 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
22510 if ((val < 0 && val != -128)
22511 || val == 128)
22512 {
22513 *loc = GEN_INT (-val);
22514 return true;
22515 }
22516
22517 return false;
22518 }
22519
22520 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
22521 optabs would emit if we didn't have TFmode patterns. */
22522
22523 void
22524 x86_emit_floatuns (rtx operands[2])
22525 {
22526 rtx_code_label *neglab, *donelab;
22527 rtx i0, i1, f0, in, out;
22528 machine_mode mode, inmode;
22529
22530 inmode = GET_MODE (operands[1]);
22531 gcc_assert (inmode == SImode || inmode == DImode);
22532
22533 out = operands[0];
22534 in = force_reg (inmode, operands[1]);
22535 mode = GET_MODE (out);
22536 neglab = gen_label_rtx ();
22537 donelab = gen_label_rtx ();
22538 f0 = gen_reg_rtx (mode);
22539
22540 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
22541
22542 expand_float (out, in, 0);
22543
22544 emit_jump_insn (gen_jump (donelab));
22545 emit_barrier ();
22546
22547 emit_label (neglab);
22548
22549 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
22550 1, OPTAB_DIRECT);
22551 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
22552 1, OPTAB_DIRECT);
22553 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
22554
22555 expand_float (f0, i0, 0);
22556
22557 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
22558
22559 emit_label (donelab);
22560 }
22561 \f
22562 /* Target hook for scalar_mode_supported_p. */
22563 static bool
22564 ix86_scalar_mode_supported_p (scalar_mode mode)
22565 {
22566 if (DECIMAL_FLOAT_MODE_P (mode))
22567 return default_decimal_float_supported_p ();
22568 else if (mode == TFmode)
22569 return true;
22570 else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
22571 return true;
22572 else
22573 return default_scalar_mode_supported_p (mode);
22574 }
22575
22576 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
22577 if MODE is HFmode, and punt to the generic implementation otherwise. */
22578
22579 static bool
22580 ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
22581 {
22582 /* NB: Always return TRUE for HFmode so that the _Float16 type will
22583 be defined by the C front-end for AVX512FP16 intrinsics. We will
22584 issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
22585 enabled. */
22586 return (((mode == HFmode || mode == BFmode) && TARGET_SSE2)
22587 ? true
22588 : default_libgcc_floating_mode_supported_p (mode));
22589 }
22590
22591 /* Implements target hook vector_mode_supported_p. */
22592 static bool
22593 ix86_vector_mode_supported_p (machine_mode mode)
22594 {
22595 /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
22596 either. */
22597 if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
22598 return false;
22599 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22600 return true;
22601 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22602 return true;
22603 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
22604 return true;
22605 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
22606 return true;
22607 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
22608 && VALID_MMX_REG_MODE (mode))
22609 return true;
22610 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
22611 && VALID_MMX_REG_MODE_3DNOW (mode))
22612 return true;
22613 if (mode == V2QImode)
22614 return true;
22615 return false;
22616 }
22617
22618 /* Target hook for c_mode_for_suffix. */
22619 static machine_mode
22620 ix86_c_mode_for_suffix (char suffix)
22621 {
22622 if (suffix == 'q')
22623 return TFmode;
22624 if (suffix == 'w')
22625 return XFmode;
22626
22627 return VOIDmode;
22628 }
22629
22630 /* Worker function for TARGET_MD_ASM_ADJUST.
22631
22632 We implement asm flag outputs, and maintain source compatibility
22633 with the old cc0-based compiler. */
22634
22635 static rtx_insn *
22636 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
22637 vec<machine_mode> & /*input_modes*/,
22638 vec<const char *> &constraints, vec<rtx> &clobbers,
22639 HARD_REG_SET &clobbered_regs, location_t loc)
22640 {
22641 bool saw_asm_flag = false;
22642
22643 start_sequence ();
22644 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
22645 {
22646 const char *con = constraints[i];
22647 if (!startswith (con, "=@cc"))
22648 continue;
22649 con += 4;
22650 if (strchr (con, ',') != NULL)
22651 {
22652 error_at (loc, "alternatives not allowed in %<asm%> flag output");
22653 continue;
22654 }
22655
22656 bool invert = false;
22657 if (con[0] == 'n')
22658 invert = true, con++;
22659
22660 machine_mode mode = CCmode;
22661 rtx_code code = UNKNOWN;
22662
22663 switch (con[0])
22664 {
22665 case 'a':
22666 if (con[1] == 0)
22667 mode = CCAmode, code = EQ;
22668 else if (con[1] == 'e' && con[2] == 0)
22669 mode = CCCmode, code = NE;
22670 break;
22671 case 'b':
22672 if (con[1] == 0)
22673 mode = CCCmode, code = EQ;
22674 else if (con[1] == 'e' && con[2] == 0)
22675 mode = CCAmode, code = NE;
22676 break;
22677 case 'c':
22678 if (con[1] == 0)
22679 mode = CCCmode, code = EQ;
22680 break;
22681 case 'e':
22682 if (con[1] == 0)
22683 mode = CCZmode, code = EQ;
22684 break;
22685 case 'g':
22686 if (con[1] == 0)
22687 mode = CCGCmode, code = GT;
22688 else if (con[1] == 'e' && con[2] == 0)
22689 mode = CCGCmode, code = GE;
22690 break;
22691 case 'l':
22692 if (con[1] == 0)
22693 mode = CCGCmode, code = LT;
22694 else if (con[1] == 'e' && con[2] == 0)
22695 mode = CCGCmode, code = LE;
22696 break;
22697 case 'o':
22698 if (con[1] == 0)
22699 mode = CCOmode, code = EQ;
22700 break;
22701 case 'p':
22702 if (con[1] == 0)
22703 mode = CCPmode, code = EQ;
22704 break;
22705 case 's':
22706 if (con[1] == 0)
22707 mode = CCSmode, code = EQ;
22708 break;
22709 case 'z':
22710 if (con[1] == 0)
22711 mode = CCZmode, code = EQ;
22712 break;
22713 }
22714 if (code == UNKNOWN)
22715 {
22716 error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
22717 continue;
22718 }
22719 if (invert)
22720 code = reverse_condition (code);
22721
22722 rtx dest = outputs[i];
22723 if (!saw_asm_flag)
22724 {
22725 /* This is the first asm flag output. Here we put the flags
22726 register in as the real output and adjust the condition to
22727 allow it. */
22728 constraints[i] = "=Bf";
22729 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
22730 saw_asm_flag = true;
22731 }
22732 else
22733 {
22734 /* We don't need the flags register as output twice. */
22735 constraints[i] = "=X";
22736 outputs[i] = gen_rtx_SCRATCH (SImode);
22737 }
22738
22739 rtx x = gen_rtx_REG (mode, FLAGS_REG);
22740 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
22741
22742 machine_mode dest_mode = GET_MODE (dest);
22743 if (!SCALAR_INT_MODE_P (dest_mode))
22744 {
22745 error_at (loc, "invalid type for %<asm%> flag output");
22746 continue;
22747 }
22748
22749 if (dest_mode == QImode)
22750 emit_insn (gen_rtx_SET (dest, x));
22751 else
22752 {
22753 rtx reg = gen_reg_rtx (QImode);
22754 emit_insn (gen_rtx_SET (reg, x));
22755
22756 reg = convert_to_mode (dest_mode, reg, 1);
22757 emit_move_insn (dest, reg);
22758 }
22759 }
22760
22761 rtx_insn *seq = get_insns ();
22762 end_sequence ();
22763
22764 if (saw_asm_flag)
22765 return seq;
22766 else
22767 {
22768 /* If we had no asm flag outputs, clobber the flags. */
22769 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
22770 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
22771 return NULL;
22772 }
22773 }
22774
22775 /* Implements target vector targetm.asm.encode_section_info. */
22776
22777 static void ATTRIBUTE_UNUSED
22778 ix86_encode_section_info (tree decl, rtx rtl, int first)
22779 {
22780 default_encode_section_info (decl, rtl, first);
22781
22782 if (ix86_in_large_data_p (decl))
22783 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22784 }
22785
22786 /* Worker function for REVERSE_CONDITION. */
22787
22788 enum rtx_code
22789 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
22790 {
22791 return (mode == CCFPmode
22792 ? reverse_condition_maybe_unordered (code)
22793 : reverse_condition (code));
22794 }
22795
22796 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22797 to OPERANDS[0]. */
22798
22799 const char *
22800 output_387_reg_move (rtx_insn *insn, rtx *operands)
22801 {
22802 if (REG_P (operands[0]))
22803 {
22804 if (REG_P (operands[1])
22805 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22806 {
22807 if (REGNO (operands[0]) == FIRST_STACK_REG)
22808 return output_387_ffreep (operands, 0);
22809 return "fstp\t%y0";
22810 }
22811 if (STACK_TOP_P (operands[0]))
22812 return "fld%Z1\t%y1";
22813 return "fst\t%y0";
22814 }
22815 else if (MEM_P (operands[0]))
22816 {
22817 gcc_assert (REG_P (operands[1]));
22818 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22819 return "fstp%Z0\t%y0";
22820 else
22821 {
22822 /* There is no non-popping store to memory for XFmode.
22823 So if we need one, follow the store with a load. */
22824 if (GET_MODE (operands[0]) == XFmode)
22825 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
22826 else
22827 return "fst%Z0\t%y0";
22828 }
22829 }
22830 else
22831 gcc_unreachable();
22832 }
22833 #ifdef TARGET_SOLARIS
22834 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22835
22836 static void
22837 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22838 tree decl)
22839 {
22840 /* With Binutils 2.15, the "@unwind" marker must be specified on
22841 every occurrence of the ".eh_frame" section, not just the first
22842 one. */
22843 if (TARGET_64BIT
22844 && strcmp (name, ".eh_frame") == 0)
22845 {
22846 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22847 flags & SECTION_WRITE ? "aw" : "a");
22848 return;
22849 }
22850
22851 #ifndef USE_GAS
22852 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
22853 {
22854 solaris_elf_asm_comdat_section (name, flags, decl);
22855 return;
22856 }
22857
22858 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
22859 SPARC assembler. One cannot mix single-letter flags and #exclude, so
22860 only emit the latter here. */
22861 if (flags & SECTION_EXCLUDE)
22862 {
22863 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
22864 return;
22865 }
22866 #endif
22867
22868 default_elf_asm_named_section (name, flags, decl);
22869 }
22870 #endif /* TARGET_SOLARIS */
22871
22872 /* Return the mangling of TYPE if it is an extended fundamental type. */
22873
22874 static const char *
22875 ix86_mangle_type (const_tree type)
22876 {
22877 type = TYPE_MAIN_VARIANT (type);
22878
22879 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
22880 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
22881 return NULL;
22882
22883 if (type == float128_type_node || type == float64x_type_node)
22884 return NULL;
22885
22886 switch (TYPE_MODE (type))
22887 {
22888 case E_BFmode:
22889 return "DF16b";
22890 case E_HFmode:
22891 /* _Float16 is "DF16_".
22892 Align with clang's decision in https://reviews.llvm.org/D33719. */
22893 return "DF16_";
22894 case E_TFmode:
22895 /* __float128 is "g". */
22896 return "g";
22897 case E_XFmode:
22898 /* "long double" or __float80 is "e". */
22899 return "e";
22900 default:
22901 return NULL;
22902 }
22903 }
22904
22905 /* Create C++ tinfo symbols for only conditionally available fundamental
22906 types. */
22907
22908 static void
22909 ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
22910 {
22911 extern tree ix86_float16_type_node;
22912 extern tree ix86_bf16_type_node;
22913
22914 if (!TARGET_SSE2)
22915 {
22916 gcc_checking_assert (!float16_type_node && !bfloat16_type_node);
22917 float16_type_node = ix86_float16_type_node;
22918 bfloat16_type_node = ix86_bf16_type_node;
22919 callback (float16_type_node);
22920 callback (bfloat16_type_node);
22921 float16_type_node = NULL_TREE;
22922 bfloat16_type_node = NULL_TREE;
22923 }
22924 }
22925
22926 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
22927
22928 static tree
22929 ix86_stack_protect_guard (void)
22930 {
22931 if (TARGET_SSP_TLS_GUARD)
22932 {
22933 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
22934 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
22935 tree type = build_qualified_type (type_node, qual);
22936 tree t;
22937
22938 if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
22939 {
22940 t = ix86_tls_stack_chk_guard_decl;
22941
22942 if (t == NULL)
22943 {
22944 rtx x;
22945
22946 t = build_decl
22947 (UNKNOWN_LOCATION, VAR_DECL,
22948 get_identifier (ix86_stack_protector_guard_symbol_str),
22949 type);
22950 TREE_STATIC (t) = 1;
22951 TREE_PUBLIC (t) = 1;
22952 DECL_EXTERNAL (t) = 1;
22953 TREE_USED (t) = 1;
22954 TREE_THIS_VOLATILE (t) = 1;
22955 DECL_ARTIFICIAL (t) = 1;
22956 DECL_IGNORED_P (t) = 1;
22957
22958 /* Do not share RTL as the declaration is visible outside of
22959 current function. */
22960 x = DECL_RTL (t);
22961 RTX_FLAG (x, used) = 1;
22962
22963 ix86_tls_stack_chk_guard_decl = t;
22964 }
22965 }
22966 else
22967 {
22968 tree asptrtype = build_pointer_type (type);
22969
22970 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
22971 t = build2 (MEM_REF, asptrtype, t,
22972 build_int_cst (asptrtype, 0));
22973 TREE_THIS_VOLATILE (t) = 1;
22974 }
22975
22976 return t;
22977 }
22978
22979 return default_stack_protect_guard ();
22980 }
22981
22982 /* For 32-bit code we can save PIC register setup by using
22983 __stack_chk_fail_local hidden function instead of calling
22984 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22985 register, so it is better to call __stack_chk_fail directly. */
22986
22987 static tree ATTRIBUTE_UNUSED
22988 ix86_stack_protect_fail (void)
22989 {
22990 return TARGET_64BIT
22991 ? default_external_stack_protect_fail ()
22992 : default_hidden_stack_protect_fail ();
22993 }
22994
22995 /* Select a format to encode pointers in exception handling data. CODE
22996 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22997 true if the symbol may be affected by dynamic relocations.
22998
22999 ??? All x86 object file formats are capable of representing this.
23000 After all, the relocation needed is the same as for the call insn.
23001 Whether or not a particular assembler allows us to enter such, I
23002 guess we'll have to see. */
23003
23004 int
23005 asm_preferred_eh_data_format (int code, int global)
23006 {
23007 /* PE-COFF is effectively always -fPIC because of the .reloc section. */
23008 if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
23009 {
23010 int type = DW_EH_PE_sdata8;
23011 if (ptr_mode == SImode
23012 || ix86_cmodel == CM_SMALL_PIC
23013 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
23014 type = DW_EH_PE_sdata4;
23015 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
23016 }
23017
23018 if (ix86_cmodel == CM_SMALL
23019 || (ix86_cmodel == CM_MEDIUM && code))
23020 return DW_EH_PE_udata4;
23021
23022 return DW_EH_PE_absptr;
23023 }
23024 \f
23025 /* Implement targetm.vectorize.builtin_vectorization_cost. */
23026 static int
23027 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
23028 tree vectype, int)
23029 {
23030 bool fp = false;
23031 machine_mode mode = TImode;
23032 int index;
23033 if (vectype != NULL)
23034 {
23035 fp = FLOAT_TYPE_P (vectype);
23036 mode = TYPE_MODE (vectype);
23037 }
23038
23039 switch (type_of_cost)
23040 {
23041 case scalar_stmt:
23042 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
23043
23044 case scalar_load:
23045 /* load/store costs are relative to register move which is 2. Recompute
23046 it to COSTS_N_INSNS so everything have same base. */
23047 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
23048 : ix86_cost->int_load [2]) / 2;
23049
23050 case scalar_store:
23051 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
23052 : ix86_cost->int_store [2]) / 2;
23053
23054 case vector_stmt:
23055 return ix86_vec_cost (mode,
23056 fp ? ix86_cost->addss : ix86_cost->sse_op);
23057
23058 case vector_load:
23059 index = sse_store_index (mode);
23060 /* See PR82713 - we may end up being called on non-vector type. */
23061 if (index < 0)
23062 index = 2;
23063 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
23064
23065 case vector_store:
23066 index = sse_store_index (mode);
23067 /* See PR82713 - we may end up being called on non-vector type. */
23068 if (index < 0)
23069 index = 2;
23070 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
23071
23072 case vec_to_scalar:
23073 case scalar_to_vec:
23074 return ix86_vec_cost (mode, ix86_cost->sse_op);
23075
23076 /* We should have separate costs for unaligned loads and gather/scatter.
23077 Do that incrementally. */
23078 case unaligned_load:
23079 index = sse_store_index (mode);
23080 /* See PR82713 - we may end up being called on non-vector type. */
23081 if (index < 0)
23082 index = 2;
23083 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
23084
23085 case unaligned_store:
23086 index = sse_store_index (mode);
23087 /* See PR82713 - we may end up being called on non-vector type. */
23088 if (index < 0)
23089 index = 2;
23090 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
23091
23092 case vector_gather_load:
23093 return ix86_vec_cost (mode,
23094 COSTS_N_INSNS
23095 (ix86_cost->gather_static
23096 + ix86_cost->gather_per_elt
23097 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
23098
23099 case vector_scatter_store:
23100 return ix86_vec_cost (mode,
23101 COSTS_N_INSNS
23102 (ix86_cost->scatter_static
23103 + ix86_cost->scatter_per_elt
23104 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
23105
23106 case cond_branch_taken:
23107 return ix86_cost->cond_taken_branch_cost;
23108
23109 case cond_branch_not_taken:
23110 return ix86_cost->cond_not_taken_branch_cost;
23111
23112 case vec_perm:
23113 case vec_promote_demote:
23114 return ix86_vec_cost (mode, ix86_cost->sse_op);
23115
23116 case vec_construct:
23117 {
23118 int n = TYPE_VECTOR_SUBPARTS (vectype);
23119 /* N - 1 element inserts into an SSE vector, the possible
23120 GPR -> XMM move is accounted for in add_stmt_cost. */
23121 if (GET_MODE_BITSIZE (mode) <= 128)
23122 return (n - 1) * ix86_cost->sse_op;
23123 /* One vinserti128 for combining two SSE vectors for AVX256. */
23124 else if (GET_MODE_BITSIZE (mode) == 256)
23125 return ((n - 2) * ix86_cost->sse_op
23126 + ix86_vec_cost (mode, ix86_cost->addss));
23127 /* One vinserti64x4 and two vinserti128 for combining SSE
23128 and AVX256 vectors to AVX512. */
23129 else if (GET_MODE_BITSIZE (mode) == 512)
23130 return ((n - 4) * ix86_cost->sse_op
23131 + 3 * ix86_vec_cost (mode, ix86_cost->addss));
23132 gcc_unreachable ();
23133 }
23134
23135 default:
23136 gcc_unreachable ();
23137 }
23138 }
23139
23140 \f
23141 /* This function returns the calling abi specific va_list type node.
23142 It returns the FNDECL specific va_list type. */
23143
23144 static tree
23145 ix86_fn_abi_va_list (tree fndecl)
23146 {
23147 if (!TARGET_64BIT)
23148 return va_list_type_node;
23149 gcc_assert (fndecl != NULL_TREE);
23150
23151 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
23152 return ms_va_list_type_node;
23153 else
23154 return sysv_va_list_type_node;
23155 }
23156
23157 /* Returns the canonical va_list type specified by TYPE. If there
23158 is no valid TYPE provided, it return NULL_TREE. */
23159
23160 static tree
23161 ix86_canonical_va_list_type (tree type)
23162 {
23163 if (TARGET_64BIT)
23164 {
23165 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
23166 return ms_va_list_type_node;
23167
23168 if ((TREE_CODE (type) == ARRAY_TYPE
23169 && integer_zerop (array_type_nelts (type)))
23170 || POINTER_TYPE_P (type))
23171 {
23172 tree elem_type = TREE_TYPE (type);
23173 if (TREE_CODE (elem_type) == RECORD_TYPE
23174 && lookup_attribute ("sysv_abi va_list",
23175 TYPE_ATTRIBUTES (elem_type)))
23176 return sysv_va_list_type_node;
23177 }
23178
23179 return NULL_TREE;
23180 }
23181
23182 return std_canonical_va_list_type (type);
23183 }
23184
23185 /* Iterate through the target-specific builtin types for va_list.
23186 IDX denotes the iterator, *PTREE is set to the result type of
23187 the va_list builtin, and *PNAME to its internal type.
23188 Returns zero if there is no element for this index, otherwise
23189 IDX should be increased upon the next call.
23190 Note, do not iterate a base builtin's name like __builtin_va_list.
23191 Used from c_common_nodes_and_builtins. */
23192
23193 static int
23194 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
23195 {
23196 if (TARGET_64BIT)
23197 {
23198 switch (idx)
23199 {
23200 default:
23201 break;
23202
23203 case 0:
23204 *ptree = ms_va_list_type_node;
23205 *pname = "__builtin_ms_va_list";
23206 return 1;
23207
23208 case 1:
23209 *ptree = sysv_va_list_type_node;
23210 *pname = "__builtin_sysv_va_list";
23211 return 1;
23212 }
23213 }
23214
23215 return 0;
23216 }
23217
23218 #undef TARGET_SCHED_DISPATCH
23219 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
23220 #undef TARGET_SCHED_DISPATCH_DO
23221 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
23222 #undef TARGET_SCHED_REASSOCIATION_WIDTH
23223 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
23224 #undef TARGET_SCHED_REORDER
23225 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
23226 #undef TARGET_SCHED_ADJUST_PRIORITY
23227 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
23228 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
23229 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
23230 ix86_dependencies_evaluation_hook
23231
23232
23233 /* Implementation of reassociation_width target hook used by
23234 reassoc phase to identify parallelism level in reassociated
23235 tree. Statements tree_code is passed in OPC. Arguments type
23236 is passed in MODE. */
23237
23238 static int
23239 ix86_reassociation_width (unsigned int op, machine_mode mode)
23240 {
23241 int width = 1;
23242 /* Vector part. */
23243 if (VECTOR_MODE_P (mode))
23244 {
23245 int div = 1;
23246 if (INTEGRAL_MODE_P (mode))
23247 width = ix86_cost->reassoc_vec_int;
23248 else if (FLOAT_MODE_P (mode))
23249 width = ix86_cost->reassoc_vec_fp;
23250
23251 if (width == 1)
23252 return 1;
23253
23254 /* Integer vector instructions execute in FP unit
23255 and can execute 3 additions and one multiplication per cycle. */
23256 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
23257 || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
23258 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
23259 return 1;
23260
23261 /* Account for targets that splits wide vectors into multiple parts. */
23262 if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
23263 div = GET_MODE_BITSIZE (mode) / 256;
23264 else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
23265 div = GET_MODE_BITSIZE (mode) / 128;
23266 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
23267 div = GET_MODE_BITSIZE (mode) / 64;
23268 width = (width + div - 1) / div;
23269 }
23270 /* Scalar part. */
23271 else if (INTEGRAL_MODE_P (mode))
23272 width = ix86_cost->reassoc_int;
23273 else if (FLOAT_MODE_P (mode))
23274 width = ix86_cost->reassoc_fp;
23275
23276 /* Avoid using too many registers in 32bit mode. */
23277 if (!TARGET_64BIT && width > 2)
23278 width = 2;
23279 return width;
23280 }
23281
23282 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
23283 place emms and femms instructions. */
23284
23285 static machine_mode
23286 ix86_preferred_simd_mode (scalar_mode mode)
23287 {
23288 if (!TARGET_SSE)
23289 return word_mode;
23290
23291 switch (mode)
23292 {
23293 case E_QImode:
23294 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
23295 return V64QImode;
23296 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23297 return V32QImode;
23298 else
23299 return V16QImode;
23300
23301 case E_HImode:
23302 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
23303 return V32HImode;
23304 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23305 return V16HImode;
23306 else
23307 return V8HImode;
23308
23309 case E_SImode:
23310 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23311 return V16SImode;
23312 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23313 return V8SImode;
23314 else
23315 return V4SImode;
23316
23317 case E_DImode:
23318 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23319 return V8DImode;
23320 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23321 return V4DImode;
23322 else
23323 return V2DImode;
23324
23325 case E_HFmode:
23326 if (TARGET_AVX512FP16)
23327 {
23328 if (TARGET_AVX512VL)
23329 {
23330 if (TARGET_PREFER_AVX128)
23331 return V8HFmode;
23332 else if (TARGET_PREFER_AVX256)
23333 return V16HFmode;
23334 }
23335 return V32HFmode;
23336 }
23337 return word_mode;
23338
23339 case E_SFmode:
23340 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23341 return V16SFmode;
23342 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23343 return V8SFmode;
23344 else
23345 return V4SFmode;
23346
23347 case E_DFmode:
23348 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23349 return V8DFmode;
23350 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23351 return V4DFmode;
23352 else if (TARGET_SSE2)
23353 return V2DFmode;
23354 /* FALLTHRU */
23355
23356 default:
23357 return word_mode;
23358 }
23359 }
23360
23361 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
23362 vectors. If AVX512F is enabled then try vectorizing with 512bit,
23363 256bit and 128bit vectors. */
23364
23365 static unsigned int
23366 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
23367 {
23368 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
23369 {
23370 modes->safe_push (V64QImode);
23371 modes->safe_push (V32QImode);
23372 modes->safe_push (V16QImode);
23373 }
23374 else if (TARGET_AVX512F && all)
23375 {
23376 modes->safe_push (V32QImode);
23377 modes->safe_push (V16QImode);
23378 modes->safe_push (V64QImode);
23379 }
23380 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
23381 {
23382 modes->safe_push (V32QImode);
23383 modes->safe_push (V16QImode);
23384 }
23385 else if (TARGET_AVX && all)
23386 {
23387 modes->safe_push (V16QImode);
23388 modes->safe_push (V32QImode);
23389 }
23390 else if (TARGET_SSE2)
23391 modes->safe_push (V16QImode);
23392
23393 if (TARGET_MMX_WITH_SSE)
23394 modes->safe_push (V8QImode);
23395
23396 if (TARGET_SSE2)
23397 modes->safe_push (V4QImode);
23398
23399 return 0;
23400 }
23401
23402 /* Implemenation of targetm.vectorize.get_mask_mode. */
23403
23404 static opt_machine_mode
23405 ix86_get_mask_mode (machine_mode data_mode)
23406 {
23407 unsigned vector_size = GET_MODE_SIZE (data_mode);
23408 unsigned nunits = GET_MODE_NUNITS (data_mode);
23409 unsigned elem_size = vector_size / nunits;
23410
23411 /* Scalar mask case. */
23412 if ((TARGET_AVX512F && vector_size == 64)
23413 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
23414 {
23415 if (elem_size == 4
23416 || elem_size == 8
23417 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
23418 return smallest_int_mode_for_size (nunits);
23419 }
23420
23421 scalar_int_mode elem_mode
23422 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
23423
23424 gcc_assert (elem_size * nunits == vector_size);
23425
23426 return mode_for_vector (elem_mode, nunits);
23427 }
23428
23429 \f
23430
23431 /* Return class of registers which could be used for pseudo of MODE
23432 and of class RCLASS for spilling instead of memory. Return NO_REGS
23433 if it is not possible or non-profitable. */
23434
23435 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23436
23437 static reg_class_t
23438 ix86_spill_class (reg_class_t rclass, machine_mode mode)
23439 {
23440 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
23441 && TARGET_SSE2
23442 && TARGET_INTER_UNIT_MOVES_TO_VEC
23443 && TARGET_INTER_UNIT_MOVES_FROM_VEC
23444 && (mode == SImode || (TARGET_64BIT && mode == DImode))
23445 && INTEGER_CLASS_P (rclass))
23446 return ALL_SSE_REGS;
23447 return NO_REGS;
23448 }
23449
23450 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
23451 but returns a lower bound. */
23452
23453 static unsigned int
23454 ix86_max_noce_ifcvt_seq_cost (edge e)
23455 {
23456 bool predictable_p = predictable_edge_p (e);
23457 if (predictable_p)
23458 {
23459 if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
23460 return param_max_rtl_if_conversion_predictable_cost;
23461 }
23462 else
23463 {
23464 if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
23465 return param_max_rtl_if_conversion_unpredictable_cost;
23466 }
23467
23468 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
23469 }
23470
23471 /* Return true if SEQ is a good candidate as a replacement for the
23472 if-convertible sequence described in IF_INFO. */
23473
23474 static bool
23475 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
23476 {
23477 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
23478 {
23479 int cmov_cnt = 0;
23480 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
23481 Maybe we should allow even more conditional moves as long as they
23482 are used far enough not to stall the CPU, or also consider
23483 IF_INFO->TEST_BB succ edge probabilities. */
23484 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
23485 {
23486 rtx set = single_set (insn);
23487 if (!set)
23488 continue;
23489 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
23490 continue;
23491 rtx src = SET_SRC (set);
23492 machine_mode mode = GET_MODE (src);
23493 if (GET_MODE_CLASS (mode) != MODE_INT
23494 && GET_MODE_CLASS (mode) != MODE_FLOAT)
23495 continue;
23496 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
23497 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
23498 continue;
23499 /* insn is CMOV or FCMOV. */
23500 if (++cmov_cnt > 1)
23501 return false;
23502 }
23503 }
23504 return default_noce_conversion_profitable_p (seq, if_info);
23505 }
23506
23507 /* x86-specific vector costs. */
23508 class ix86_vector_costs : public vector_costs
23509 {
23510 using vector_costs::vector_costs;
23511
23512 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
23513 stmt_vec_info stmt_info, slp_tree node,
23514 tree vectype, int misalign,
23515 vect_cost_model_location where) override;
23516 };
23517
23518 /* Implement targetm.vectorize.create_costs. */
23519
23520 static vector_costs *
23521 ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
23522 {
23523 return new ix86_vector_costs (vinfo, costing_for_scalar);
23524 }
23525
23526 unsigned
23527 ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
23528 stmt_vec_info stmt_info, slp_tree node,
23529 tree vectype, int misalign,
23530 vect_cost_model_location where)
23531 {
23532 unsigned retval = 0;
23533 bool scalar_p
23534 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
23535 int stmt_cost = - 1;
23536
23537 bool fp = false;
23538 machine_mode mode = scalar_p ? SImode : TImode;
23539
23540 if (vectype != NULL)
23541 {
23542 fp = FLOAT_TYPE_P (vectype);
23543 mode = TYPE_MODE (vectype);
23544 if (scalar_p)
23545 mode = TYPE_MODE (TREE_TYPE (vectype));
23546 }
23547
23548 if ((kind == vector_stmt || kind == scalar_stmt)
23549 && stmt_info
23550 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
23551 {
23552 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
23553 /*machine_mode inner_mode = mode;
23554 if (VECTOR_MODE_P (mode))
23555 inner_mode = GET_MODE_INNER (mode);*/
23556
23557 switch (subcode)
23558 {
23559 case PLUS_EXPR:
23560 case POINTER_PLUS_EXPR:
23561 case MINUS_EXPR:
23562 if (kind == scalar_stmt)
23563 {
23564 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
23565 stmt_cost = ix86_cost->addss;
23566 else if (X87_FLOAT_MODE_P (mode))
23567 stmt_cost = ix86_cost->fadd;
23568 else
23569 stmt_cost = ix86_cost->add;
23570 }
23571 else
23572 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
23573 : ix86_cost->sse_op);
23574 break;
23575
23576 case MULT_EXPR:
23577 /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
23578 take it as MULT_EXPR. */
23579 case MULT_HIGHPART_EXPR:
23580 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
23581 break;
23582 /* There's no direct instruction for WIDEN_MULT_EXPR,
23583 take emulation into account. */
23584 case WIDEN_MULT_EXPR:
23585 stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
23586 TYPE_UNSIGNED (vectype));
23587 break;
23588
23589 case NEGATE_EXPR:
23590 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
23591 stmt_cost = ix86_cost->sse_op;
23592 else if (X87_FLOAT_MODE_P (mode))
23593 stmt_cost = ix86_cost->fchs;
23594 else if (VECTOR_MODE_P (mode))
23595 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
23596 else
23597 stmt_cost = ix86_cost->add;
23598 break;
23599 case TRUNC_DIV_EXPR:
23600 case CEIL_DIV_EXPR:
23601 case FLOOR_DIV_EXPR:
23602 case ROUND_DIV_EXPR:
23603 case TRUNC_MOD_EXPR:
23604 case CEIL_MOD_EXPR:
23605 case FLOOR_MOD_EXPR:
23606 case RDIV_EXPR:
23607 case ROUND_MOD_EXPR:
23608 case EXACT_DIV_EXPR:
23609 stmt_cost = ix86_division_cost (ix86_cost, mode);
23610 break;
23611
23612 case RSHIFT_EXPR:
23613 case LSHIFT_EXPR:
23614 case LROTATE_EXPR:
23615 case RROTATE_EXPR:
23616 {
23617 tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
23618 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
23619 stmt_cost = ix86_shift_rotate_cost
23620 (ix86_cost,
23621 (subcode == RSHIFT_EXPR
23622 && !TYPE_UNSIGNED (TREE_TYPE (op1)))
23623 ? ASHIFTRT : LSHIFTRT, mode,
23624 TREE_CODE (op2) == INTEGER_CST,
23625 cst_and_fits_in_hwi (op2)
23626 ? int_cst_value (op2) : -1,
23627 false, false, NULL, NULL);
23628 }
23629 break;
23630 case NOP_EXPR:
23631 /* Only sign-conversions are free. */
23632 if (tree_nop_conversion_p
23633 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
23634 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
23635 stmt_cost = 0;
23636 break;
23637
23638 case BIT_IOR_EXPR:
23639 case ABS_EXPR:
23640 case ABSU_EXPR:
23641 case MIN_EXPR:
23642 case MAX_EXPR:
23643 case BIT_XOR_EXPR:
23644 case BIT_AND_EXPR:
23645 case BIT_NOT_EXPR:
23646 if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
23647 stmt_cost = ix86_cost->sse_op;
23648 else if (VECTOR_MODE_P (mode))
23649 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
23650 else
23651 stmt_cost = ix86_cost->add;
23652 break;
23653 default:
23654 break;
23655 }
23656 }
23657
23658 combined_fn cfn;
23659 if ((kind == vector_stmt || kind == scalar_stmt)
23660 && stmt_info
23661 && stmt_info->stmt
23662 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
23663 switch (cfn)
23664 {
23665 case CFN_FMA:
23666 stmt_cost = ix86_vec_cost (mode,
23667 mode == SFmode ? ix86_cost->fmass
23668 : ix86_cost->fmasd);
23669 break;
23670 case CFN_MULH:
23671 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
23672 break;
23673 default:
23674 break;
23675 }
23676
23677 /* If we do elementwise loads into a vector then we are bound by
23678 latency and execution resources for the many scalar loads
23679 (AGU and load ports). Try to account for this by scaling the
23680 construction cost by the number of elements involved. */
23681 if ((kind == vec_construct || kind == vec_to_scalar)
23682 && stmt_info
23683 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
23684 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
23685 && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
23686 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
23687 != INTEGER_CST))
23688 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
23689 {
23690 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
23691 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
23692 }
23693 else if (kind == vec_construct
23694 && node
23695 && SLP_TREE_DEF_TYPE (node) == vect_external_def
23696 && INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
23697 {
23698 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
23699 unsigned i;
23700 tree op;
23701 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
23702 if (TREE_CODE (op) == SSA_NAME)
23703 TREE_VISITED (op) = 0;
23704 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
23705 {
23706 if (TREE_CODE (op) != SSA_NAME
23707 || TREE_VISITED (op))
23708 continue;
23709 TREE_VISITED (op) = 1;
23710 gimple *def = SSA_NAME_DEF_STMT (op);
23711 tree tem;
23712 if (is_gimple_assign (def)
23713 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
23714 && ((tem = gimple_assign_rhs1 (def)), true)
23715 && TREE_CODE (tem) == SSA_NAME
23716 /* A sign-change expands to nothing. */
23717 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
23718 TREE_TYPE (tem)))
23719 def = SSA_NAME_DEF_STMT (tem);
23720 /* When the component is loaded from memory we can directly
23721 move it to a vector register, otherwise we have to go
23722 via a GPR or via vpinsr which involves similar cost.
23723 Likewise with a BIT_FIELD_REF extracting from a vector
23724 register we can hope to avoid using a GPR. */
23725 if (!is_gimple_assign (def)
23726 || (!gimple_assign_load_p (def)
23727 && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
23728 || !VECTOR_TYPE_P (TREE_TYPE
23729 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
23730 stmt_cost += ix86_cost->sse_to_integer;
23731 }
23732 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
23733 if (TREE_CODE (op) == SSA_NAME)
23734 TREE_VISITED (op) = 0;
23735 }
23736 if (stmt_cost == -1)
23737 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
23738
23739 /* Penalize DFmode vector operations for Bonnell. */
23740 if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
23741 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
23742 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
23743
23744 /* Statements in an inner loop relative to the loop being
23745 vectorized are weighted more heavily. The value here is
23746 arbitrary and could potentially be improved with analysis. */
23747 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
23748
23749 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
23750 for Silvermont as it has out of order integer pipeline and can execute
23751 2 scalar instruction per tick, but has in order SIMD pipeline. */
23752 if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
23753 || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
23754 && stmt_info && stmt_info->stmt)
23755 {
23756 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
23757 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
23758 retval = (retval * 17) / 10;
23759 }
23760
23761 m_costs[where] += retval;
23762
23763 return retval;
23764 }
23765
23766 /* Validate target specific memory model bits in VAL. */
23767
23768 static unsigned HOST_WIDE_INT
23769 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
23770 {
23771 enum memmodel model = memmodel_from_int (val);
23772 bool strong;
23773
23774 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
23775 |MEMMODEL_MASK)
23776 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
23777 {
23778 warning (OPT_Winvalid_memory_model,
23779 "unknown architecture specific memory model");
23780 return MEMMODEL_SEQ_CST;
23781 }
23782 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
23783 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
23784 {
23785 warning (OPT_Winvalid_memory_model,
23786 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
23787 "memory model");
23788 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
23789 }
23790 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
23791 {
23792 warning (OPT_Winvalid_memory_model,
23793 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
23794 "memory model");
23795 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
23796 }
23797 return val;
23798 }
23799
23800 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
23801 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
23802 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
23803 or number of vecsize_mangle variants that should be emitted. */
23804
23805 static int
23806 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
23807 struct cgraph_simd_clone *clonei,
23808 tree base_type, int num,
23809 bool explicit_p)
23810 {
23811 int ret = 1;
23812
23813 if (clonei->simdlen
23814 && (clonei->simdlen < 2
23815 || clonei->simdlen > 1024
23816 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
23817 {
23818 if (explicit_p)
23819 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23820 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
23821 return 0;
23822 }
23823
23824 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
23825 if (TREE_CODE (ret_type) != VOID_TYPE)
23826 switch (TYPE_MODE (ret_type))
23827 {
23828 case E_QImode:
23829 case E_HImode:
23830 case E_SImode:
23831 case E_DImode:
23832 case E_SFmode:
23833 case E_DFmode:
23834 /* case E_SCmode: */
23835 /* case E_DCmode: */
23836 if (!AGGREGATE_TYPE_P (ret_type))
23837 break;
23838 /* FALLTHRU */
23839 default:
23840 if (explicit_p)
23841 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23842 "unsupported return type %qT for simd", ret_type);
23843 return 0;
23844 }
23845
23846 tree t;
23847 int i;
23848 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
23849 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
23850
23851 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
23852 t && t != void_list_node; t = TREE_CHAIN (t), i++)
23853 {
23854 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
23855 switch (TYPE_MODE (arg_type))
23856 {
23857 case E_QImode:
23858 case E_HImode:
23859 case E_SImode:
23860 case E_DImode:
23861 case E_SFmode:
23862 case E_DFmode:
23863 /* case E_SCmode: */
23864 /* case E_DCmode: */
23865 if (!AGGREGATE_TYPE_P (arg_type))
23866 break;
23867 /* FALLTHRU */
23868 default:
23869 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
23870 break;
23871 if (explicit_p)
23872 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23873 "unsupported argument type %qT for simd", arg_type);
23874 return 0;
23875 }
23876 }
23877
23878 if (!TREE_PUBLIC (node->decl) || !explicit_p)
23879 {
23880 /* If the function isn't exported, we can pick up just one ISA
23881 for the clones. */
23882 if (TARGET_AVX512F)
23883 clonei->vecsize_mangle = 'e';
23884 else if (TARGET_AVX2)
23885 clonei->vecsize_mangle = 'd';
23886 else if (TARGET_AVX)
23887 clonei->vecsize_mangle = 'c';
23888 else
23889 clonei->vecsize_mangle = 'b';
23890 ret = 1;
23891 }
23892 else
23893 {
23894 clonei->vecsize_mangle = "bcde"[num];
23895 ret = 4;
23896 }
23897 clonei->mask_mode = VOIDmode;
23898 switch (clonei->vecsize_mangle)
23899 {
23900 case 'b':
23901 clonei->vecsize_int = 128;
23902 clonei->vecsize_float = 128;
23903 break;
23904 case 'c':
23905 clonei->vecsize_int = 128;
23906 clonei->vecsize_float = 256;
23907 break;
23908 case 'd':
23909 clonei->vecsize_int = 256;
23910 clonei->vecsize_float = 256;
23911 break;
23912 case 'e':
23913 clonei->vecsize_int = 512;
23914 clonei->vecsize_float = 512;
23915 if (TYPE_MODE (base_type) == QImode)
23916 clonei->mask_mode = DImode;
23917 else
23918 clonei->mask_mode = SImode;
23919 break;
23920 }
23921 if (clonei->simdlen == 0)
23922 {
23923 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
23924 clonei->simdlen = clonei->vecsize_int;
23925 else
23926 clonei->simdlen = clonei->vecsize_float;
23927 clonei->simdlen = clonei->simdlen
23928 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
23929 }
23930 else if (clonei->simdlen > 16)
23931 {
23932 /* For compatibility with ICC, use the same upper bounds
23933 for simdlen. In particular, for CTYPE below, use the return type,
23934 unless the function returns void, in that case use the characteristic
23935 type. If it is possible for given SIMDLEN to pass CTYPE value
23936 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
23937 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
23938 emit corresponding clone. */
23939 tree ctype = ret_type;
23940 if (VOID_TYPE_P (ret_type))
23941 ctype = base_type;
23942 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
23943 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
23944 cnt /= clonei->vecsize_int;
23945 else
23946 cnt /= clonei->vecsize_float;
23947 if (cnt > (TARGET_64BIT ? 16 : 8))
23948 {
23949 if (explicit_p)
23950 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
23951 "unsupported simdlen %wd",
23952 clonei->simdlen.to_constant ());
23953 return 0;
23954 }
23955 }
23956 return ret;
23957 }
23958
23959 /* If SIMD clone NODE can't be used in a vectorized loop
23960 in current function, return -1, otherwise return a badness of using it
23961 (0 if it is most desirable from vecsize_mangle point of view, 1
23962 slightly less desirable, etc.). */
23963
23964 static int
23965 ix86_simd_clone_usable (struct cgraph_node *node)
23966 {
23967 switch (node->simdclone->vecsize_mangle)
23968 {
23969 case 'b':
23970 if (!TARGET_SSE2)
23971 return -1;
23972 if (!TARGET_AVX)
23973 return 0;
23974 return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
23975 case 'c':
23976 if (!TARGET_AVX)
23977 return -1;
23978 return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
23979 case 'd':
23980 if (!TARGET_AVX2)
23981 return -1;
23982 return TARGET_AVX512F ? 1 : 0;
23983 case 'e':
23984 if (!TARGET_AVX512F)
23985 return -1;
23986 return 0;
23987 default:
23988 gcc_unreachable ();
23989 }
23990 }
23991
23992 /* This function adjusts the unroll factor based on
23993 the hardware capabilities. For ex, bdver3 has
23994 a loop buffer which makes unrolling of smaller
23995 loops less important. This function decides the
23996 unroll factor using number of memory references
23997 (value 32 is used) as a heuristic. */
23998
23999 static unsigned
24000 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
24001 {
24002 basic_block *bbs;
24003 rtx_insn *insn;
24004 unsigned i;
24005 unsigned mem_count = 0;
24006
24007 /* Unroll small size loop when unroll factor is not explicitly
24008 specified. */
24009 if (ix86_unroll_only_small_loops && !loop->unroll)
24010 {
24011 if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
24012 return MIN (nunroll, ix86_cost->small_unroll_factor);
24013 else
24014 return 1;
24015 }
24016
24017 if (!TARGET_ADJUST_UNROLL)
24018 return nunroll;
24019
24020 /* Count the number of memory references within the loop body.
24021 This value determines the unrolling factor for bdver3 and bdver4
24022 architectures. */
24023 subrtx_iterator::array_type array;
24024 bbs = get_loop_body (loop);
24025 for (i = 0; i < loop->num_nodes; i++)
24026 FOR_BB_INSNS (bbs[i], insn)
24027 if (NONDEBUG_INSN_P (insn))
24028 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
24029 if (const_rtx x = *iter)
24030 if (MEM_P (x))
24031 {
24032 machine_mode mode = GET_MODE (x);
24033 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24034 if (n_words > 4)
24035 mem_count += 2;
24036 else
24037 mem_count += 1;
24038 }
24039 free (bbs);
24040
24041 if (mem_count && mem_count <=32)
24042 return MIN (nunroll, 32 / mem_count);
24043
24044 return nunroll;
24045 }
24046
24047
24048 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
24049
24050 static bool
24051 ix86_float_exceptions_rounding_supported_p (void)
24052 {
24053 /* For x87 floating point with standard excess precision handling,
24054 there is no adddf3 pattern (since x87 floating point only has
24055 XFmode operations) so the default hook implementation gets this
24056 wrong. */
24057 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
24058 }
24059
24060 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
24061
24062 static void
24063 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
24064 {
24065 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
24066 return;
24067 tree exceptions_var = create_tmp_var_raw (integer_type_node);
24068 if (TARGET_80387)
24069 {
24070 tree fenv_index_type = build_index_type (size_int (6));
24071 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
24072 tree fenv_var = create_tmp_var_raw (fenv_type);
24073 TREE_ADDRESSABLE (fenv_var) = 1;
24074 tree fenv_ptr = build_pointer_type (fenv_type);
24075 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
24076 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
24077 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
24078 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
24079 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
24080 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
24081 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
24082 tree hold_fnclex = build_call_expr (fnclex, 0);
24083 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
24084 NULL_TREE, NULL_TREE);
24085 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
24086 hold_fnclex);
24087 *clear = build_call_expr (fnclex, 0);
24088 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
24089 tree fnstsw_call = build_call_expr (fnstsw, 0);
24090 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
24091 fnstsw_call, NULL_TREE, NULL_TREE);
24092 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
24093 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
24094 exceptions_var, exceptions_x87,
24095 NULL_TREE, NULL_TREE);
24096 *update = build2 (COMPOUND_EXPR, integer_type_node,
24097 sw_mod, update_mod);
24098 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
24099 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
24100 }
24101 if (TARGET_SSE && TARGET_SSE_MATH)
24102 {
24103 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
24104 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
24105 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
24106 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
24107 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
24108 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
24109 mxcsr_orig_var, stmxcsr_hold_call,
24110 NULL_TREE, NULL_TREE);
24111 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
24112 mxcsr_orig_var,
24113 build_int_cst (unsigned_type_node, 0x1f80));
24114 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
24115 build_int_cst (unsigned_type_node, 0xffffffc0));
24116 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
24117 mxcsr_mod_var, hold_mod_val,
24118 NULL_TREE, NULL_TREE);
24119 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
24120 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
24121 hold_assign_orig, hold_assign_mod);
24122 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
24123 ldmxcsr_hold_call);
24124 if (*hold)
24125 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
24126 else
24127 *hold = hold_all;
24128 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
24129 if (*clear)
24130 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
24131 ldmxcsr_clear_call);
24132 else
24133 *clear = ldmxcsr_clear_call;
24134 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
24135 tree exceptions_sse = fold_convert (integer_type_node,
24136 stxmcsr_update_call);
24137 if (*update)
24138 {
24139 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
24140 exceptions_var, exceptions_sse);
24141 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
24142 exceptions_var, exceptions_mod);
24143 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
24144 exceptions_assign);
24145 }
24146 else
24147 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
24148 exceptions_sse, NULL_TREE, NULL_TREE);
24149 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
24150 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
24151 ldmxcsr_update_call);
24152 }
24153 tree atomic_feraiseexcept
24154 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
24155 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
24156 1, exceptions_var);
24157 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
24158 atomic_feraiseexcept_call);
24159 }
24160
24161 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
24162 /* For i386, common symbol is local only for non-PIE binaries. For
24163 x86-64, common symbol is local only for non-PIE binaries or linker
24164 supports copy reloc in PIE binaries. */
24165
24166 static bool
24167 ix86_binds_local_p (const_tree exp)
24168 {
24169 bool direct_extern_access
24170 = (ix86_direct_extern_access
24171 && !(VAR_OR_FUNCTION_DECL_P (exp)
24172 && lookup_attribute ("nodirect_extern_access",
24173 DECL_ATTRIBUTES (exp))));
24174 if (!direct_extern_access)
24175 ix86_has_no_direct_extern_access = true;
24176 return default_binds_local_p_3 (exp, flag_shlib != 0, true,
24177 direct_extern_access,
24178 (direct_extern_access
24179 && (!flag_pic
24180 || (TARGET_64BIT
24181 && HAVE_LD_PIE_COPYRELOC != 0))));
24182 }
24183
24184 /* If flag_pic or ix86_direct_extern_access is false, then neither
24185 local nor global relocs should be placed in readonly memory. */
24186
24187 static int
24188 ix86_reloc_rw_mask (void)
24189 {
24190 return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
24191 }
24192 #endif
24193
24194 /* If MEM is in the form of [base+offset], extract the two parts
24195 of address and set to BASE and OFFSET, otherwise return false. */
24196
24197 static bool
24198 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
24199 {
24200 rtx addr;
24201
24202 gcc_assert (MEM_P (mem));
24203
24204 addr = XEXP (mem, 0);
24205
24206 if (GET_CODE (addr) == CONST)
24207 addr = XEXP (addr, 0);
24208
24209 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
24210 {
24211 *base = addr;
24212 *offset = const0_rtx;
24213 return true;
24214 }
24215
24216 if (GET_CODE (addr) == PLUS
24217 && (REG_P (XEXP (addr, 0))
24218 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
24219 && CONST_INT_P (XEXP (addr, 1)))
24220 {
24221 *base = XEXP (addr, 0);
24222 *offset = XEXP (addr, 1);
24223 return true;
24224 }
24225
24226 return false;
24227 }
24228
24229 /* Given OPERANDS of consecutive load/store, check if we can merge
24230 them into move multiple. LOAD is true if they are load instructions.
24231 MODE is the mode of memory operands. */
24232
24233 bool
24234 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
24235 machine_mode mode)
24236 {
24237 HOST_WIDE_INT offval_1, offval_2, msize;
24238 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
24239
24240 if (load)
24241 {
24242 mem_1 = operands[1];
24243 mem_2 = operands[3];
24244 reg_1 = operands[0];
24245 reg_2 = operands[2];
24246 }
24247 else
24248 {
24249 mem_1 = operands[0];
24250 mem_2 = operands[2];
24251 reg_1 = operands[1];
24252 reg_2 = operands[3];
24253 }
24254
24255 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
24256
24257 if (REGNO (reg_1) != REGNO (reg_2))
24258 return false;
24259
24260 /* Check if the addresses are in the form of [base+offset]. */
24261 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
24262 return false;
24263 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
24264 return false;
24265
24266 /* Check if the bases are the same. */
24267 if (!rtx_equal_p (base_1, base_2))
24268 return false;
24269
24270 offval_1 = INTVAL (offset_1);
24271 offval_2 = INTVAL (offset_2);
24272 msize = GET_MODE_SIZE (mode);
24273 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
24274 if (offval_1 + msize != offval_2)
24275 return false;
24276
24277 return true;
24278 }
24279
24280 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
24281
24282 static bool
24283 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
24284 optimization_type opt_type)
24285 {
24286 switch (op)
24287 {
24288 case asin_optab:
24289 case acos_optab:
24290 case log1p_optab:
24291 case exp_optab:
24292 case exp10_optab:
24293 case exp2_optab:
24294 case expm1_optab:
24295 case ldexp_optab:
24296 case scalb_optab:
24297 case round_optab:
24298 case lround_optab:
24299 return opt_type == OPTIMIZE_FOR_SPEED;
24300
24301 case rint_optab:
24302 if (SSE_FLOAT_MODE_P (mode1)
24303 && TARGET_SSE_MATH
24304 && !flag_trapping_math
24305 && !TARGET_SSE4_1
24306 && mode1 != HFmode)
24307 return opt_type == OPTIMIZE_FOR_SPEED;
24308 return true;
24309
24310 case floor_optab:
24311 case ceil_optab:
24312 case btrunc_optab:
24313 if (((SSE_FLOAT_MODE_P (mode1)
24314 && TARGET_SSE_MATH
24315 && TARGET_SSE4_1)
24316 || mode1 == HFmode)
24317 && !flag_trapping_math)
24318 return true;
24319 return opt_type == OPTIMIZE_FOR_SPEED;
24320
24321 case rsqrt_optab:
24322 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
24323
24324 default:
24325 return true;
24326 }
24327 }
24328
24329 /* Address space support.
24330
24331 This is not "far pointers" in the 16-bit sense, but an easy way
24332 to use %fs and %gs segment prefixes. Therefore:
24333
24334 (a) All address spaces have the same modes,
24335 (b) All address spaces have the same addresss forms,
24336 (c) While %fs and %gs are technically subsets of the generic
24337 address space, they are probably not subsets of each other.
24338 (d) Since we have no access to the segment base register values
24339 without resorting to a system call, we cannot convert a
24340 non-default address space to a default address space.
24341 Therefore we do not claim %fs or %gs are subsets of generic.
24342
24343 Therefore we can (mostly) use the default hooks. */
24344
24345 /* All use of segmentation is assumed to make address 0 valid. */
24346
24347 static bool
24348 ix86_addr_space_zero_address_valid (addr_space_t as)
24349 {
24350 return as != ADDR_SPACE_GENERIC;
24351 }
24352
24353 static void
24354 ix86_init_libfuncs (void)
24355 {
24356 if (TARGET_64BIT)
24357 {
24358 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
24359 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
24360 }
24361 else
24362 {
24363 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
24364 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
24365 }
24366
24367 #if TARGET_MACHO
24368 darwin_rename_builtins ();
24369 #endif
24370 }
24371
24372 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
24373 FPU, assume that the fpcw is set to extended precision; when using
24374 only SSE, rounding is correct; when using both SSE and the FPU,
24375 the rounding precision is indeterminate, since either may be chosen
24376 apparently at random. */
24377
24378 static enum flt_eval_method
24379 ix86_get_excess_precision (enum excess_precision_type type)
24380 {
24381 switch (type)
24382 {
24383 case EXCESS_PRECISION_TYPE_FAST:
24384 /* The fastest type to promote to will always be the native type,
24385 whether that occurs with implicit excess precision or
24386 otherwise. */
24387 return TARGET_AVX512FP16
24388 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24389 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
24390 case EXCESS_PRECISION_TYPE_STANDARD:
24391 case EXCESS_PRECISION_TYPE_IMPLICIT:
24392 /* Otherwise, the excess precision we want when we are
24393 in a standards compliant mode, and the implicit precision we
24394 provide would be identical were it not for the unpredictable
24395 cases. */
24396 if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
24397 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24398 else if (!TARGET_80387)
24399 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
24400 else if (!TARGET_MIX_SSE_I387)
24401 {
24402 if (!(TARGET_SSE && TARGET_SSE_MATH))
24403 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
24404 else if (TARGET_SSE2)
24405 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
24406 }
24407
24408 /* If we are in standards compliant mode, but we know we will
24409 calculate in unpredictable precision, return
24410 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
24411 excess precision if the target can't guarantee it will honor
24412 it. */
24413 return (type == EXCESS_PRECISION_TYPE_STANDARD
24414 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
24415 : FLT_EVAL_METHOD_UNPREDICTABLE);
24416 case EXCESS_PRECISION_TYPE_FLOAT16:
24417 if (TARGET_80387
24418 && !(TARGET_SSE_MATH && TARGET_SSE))
24419 error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
24420 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24421 default:
24422 gcc_unreachable ();
24423 }
24424
24425 return FLT_EVAL_METHOD_UNPREDICTABLE;
24426 }
24427
24428 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
24429 decrements by exactly 2 no matter what the position was, there is no pushb.
24430
24431 But as CIE data alignment factor on this arch is -4 for 32bit targets
24432 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
24433 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
24434
24435 poly_int64
24436 ix86_push_rounding (poly_int64 bytes)
24437 {
24438 return ROUND_UP (bytes, UNITS_PER_WORD);
24439 }
24440
24441 /* Use 8 bits metadata start from bit48 for LAM_U48,
24442 6 bits metadat start from bit57 for LAM_U57. */
24443 #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48 \
24444 ? 48 \
24445 : (ix86_lam_type == lam_u57 ? 57 : 0))
24446 #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48 \
24447 ? 8 \
24448 : (ix86_lam_type == lam_u57 ? 6 : 0))
24449
24450 /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES. */
24451 bool
24452 ix86_memtag_can_tag_addresses ()
24453 {
24454 return ix86_lam_type != lam_none && TARGET_LP64;
24455 }
24456
24457 /* Implement TARGET_MEMTAG_TAG_SIZE. */
24458 unsigned char
24459 ix86_memtag_tag_size ()
24460 {
24461 return IX86_HWASAN_TAG_SIZE;
24462 }
24463
24464 /* Implement TARGET_MEMTAG_SET_TAG. */
24465 rtx
24466 ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
24467 {
24468 /* default_memtag_insert_random_tag may
24469 generate tag with value more than 6 bits. */
24470 if (ix86_lam_type == lam_u57)
24471 {
24472 unsigned HOST_WIDE_INT and_imm
24473 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
24474
24475 emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
24476 }
24477 tag = expand_simple_binop (Pmode, ASHIFT, tag,
24478 GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
24479 /* unsignedp = */1, OPTAB_WIDEN);
24480 rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
24481 /* unsignedp = */1, OPTAB_DIRECT);
24482 return ret;
24483 }
24484
24485 /* Implement TARGET_MEMTAG_EXTRACT_TAG. */
24486 rtx
24487 ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
24488 {
24489 rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
24490 GEN_INT (IX86_HWASAN_SHIFT), target,
24491 /* unsignedp = */0,
24492 OPTAB_DIRECT);
24493 rtx ret = gen_reg_rtx (QImode);
24494 /* Mask off bit63 when LAM_U57. */
24495 if (ix86_lam_type == lam_u57)
24496 {
24497 unsigned HOST_WIDE_INT and_imm
24498 = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
24499 emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
24500 gen_int_mode (and_imm, QImode)));
24501 }
24502 else
24503 emit_move_insn (ret, gen_lowpart (QImode, tag));
24504 return ret;
24505 }
24506
24507 /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER. */
24508 rtx
24509 ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
24510 {
24511 /* Leave bit63 alone. */
24512 rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
24513 + (HOST_WIDE_INT_1U << 63) - 1),
24514 Pmode);
24515 rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
24516 tag_mask, target, true,
24517 OPTAB_DIRECT);
24518 gcc_assert (untagged_base);
24519 return untagged_base;
24520 }
24521
24522 /* Implement TARGET_MEMTAG_ADD_TAG. */
24523 rtx
24524 ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
24525 {
24526 rtx base_tag = gen_reg_rtx (QImode);
24527 rtx base_addr = gen_reg_rtx (Pmode);
24528 rtx tagged_addr = gen_reg_rtx (Pmode);
24529 rtx new_tag = gen_reg_rtx (QImode);
24530 unsigned HOST_WIDE_INT and_imm
24531 = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
24532
24533 /* When there's "overflow" in tag adding,
24534 need to mask the most significant bit off. */
24535 emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
24536 emit_move_insn (base_addr,
24537 ix86_memtag_untagged_pointer (base, NULL_RTX));
24538 emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
24539 emit_move_insn (new_tag, base_tag);
24540 emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
24541 emit_move_insn (tagged_addr,
24542 ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
24543 return plus_constant (Pmode, tagged_addr, offset);
24544 }
24545
24546 /* Target-specific selftests. */
24547
24548 #if CHECKING_P
24549
24550 namespace selftest {
24551
24552 /* Verify that hard regs are dumped as expected (in compact mode). */
24553
24554 static void
24555 ix86_test_dumping_hard_regs ()
24556 {
24557 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
24558 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
24559 }
24560
24561 /* Test dumping an insn with repeated references to the same SCRATCH,
24562 to verify the rtx_reuse code. */
24563
24564 static void
24565 ix86_test_dumping_memory_blockage ()
24566 {
24567 set_new_first_and_last_insn (NULL, NULL);
24568
24569 rtx pat = gen_memory_blockage ();
24570 rtx_reuse_manager r;
24571 r.preprocess (pat);
24572
24573 /* Verify that the repeated references to the SCRATCH show use
24574 reuse IDS. The first should be prefixed with a reuse ID,
24575 and the second should be dumped as a "reuse_rtx" of that ID.
24576 The expected string assumes Pmode == DImode. */
24577 if (Pmode == DImode)
24578 ASSERT_RTL_DUMP_EQ_WITH_REUSE
24579 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
24580 " (unspec:BLK [\n"
24581 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
24582 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
24583 }
24584
24585 /* Verify loading an RTL dump; specifically a dump of copying
24586 a param on x86_64 from a hard reg into the frame.
24587 This test is target-specific since the dump contains target-specific
24588 hard reg names. */
24589
24590 static void
24591 ix86_test_loading_dump_fragment_1 ()
24592 {
24593 rtl_dump_test t (SELFTEST_LOCATION,
24594 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
24595
24596 rtx_insn *insn = get_insn_by_uid (1);
24597
24598 /* The block structure and indentation here is purely for
24599 readability; it mirrors the structure of the rtx. */
24600 tree mem_expr;
24601 {
24602 rtx pat = PATTERN (insn);
24603 ASSERT_EQ (SET, GET_CODE (pat));
24604 {
24605 rtx dest = SET_DEST (pat);
24606 ASSERT_EQ (MEM, GET_CODE (dest));
24607 /* Verify the "/c" was parsed. */
24608 ASSERT_TRUE (RTX_FLAG (dest, call));
24609 ASSERT_EQ (SImode, GET_MODE (dest));
24610 {
24611 rtx addr = XEXP (dest, 0);
24612 ASSERT_EQ (PLUS, GET_CODE (addr));
24613 ASSERT_EQ (DImode, GET_MODE (addr));
24614 {
24615 rtx lhs = XEXP (addr, 0);
24616 /* Verify that the "frame" REG was consolidated. */
24617 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
24618 }
24619 {
24620 rtx rhs = XEXP (addr, 1);
24621 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
24622 ASSERT_EQ (-4, INTVAL (rhs));
24623 }
24624 }
24625 /* Verify the "[1 i+0 S4 A32]" was parsed. */
24626 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
24627 /* "i" should have been handled by synthesizing a global int
24628 variable named "i". */
24629 mem_expr = MEM_EXPR (dest);
24630 ASSERT_NE (mem_expr, NULL);
24631 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
24632 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
24633 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
24634 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
24635 /* "+0". */
24636 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
24637 ASSERT_EQ (0, MEM_OFFSET (dest));
24638 /* "S4". */
24639 ASSERT_EQ (4, MEM_SIZE (dest));
24640 /* "A32. */
24641 ASSERT_EQ (32, MEM_ALIGN (dest));
24642 }
24643 {
24644 rtx src = SET_SRC (pat);
24645 ASSERT_EQ (REG, GET_CODE (src));
24646 ASSERT_EQ (SImode, GET_MODE (src));
24647 ASSERT_EQ (5, REGNO (src));
24648 tree reg_expr = REG_EXPR (src);
24649 /* "i" here should point to the same var as for the MEM_EXPR. */
24650 ASSERT_EQ (reg_expr, mem_expr);
24651 }
24652 }
24653 }
24654
24655 /* Verify that the RTL loader copes with a call_insn dump.
24656 This test is target-specific since the dump contains a target-specific
24657 hard reg name. */
24658
24659 static void
24660 ix86_test_loading_call_insn ()
24661 {
24662 /* The test dump includes register "xmm0", where requires TARGET_SSE
24663 to exist. */
24664 if (!TARGET_SSE)
24665 return;
24666
24667 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
24668
24669 rtx_insn *insn = get_insns ();
24670 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
24671
24672 /* "/j". */
24673 ASSERT_TRUE (RTX_FLAG (insn, jump));
24674
24675 rtx pat = PATTERN (insn);
24676 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
24677
24678 /* Verify REG_NOTES. */
24679 {
24680 /* "(expr_list:REG_CALL_DECL". */
24681 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
24682 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
24683 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
24684
24685 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
24686 rtx_expr_list *note1 = note0->next ();
24687 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
24688
24689 ASSERT_EQ (NULL, note1->next ());
24690 }
24691
24692 /* Verify CALL_INSN_FUNCTION_USAGE. */
24693 {
24694 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
24695 rtx_expr_list *usage
24696 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
24697 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
24698 ASSERT_EQ (DFmode, GET_MODE (usage));
24699 ASSERT_EQ (USE, GET_CODE (usage->element ()));
24700 ASSERT_EQ (NULL, usage->next ());
24701 }
24702 }
24703
24704 /* Verify that the RTL loader copes a dump from print_rtx_function.
24705 This test is target-specific since the dump contains target-specific
24706 hard reg names. */
24707
24708 static void
24709 ix86_test_loading_full_dump ()
24710 {
24711 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
24712
24713 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
24714
24715 rtx_insn *insn_1 = get_insn_by_uid (1);
24716 ASSERT_EQ (NOTE, GET_CODE (insn_1));
24717
24718 rtx_insn *insn_7 = get_insn_by_uid (7);
24719 ASSERT_EQ (INSN, GET_CODE (insn_7));
24720 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
24721
24722 rtx_insn *insn_15 = get_insn_by_uid (15);
24723 ASSERT_EQ (INSN, GET_CODE (insn_15));
24724 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
24725
24726 /* Verify crtl->return_rtx. */
24727 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
24728 ASSERT_EQ (0, REGNO (crtl->return_rtx));
24729 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
24730 }
24731
24732 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
24733 In particular, verify that it correctly loads the 2nd operand.
24734 This test is target-specific since these are machine-specific
24735 operands (and enums). */
24736
24737 static void
24738 ix86_test_loading_unspec ()
24739 {
24740 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
24741
24742 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
24743
24744 ASSERT_TRUE (cfun);
24745
24746 /* Test of an UNSPEC. */
24747 rtx_insn *insn = get_insns ();
24748 ASSERT_EQ (INSN, GET_CODE (insn));
24749 rtx set = single_set (insn);
24750 ASSERT_NE (NULL, set);
24751 rtx dst = SET_DEST (set);
24752 ASSERT_EQ (MEM, GET_CODE (dst));
24753 rtx src = SET_SRC (set);
24754 ASSERT_EQ (UNSPEC, GET_CODE (src));
24755 ASSERT_EQ (BLKmode, GET_MODE (src));
24756 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
24757
24758 rtx v0 = XVECEXP (src, 0, 0);
24759
24760 /* Verify that the two uses of the first SCRATCH have pointer
24761 equality. */
24762 rtx scratch_a = XEXP (dst, 0);
24763 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
24764
24765 rtx scratch_b = XEXP (v0, 0);
24766 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
24767
24768 ASSERT_EQ (scratch_a, scratch_b);
24769
24770 /* Verify that the two mems are thus treated as equal. */
24771 ASSERT_TRUE (rtx_equal_p (dst, v0));
24772
24773 /* Verify that the insn is recognized. */
24774 ASSERT_NE(-1, recog_memoized (insn));
24775
24776 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
24777 insn = NEXT_INSN (insn);
24778 ASSERT_EQ (INSN, GET_CODE (insn));
24779
24780 set = single_set (insn);
24781 ASSERT_NE (NULL, set);
24782
24783 src = SET_SRC (set);
24784 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
24785 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
24786 }
24787
24788 /* Run all target-specific selftests. */
24789
24790 static void
24791 ix86_run_selftests (void)
24792 {
24793 ix86_test_dumping_hard_regs ();
24794 ix86_test_dumping_memory_blockage ();
24795
24796 /* Various tests of loading RTL dumps, here because they contain
24797 ix86-isms (e.g. names of hard regs). */
24798 ix86_test_loading_dump_fragment_1 ();
24799 ix86_test_loading_call_insn ();
24800 ix86_test_loading_full_dump ();
24801 ix86_test_loading_unspec ();
24802 }
24803
24804 } // namespace selftest
24805
24806 #endif /* CHECKING_P */
24807
24808 /* Initialize the GCC target structure. */
24809 #undef TARGET_RETURN_IN_MEMORY
24810 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
24811
24812 #undef TARGET_LEGITIMIZE_ADDRESS
24813 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
24814
24815 #undef TARGET_ATTRIBUTE_TABLE
24816 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
24817 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
24818 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
24819 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
24820 # undef TARGET_MERGE_DECL_ATTRIBUTES
24821 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
24822 #endif
24823
24824 #undef TARGET_COMP_TYPE_ATTRIBUTES
24825 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
24826
24827 #undef TARGET_INIT_BUILTINS
24828 #define TARGET_INIT_BUILTINS ix86_init_builtins
24829 #undef TARGET_BUILTIN_DECL
24830 #define TARGET_BUILTIN_DECL ix86_builtin_decl
24831 #undef TARGET_EXPAND_BUILTIN
24832 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
24833
24834 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
24835 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
24836 ix86_builtin_vectorized_function
24837
24838 #undef TARGET_VECTORIZE_BUILTIN_GATHER
24839 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
24840
24841 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
24842 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
24843
24844 #undef TARGET_BUILTIN_RECIPROCAL
24845 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
24846
24847 #undef TARGET_ASM_FUNCTION_EPILOGUE
24848 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
24849
24850 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
24851 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
24852 ix86_print_patchable_function_entry
24853
24854 #undef TARGET_ENCODE_SECTION_INFO
24855 #ifndef SUBTARGET_ENCODE_SECTION_INFO
24856 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
24857 #else
24858 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
24859 #endif
24860
24861 #undef TARGET_ASM_OPEN_PAREN
24862 #define TARGET_ASM_OPEN_PAREN ""
24863 #undef TARGET_ASM_CLOSE_PAREN
24864 #define TARGET_ASM_CLOSE_PAREN ""
24865
24866 #undef TARGET_ASM_BYTE_OP
24867 #define TARGET_ASM_BYTE_OP ASM_BYTE
24868
24869 #undef TARGET_ASM_ALIGNED_HI_OP
24870 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
24871 #undef TARGET_ASM_ALIGNED_SI_OP
24872 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
24873 #ifdef ASM_QUAD
24874 #undef TARGET_ASM_ALIGNED_DI_OP
24875 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
24876 #endif
24877
24878 #undef TARGET_PROFILE_BEFORE_PROLOGUE
24879 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
24880
24881 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
24882 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
24883
24884 #undef TARGET_ASM_UNALIGNED_HI_OP
24885 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
24886 #undef TARGET_ASM_UNALIGNED_SI_OP
24887 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
24888 #undef TARGET_ASM_UNALIGNED_DI_OP
24889 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
24890
24891 #undef TARGET_PRINT_OPERAND
24892 #define TARGET_PRINT_OPERAND ix86_print_operand
24893 #undef TARGET_PRINT_OPERAND_ADDRESS
24894 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
24895 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
24896 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
24897 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
24898 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
24899
24900 #undef TARGET_SCHED_INIT_GLOBAL
24901 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
24902 #undef TARGET_SCHED_ADJUST_COST
24903 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
24904 #undef TARGET_SCHED_ISSUE_RATE
24905 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
24906 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
24907 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
24908 ia32_multipass_dfa_lookahead
24909 #undef TARGET_SCHED_MACRO_FUSION_P
24910 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
24911 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
24912 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
24913
24914 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
24915 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
24916
24917 #undef TARGET_MEMMODEL_CHECK
24918 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
24919
24920 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
24921 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
24922
24923 #ifdef HAVE_AS_TLS
24924 #undef TARGET_HAVE_TLS
24925 #define TARGET_HAVE_TLS true
24926 #endif
24927 #undef TARGET_CANNOT_FORCE_CONST_MEM
24928 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
24929 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
24930 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
24931
24932 #undef TARGET_DELEGITIMIZE_ADDRESS
24933 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
24934
24935 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
24936 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
24937
24938 #undef TARGET_MS_BITFIELD_LAYOUT_P
24939 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
24940
24941 #if TARGET_MACHO
24942 #undef TARGET_BINDS_LOCAL_P
24943 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
24944 #else
24945 #undef TARGET_BINDS_LOCAL_P
24946 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
24947 #endif
24948 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
24949 #undef TARGET_BINDS_LOCAL_P
24950 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
24951 #endif
24952
24953 #undef TARGET_ASM_OUTPUT_MI_THUNK
24954 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
24955 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
24956 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
24957
24958 #undef TARGET_ASM_FILE_START
24959 #define TARGET_ASM_FILE_START x86_file_start
24960
24961 #undef TARGET_OPTION_OVERRIDE
24962 #define TARGET_OPTION_OVERRIDE ix86_option_override
24963
24964 #undef TARGET_REGISTER_MOVE_COST
24965 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
24966 #undef TARGET_MEMORY_MOVE_COST
24967 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
24968 #undef TARGET_RTX_COSTS
24969 #define TARGET_RTX_COSTS ix86_rtx_costs
24970 #undef TARGET_ADDRESS_COST
24971 #define TARGET_ADDRESS_COST ix86_address_cost
24972
24973 #undef TARGET_OVERLAP_OP_BY_PIECES_P
24974 #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
24975
24976 #undef TARGET_FLAGS_REGNUM
24977 #define TARGET_FLAGS_REGNUM FLAGS_REG
24978 #undef TARGET_FIXED_CONDITION_CODE_REGS
24979 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
24980 #undef TARGET_CC_MODES_COMPATIBLE
24981 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
24982
24983 #undef TARGET_MACHINE_DEPENDENT_REORG
24984 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
24985
24986 #undef TARGET_BUILD_BUILTIN_VA_LIST
24987 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
24988
24989 #undef TARGET_FOLD_BUILTIN
24990 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
24991
24992 #undef TARGET_GIMPLE_FOLD_BUILTIN
24993 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
24994
24995 #undef TARGET_COMPARE_VERSION_PRIORITY
24996 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
24997
24998 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
24999 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
25000 ix86_generate_version_dispatcher_body
25001
25002 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
25003 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
25004 ix86_get_function_versions_dispatcher
25005
25006 #undef TARGET_ENUM_VA_LIST_P
25007 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
25008
25009 #undef TARGET_FN_ABI_VA_LIST
25010 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
25011
25012 #undef TARGET_CANONICAL_VA_LIST_TYPE
25013 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
25014
25015 #undef TARGET_EXPAND_BUILTIN_VA_START
25016 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25017
25018 #undef TARGET_MD_ASM_ADJUST
25019 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
25020
25021 #undef TARGET_C_EXCESS_PRECISION
25022 #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
25023 #undef TARGET_PROMOTE_PROTOTYPES
25024 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25025 #undef TARGET_PUSH_ARGUMENT
25026 #define TARGET_PUSH_ARGUMENT ix86_push_argument
25027 #undef TARGET_SETUP_INCOMING_VARARGS
25028 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25029 #undef TARGET_MUST_PASS_IN_STACK
25030 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25031 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
25032 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
25033 #undef TARGET_FUNCTION_ARG_ADVANCE
25034 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
25035 #undef TARGET_FUNCTION_ARG
25036 #define TARGET_FUNCTION_ARG ix86_function_arg
25037 #undef TARGET_INIT_PIC_REG
25038 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
25039 #undef TARGET_USE_PSEUDO_PIC_REG
25040 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
25041 #undef TARGET_FUNCTION_ARG_BOUNDARY
25042 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
25043 #undef TARGET_PASS_BY_REFERENCE
25044 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25045 #undef TARGET_INTERNAL_ARG_POINTER
25046 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25047 #undef TARGET_UPDATE_STACK_BOUNDARY
25048 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
25049 #undef TARGET_GET_DRAP_RTX
25050 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
25051 #undef TARGET_STRICT_ARGUMENT_NAMING
25052 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25053 #undef TARGET_STATIC_CHAIN
25054 #define TARGET_STATIC_CHAIN ix86_static_chain
25055 #undef TARGET_TRAMPOLINE_INIT
25056 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
25057 #undef TARGET_RETURN_POPS_ARGS
25058 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
25059
25060 #undef TARGET_WARN_FUNC_RETURN
25061 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
25062
25063 #undef TARGET_LEGITIMATE_COMBINED_INSN
25064 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
25065
25066 #undef TARGET_ASAN_SHADOW_OFFSET
25067 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
25068
25069 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25070 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25071
25072 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25073 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25074
25075 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
25076 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
25077 ix86_libgcc_floating_mode_supported_p
25078
25079 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25080 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25081
25082 #undef TARGET_C_MODE_FOR_SUFFIX
25083 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25084
25085 #ifdef HAVE_AS_TLS
25086 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25087 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25088 #endif
25089
25090 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25091 #undef TARGET_INSERT_ATTRIBUTES
25092 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25093 #endif
25094
25095 #undef TARGET_MANGLE_TYPE
25096 #define TARGET_MANGLE_TYPE ix86_mangle_type
25097
25098 #undef TARGET_EMIT_SUPPORT_TINFOS
25099 #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
25100
25101 #undef TARGET_STACK_PROTECT_GUARD
25102 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
25103
25104 #if !TARGET_MACHO
25105 #undef TARGET_STACK_PROTECT_FAIL
25106 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25107 #endif
25108
25109 #undef TARGET_FUNCTION_VALUE
25110 #define TARGET_FUNCTION_VALUE ix86_function_value
25111
25112 #undef TARGET_FUNCTION_VALUE_REGNO_P
25113 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
25114
25115 #undef TARGET_ZERO_CALL_USED_REGS
25116 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
25117
25118 #undef TARGET_PROMOTE_FUNCTION_MODE
25119 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
25120
25121 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
25122 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
25123
25124 #undef TARGET_MEMBER_TYPE_FORCES_BLK
25125 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
25126
25127 #undef TARGET_INSTANTIATE_DECLS
25128 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
25129
25130 #undef TARGET_SECONDARY_RELOAD
25131 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
25132 #undef TARGET_SECONDARY_MEMORY_NEEDED
25133 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
25134 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
25135 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
25136
25137 #undef TARGET_CLASS_MAX_NREGS
25138 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
25139
25140 #undef TARGET_PREFERRED_RELOAD_CLASS
25141 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
25142 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
25143 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
25144 #undef TARGET_CLASS_LIKELY_SPILLED_P
25145 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
25146
25147 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25148 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
25149 ix86_builtin_vectorization_cost
25150 #undef TARGET_VECTORIZE_VEC_PERM_CONST
25151 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
25152 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
25153 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
25154 ix86_preferred_simd_mode
25155 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
25156 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
25157 ix86_split_reduction
25158 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
25159 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
25160 ix86_autovectorize_vector_modes
25161 #undef TARGET_VECTORIZE_GET_MASK_MODE
25162 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
25163 #undef TARGET_VECTORIZE_CREATE_COSTS
25164 #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
25165
25166 #undef TARGET_SET_CURRENT_FUNCTION
25167 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
25168
25169 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
25170 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
25171
25172 #undef TARGET_OPTION_SAVE
25173 #define TARGET_OPTION_SAVE ix86_function_specific_save
25174
25175 #undef TARGET_OPTION_RESTORE
25176 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
25177
25178 #undef TARGET_OPTION_POST_STREAM_IN
25179 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
25180
25181 #undef TARGET_OPTION_PRINT
25182 #define TARGET_OPTION_PRINT ix86_function_specific_print
25183
25184 #undef TARGET_OPTION_FUNCTION_VERSIONS
25185 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
25186
25187 #undef TARGET_CAN_INLINE_P
25188 #define TARGET_CAN_INLINE_P ix86_can_inline_p
25189
25190 #undef TARGET_LEGITIMATE_ADDRESS_P
25191 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
25192
25193 #undef TARGET_REGISTER_PRIORITY
25194 #define TARGET_REGISTER_PRIORITY ix86_register_priority
25195
25196 #undef TARGET_REGISTER_USAGE_LEVELING_P
25197 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
25198
25199 #undef TARGET_LEGITIMATE_CONSTANT_P
25200 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
25201
25202 #undef TARGET_COMPUTE_FRAME_LAYOUT
25203 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
25204
25205 #undef TARGET_FRAME_POINTER_REQUIRED
25206 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
25207
25208 #undef TARGET_CAN_ELIMINATE
25209 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
25210
25211 #undef TARGET_EXTRA_LIVE_ON_ENTRY
25212 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
25213
25214 #undef TARGET_ASM_CODE_END
25215 #define TARGET_ASM_CODE_END ix86_code_end
25216
25217 #undef TARGET_CONDITIONAL_REGISTER_USAGE
25218 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
25219
25220 #undef TARGET_CANONICALIZE_COMPARISON
25221 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
25222
25223 #undef TARGET_LOOP_UNROLL_ADJUST
25224 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
25225
25226 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
25227 #undef TARGET_SPILL_CLASS
25228 #define TARGET_SPILL_CLASS ix86_spill_class
25229
25230 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
25231 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
25232 ix86_simd_clone_compute_vecsize_and_simdlen
25233
25234 #undef TARGET_SIMD_CLONE_ADJUST
25235 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
25236
25237 #undef TARGET_SIMD_CLONE_USABLE
25238 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
25239
25240 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
25241 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
25242
25243 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
25244 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
25245 ix86_float_exceptions_rounding_supported_p
25246
25247 #undef TARGET_MODE_EMIT
25248 #define TARGET_MODE_EMIT ix86_emit_mode_set
25249
25250 #undef TARGET_MODE_NEEDED
25251 #define TARGET_MODE_NEEDED ix86_mode_needed
25252
25253 #undef TARGET_MODE_AFTER
25254 #define TARGET_MODE_AFTER ix86_mode_after
25255
25256 #undef TARGET_MODE_ENTRY
25257 #define TARGET_MODE_ENTRY ix86_mode_entry
25258
25259 #undef TARGET_MODE_EXIT
25260 #define TARGET_MODE_EXIT ix86_mode_exit
25261
25262 #undef TARGET_MODE_PRIORITY
25263 #define TARGET_MODE_PRIORITY ix86_mode_priority
25264
25265 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
25266 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
25267
25268 #undef TARGET_OFFLOAD_OPTIONS
25269 #define TARGET_OFFLOAD_OPTIONS \
25270 ix86_offload_options
25271
25272 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
25273 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
25274
25275 #undef TARGET_OPTAB_SUPPORTED_P
25276 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
25277
25278 #undef TARGET_HARD_REGNO_SCRATCH_OK
25279 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
25280
25281 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
25282 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
25283
25284 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
25285 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
25286
25287 #undef TARGET_INIT_LIBFUNCS
25288 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
25289
25290 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
25291 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
25292
25293 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
25294 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
25295
25296 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
25297 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
25298
25299 #undef TARGET_HARD_REGNO_NREGS
25300 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
25301 #undef TARGET_HARD_REGNO_MODE_OK
25302 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
25303
25304 #undef TARGET_MODES_TIEABLE_P
25305 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
25306
25307 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
25308 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
25309 ix86_hard_regno_call_part_clobbered
25310
25311 #undef TARGET_INSN_CALLEE_ABI
25312 #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
25313
25314 #undef TARGET_CAN_CHANGE_MODE_CLASS
25315 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
25316
25317 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
25318 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
25319
25320 #undef TARGET_STATIC_RTX_ALIGNMENT
25321 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
25322 #undef TARGET_CONSTANT_ALIGNMENT
25323 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
25324
25325 #undef TARGET_EMPTY_RECORD_P
25326 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
25327
25328 #undef TARGET_WARN_PARAMETER_PASSING_ABI
25329 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
25330
25331 #undef TARGET_GET_MULTILIB_ABI_NAME
25332 #define TARGET_GET_MULTILIB_ABI_NAME \
25333 ix86_get_multilib_abi_name
25334
25335 #undef TARGET_IFUNC_REF_LOCAL_OK
25336 #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
25337
25338 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
25339 # undef TARGET_ASM_RELOC_RW_MASK
25340 # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
25341 #endif
25342
25343 #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
25344 #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
25345
25346 #undef TARGET_MEMTAG_ADD_TAG
25347 #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
25348
25349 #undef TARGET_MEMTAG_SET_TAG
25350 #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
25351
25352 #undef TARGET_MEMTAG_EXTRACT_TAG
25353 #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
25354
25355 #undef TARGET_MEMTAG_UNTAGGED_POINTER
25356 #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
25357
25358 #undef TARGET_MEMTAG_TAG_SIZE
25359 #define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size
25360
25361 static bool
25362 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
25363 {
25364 #ifdef OPTION_GLIBC
25365 if (OPTION_GLIBC)
25366 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
25367 else
25368 return false;
25369 #else
25370 return false;
25371 #endif
25372 }
25373
25374 #undef TARGET_LIBC_HAS_FAST_FUNCTION
25375 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
25376
25377 static unsigned
25378 ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
25379 bool boundary_p)
25380 {
25381 #ifdef OPTION_GLIBC
25382 bool glibc_p = OPTION_GLIBC;
25383 #else
25384 bool glibc_p = false;
25385 #endif
25386 if (glibc_p)
25387 {
25388 /* If __FAST_MATH__ is defined, glibc provides libmvec. */
25389 unsigned int libmvec_ret = 0;
25390 if (!flag_trapping_math
25391 && flag_unsafe_math_optimizations
25392 && flag_finite_math_only
25393 && !flag_signed_zeros
25394 && !flag_errno_math)
25395 switch (cfn)
25396 {
25397 CASE_CFN_COS:
25398 CASE_CFN_COS_FN:
25399 CASE_CFN_SIN:
25400 CASE_CFN_SIN_FN:
25401 if (!boundary_p)
25402 {
25403 /* With non-default rounding modes, libmvec provides
25404 complete garbage in results. E.g.
25405 _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
25406 returns 0.00333309174f rather than 1.40129846e-45f. */
25407 if (flag_rounding_math)
25408 return ~0U;
25409 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
25410 claims libmvec maximum error is 4ulps.
25411 My own random testing indicates 2ulps for SFmode and
25412 0.5ulps for DFmode, but let's go with the 4ulps. */
25413 libmvec_ret = 4;
25414 }
25415 break;
25416 default:
25417 break;
25418 }
25419 unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
25420 boundary_p);
25421 return MAX (ret, libmvec_ret);
25422 }
25423 return default_libm_function_max_error (cfn, mode, boundary_p);
25424 }
25425
25426 #undef TARGET_LIBM_FUNCTION_MAX_ERROR
25427 #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
25428
25429 #if CHECKING_P
25430 #undef TARGET_RUN_TARGET_SELFTESTS
25431 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
25432 #endif /* #if CHECKING_P */
25433
25434 struct gcc_target targetm = TARGET_INITIALIZER;
25435 \f
25436 #include "gt-i386.h"