1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic.h"
44 #include "fold-const.h"
47 #include "stor-layout.h"
50 #include "insn-attr.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
61 #include "tm-constrs.h"
64 #include "sched-int.h"
66 #include "tree-pass.h"
68 #include "pass_manager.h"
69 #include "target-globals.h"
70 #include "gimple-iterator.h"
71 #include "tree-vectorizer.h"
72 #include "shrink-wrap.h"
75 #include "tree-iterator.h"
77 #include "case-cfn-macros.h"
79 #include "fold-const-call.h"
81 #include "tree-ssanames.h"
83 #include "selftest-rtl.h"
84 #include "print-rtl.h"
87 #include "symbol-summary.h"
89 #include "ipa-fnsummary.h"
90 #include "wide-int-bitmask.h"
91 #include "tree-vector-builder.h"
93 #include "dwarf2out.h"
94 #include "i386-options.h"
95 #include "i386-builtins.h"
96 #include "i386-expand.h"
97 #include "i386-features.h"
99 /* This file should be included last. */
100 #include "target-def.h"
102 static rtx
legitimize_dllimport_symbol (rtx
, bool);
103 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx
, addr_space_t
, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx
);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
122 const struct processor_costs
*ix86_tune_cost
= NULL
;
124 /* Set by -mtune or -Os. */
125 const struct processor_costs
*ix86_cost
= NULL
;
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
130 #define FAST_PROLOGUE_INSN_COUNT 20
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
134 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
135 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
140 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
143 AREG
, DREG
, CREG
, BREG
,
145 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
147 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
148 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
152 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
153 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
155 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
156 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
158 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
159 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
160 /* SSE REX registers */
161 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
162 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
165 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
166 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
167 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
168 /* Mask registers. */
169 ALL_MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
,
170 MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
173 /* The "default" register map used in 32bit mode. */
175 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
178 0, 2, 1, 3, 6, 7, 4, 5,
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
183 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
185 21, 22, 23, 24, 25, 26, 27, 28,
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
190 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
191 /* extended sse registers */
192 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
193 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
196 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
199 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
201 93, 94, 95, 96, 97, 98, 99, 100
204 /* The "default" register map used in 64bit mode. */
206 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
209 0, 1, 2, 3, 4, 5, 6, 7,
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
214 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
216 17, 18, 19, 20, 21, 22, 23, 24,
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
228 118, 119, 120, 121, 122, 123, 124, 125
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
285 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
288 0, 2, 1, 3, 6, 7, 5, 4,
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM
, 9,
293 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
295 21, 22, 23, 24, 25, 26, 27, 28,
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
300 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
301 /* extended sse registers */
302 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
303 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
306 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
309 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
311 93, 94, 95, 96, 97, 98, 99, 100
314 /* Define parameter passing and return registers. */
316 static int const x86_64_int_parameter_registers
[6] =
318 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
321 static int const x86_64_ms_abi_int_parameter_registers
[4] =
323 CX_REG
, DX_REG
, R8_REG
, R9_REG
326 static int const x86_64_int_return_registers
[4] =
328 AX_REG
, DX_REG
, DI_REG
, SI_REG
331 /* Define the structure for the machine field in struct function. */
333 struct GTY(()) stack_local_entry
{
337 struct stack_local_entry
*next
;
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule
;
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune
;
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch
;
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse
;
352 /* Preferred alignment for stack boundary in bits. */
353 unsigned int ix86_preferred_stack_boundary
;
355 /* Alignment for incoming stack boundary in bits specified at
357 unsigned int ix86_user_incoming_stack_boundary
;
359 /* Default alignment for incoming stack boundary in bits. */
360 unsigned int ix86_default_incoming_stack_boundary
;
362 /* Alignment for incoming stack boundary in bits. */
363 unsigned int ix86_incoming_stack_boundary
;
365 /* Calling abi specific va_list type nodes. */
366 tree sysv_va_list_type_node
;
367 tree ms_va_list_type_node
;
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
370 char internal_label_prefix
[16];
371 int internal_label_prefix_len
;
373 /* Fence to use after loop using movnt. */
376 /* Register class used for passing given 64bit part of the argument.
377 These represent classes as documented by the PS ABI, with the exception
378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382 whenever possible (upper half does contain padding). */
383 enum x86_64_reg_class
386 X86_64_INTEGER_CLASS
,
387 X86_64_INTEGERSI_CLASS
,
394 X86_64_COMPLEX_X87_CLASS
,
398 #define MAX_CLASSES 8
400 /* Table of constants used by fldpi, fldln2, etc.... */
401 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
402 static bool ext_80387_constants_init
;
405 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode
,
409 static rtx
ix86_static_chain (const_tree
, bool);
410 static int ix86_function_regparm (const_tree
, const_tree
);
411 static void ix86_compute_frame_layout (void);
412 static tree
ix86_canonical_va_list_type (tree
);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
416 static bool ix86_can_inline_p (tree
, tree
);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted
;
422 int ix86_arch_specified
;
424 /* Return true if a red-zone is in use. We can't use red-zone when
425 there are local indirect jumps, like "indirect_jump" or "tablejump",
426 which jumps to another place in the function, since "call" in the
427 indirect thunk pushes the return address onto stack, destroying
430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431 for CALL, in red-zone, we can allow local indirect jumps with
435 ix86_using_red_zone (void)
437 return (TARGET_RED_ZONE
438 && !TARGET_64BIT_MS_ABI
439 && (!cfun
->machine
->has_local_indirect_jump
440 || cfun
->machine
->indirect_branch_type
== indirect_branch_keep
));
443 /* Return true, if profiling code should be emitted before
444 prologue. Otherwise it returns false.
445 Note: For x86 with "hotfix" it is sorried. */
447 ix86_profile_before_prologue (void)
449 return flag_fentry
!= 0;
452 /* Update register usage after having seen the compiler flags. */
455 ix86_conditional_register_usage (void)
459 /* If there are no caller-saved registers, preserve all registers.
460 except fixed_regs and registers used for function return value
461 since aggregate_value_p checks call_used_regs[regno] on return
463 if (cfun
&& cfun
->machine
->no_caller_saved_registers
)
464 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
465 if (!fixed_regs
[i
] && !ix86_function_value_regno_p (i
))
466 call_used_regs
[i
] = 0;
468 /* For 32-bit targets, disable the REX registers. */
471 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
472 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
473 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
474 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
475 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
476 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
479 /* See the definition of CALL_USED_REGISTERS in i386.h. */
480 c_mask
= CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI
);
482 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
484 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
486 /* Set/reset conditionally defined registers from
487 CALL_USED_REGISTERS initializer. */
488 if (call_used_regs
[i
] > 1)
489 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
491 /* Calculate registers of CLOBBERED_REGS register set
492 as call used registers from GENERAL_REGS register set. */
493 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
494 && call_used_regs
[i
])
495 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
498 /* If MMX is disabled, disable the registers. */
500 AND_COMPL_HARD_REG_SET (accessible_reg_set
,
501 reg_class_contents
[(int) MMX_REGS
]);
503 /* If SSE is disabled, disable the registers. */
505 AND_COMPL_HARD_REG_SET (accessible_reg_set
,
506 reg_class_contents
[(int) ALL_SSE_REGS
]);
508 /* If the FPU is disabled, disable the registers. */
509 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
510 AND_COMPL_HARD_REG_SET (accessible_reg_set
,
511 reg_class_contents
[(int) FLOAT_REGS
]);
513 /* If AVX512F is disabled, disable the registers. */
514 if (! TARGET_AVX512F
)
516 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
517 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
519 AND_COMPL_HARD_REG_SET (accessible_reg_set
,
520 reg_class_contents
[(int) ALL_MASK_REGS
]);
524 /* Canonicalize a comparison from one we don't have to one we do have. */
527 ix86_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
528 bool op0_preserve_value
)
530 /* The order of operands in x87 ficom compare is forced by combine in
531 simplify_comparison () function. Float operator is treated as RTX_OBJ
532 with a precedence over other operators and is always put in the first
533 place. Swap condition and operands to match ficom instruction. */
534 if (!op0_preserve_value
535 && GET_CODE (*op0
) == FLOAT
&& MEM_P (XEXP (*op0
, 0)) && REG_P (*op1
))
537 enum rtx_code scode
= swap_condition ((enum rtx_code
) *code
);
539 /* We are called only for compares that are split to SAHF instruction.
540 Ensure that we have setcc/jcc insn for the swapped condition. */
541 if (ix86_fp_compare_code_to_integer (scode
) != UNKNOWN
)
543 std::swap (*op0
, *op1
);
550 /* Hook to determine if one function can safely inline another. */
553 ix86_can_inline_p (tree caller
, tree callee
)
555 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
556 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
558 /* Changes of those flags can be tolerated for always inlines. Lets hope
559 user knows what he is doing. */
560 const unsigned HOST_WIDE_INT always_inline_safe_mask
561 = (MASK_USE_8BIT_IDIV
| MASK_ACCUMULATE_OUTGOING_ARGS
562 | MASK_NO_ALIGN_STRINGOPS
| MASK_AVX256_SPLIT_UNALIGNED_LOAD
563 | MASK_AVX256_SPLIT_UNALIGNED_STORE
| MASK_CLD
564 | MASK_NO_FANCY_MATH_387
| MASK_IEEE_FP
| MASK_INLINE_ALL_STRINGOPS
565 | MASK_INLINE_STRINGOPS_DYNAMICALLY
| MASK_RECIP
| MASK_STACK_PROBE
566 | MASK_STV
| MASK_TLS_DIRECT_SEG_REFS
| MASK_VZEROUPPER
567 | MASK_NO_PUSH_ARGS
| MASK_OMIT_LEAF_FRAME_POINTER
);
571 callee_tree
= target_option_default_node
;
573 caller_tree
= target_option_default_node
;
574 if (callee_tree
== caller_tree
)
577 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
578 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
581 = (DECL_DISREGARD_INLINE_LIMITS (callee
)
582 && lookup_attribute ("always_inline",
583 DECL_ATTRIBUTES (callee
)));
585 cgraph_node
*callee_node
= cgraph_node::get (callee
);
586 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
587 function can inline a SSE2 function but a SSE2 function can't inline
589 if (((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
590 != callee_opts
->x_ix86_isa_flags
)
591 || ((caller_opts
->x_ix86_isa_flags2
& callee_opts
->x_ix86_isa_flags2
)
592 != callee_opts
->x_ix86_isa_flags2
))
595 /* See if we have the same non-isa options. */
596 else if ((!always_inline
597 && caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
598 || (caller_opts
->x_target_flags
& ~always_inline_safe_mask
)
599 != (callee_opts
->x_target_flags
& ~always_inline_safe_mask
))
602 /* See if arch, tune, etc. are the same. */
603 else if (caller_opts
->arch
!= callee_opts
->arch
)
606 else if (!always_inline
&& caller_opts
->tune
!= callee_opts
->tune
)
609 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
610 /* If the calle doesn't use FP expressions differences in
611 ix86_fpmath can be ignored. We are called from FEs
612 for multi-versioning call optimization, so beware of
613 ipa_fn_summaries not available. */
614 && (! ipa_fn_summaries
615 || ipa_fn_summaries
->get (callee_node
) == NULL
616 || ipa_fn_summaries
->get (callee_node
)->fp_expressions
))
619 else if (!always_inline
620 && caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
629 /* Return true if this goes in large data/bss. */
632 ix86_in_large_data_p (tree exp
)
634 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
637 if (exp
== NULL_TREE
)
640 /* Functions are never large data. */
641 if (TREE_CODE (exp
) == FUNCTION_DECL
)
644 /* Automatic variables are never large data. */
645 if (VAR_P (exp
) && !is_global_var (exp
))
648 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
650 const char *section
= DECL_SECTION_NAME (exp
);
651 if (strcmp (section
, ".ldata") == 0
652 || strcmp (section
, ".lbss") == 0)
658 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
660 /* If this is an incomplete type with size 0, then we can't put it
661 in data because it might be too big when completed. Also,
662 int_size_in_bytes returns -1 if size can vary or is larger than
663 an integer in which case also it is safer to assume that it goes in
665 if (size
<= 0 || size
> ix86_section_threshold
)
672 /* i386-specific section flag to mark large sections. */
673 #define SECTION_LARGE SECTION_MACH_DEP
675 /* Switch to the appropriate section for output of DECL.
676 DECL is either a `VAR_DECL' node or a constant of some sort.
677 RELOC indicates whether forming the initial value of DECL requires
678 link-time relocations. */
680 ATTRIBUTE_UNUSED
static section
*
681 x86_64_elf_select_section (tree decl
, int reloc
,
682 unsigned HOST_WIDE_INT align
)
684 if (ix86_in_large_data_p (decl
))
686 const char *sname
= NULL
;
687 unsigned int flags
= SECTION_WRITE
| SECTION_LARGE
;
688 switch (categorize_decl_for_section (decl
, reloc
))
693 case SECCAT_DATA_REL
:
694 sname
= ".ldata.rel";
696 case SECCAT_DATA_REL_LOCAL
:
697 sname
= ".ldata.rel.local";
699 case SECCAT_DATA_REL_RO
:
700 sname
= ".ldata.rel.ro";
702 case SECCAT_DATA_REL_RO_LOCAL
:
703 sname
= ".ldata.rel.ro.local";
707 flags
|= SECTION_BSS
;
710 case SECCAT_RODATA_MERGE_STR
:
711 case SECCAT_RODATA_MERGE_STR_INIT
:
712 case SECCAT_RODATA_MERGE_CONST
:
714 flags
&= ~SECTION_WRITE
;
723 /* We don't split these for medium model. Place them into
724 default sections and hope for best. */
729 /* We might get called with string constants, but get_named_section
730 doesn't like them as they are not DECLs. Also, we need to set
731 flags in that case. */
733 return get_section (sname
, flags
, NULL
);
734 return get_named_section (decl
, sname
, reloc
);
737 return default_elf_select_section (decl
, reloc
, align
);
740 /* Select a set of attributes for section NAME based on the properties
741 of DECL and whether or not RELOC indicates that DECL's initializer
742 might contain runtime relocations. */
744 static unsigned int ATTRIBUTE_UNUSED
745 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
747 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
749 if (ix86_in_large_data_p (decl
))
750 flags
|= SECTION_LARGE
;
752 if (decl
== NULL_TREE
753 && (strcmp (name
, ".ldata.rel.ro") == 0
754 || strcmp (name
, ".ldata.rel.ro.local") == 0))
755 flags
|= SECTION_RELRO
;
757 if (strcmp (name
, ".lbss") == 0
758 || strncmp (name
, ".lbss.", 5) == 0
759 || strncmp (name
, ".gnu.linkonce.lb.", 16) == 0)
760 flags
|= SECTION_BSS
;
765 /* Build up a unique section name, expressed as a
766 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
767 RELOC indicates whether the initial value of EXP requires
768 link-time relocations. */
770 static void ATTRIBUTE_UNUSED
771 x86_64_elf_unique_section (tree decl
, int reloc
)
773 if (ix86_in_large_data_p (decl
))
775 const char *prefix
= NULL
;
776 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
777 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
779 switch (categorize_decl_for_section (decl
, reloc
))
782 case SECCAT_DATA_REL
:
783 case SECCAT_DATA_REL_LOCAL
:
784 case SECCAT_DATA_REL_RO
:
785 case SECCAT_DATA_REL_RO_LOCAL
:
786 prefix
= one_only
? ".ld" : ".ldata";
789 prefix
= one_only
? ".lb" : ".lbss";
792 case SECCAT_RODATA_MERGE_STR
:
793 case SECCAT_RODATA_MERGE_STR_INIT
:
794 case SECCAT_RODATA_MERGE_CONST
:
795 prefix
= one_only
? ".lr" : ".lrodata";
804 /* We don't split these for medium model. Place them into
805 default sections and hope for best. */
810 const char *name
, *linkonce
;
813 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
814 name
= targetm
.strip_name_encoding (name
);
816 /* If we're using one_only, then there needs to be a .gnu.linkonce
817 prefix to the section name. */
818 linkonce
= one_only
? ".gnu.linkonce" : "";
820 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
822 set_decl_section_name (decl
, string
);
826 default_unique_section (decl
, reloc
);
831 #ifndef LARGECOMM_SECTION_ASM_OP
832 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
835 /* This says how to output assembler code to declare an
836 uninitialized external linkage data object.
838 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
841 x86_elf_aligned_decl_common (FILE *file
, tree decl
,
842 const char *name
, unsigned HOST_WIDE_INT size
,
845 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
846 && size
> (unsigned int)ix86_section_threshold
)
848 switch_to_section (get_named_section (decl
, ".lbss", 0));
849 fputs (LARGECOMM_SECTION_ASM_OP
, file
);
852 fputs (COMMON_ASM_OP
, file
);
853 assemble_name (file
, name
);
854 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
855 size
, align
/ BITS_PER_UNIT
);
859 /* Utility function for targets to use in implementing
860 ASM_OUTPUT_ALIGNED_BSS. */
863 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
864 unsigned HOST_WIDE_INT size
, int align
)
866 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
867 && size
> (unsigned int)ix86_section_threshold
)
868 switch_to_section (get_named_section (decl
, ".lbss", 0));
870 switch_to_section (bss_section
);
871 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
872 #ifdef ASM_DECLARE_OBJECT_NAME
873 last_assemble_variable_decl
= decl
;
874 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
876 /* Standard thing is just output label for the object. */
877 ASM_OUTPUT_LABEL (file
, name
);
878 #endif /* ASM_DECLARE_OBJECT_NAME */
879 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
882 /* Decide whether we must probe the stack before any space allocation
883 on this target. It's essentially TARGET_STACK_PROBE except when
884 -fstack-check causes the stack to be already probed differently. */
887 ix86_target_stack_probe (void)
889 /* Do not probe the stack twice if static stack checking is enabled. */
890 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
893 return TARGET_STACK_PROBE
;
896 /* Decide whether we can make a sibling call to a function. DECL is the
897 declaration of the function being targeted by the call and EXP is the
898 CALL_EXPR representing the call. */
901 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
903 tree type
, decl_or_type
;
905 bool bind_global
= decl
&& !targetm
.binds_local_p (decl
);
907 if (ix86_function_naked (current_function_decl
))
910 /* Sibling call isn't OK if there are no caller-saved registers
911 since all registers must be preserved before return. */
912 if (cfun
->machine
->no_caller_saved_registers
)
915 /* If we are generating position-independent code, we cannot sibcall
916 optimize direct calls to global functions, as the PLT requires
917 %ebx be live. (Darwin does not have a PLT.) */
925 /* If we need to align the outgoing stack, then sibcalling would
926 unalign the stack, which may break the called function. */
927 if (ix86_minimum_incoming_stack_boundary (true)
928 < PREFERRED_STACK_BOUNDARY
)
934 type
= TREE_TYPE (decl
);
938 /* We're looking at the CALL_EXPR, we need the type of the function. */
939 type
= CALL_EXPR_FN (exp
); /* pointer expression */
940 type
= TREE_TYPE (type
); /* pointer type */
941 type
= TREE_TYPE (type
); /* function type */
945 /* Check that the return value locations are the same. Like
946 if we are returning floats on the 80387 register stack, we cannot
947 make a sibcall from a function that doesn't return a float to a
948 function that does or, conversely, from a function that does return
949 a float to a function that doesn't; the necessary stack adjustment
950 would not be executed. This is also the place we notice
951 differences in the return value ABI. Note that it is ok for one
952 of the functions to have void return type as long as the return
953 value of the other is passed in a register. */
954 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
955 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
957 if (STACK_REG_P (a
) || STACK_REG_P (b
))
959 if (!rtx_equal_p (a
, b
))
962 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
964 else if (!rtx_equal_p (a
, b
))
969 /* The SYSV ABI has more call-clobbered registers;
970 disallow sibcalls from MS to SYSV. */
971 if (cfun
->machine
->call_abi
== MS_ABI
972 && ix86_function_type_abi (type
) == SYSV_ABI
)
977 /* If this call is indirect, we'll need to be able to use a
978 call-clobbered register for the address of the target function.
979 Make sure that all such registers are not used for passing
980 parameters. Note that DLLIMPORT functions and call to global
981 function via GOT slot are indirect. */
983 || (bind_global
&& flag_pic
&& !flag_plt
)
984 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
))
985 || flag_force_indirect_call
)
987 /* Check if regparm >= 3 since arg_reg_available is set to
988 false if regparm == 0. If regparm is 1 or 2, there is
989 always a call-clobbered register available.
991 ??? The symbol indirect call doesn't need a call-clobbered
992 register. But we don't know if this is a symbol indirect
994 if (ix86_function_regparm (type
, decl
) >= 3
995 && !cfun
->machine
->arg_reg_available
)
1000 /* Otherwise okay. That also includes certain types of indirect calls. */
1004 /* This function determines from TYPE the calling-convention. */
1007 ix86_get_callcvt (const_tree type
)
1009 unsigned int ret
= 0;
1014 return IX86_CALLCVT_CDECL
;
1016 attrs
= TYPE_ATTRIBUTES (type
);
1017 if (attrs
!= NULL_TREE
)
1019 if (lookup_attribute ("cdecl", attrs
))
1020 ret
|= IX86_CALLCVT_CDECL
;
1021 else if (lookup_attribute ("stdcall", attrs
))
1022 ret
|= IX86_CALLCVT_STDCALL
;
1023 else if (lookup_attribute ("fastcall", attrs
))
1024 ret
|= IX86_CALLCVT_FASTCALL
;
1025 else if (lookup_attribute ("thiscall", attrs
))
1026 ret
|= IX86_CALLCVT_THISCALL
;
1028 /* Regparam isn't allowed for thiscall and fastcall. */
1029 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
1031 if (lookup_attribute ("regparm", attrs
))
1032 ret
|= IX86_CALLCVT_REGPARM
;
1033 if (lookup_attribute ("sseregparm", attrs
))
1034 ret
|= IX86_CALLCVT_SSEREGPARM
;
1037 if (IX86_BASE_CALLCVT(ret
) != 0)
1041 is_stdarg
= stdarg_p (type
);
1042 if (TARGET_RTD
&& !is_stdarg
)
1043 return IX86_CALLCVT_STDCALL
| ret
;
1047 || TREE_CODE (type
) != METHOD_TYPE
1048 || ix86_function_type_abi (type
) != MS_ABI
)
1049 return IX86_CALLCVT_CDECL
| ret
;
1051 return IX86_CALLCVT_THISCALL
;
1054 /* Return 0 if the attributes for two types are incompatible, 1 if they
1055 are compatible, and 2 if they are nearly compatible (which causes a
1056 warning to be generated). */
1059 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
1061 unsigned int ccvt1
, ccvt2
;
1063 if (TREE_CODE (type1
) != FUNCTION_TYPE
1064 && TREE_CODE (type1
) != METHOD_TYPE
)
1067 ccvt1
= ix86_get_callcvt (type1
);
1068 ccvt2
= ix86_get_callcvt (type2
);
1071 if (ix86_function_regparm (type1
, NULL
)
1072 != ix86_function_regparm (type2
, NULL
))
1078 /* Return the regparm value for a function with the indicated TYPE and DECL.
1079 DECL may be NULL when calling function indirectly
1080 or considering a libcall. */
1083 ix86_function_regparm (const_tree type
, const_tree decl
)
1090 return (ix86_function_type_abi (type
) == SYSV_ABI
1091 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
1092 ccvt
= ix86_get_callcvt (type
);
1093 regparm
= ix86_regparm
;
1095 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
1097 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1100 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1104 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1106 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1109 /* Use register calling convention for local functions when possible. */
1111 && TREE_CODE (decl
) == FUNCTION_DECL
)
1113 cgraph_node
*target
= cgraph_node::get (decl
);
1115 target
= target
->function_symbol ();
1117 /* Caller and callee must agree on the calling convention, so
1118 checking here just optimize means that with
1119 __attribute__((optimize (...))) caller could use regparm convention
1120 and callee not, or vice versa. Instead look at whether the callee
1121 is optimized or not. */
1122 if (target
&& opt_for_fn (target
->decl
, optimize
)
1123 && !(profile_flag
&& !flag_fentry
))
1125 cgraph_local_info
*i
= &target
->local
;
1126 if (i
&& i
->local
&& i
->can_change_signature
)
1128 int local_regparm
, globals
= 0, regno
;
1130 /* Make sure no regparm register is taken by a
1131 fixed register variable. */
1132 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
;
1134 if (fixed_regs
[local_regparm
])
1137 /* We don't want to use regparm(3) for nested functions as
1138 these use a static chain pointer in the third argument. */
1139 if (local_regparm
== 3 && DECL_STATIC_CHAIN (target
->decl
))
1142 /* Save a register for the split stack. */
1143 if (flag_split_stack
)
1145 if (local_regparm
== 3)
1147 else if (local_regparm
== 2
1148 && DECL_STATIC_CHAIN (target
->decl
))
1152 /* Each fixed register usage increases register pressure,
1153 so less registers should be used for argument passing.
1154 This functionality can be overriden by an explicit
1156 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
1157 if (fixed_regs
[regno
])
1161 = globals
< local_regparm
? local_regparm
- globals
: 0;
1163 if (local_regparm
> regparm
)
1164 regparm
= local_regparm
;
1172 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1173 DFmode (2) arguments in SSE registers for a function with the
1174 indicated TYPE and DECL. DECL may be NULL when calling function
1175 indirectly or considering a libcall. Return -1 if any FP parameter
1176 should be rejected by error. This is used in siutation we imply SSE
1177 calling convetion but the function is called from another function with
1178 SSE disabled. Otherwise return 0. */
1181 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
1183 gcc_assert (!TARGET_64BIT
);
1185 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1186 by the sseregparm attribute. */
1187 if (TARGET_SSEREGPARM
1188 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
1195 error ("calling %qD with attribute sseregparm without "
1196 "SSE/SSE2 enabled", decl
);
1198 error ("calling %qT with attribute sseregparm without "
1199 "SSE/SSE2 enabled", type
);
1210 cgraph_node
*target
= cgraph_node::get (decl
);
1212 target
= target
->function_symbol ();
1214 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1215 (and DFmode for SSE2) arguments in SSE registers. */
1217 /* TARGET_SSE_MATH */
1218 && (target_opts_for_fn (target
->decl
)->x_ix86_fpmath
& FPMATH_SSE
)
1219 && opt_for_fn (target
->decl
, optimize
)
1220 && !(profile_flag
&& !flag_fentry
))
1222 cgraph_local_info
*i
= &target
->local
;
1223 if (i
&& i
->local
&& i
->can_change_signature
)
1225 /* Refuse to produce wrong code when local function with SSE enabled
1226 is called from SSE disabled function.
1227 FIXME: We need a way to detect these cases cross-ltrans partition
1228 and avoid using SSE calling conventions on local functions called
1229 from function with SSE disabled. For now at least delay the
1230 warning until we know we are going to produce wrong code.
1232 if (!TARGET_SSE
&& warn
)
1234 return TARGET_SSE2_P (target_opts_for_fn (target
->decl
)
1235 ->x_ix86_isa_flags
) ? 2 : 1;
1242 /* Return true if EAX is live at the start of the function. Used by
1243 ix86_expand_prologue to determine if we need special help before
1244 calling allocate_stack_worker. */
1247 ix86_eax_live_at_start_p (void)
1249 /* Cheat. Don't bother working forward from ix86_function_regparm
1250 to the function type to whether an actual argument is located in
1251 eax. Instead just look at cfg info, which is still close enough
1252 to correct at this point. This gives false positives for broken
1253 functions that might use uninitialized data that happens to be
1254 allocated in eax, but who cares? */
1255 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
1259 ix86_keep_aggregate_return_pointer (tree fntype
)
1265 attr
= lookup_attribute ("callee_pop_aggregate_return",
1266 TYPE_ATTRIBUTES (fntype
));
1268 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
1270 /* For 32-bit MS-ABI the default is to keep aggregate
1272 if (ix86_function_type_abi (fntype
) == MS_ABI
)
1275 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
1278 /* Value is the number of bytes of arguments automatically
1279 popped when returning from a subroutine call.
1280 FUNDECL is the declaration node of the function (as a tree),
1281 FUNTYPE is the data type of the function (as a tree),
1282 or for a library call it is an identifier node for the subroutine name.
1283 SIZE is the number of bytes of arguments passed on the stack.
1285 On the 80386, the RTD insn may be used to pop them if the number
1286 of args is fixed, but if the number is variable then the caller
1287 must pop them all. RTD can't be used for library calls now
1288 because the library is compiled with the Unix compiler.
1289 Use of RTD is a selectable option, since it is incompatible with
1290 standard Unix calling sequences. If the option is not selected,
1291 the caller must always pop the args.
1293 The attribute stdcall is equivalent to RTD on a per module basis. */
1296 ix86_return_pops_args (tree fundecl
, tree funtype
, poly_int64 size
)
1300 /* None of the 64-bit ABIs pop arguments. */
1304 ccvt
= ix86_get_callcvt (funtype
);
1306 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
1307 | IX86_CALLCVT_THISCALL
)) != 0
1308 && ! stdarg_p (funtype
))
1311 /* Lose any fake structure return argument if it is passed on the stack. */
1312 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1313 && !ix86_keep_aggregate_return_pointer (funtype
))
1315 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1317 return GET_MODE_SIZE (Pmode
);
1323 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1326 ix86_legitimate_combined_insn (rtx_insn
*insn
)
1330 /* Check operand constraints in case hard registers were propagated
1331 into insn pattern. This check prevents combine pass from
1332 generating insn patterns with invalid hard register operands.
1333 These invalid insns can eventually confuse reload to error out
1334 with a spill failure. See also PRs 46829 and 46843. */
1336 gcc_assert (INSN_CODE (insn
) >= 0);
1338 extract_insn (insn
);
1339 preprocess_constraints (insn
);
1341 int n_operands
= recog_data
.n_operands
;
1342 int n_alternatives
= recog_data
.n_alternatives
;
1343 for (i
= 0; i
< n_operands
; i
++)
1345 rtx op
= recog_data
.operand
[i
];
1346 machine_mode mode
= GET_MODE (op
);
1347 const operand_alternative
*op_alt
;
1352 /* A unary operator may be accepted by the predicate, but it
1353 is irrelevant for matching constraints. */
1359 if (REG_P (SUBREG_REG (op
))
1360 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
1361 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
1362 GET_MODE (SUBREG_REG (op
)),
1365 op
= SUBREG_REG (op
);
1368 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
1371 op_alt
= recog_op_alt
;
1373 /* Operand has no constraints, anything is OK. */
1374 win
= !n_alternatives
;
1376 alternative_mask preferred
= get_preferred_alternatives (insn
);
1377 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
1379 if (!TEST_BIT (preferred
, j
))
1381 if (op_alt
[i
].anything_ok
1382 || (op_alt
[i
].matches
!= -1
1384 (recog_data
.operand
[i
],
1385 recog_data
.operand
[op_alt
[i
].matches
]))
1386 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
1400 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1402 static unsigned HOST_WIDE_INT
1403 ix86_asan_shadow_offset (void)
1405 return TARGET_LP64
? (TARGET_MACHO
? (HOST_WIDE_INT_1
<< 44)
1406 : HOST_WIDE_INT_C (0x7fff8000))
1407 : (HOST_WIDE_INT_1
<< 29);
1410 /* Argument support functions. */
1412 /* Return true when register may be used to pass function parameters. */
1414 ix86_function_arg_regno_p (int regno
)
1417 enum calling_abi call_abi
;
1418 const int *parm_regs
;
1423 return (regno
< REGPARM_MAX
1424 || (TARGET_SSE
&& SSE_REGNO_P (regno
) && !fixed_regs
[regno
]));
1426 return (regno
< REGPARM_MAX
1427 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
1428 && (regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
))
1429 || (TARGET_SSE
&& SSE_REGNO_P (regno
)
1430 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)));
1433 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
1434 && (regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
))
1437 /* TODO: The function should depend on current function ABI but
1438 builtins.c would need updating then. Therefore we use the
1440 call_abi
= ix86_cfun_abi ();
1442 /* RAX is used as hidden argument to va_arg functions. */
1443 if (call_abi
== SYSV_ABI
&& regno
== AX_REG
)
1446 if (call_abi
== MS_ABI
)
1447 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
1449 parm_regs
= x86_64_int_parameter_registers
;
1451 for (i
= 0; i
< (call_abi
== MS_ABI
1452 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
1453 if (regno
== parm_regs
[i
])
1458 /* Return if we do not know how to pass ARG solely in registers. */
1461 ix86_must_pass_in_stack (const function_arg_info
&arg
)
1463 if (must_pass_in_stack_var_size_or_pad (arg
))
1466 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1467 The layout_type routine is crafty and tries to trick us into passing
1468 currently unsupported vector types on the stack by using TImode. */
1469 return (!TARGET_64BIT
&& arg
.mode
== TImode
1470 && arg
.type
&& TREE_CODE (arg
.type
) != VECTOR_TYPE
);
1473 /* It returns the size, in bytes, of the area reserved for arguments passed
1474 in registers for the function represented by fndecl dependent to the used
1477 ix86_reg_parm_stack_space (const_tree fndecl
)
1479 enum calling_abi call_abi
= SYSV_ABI
;
1480 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
1481 call_abi
= ix86_function_abi (fndecl
);
1483 call_abi
= ix86_function_type_abi (fndecl
);
1484 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
1489 /* We add this as a workaround in order to use libc_has_function
1492 ix86_libc_has_function (enum function_class fn_class
)
1494 return targetm
.libc_has_function (fn_class
);
1497 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1498 specifying the call abi used. */
1500 ix86_function_type_abi (const_tree fntype
)
1502 enum calling_abi abi
= ix86_abi
;
1504 if (fntype
== NULL_TREE
|| TYPE_ATTRIBUTES (fntype
) == NULL_TREE
)
1508 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
1511 if (TARGET_X32
&& !warned
)
1513 error ("X32 does not support %<ms_abi%> attribute");
1519 else if (abi
== MS_ABI
1520 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
1527 ix86_function_abi (const_tree fndecl
)
1529 return fndecl
? ix86_function_type_abi (TREE_TYPE (fndecl
)) : ix86_abi
;
1532 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1533 specifying the call abi used. */
1535 ix86_cfun_abi (void)
1537 return cfun
? cfun
->machine
->call_abi
: ix86_abi
;
1541 ix86_function_ms_hook_prologue (const_tree fn
)
1543 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
1545 if (decl_function_context (fn
) != NULL_TREE
)
1546 error_at (DECL_SOURCE_LOCATION (fn
),
1547 "%<ms_hook_prologue%> attribute is not compatible "
1548 "with nested function");
1556 ix86_function_naked (const_tree fn
)
1558 if (fn
&& lookup_attribute ("naked", DECL_ATTRIBUTES (fn
)))
1564 /* Write the extra assembler code needed to declare a function properly. */
1567 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
1570 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
1574 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
1575 unsigned int filler_cc
= 0xcccccccc;
1577 for (i
= 0; i
< filler_count
; i
+= 4)
1578 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
1581 #ifdef SUBTARGET_ASM_UNWIND_INIT
1582 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
1585 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
1587 /* Output magic byte marker, if hot-patch attribute is set. */
1592 /* leaq [%rsp + 0], %rsp */
1593 fputs (ASM_BYTE
"0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1598 /* movl.s %edi, %edi
1600 movl.s %esp, %ebp */
1601 fputs (ASM_BYTE
"0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file
);
1606 /* Implementation of call abi switching target hook. Specific to FNDECL
1607 the specific call register sets are set. See also
1608 ix86_conditional_register_usage for more details. */
1610 ix86_call_abi_override (const_tree fndecl
)
1612 cfun
->machine
->call_abi
= ix86_function_abi (fndecl
);
1615 /* Return 1 if pseudo register should be created and used to hold
1616 GOT address for PIC code. */
1618 ix86_use_pseudo_pic_reg (void)
1621 && (ix86_cmodel
== CM_SMALL_PIC
1628 /* Initialize large model PIC register. */
1631 ix86_init_large_pic_reg (unsigned int tmp_regno
)
1633 rtx_code_label
*label
;
1636 gcc_assert (Pmode
== DImode
);
1637 label
= gen_label_rtx ();
1639 LABEL_PRESERVE_P (label
) = 1;
1640 tmp_reg
= gen_rtx_REG (Pmode
, tmp_regno
);
1641 gcc_assert (REGNO (pic_offset_table_rtx
) != tmp_regno
);
1642 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
1644 emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
1645 emit_insn (gen_add2_insn (pic_offset_table_rtx
, tmp_reg
));
1646 const char *name
= LABEL_NAME (label
);
1647 PUT_CODE (label
, NOTE
);
1648 NOTE_KIND (label
) = NOTE_INSN_DELETED_LABEL
;
1649 NOTE_DELETED_LABEL_NAME (label
) = name
;
1652 /* Create and initialize PIC register if required. */
1654 ix86_init_pic_reg (void)
1659 if (!ix86_use_pseudo_pic_reg ())
1666 if (ix86_cmodel
== CM_LARGE_PIC
)
1667 ix86_init_large_pic_reg (R11_REG
);
1669 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
1673 /* If there is future mcount call in the function it is more profitable
1674 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1675 rtx reg
= crtl
->profile
1676 ? gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
)
1677 : pic_offset_table_rtx
;
1678 rtx_insn
*insn
= emit_insn (gen_set_got (reg
));
1679 RTX_FRAME_RELATED_P (insn
) = 1;
1681 emit_move_insn (pic_offset_table_rtx
, reg
);
1682 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
1688 entry_edge
= single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1689 insert_insn_on_edge (seq
, entry_edge
);
1690 commit_one_edge_insertion (entry_edge
);
1693 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1694 for a call to a function whose data type is FNTYPE.
1695 For a library call, FNTYPE is 0. */
1698 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1699 tree fntype
, /* tree ptr for function decl */
1700 rtx libname
, /* SYMBOL_REF of library name or 0 */
1704 struct cgraph_local_info
*i
= NULL
;
1705 struct cgraph_node
*target
= NULL
;
1707 memset (cum
, 0, sizeof (*cum
));
1711 target
= cgraph_node::get (fndecl
);
1714 target
= target
->function_symbol ();
1715 i
= cgraph_node::local_info (target
->decl
);
1716 cum
->call_abi
= ix86_function_abi (target
->decl
);
1719 cum
->call_abi
= ix86_function_abi (fndecl
);
1722 cum
->call_abi
= ix86_function_type_abi (fntype
);
1724 cum
->caller
= caller
;
1726 /* Set up the number of registers to use for passing arguments. */
1727 cum
->nregs
= ix86_regparm
;
1730 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
1731 ? X86_64_REGPARM_MAX
1732 : X86_64_MS_REGPARM_MAX
);
1736 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1739 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
1740 ? X86_64_SSE_REGPARM_MAX
1741 : X86_64_MS_SSE_REGPARM_MAX
);
1745 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1746 cum
->warn_avx512f
= true;
1747 cum
->warn_avx
= true;
1748 cum
->warn_sse
= true;
1749 cum
->warn_mmx
= true;
1751 /* Because type might mismatch in between caller and callee, we need to
1752 use actual type of function for local calls.
1753 FIXME: cgraph_analyze can be told to actually record if function uses
1754 va_start so for local functions maybe_vaarg can be made aggressive
1756 FIXME: once typesytem is fixed, we won't need this code anymore. */
1757 if (i
&& i
->local
&& i
->can_change_signature
)
1758 fntype
= TREE_TYPE (target
->decl
);
1759 cum
->stdarg
= stdarg_p (fntype
);
1760 cum
->maybe_vaarg
= (fntype
1761 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
1766 cum
->warn_empty
= !warn_abi
|| cum
->stdarg
;
1767 if (!cum
->warn_empty
&& fntype
)
1769 function_args_iterator iter
;
1771 bool seen_empty_type
= false;
1772 FOREACH_FUNCTION_ARGS (fntype
, argtype
, iter
)
1774 if (argtype
== error_mark_node
|| VOID_TYPE_P (argtype
))
1776 if (TYPE_EMPTY_P (argtype
))
1777 seen_empty_type
= true;
1778 else if (seen_empty_type
)
1780 cum
->warn_empty
= true;
1788 /* If there are variable arguments, then we won't pass anything
1789 in registers in 32-bit mode. */
1790 if (stdarg_p (fntype
))
1793 /* Since in 32-bit, variable arguments are always passed on
1794 stack, there is scratch register available for indirect
1796 cfun
->machine
->arg_reg_available
= true;
1799 cum
->warn_avx512f
= false;
1800 cum
->warn_avx
= false;
1801 cum
->warn_sse
= false;
1802 cum
->warn_mmx
= false;
1806 /* Use ecx and edx registers if function has fastcall attribute,
1807 else look for regparm information. */
1810 unsigned int ccvt
= ix86_get_callcvt (fntype
);
1811 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1814 cum
->fastcall
= 1; /* Same first register as in fastcall. */
1816 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1822 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1825 /* Set up the number of SSE registers used for passing SFmode
1826 and DFmode arguments. Warn for mismatching ABI. */
1827 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
1830 cfun
->machine
->arg_reg_available
= (cum
->nregs
> 0);
1833 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1834 But in the case of vector types, it is some vector mode.
1836 When we have only some of our vector isa extensions enabled, then there
1837 are some modes for which vector_mode_supported_p is false. For these
1838 modes, the generic vector support in gcc will choose some non-vector mode
1839 in order to implement the type. By computing the natural mode, we'll
1840 select the proper ABI location for the operand and not depend on whatever
1841 the middle-end decides to do with these vector types.
1843 The midde-end can't deal with the vector types > 16 bytes. In this
1844 case, we return the original mode and warn ABI change if CUM isn't
1847 If INT_RETURN is true, warn ABI change if the vector mode isn't
1848 available for function return value. */
1851 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
1854 machine_mode mode
= TYPE_MODE (type
);
1856 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
1858 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1859 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
1860 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1861 && TYPE_VECTOR_SUBPARTS (type
) > 1)
1863 machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
1865 /* There are no XFmode vector modes. */
1866 if (innermode
== XFmode
)
1869 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
1870 mode
= MIN_MODE_VECTOR_FLOAT
;
1872 mode
= MIN_MODE_VECTOR_INT
;
1874 /* Get the mode which has this inner mode and number of units. */
1875 FOR_EACH_MODE_FROM (mode
, mode
)
1876 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
1877 && GET_MODE_INNER (mode
) == innermode
)
1879 if (size
== 64 && !TARGET_AVX512F
&& !TARGET_IAMCU
)
1881 static bool warnedavx512f
;
1882 static bool warnedavx512f_ret
;
1884 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
1886 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
1887 "without AVX512F enabled changes the ABI"))
1888 warnedavx512f
= true;
1890 else if (in_return
&& !warnedavx512f_ret
)
1892 if (warning (OPT_Wpsabi
, "AVX512F vector return "
1893 "without AVX512F enabled changes the ABI"))
1894 warnedavx512f_ret
= true;
1897 return TYPE_MODE (type
);
1899 else if (size
== 32 && !TARGET_AVX
&& !TARGET_IAMCU
)
1901 static bool warnedavx
;
1902 static bool warnedavx_ret
;
1904 if (cum
&& cum
->warn_avx
&& !warnedavx
)
1906 if (warning (OPT_Wpsabi
, "AVX vector argument "
1907 "without AVX enabled changes the ABI"))
1910 else if (in_return
&& !warnedavx_ret
)
1912 if (warning (OPT_Wpsabi
, "AVX vector return "
1913 "without AVX enabled changes the ABI"))
1914 warnedavx_ret
= true;
1917 return TYPE_MODE (type
);
1919 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
1923 static bool warnedsse
;
1924 static bool warnedsse_ret
;
1926 if (cum
&& cum
->warn_sse
&& !warnedsse
)
1928 if (warning (OPT_Wpsabi
, "SSE vector argument "
1929 "without SSE enabled changes the ABI"))
1932 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
1934 if (warning (OPT_Wpsabi
, "SSE vector return "
1935 "without SSE enabled changes the ABI"))
1936 warnedsse_ret
= true;
1939 else if ((size
== 8 && !TARGET_64BIT
)
1941 || cfun
->machine
->func_type
== TYPE_NORMAL
)
1945 static bool warnedmmx
;
1946 static bool warnedmmx_ret
;
1948 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
1950 if (warning (OPT_Wpsabi
, "MMX vector argument "
1951 "without MMX enabled changes the ABI"))
1954 else if (in_return
&& !warnedmmx_ret
)
1956 if (warning (OPT_Wpsabi
, "MMX vector return "
1957 "without MMX enabled changes the ABI"))
1958 warnedmmx_ret
= true;
1971 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1972 this may not agree with the mode that the type system has chosen for the
1973 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1974 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1977 gen_reg_or_parallel (machine_mode mode
, machine_mode orig_mode
,
1982 if (orig_mode
!= BLKmode
)
1983 tmp
= gen_rtx_REG (orig_mode
, regno
);
1986 tmp
= gen_rtx_REG (mode
, regno
);
1987 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
1988 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
1994 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1995 of this code is to classify each 8bytes of incoming argument by the register
1996 class and assign registers accordingly. */
1998 /* Return the union class of CLASS1 and CLASS2.
1999 See the x86-64 PS ABI for details. */
2001 static enum x86_64_reg_class
2002 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2004 /* Rule #1: If both classes are equal, this is the resulting class. */
2005 if (class1
== class2
)
2008 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2010 if (class1
== X86_64_NO_CLASS
)
2012 if (class2
== X86_64_NO_CLASS
)
2015 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2016 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2017 return X86_64_MEMORY_CLASS
;
2019 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2020 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2021 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2022 return X86_64_INTEGERSI_CLASS
;
2023 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2024 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2025 return X86_64_INTEGER_CLASS
;
2027 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2029 if (class1
== X86_64_X87_CLASS
2030 || class1
== X86_64_X87UP_CLASS
2031 || class1
== X86_64_COMPLEX_X87_CLASS
2032 || class2
== X86_64_X87_CLASS
2033 || class2
== X86_64_X87UP_CLASS
2034 || class2
== X86_64_COMPLEX_X87_CLASS
)
2035 return X86_64_MEMORY_CLASS
;
2037 /* Rule #6: Otherwise class SSE is used. */
2038 return X86_64_SSE_CLASS
;
2041 /* Classify the argument of type TYPE and mode MODE.
2042 CLASSES will be filled by the register class used to pass each word
2043 of the operand. The number of words is returned. In case the parameter
2044 should be passed in memory, 0 is returned. As a special case for zero
2045 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2047 BIT_OFFSET is used internally for handling records and specifies offset
2048 of the offset in bits modulo 512 to avoid overflow cases.
2050 See the x86-64 PS ABI for details.
2054 classify_argument (machine_mode mode
, const_tree type
,
2055 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2058 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2059 int words
= CEIL (bytes
+ (bit_offset
% 64) / 8, UNITS_PER_WORD
);
2061 /* Variable sized entities are always passed/returned in memory. */
2065 if (mode
!= VOIDmode
)
2067 /* The value of "named" doesn't matter. */
2068 function_arg_info
arg (const_cast<tree
> (type
), mode
, /*named=*/true);
2069 if (targetm
.calls
.must_pass_in_stack (arg
))
2073 if (type
&& AGGREGATE_TYPE_P (type
))
2077 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2079 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2083 for (i
= 0; i
< words
; i
++)
2084 classes
[i
] = X86_64_NO_CLASS
;
2086 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2087 signalize memory class, so handle it as special case. */
2090 classes
[0] = X86_64_NO_CLASS
;
2094 /* Classify each field of record and merge classes. */
2095 switch (TREE_CODE (type
))
2098 /* And now merge the fields of structure. */
2099 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2101 if (TREE_CODE (field
) == FIELD_DECL
)
2105 if (TREE_TYPE (field
) == error_mark_node
)
2108 /* Bitfields are always classified as integer. Handle them
2109 early, since later code would consider them to be
2110 misaligned integers. */
2111 if (DECL_BIT_FIELD (field
))
2113 for (i
= (int_bit_position (field
)
2114 + (bit_offset
% 64)) / 8 / 8;
2115 i
< ((int_bit_position (field
) + (bit_offset
% 64))
2116 + tree_to_shwi (DECL_SIZE (field
))
2119 = merge_classes (X86_64_INTEGER_CLASS
, classes
[i
]);
2125 type
= TREE_TYPE (field
);
2127 /* Flexible array member is ignored. */
2128 if (TYPE_MODE (type
) == BLKmode
2129 && TREE_CODE (type
) == ARRAY_TYPE
2130 && TYPE_SIZE (type
) == NULL_TREE
2131 && TYPE_DOMAIN (type
) != NULL_TREE
2132 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
2137 if (!warned
&& warn_psabi
)
2140 inform (input_location
,
2141 "the ABI of passing struct with"
2142 " a flexible array member has"
2143 " changed in GCC 4.4");
2147 num
= classify_argument (TYPE_MODE (type
), type
,
2149 (int_bit_position (field
)
2150 + bit_offset
) % 512);
2153 pos
= (int_bit_position (field
)
2154 + (bit_offset
% 64)) / 8 / 8;
2155 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
2157 = merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2164 /* Arrays are handled as small records. */
2167 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2168 TREE_TYPE (type
), subclasses
, bit_offset
);
2172 /* The partial classes are now full classes. */
2173 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2174 subclasses
[0] = X86_64_SSE_CLASS
;
2175 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
2176 && !((bit_offset
% 64) == 0 && bytes
== 4))
2177 subclasses
[0] = X86_64_INTEGER_CLASS
;
2179 for (i
= 0; i
< words
; i
++)
2180 classes
[i
] = subclasses
[i
% num
];
2185 case QUAL_UNION_TYPE
:
2186 /* Unions are similar to RECORD_TYPE but offset is always 0.
2188 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2190 if (TREE_CODE (field
) == FIELD_DECL
)
2194 if (TREE_TYPE (field
) == error_mark_node
)
2197 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2198 TREE_TYPE (field
), subclasses
,
2202 for (i
= 0; i
< num
&& i
< words
; i
++)
2203 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2214 /* When size > 16 bytes, if the first one isn't
2215 X86_64_SSE_CLASS or any other ones aren't
2216 X86_64_SSEUP_CLASS, everything should be passed in
2218 if (classes
[0] != X86_64_SSE_CLASS
)
2221 for (i
= 1; i
< words
; i
++)
2222 if (classes
[i
] != X86_64_SSEUP_CLASS
)
2226 /* Final merger cleanup. */
2227 for (i
= 0; i
< words
; i
++)
2229 /* If one class is MEMORY, everything should be passed in
2231 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2234 /* The X86_64_SSEUP_CLASS should be always preceded by
2235 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2236 if (classes
[i
] == X86_64_SSEUP_CLASS
2237 && classes
[i
- 1] != X86_64_SSE_CLASS
2238 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
2240 /* The first one should never be X86_64_SSEUP_CLASS. */
2241 gcc_assert (i
!= 0);
2242 classes
[i
] = X86_64_SSE_CLASS
;
2245 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2246 everything should be passed in memory. */
2247 if (classes
[i
] == X86_64_X87UP_CLASS
2248 && (classes
[i
- 1] != X86_64_X87_CLASS
))
2252 /* The first one should never be X86_64_X87UP_CLASS. */
2253 gcc_assert (i
!= 0);
2254 if (!warned
&& warn_psabi
)
2257 inform (input_location
,
2258 "the ABI of passing union with %<long double%>"
2259 " has changed in GCC 4.4");
2267 /* Compute alignment needed. We align all types to natural boundaries with
2268 exception of XFmode that is aligned to 64bits. */
2269 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2271 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2274 mode_alignment
= 128;
2275 else if (mode
== XCmode
)
2276 mode_alignment
= 256;
2277 if (COMPLEX_MODE_P (mode
))
2278 mode_alignment
/= 2;
2279 /* Misaligned fields are always returned in memory. */
2280 if (bit_offset
% mode_alignment
)
2284 /* for V1xx modes, just use the base mode */
2285 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
2286 && GET_MODE_UNIT_SIZE (mode
) == bytes
)
2287 mode
= GET_MODE_INNER (mode
);
2289 /* Classification of atomic types. */
2294 classes
[0] = X86_64_SSE_CLASS
;
2297 classes
[0] = X86_64_SSE_CLASS
;
2298 classes
[1] = X86_64_SSEUP_CLASS
;
2308 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
2310 /* Analyze last 128 bits only. */
2311 size
= (size
- 1) & 0x7f;
2315 classes
[0] = X86_64_INTEGERSI_CLASS
;
2320 classes
[0] = X86_64_INTEGER_CLASS
;
2323 else if (size
< 64+32)
2325 classes
[0] = X86_64_INTEGER_CLASS
;
2326 classes
[1] = X86_64_INTEGERSI_CLASS
;
2329 else if (size
< 64+64)
2331 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2339 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2343 /* OImode shouldn't be used directly. */
2348 if (!(bit_offset
% 64))
2349 classes
[0] = X86_64_SSESF_CLASS
;
2351 classes
[0] = X86_64_SSE_CLASS
;
2354 classes
[0] = X86_64_SSEDF_CLASS
;
2357 classes
[0] = X86_64_X87_CLASS
;
2358 classes
[1] = X86_64_X87UP_CLASS
;
2361 classes
[0] = X86_64_SSE_CLASS
;
2362 classes
[1] = X86_64_SSEUP_CLASS
;
2365 classes
[0] = X86_64_SSE_CLASS
;
2366 if (!(bit_offset
% 64))
2372 if (!warned
&& warn_psabi
)
2375 inform (input_location
,
2376 "the ABI of passing structure with %<complex float%>"
2377 " member has changed in GCC 4.4");
2379 classes
[1] = X86_64_SSESF_CLASS
;
2383 classes
[0] = X86_64_SSEDF_CLASS
;
2384 classes
[1] = X86_64_SSEDF_CLASS
;
2387 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2390 /* This modes is larger than 16 bytes. */
2398 classes
[0] = X86_64_SSE_CLASS
;
2399 classes
[1] = X86_64_SSEUP_CLASS
;
2400 classes
[2] = X86_64_SSEUP_CLASS
;
2401 classes
[3] = X86_64_SSEUP_CLASS
;
2409 classes
[0] = X86_64_SSE_CLASS
;
2410 classes
[1] = X86_64_SSEUP_CLASS
;
2411 classes
[2] = X86_64_SSEUP_CLASS
;
2412 classes
[3] = X86_64_SSEUP_CLASS
;
2413 classes
[4] = X86_64_SSEUP_CLASS
;
2414 classes
[5] = X86_64_SSEUP_CLASS
;
2415 classes
[6] = X86_64_SSEUP_CLASS
;
2416 classes
[7] = X86_64_SSEUP_CLASS
;
2424 classes
[0] = X86_64_SSE_CLASS
;
2425 classes
[1] = X86_64_SSEUP_CLASS
;
2433 classes
[0] = X86_64_SSE_CLASS
;
2439 gcc_assert (VECTOR_MODE_P (mode
));
2444 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
2446 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2447 classes
[0] = X86_64_INTEGERSI_CLASS
;
2449 classes
[0] = X86_64_INTEGER_CLASS
;
2450 classes
[1] = X86_64_INTEGER_CLASS
;
2451 return 1 + (bytes
> 8);
2455 /* Examine the argument and return set number of register required in each
2456 class. Return true iff parameter should be passed in memory. */
2459 examine_argument (machine_mode mode
, const_tree type
, int in_return
,
2460 int *int_nregs
, int *sse_nregs
)
2462 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2463 int n
= classify_argument (mode
, type
, regclass
, 0);
2470 for (n
--; n
>= 0; n
--)
2471 switch (regclass
[n
])
2473 case X86_64_INTEGER_CLASS
:
2474 case X86_64_INTEGERSI_CLASS
:
2477 case X86_64_SSE_CLASS
:
2478 case X86_64_SSESF_CLASS
:
2479 case X86_64_SSEDF_CLASS
:
2482 case X86_64_NO_CLASS
:
2483 case X86_64_SSEUP_CLASS
:
2485 case X86_64_X87_CLASS
:
2486 case X86_64_X87UP_CLASS
:
2487 case X86_64_COMPLEX_X87_CLASS
:
2491 case X86_64_MEMORY_CLASS
:
2498 /* Construct container for the argument used by GCC interface. See
2499 FUNCTION_ARG for the detailed description. */
2502 construct_container (machine_mode mode
, machine_mode orig_mode
,
2503 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
2504 const int *intreg
, int sse_regno
)
2506 /* The following variables hold the static issued_error state. */
2507 static bool issued_sse_arg_error
;
2508 static bool issued_sse_ret_error
;
2509 static bool issued_x87_ret_error
;
2511 machine_mode tmpmode
;
2513 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2514 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2518 int needed_sseregs
, needed_intregs
;
2519 rtx exp
[MAX_CLASSES
];
2522 n
= classify_argument (mode
, type
, regclass
, 0);
2525 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
2528 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2531 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2532 some less clueful developer tries to use floating-point anyway. */
2533 if (needed_sseregs
&& !TARGET_SSE
)
2537 if (!issued_sse_ret_error
)
2539 error ("SSE register return with SSE disabled");
2540 issued_sse_ret_error
= true;
2543 else if (!issued_sse_arg_error
)
2545 error ("SSE register argument with SSE disabled");
2546 issued_sse_arg_error
= true;
2551 /* Likewise, error if the ABI requires us to return values in the
2552 x87 registers and the user specified -mno-80387. */
2553 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
2554 for (i
= 0; i
< n
; i
++)
2555 if (regclass
[i
] == X86_64_X87_CLASS
2556 || regclass
[i
] == X86_64_X87UP_CLASS
2557 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
2559 if (!issued_x87_ret_error
)
2561 error ("x87 register return with x87 disabled");
2562 issued_x87_ret_error
= true;
2567 /* First construct simple cases. Avoid SCmode, since we want to use
2568 single register to pass this type. */
2569 if (n
== 1 && mode
!= SCmode
)
2570 switch (regclass
[0])
2572 case X86_64_INTEGER_CLASS
:
2573 case X86_64_INTEGERSI_CLASS
:
2574 return gen_rtx_REG (mode
, intreg
[0]);
2575 case X86_64_SSE_CLASS
:
2576 case X86_64_SSESF_CLASS
:
2577 case X86_64_SSEDF_CLASS
:
2578 if (mode
!= BLKmode
)
2579 return gen_reg_or_parallel (mode
, orig_mode
,
2580 GET_SSE_REGNO (sse_regno
));
2582 case X86_64_X87_CLASS
:
2583 case X86_64_COMPLEX_X87_CLASS
:
2584 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2585 case X86_64_NO_CLASS
:
2586 /* Zero sized array, struct or class. */
2592 && regclass
[0] == X86_64_SSE_CLASS
2593 && regclass
[1] == X86_64_SSEUP_CLASS
2595 return gen_reg_or_parallel (mode
, orig_mode
,
2596 GET_SSE_REGNO (sse_regno
));
2598 && regclass
[0] == X86_64_SSE_CLASS
2599 && regclass
[1] == X86_64_SSEUP_CLASS
2600 && regclass
[2] == X86_64_SSEUP_CLASS
2601 && regclass
[3] == X86_64_SSEUP_CLASS
2603 return gen_reg_or_parallel (mode
, orig_mode
,
2604 GET_SSE_REGNO (sse_regno
));
2606 && regclass
[0] == X86_64_SSE_CLASS
2607 && regclass
[1] == X86_64_SSEUP_CLASS
2608 && regclass
[2] == X86_64_SSEUP_CLASS
2609 && regclass
[3] == X86_64_SSEUP_CLASS
2610 && regclass
[4] == X86_64_SSEUP_CLASS
2611 && regclass
[5] == X86_64_SSEUP_CLASS
2612 && regclass
[6] == X86_64_SSEUP_CLASS
2613 && regclass
[7] == X86_64_SSEUP_CLASS
2615 return gen_reg_or_parallel (mode
, orig_mode
,
2616 GET_SSE_REGNO (sse_regno
));
2618 && regclass
[0] == X86_64_X87_CLASS
2619 && regclass
[1] == X86_64_X87UP_CLASS
)
2620 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2623 && regclass
[0] == X86_64_INTEGER_CLASS
2624 && regclass
[1] == X86_64_INTEGER_CLASS
2625 && (mode
== CDImode
|| mode
== TImode
|| mode
== BLKmode
)
2626 && intreg
[0] + 1 == intreg
[1])
2628 if (mode
== BLKmode
)
2630 /* Use TImode for BLKmode values in 2 integer registers. */
2631 exp
[0] = gen_rtx_EXPR_LIST (VOIDmode
,
2632 gen_rtx_REG (TImode
, intreg
[0]),
2634 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (1));
2635 XVECEXP (ret
, 0, 0) = exp
[0];
2639 return gen_rtx_REG (mode
, intreg
[0]);
2642 /* Otherwise figure out the entries of the PARALLEL. */
2643 for (i
= 0; i
< n
; i
++)
2647 switch (regclass
[i
])
2649 case X86_64_NO_CLASS
:
2651 case X86_64_INTEGER_CLASS
:
2652 case X86_64_INTEGERSI_CLASS
:
2653 /* Merge TImodes on aligned occasions here too. */
2654 if (i
* 8 + 8 > bytes
)
2656 unsigned int tmpbits
= (bytes
- i
* 8) * BITS_PER_UNIT
;
2657 if (!int_mode_for_size (tmpbits
, 0).exists (&tmpmode
))
2658 /* We've requested 24 bytes we
2659 don't have mode for. Use DImode. */
2662 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
2667 = gen_rtx_EXPR_LIST (VOIDmode
,
2668 gen_rtx_REG (tmpmode
, *intreg
),
2672 case X86_64_SSESF_CLASS
:
2674 = gen_rtx_EXPR_LIST (VOIDmode
,
2675 gen_rtx_REG (SFmode
,
2676 GET_SSE_REGNO (sse_regno
)),
2680 case X86_64_SSEDF_CLASS
:
2682 = gen_rtx_EXPR_LIST (VOIDmode
,
2683 gen_rtx_REG (DFmode
,
2684 GET_SSE_REGNO (sse_regno
)),
2688 case X86_64_SSE_CLASS
:
2696 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
2706 && regclass
[1] == X86_64_SSEUP_CLASS
2707 && regclass
[2] == X86_64_SSEUP_CLASS
2708 && regclass
[3] == X86_64_SSEUP_CLASS
);
2714 && regclass
[1] == X86_64_SSEUP_CLASS
2715 && regclass
[2] == X86_64_SSEUP_CLASS
2716 && regclass
[3] == X86_64_SSEUP_CLASS
2717 && regclass
[4] == X86_64_SSEUP_CLASS
2718 && regclass
[5] == X86_64_SSEUP_CLASS
2719 && regclass
[6] == X86_64_SSEUP_CLASS
2720 && regclass
[7] == X86_64_SSEUP_CLASS
);
2728 = gen_rtx_EXPR_LIST (VOIDmode
,
2729 gen_rtx_REG (tmpmode
,
2730 GET_SSE_REGNO (sse_regno
)),
2739 /* Empty aligned struct, union or class. */
2743 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2744 for (i
= 0; i
< nexps
; i
++)
2745 XVECEXP (ret
, 0, i
) = exp
[i
];
2749 /* Update the data in CUM to advance over an argument of mode MODE
2750 and data type TYPE. (TYPE is null for libcalls where that information
2751 may not be available.)
2753 Return a number of integer regsiters advanced over. */
2756 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2757 const_tree type
, HOST_WIDE_INT bytes
,
2758 HOST_WIDE_INT words
)
2761 bool error_p
= false;
2765 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2766 bytes in registers. */
2767 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
2787 cum
->words
+= words
;
2788 cum
->nregs
-= words
;
2789 cum
->regno
+= words
;
2790 if (cum
->nregs
>= 0)
2792 if (cum
->nregs
<= 0)
2795 cfun
->machine
->arg_reg_available
= false;
2801 /* OImode shouldn't be used directly. */
2805 if (cum
->float_in_sse
== -1)
2807 if (cum
->float_in_sse
< 2)
2811 if (cum
->float_in_sse
== -1)
2813 if (cum
->float_in_sse
< 1)
2836 if (!type
|| !AGGREGATE_TYPE_P (type
))
2838 cum
->sse_words
+= words
;
2839 cum
->sse_nregs
-= 1;
2840 cum
->sse_regno
+= 1;
2841 if (cum
->sse_nregs
<= 0)
2855 if (!type
|| !AGGREGATE_TYPE_P (type
))
2857 cum
->mmx_words
+= words
;
2858 cum
->mmx_nregs
-= 1;
2859 cum
->mmx_regno
+= 1;
2860 if (cum
->mmx_nregs
<= 0)
2870 cum
->float_in_sse
= 0;
2871 error ("calling %qD with SSE calling convention without "
2872 "SSE/SSE2 enabled", cum
->decl
);
2873 sorry ("this is a GCC bug that can be worked around by adding "
2874 "attribute used to function called");
2881 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2882 const_tree type
, HOST_WIDE_INT words
, bool named
)
2884 int int_nregs
, sse_nregs
;
2886 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2887 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
2888 || VALID_AVX256_REG_MODE (mode
)))
2891 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
2892 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2894 cum
->nregs
-= int_nregs
;
2895 cum
->sse_nregs
-= sse_nregs
;
2896 cum
->regno
+= int_nregs
;
2897 cum
->sse_regno
+= sse_nregs
;
2902 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
2903 cum
->words
= ROUND_UP (cum
->words
, align
);
2904 cum
->words
+= words
;
2910 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
2911 HOST_WIDE_INT words
)
2913 /* Otherwise, this should be passed indirect. */
2914 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
2916 cum
->words
+= words
;
2926 /* Update the data in CUM to advance over argument ARG. */
2929 ix86_function_arg_advance (cumulative_args_t cum_v
,
2930 const function_arg_info
&arg
)
2932 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
2933 machine_mode mode
= arg
.mode
;
2934 HOST_WIDE_INT bytes
, words
;
2937 /* The argument of interrupt handler is a special case and is
2938 handled in ix86_function_arg. */
2939 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
2942 bytes
= arg
.promoted_size_in_bytes ();
2943 words
= CEIL (bytes
, UNITS_PER_WORD
);
2946 mode
= type_natural_mode (arg
.type
, NULL
, false);
2950 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
2952 if (call_abi
== MS_ABI
)
2953 nregs
= function_arg_advance_ms_64 (cum
, bytes
, words
);
2955 nregs
= function_arg_advance_64 (cum
, mode
, arg
.type
, words
,
2959 nregs
= function_arg_advance_32 (cum
, mode
, arg
.type
, bytes
, words
);
2963 /* Track if there are outgoing arguments on stack. */
2965 cfun
->machine
->outgoing_args_on_stack
= true;
2969 /* Define where to put the arguments to a function.
2970 Value is zero to push the argument on the stack,
2971 or a hard register in which to store the argument.
2973 MODE is the argument's machine mode.
2974 TYPE is the data type of the argument (as a tree).
2975 This is null for libcalls where that information may
2977 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2978 the preceding args and about the function being called.
2979 NAMED is nonzero if this argument is a named parameter
2980 (otherwise it is an extra parameter matching an ellipsis). */
2983 function_arg_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2984 machine_mode orig_mode
, const_tree type
,
2985 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
2987 bool error_p
= false;
2989 /* Avoid the AL settings for the Unix64 ABI. */
2990 if (mode
== VOIDmode
)
2995 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2996 bytes in registers. */
2997 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
3016 if (words
<= cum
->nregs
)
3018 int regno
= cum
->regno
;
3020 /* Fastcall allocates the first two DWORD (SImode) or
3021 smaller arguments to ECX and EDX if it isn't an
3027 || (type
&& AGGREGATE_TYPE_P (type
)))
3030 /* ECX not EAX is the first allocated register. */
3031 if (regno
== AX_REG
)
3034 return gen_rtx_REG (mode
, regno
);
3039 if (cum
->float_in_sse
== -1)
3041 if (cum
->float_in_sse
< 2)
3045 if (cum
->float_in_sse
== -1)
3047 if (cum
->float_in_sse
< 1)
3051 /* In 32bit, we pass TImode in xmm registers. */
3058 if (!type
|| !AGGREGATE_TYPE_P (type
))
3061 return gen_reg_or_parallel (mode
, orig_mode
,
3062 cum
->sse_regno
+ FIRST_SSE_REG
);
3068 /* OImode and XImode shouldn't be used directly. */
3083 if (!type
|| !AGGREGATE_TYPE_P (type
))
3086 return gen_reg_or_parallel (mode
, orig_mode
,
3087 cum
->sse_regno
+ FIRST_SSE_REG
);
3097 if (!type
|| !AGGREGATE_TYPE_P (type
))
3100 return gen_reg_or_parallel (mode
, orig_mode
,
3101 cum
->mmx_regno
+ FIRST_MMX_REG
);
3107 cum
->float_in_sse
= 0;
3108 error ("calling %qD with SSE calling convention without "
3109 "SSE/SSE2 enabled", cum
->decl
);
3110 sorry ("this is a GCC bug that can be worked around by adding "
3111 "attribute used to function called");
3118 function_arg_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3119 machine_mode orig_mode
, const_tree type
, bool named
)
3121 /* Handle a hidden AL argument containing number of registers
3122 for varargs x86-64 functions. */
3123 if (mode
== VOIDmode
)
3124 return GEN_INT (cum
->maybe_vaarg
3125 ? (cum
->sse_nregs
< 0
3126 ? X86_64_SSE_REGPARM_MAX
3147 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3153 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3155 &x86_64_int_parameter_registers
[cum
->regno
],
3160 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3161 machine_mode orig_mode
, bool named
,
3162 HOST_WIDE_INT bytes
)
3166 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3167 We use value of -2 to specify that current function call is MSABI. */
3168 if (mode
== VOIDmode
)
3169 return GEN_INT (-2);
3171 /* If we've run out of registers, it goes on the stack. */
3172 if (cum
->nregs
== 0)
3175 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
3177 /* Only floating point modes are passed in anything but integer regs. */
3178 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
3181 regno
= cum
->regno
+ FIRST_SSE_REG
;
3186 /* Unnamed floating parameters are passed in both the
3187 SSE and integer registers. */
3188 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
3189 t2
= gen_rtx_REG (mode
, regno
);
3190 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
3191 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
3192 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
3195 /* Handle aggregated types passed in register. */
3196 if (orig_mode
== BLKmode
)
3198 if (bytes
> 0 && bytes
<= 8)
3199 mode
= (bytes
> 4 ? DImode
: SImode
);
3200 if (mode
== BLKmode
)
3204 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
3207 /* Return where to put the arguments to a function.
3208 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3210 ARG describes the argument while CUM gives information about the
3211 preceding args and about the function being called. */
3214 ix86_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3216 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3217 machine_mode mode
= arg
.mode
;
3218 HOST_WIDE_INT bytes
, words
;
3221 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
3223 gcc_assert (arg
.type
!= NULL_TREE
);
3224 if (POINTER_TYPE_P (arg
.type
))
3226 /* This is the pointer argument. */
3227 gcc_assert (TYPE_MODE (arg
.type
) == Pmode
);
3228 /* It is at -WORD(AP) in the current frame in interrupt and
3229 exception handlers. */
3230 reg
= plus_constant (Pmode
, arg_pointer_rtx
, -UNITS_PER_WORD
);
3234 gcc_assert (cfun
->machine
->func_type
== TYPE_EXCEPTION
3235 && TREE_CODE (arg
.type
) == INTEGER_TYPE
3236 && TYPE_MODE (arg
.type
) == word_mode
);
3237 /* The error code is the word-mode integer argument at
3238 -2 * WORD(AP) in the current frame of the exception
3240 reg
= gen_rtx_MEM (word_mode
,
3241 plus_constant (Pmode
,
3243 -2 * UNITS_PER_WORD
));
3248 bytes
= arg
.promoted_size_in_bytes ();
3249 words
= CEIL (bytes
, UNITS_PER_WORD
);
3251 /* To simplify the code below, represent vector types with a vector mode
3252 even if MMX/SSE are not active. */
3253 if (arg
.type
&& TREE_CODE (arg
.type
) == VECTOR_TYPE
)
3254 mode
= type_natural_mode (arg
.type
, cum
, false);
3258 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3260 if (call_abi
== MS_ABI
)
3261 reg
= function_arg_ms_64 (cum
, mode
, arg
.mode
, arg
.named
, bytes
);
3263 reg
= function_arg_64 (cum
, mode
, arg
.mode
, arg
.type
, arg
.named
);
3266 reg
= function_arg_32 (cum
, mode
, arg
.mode
, arg
.type
, bytes
, words
);
3268 /* Track if there are outgoing arguments on stack. */
3269 if (reg
== NULL_RTX
&& cum
->caller
)
3270 cfun
->machine
->outgoing_args_on_stack
= true;
3275 /* A C expression that indicates when an argument must be passed by
3276 reference. If nonzero for an argument, a copy of that argument is
3277 made in memory and a pointer to the argument is passed instead of
3278 the argument itself. The pointer is passed in whatever way is
3279 appropriate for passing a pointer to that type. */
3282 ix86_pass_by_reference (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3284 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3288 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3290 /* See Windows x64 Software Convention. */
3291 if (call_abi
== MS_ABI
)
3293 HOST_WIDE_INT msize
= GET_MODE_SIZE (arg
.mode
);
3295 if (tree type
= arg
.type
)
3297 /* Arrays are passed by reference. */
3298 if (TREE_CODE (type
) == ARRAY_TYPE
)
3301 if (RECORD_OR_UNION_TYPE_P (type
))
3303 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3304 are passed by reference. */
3305 msize
= int_size_in_bytes (type
);
3309 /* __m128 is passed by reference. */
3310 return msize
!= 1 && msize
!= 2 && msize
!= 4 && msize
!= 8;
3312 else if (arg
.type
&& int_size_in_bytes (arg
.type
) == -1)
3319 /* Return true when TYPE should be 128bit aligned for 32bit argument
3320 passing ABI. XXX: This function is obsolete and is only used for
3321 checking psABI compatibility with previous versions of GCC. */
3324 ix86_compat_aligned_value_p (const_tree type
)
3326 machine_mode mode
= TYPE_MODE (type
);
3327 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
3331 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3333 if (TYPE_ALIGN (type
) < 128)
3336 if (AGGREGATE_TYPE_P (type
))
3338 /* Walk the aggregates recursively. */
3339 switch (TREE_CODE (type
))
3343 case QUAL_UNION_TYPE
:
3347 /* Walk all the structure fields. */
3348 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3350 if (TREE_CODE (field
) == FIELD_DECL
3351 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
3358 /* Just for use if some languages passes arrays by value. */
3359 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
3370 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3371 XXX: This function is obsolete and is only used for checking psABI
3372 compatibility with previous versions of GCC. */
3375 ix86_compat_function_arg_boundary (machine_mode mode
,
3376 const_tree type
, unsigned int align
)
3378 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3379 natural boundaries. */
3380 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
3382 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3383 make an exception for SSE modes since these require 128bit
3386 The handling here differs from field_alignment. ICC aligns MMX
3387 arguments to 4 byte boundaries, while structure fields are aligned
3388 to 8 byte boundaries. */
3391 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
3392 align
= PARM_BOUNDARY
;
3396 if (!ix86_compat_aligned_value_p (type
))
3397 align
= PARM_BOUNDARY
;
3400 if (align
> BIGGEST_ALIGNMENT
)
3401 align
= BIGGEST_ALIGNMENT
;
3405 /* Return true when TYPE should be 128bit aligned for 32bit argument
3409 ix86_contains_aligned_value_p (const_tree type
)
3411 machine_mode mode
= TYPE_MODE (type
);
3413 if (mode
== XFmode
|| mode
== XCmode
)
3416 if (TYPE_ALIGN (type
) < 128)
3419 if (AGGREGATE_TYPE_P (type
))
3421 /* Walk the aggregates recursively. */
3422 switch (TREE_CODE (type
))
3426 case QUAL_UNION_TYPE
:
3430 /* Walk all the structure fields. */
3431 for (field
= TYPE_FIELDS (type
);
3433 field
= DECL_CHAIN (field
))
3435 if (TREE_CODE (field
) == FIELD_DECL
3436 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
3443 /* Just for use if some languages passes arrays by value. */
3444 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
3453 return TYPE_ALIGN (type
) >= 128;
3458 /* Gives the alignment boundary, in bits, of an argument with the
3459 specified mode and type. */
3462 ix86_function_arg_boundary (machine_mode mode
, const_tree type
)
3467 /* Since the main variant type is used for call, we convert it to
3468 the main variant type. */
3469 type
= TYPE_MAIN_VARIANT (type
);
3470 align
= TYPE_ALIGN (type
);
3471 if (TYPE_EMPTY_P (type
))
3472 return PARM_BOUNDARY
;
3475 align
= GET_MODE_ALIGNMENT (mode
);
3476 if (align
< PARM_BOUNDARY
)
3477 align
= PARM_BOUNDARY
;
3481 unsigned int saved_align
= align
;
3485 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3488 if (mode
== XFmode
|| mode
== XCmode
)
3489 align
= PARM_BOUNDARY
;
3491 else if (!ix86_contains_aligned_value_p (type
))
3492 align
= PARM_BOUNDARY
;
3495 align
= PARM_BOUNDARY
;
3500 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
3504 inform (input_location
,
3505 "the ABI for passing parameters with %d-byte"
3506 " alignment has changed in GCC 4.6",
3507 align
/ BITS_PER_UNIT
);
3514 /* Return true if N is a possible register number of function value. */
3517 ix86_function_value_regno_p (const unsigned int regno
)
3524 return (!TARGET_64BIT
|| ix86_cfun_abi () != MS_ABI
);
3527 return TARGET_64BIT
&& ix86_cfun_abi () != MS_ABI
;
3529 /* Complex values are returned in %st(0)/%st(1) pair. */
3532 /* TODO: The function should depend on current function ABI but
3533 builtins.c would need updating then. Therefore we use the
3535 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
3537 return TARGET_FLOAT_RETURNS_IN_80387
;
3539 /* Complex values are returned in %xmm0/%xmm1 pair. */
3545 if (TARGET_MACHO
|| TARGET_64BIT
)
3553 /* Define how to find the value returned by a function.
3554 VALTYPE is the data type of the value (as a tree).
3555 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3556 otherwise, FUNC is 0. */
3559 function_value_32 (machine_mode orig_mode
, machine_mode mode
,
3560 const_tree fntype
, const_tree fn
)
3564 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3565 we normally prevent this case when mmx is not available. However
3566 some ABIs may require the result to be returned like DImode. */
3567 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3568 regno
= FIRST_MMX_REG
;
3570 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3571 we prevent this case when sse is not available. However some ABIs
3572 may require the result to be returned like integer TImode. */
3573 else if (mode
== TImode
3574 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3575 regno
= FIRST_SSE_REG
;
3577 /* 32-byte vector modes in %ymm0. */
3578 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
3579 regno
= FIRST_SSE_REG
;
3581 /* 64-byte vector modes in %zmm0. */
3582 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
3583 regno
= FIRST_SSE_REG
;
3585 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3586 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
3587 regno
= FIRST_FLOAT_REG
;
3589 /* Most things go in %eax. */
3592 /* Override FP return register with %xmm0 for local functions when
3593 SSE math is enabled or for functions with sseregparm attribute. */
3594 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
3596 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
3597 if (sse_level
== -1)
3599 error ("calling %qD with SSE calling convention without "
3600 "SSE/SSE2 enabled", fn
);
3601 sorry ("this is a GCC bug that can be worked around by adding "
3602 "attribute used to function called");
3604 else if ((sse_level
>= 1 && mode
== SFmode
)
3605 || (sse_level
== 2 && mode
== DFmode
))
3606 regno
= FIRST_SSE_REG
;
3609 /* OImode shouldn't be used directly. */
3610 gcc_assert (mode
!= OImode
);
3612 return gen_rtx_REG (orig_mode
, regno
);
3616 function_value_64 (machine_mode orig_mode
, machine_mode mode
,
3621 /* Handle libcalls, which don't provide a type node. */
3622 if (valtype
== NULL
)
3636 regno
= FIRST_SSE_REG
;
3640 regno
= FIRST_FLOAT_REG
;
3648 return gen_rtx_REG (mode
, regno
);
3650 else if (POINTER_TYPE_P (valtype
))
3652 /* Pointers are always returned in word_mode. */
3656 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
3657 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
3658 x86_64_int_return_registers
, 0);
3660 /* For zero sized structures, construct_container returns NULL, but we
3661 need to keep rest of compiler happy by returning meaningful value. */
3663 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
3669 function_value_ms_32 (machine_mode orig_mode
, machine_mode mode
,
3670 const_tree fntype
, const_tree fn
, const_tree valtype
)
3674 /* Floating point return values in %st(0)
3675 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3676 if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
3677 && (GET_MODE_SIZE (mode
) > 8
3678 || valtype
== NULL_TREE
|| !AGGREGATE_TYPE_P (valtype
)))
3680 regno
= FIRST_FLOAT_REG
;
3681 return gen_rtx_REG (orig_mode
, regno
);
3684 return function_value_32(orig_mode
, mode
, fntype
,fn
);
3688 function_value_ms_64 (machine_mode orig_mode
, machine_mode mode
,
3691 unsigned int regno
= AX_REG
;
3695 switch (GET_MODE_SIZE (mode
))
3698 if (valtype
!= NULL_TREE
3699 && !VECTOR_INTEGER_TYPE_P (valtype
)
3700 && !VECTOR_INTEGER_TYPE_P (valtype
)
3701 && !INTEGRAL_TYPE_P (valtype
)
3702 && !VECTOR_FLOAT_TYPE_P (valtype
))
3704 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
3705 && !COMPLEX_MODE_P (mode
))
3706 regno
= FIRST_SSE_REG
;
3710 if (valtype
!= NULL_TREE
&& AGGREGATE_TYPE_P (valtype
))
3712 if (mode
== SFmode
|| mode
== DFmode
)
3713 regno
= FIRST_SSE_REG
;
3719 return gen_rtx_REG (orig_mode
, regno
);
3723 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
3724 machine_mode orig_mode
, machine_mode mode
)
3726 const_tree fn
, fntype
;
3729 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
3730 fn
= fntype_or_decl
;
3731 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
3733 if (ix86_function_type_abi (fntype
) == MS_ABI
)
3736 return function_value_ms_64 (orig_mode
, mode
, valtype
);
3738 return function_value_ms_32 (orig_mode
, mode
, fntype
, fn
, valtype
);
3740 else if (TARGET_64BIT
)
3741 return function_value_64 (orig_mode
, mode
, valtype
);
3743 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
3747 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
3749 machine_mode mode
, orig_mode
;
3751 orig_mode
= TYPE_MODE (valtype
);
3752 mode
= type_natural_mode (valtype
, NULL
, true);
3753 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
3756 /* Pointer function arguments and return values are promoted to
3757 word_mode for normal functions. */
3760 ix86_promote_function_mode (const_tree type
, machine_mode mode
,
3761 int *punsignedp
, const_tree fntype
,
3764 if (cfun
->machine
->func_type
== TYPE_NORMAL
3765 && type
!= NULL_TREE
3766 && POINTER_TYPE_P (type
))
3768 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
3771 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
3775 /* Return true if a structure, union or array with MODE containing FIELD
3776 should be accessed using BLKmode. */
3779 ix86_member_type_forces_blk (const_tree field
, machine_mode mode
)
3781 /* Union with XFmode must be in BLKmode. */
3782 return (mode
== XFmode
3783 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
3784 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
3788 ix86_libcall_value (machine_mode mode
)
3790 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
3793 /* Return true iff type is returned in memory. */
3796 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
3798 #ifdef SUBTARGET_RETURN_IN_MEMORY
3799 return SUBTARGET_RETURN_IN_MEMORY (type
, fntype
);
3801 const machine_mode mode
= type_natural_mode (type
, NULL
, true);
3806 if (ix86_function_type_abi (fntype
) == MS_ABI
)
3808 size
= int_size_in_bytes (type
);
3810 /* __m128 is returned in xmm0. */
3811 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
3812 || INTEGRAL_TYPE_P (type
)
3813 || VECTOR_FLOAT_TYPE_P (type
))
3814 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
3815 && !COMPLEX_MODE_P (mode
)
3816 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
3819 /* Otherwise, the size must be exactly in [1248]. */
3820 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
3824 int needed_intregs
, needed_sseregs
;
3826 return examine_argument (mode
, type
, 1,
3827 &needed_intregs
, &needed_sseregs
);
3832 size
= int_size_in_bytes (type
);
3834 /* Intel MCU psABI returns scalars and aggregates no larger than 8
3835 bytes in registers. */
3837 return VECTOR_MODE_P (mode
) || size
< 0 || size
> 8;
3839 if (mode
== BLKmode
)
3842 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
3845 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
3847 /* User-created vectors small enough to fit in EAX. */
3851 /* Unless ABI prescibes otherwise,
3852 MMX/3dNow values are returned in MM0 if available. */
3855 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
3857 /* SSE values are returned in XMM0 if available. */
3861 /* AVX values are returned in YMM0 if available. */
3865 /* AVX512F values are returned in ZMM0 if available. */
3867 return !TARGET_AVX512F
;
3876 /* OImode shouldn't be used directly. */
3877 gcc_assert (mode
!= OImode
);
3885 /* Create the va_list data type. */
3888 ix86_build_builtin_va_list_64 (void)
3890 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
3892 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
3893 type_decl
= build_decl (BUILTINS_LOCATION
,
3894 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3896 f_gpr
= build_decl (BUILTINS_LOCATION
,
3897 FIELD_DECL
, get_identifier ("gp_offset"),
3898 unsigned_type_node
);
3899 f_fpr
= build_decl (BUILTINS_LOCATION
,
3900 FIELD_DECL
, get_identifier ("fp_offset"),
3901 unsigned_type_node
);
3902 f_ovf
= build_decl (BUILTINS_LOCATION
,
3903 FIELD_DECL
, get_identifier ("overflow_arg_area"),
3905 f_sav
= build_decl (BUILTINS_LOCATION
,
3906 FIELD_DECL
, get_identifier ("reg_save_area"),
3909 va_list_gpr_counter_field
= f_gpr
;
3910 va_list_fpr_counter_field
= f_fpr
;
3912 DECL_FIELD_CONTEXT (f_gpr
) = record
;
3913 DECL_FIELD_CONTEXT (f_fpr
) = record
;
3914 DECL_FIELD_CONTEXT (f_ovf
) = record
;
3915 DECL_FIELD_CONTEXT (f_sav
) = record
;
3917 TYPE_STUB_DECL (record
) = type_decl
;
3918 TYPE_NAME (record
) = type_decl
;
3919 TYPE_FIELDS (record
) = f_gpr
;
3920 DECL_CHAIN (f_gpr
) = f_fpr
;
3921 DECL_CHAIN (f_fpr
) = f_ovf
;
3922 DECL_CHAIN (f_ovf
) = f_sav
;
3924 layout_type (record
);
3926 TYPE_ATTRIBUTES (record
) = tree_cons (get_identifier ("sysv_abi va_list"),
3927 NULL_TREE
, TYPE_ATTRIBUTES (record
));
3929 /* The correct type is an array type of one element. */
3930 return build_array_type (record
, build_index_type (size_zero_node
));
3933 /* Setup the builtin va_list data type and for 64-bit the additional
3934 calling convention specific va_list data types. */
3937 ix86_build_builtin_va_list (void)
3941 /* Initialize ABI specific va_list builtin types.
3943 In lto1, we can encounter two va_list types:
3944 - one as a result of the type-merge across TUs, and
3945 - the one constructed here.
3946 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
3947 a type identity check in canonical_va_list_type based on
3948 TYPE_MAIN_VARIANT (which we used to have) will not work.
3949 Instead, we tag each va_list_type_node with its unique attribute, and
3950 look for the attribute in the type identity check in
3951 canonical_va_list_type.
3953 Tagging sysv_va_list_type_node directly with the attribute is
3954 problematic since it's a array of one record, which will degrade into a
3955 pointer to record when used as parameter (see build_va_arg comments for
3956 an example), dropping the attribute in the process. So we tag the
3959 /* For SYSV_ABI we use an array of one record. */
3960 sysv_va_list_type_node
= ix86_build_builtin_va_list_64 ();
3962 /* For MS_ABI we use plain pointer to argument area. */
3963 tree char_ptr_type
= build_pointer_type (char_type_node
);
3964 tree attr
= tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE
,
3965 TYPE_ATTRIBUTES (char_ptr_type
));
3966 ms_va_list_type_node
= build_type_attribute_variant (char_ptr_type
, attr
);
3968 return ((ix86_abi
== MS_ABI
)
3969 ? ms_va_list_type_node
3970 : sysv_va_list_type_node
);
3974 /* For i386 we use plain pointer to argument area. */
3975 return build_pointer_type (char_type_node
);
3979 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
3982 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
3988 /* GPR size of varargs save area. */
3989 if (cfun
->va_list_gpr_size
)
3990 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
3992 ix86_varargs_gpr_size
= 0;
3994 /* FPR size of varargs save area. We don't need it if we don't pass
3995 anything in SSE registers. */
3996 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
3997 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
3999 ix86_varargs_fpr_size
= 0;
4001 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
4004 save_area
= frame_pointer_rtx
;
4005 set
= get_varargs_alias_set ();
4007 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4008 if (max
> X86_64_REGPARM_MAX
)
4009 max
= X86_64_REGPARM_MAX
;
4011 for (i
= cum
->regno
; i
< max
; i
++)
4013 mem
= gen_rtx_MEM (word_mode
,
4014 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
4015 MEM_NOTRAP_P (mem
) = 1;
4016 set_mem_alias_set (mem
, set
);
4017 emit_move_insn (mem
,
4018 gen_rtx_REG (word_mode
,
4019 x86_64_int_parameter_registers
[i
]));
4022 if (ix86_varargs_fpr_size
)
4025 rtx_code_label
*label
;
4028 /* Now emit code to save SSE registers. The AX parameter contains number
4029 of SSE parameter registers used to call this function, though all we
4030 actually check here is the zero/non-zero status. */
4032 label
= gen_label_rtx ();
4033 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
4034 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
4037 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4038 we used movdqa (i.e. TImode) instead? Perhaps even better would
4039 be if we could determine the real mode of the data, via a hook
4040 into pass_stdarg. Ignore all that for now. */
4042 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
4043 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
4045 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
4046 if (max
> X86_64_SSE_REGPARM_MAX
)
4047 max
= X86_64_SSE_REGPARM_MAX
;
4049 for (i
= cum
->sse_regno
; i
< max
; ++i
)
4051 mem
= plus_constant (Pmode
, save_area
,
4052 i
* 16 + ix86_varargs_gpr_size
);
4053 mem
= gen_rtx_MEM (smode
, mem
);
4054 MEM_NOTRAP_P (mem
) = 1;
4055 set_mem_alias_set (mem
, set
);
4056 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
4058 emit_move_insn (mem
, gen_rtx_REG (smode
, GET_SSE_REGNO (i
)));
4066 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4068 alias_set_type set
= get_varargs_alias_set ();
4071 /* Reset to zero, as there might be a sysv vaarg used
4073 ix86_varargs_gpr_size
= 0;
4074 ix86_varargs_fpr_size
= 0;
4076 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
4080 mem
= gen_rtx_MEM (Pmode
,
4081 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4082 i
* UNITS_PER_WORD
));
4083 MEM_NOTRAP_P (mem
) = 1;
4084 set_mem_alias_set (mem
, set
);
4086 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4087 emit_move_insn (mem
, reg
);
4092 ix86_setup_incoming_varargs (cumulative_args_t cum_v
,
4093 const function_arg_info
&arg
,
4096 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4097 CUMULATIVE_ARGS next_cum
;
4100 /* This argument doesn't appear to be used anymore. Which is good,
4101 because the old code here didn't suppress rtl generation. */
4102 gcc_assert (!no_rtl
);
4107 fntype
= TREE_TYPE (current_function_decl
);
4109 /* For varargs, we do not want to skip the dummy va_dcl argument.
4110 For stdargs, we do want to skip the last named argument. */
4112 if (stdarg_p (fntype
))
4113 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), arg
);
4115 if (cum
->call_abi
== MS_ABI
)
4116 setup_incoming_varargs_ms_64 (&next_cum
);
4118 setup_incoming_varargs_64 (&next_cum
);
4121 /* Checks if TYPE is of kind va_list char *. */
4124 is_va_list_char_pointer (tree type
)
4128 /* For 32-bit it is always true. */
4131 canonic
= ix86_canonical_va_list_type (type
);
4132 return (canonic
== ms_va_list_type_node
4133 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
4136 /* Implement va_start. */
4139 ix86_va_start (tree valist
, rtx nextarg
)
4141 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4142 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4143 tree gpr
, fpr
, ovf
, sav
, t
;
4147 if (flag_split_stack
4148 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4150 unsigned int scratch_regno
;
4152 /* When we are splitting the stack, we can't refer to the stack
4153 arguments using internal_arg_pointer, because they may be on
4154 the old stack. The split stack prologue will arrange to
4155 leave a pointer to the old stack arguments in a scratch
4156 register, which we here copy to a pseudo-register. The split
4157 stack prologue can't set the pseudo-register directly because
4158 it (the prologue) runs before any registers have been saved. */
4160 scratch_regno
= split_stack_prologue_scratch_regno ();
4161 if (scratch_regno
!= INVALID_REGNUM
)
4166 reg
= gen_reg_rtx (Pmode
);
4167 cfun
->machine
->split_stack_varargs_pointer
= reg
;
4170 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
4174 push_topmost_sequence ();
4175 emit_insn_after (seq
, entry_of_function ());
4176 pop_topmost_sequence ();
4180 /* Only 64bit target needs something special. */
4181 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4183 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4184 std_expand_builtin_va_start (valist
, nextarg
);
4189 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
4190 next
= expand_binop (ptr_mode
, add_optab
,
4191 cfun
->machine
->split_stack_varargs_pointer
,
4192 crtl
->args
.arg_offset_rtx
,
4193 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4194 convert_move (va_r
, next
, 0);
4199 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4200 f_fpr
= DECL_CHAIN (f_gpr
);
4201 f_ovf
= DECL_CHAIN (f_fpr
);
4202 f_sav
= DECL_CHAIN (f_ovf
);
4204 valist
= build_simple_mem_ref (valist
);
4205 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
4206 /* The following should be folded into the MEM_REF offset. */
4207 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
4209 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
4211 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
4213 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
4216 /* Count number of gp and fp argument registers used. */
4217 words
= crtl
->args
.info
.words
;
4218 n_gpr
= crtl
->args
.info
.regno
;
4219 n_fpr
= crtl
->args
.info
.sse_regno
;
4221 if (cfun
->va_list_gpr_size
)
4223 type
= TREE_TYPE (gpr
);
4224 t
= build2 (MODIFY_EXPR
, type
,
4225 gpr
, build_int_cst (type
, n_gpr
* 8));
4226 TREE_SIDE_EFFECTS (t
) = 1;
4227 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4230 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4232 type
= TREE_TYPE (fpr
);
4233 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4234 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
4235 TREE_SIDE_EFFECTS (t
) = 1;
4236 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4239 /* Find the overflow area. */
4240 type
= TREE_TYPE (ovf
);
4241 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4242 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
4244 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
4245 t
= make_tree (type
, ovf_rtx
);
4247 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
4249 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4250 TREE_SIDE_EFFECTS (t
) = 1;
4251 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4253 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
4255 /* Find the register save area.
4256 Prologue of the function save it right above stack frame. */
4257 type
= TREE_TYPE (sav
);
4258 t
= make_tree (type
, frame_pointer_rtx
);
4259 if (!ix86_varargs_gpr_size
)
4260 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
4262 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4263 TREE_SIDE_EFFECTS (t
) = 1;
4264 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4268 /* Implement va_arg. */
4271 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4274 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4275 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4276 tree gpr
, fpr
, ovf
, sav
, t
;
4278 tree lab_false
, lab_over
= NULL_TREE
;
4283 machine_mode nat_mode
;
4284 unsigned int arg_boundary
;
4286 /* Only 64bit target needs something special. */
4287 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4288 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4290 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4291 f_fpr
= DECL_CHAIN (f_gpr
);
4292 f_ovf
= DECL_CHAIN (f_fpr
);
4293 f_sav
= DECL_CHAIN (f_ovf
);
4295 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
4296 valist
, f_gpr
, NULL_TREE
);
4298 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4299 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4300 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4302 indirect_p
= pass_va_arg_by_reference (type
);
4304 type
= build_pointer_type (type
);
4305 size
= arg_int_size_in_bytes (type
);
4306 rsize
= CEIL (size
, UNITS_PER_WORD
);
4308 nat_mode
= type_natural_mode (type
, NULL
, false);
4323 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4324 if (!TARGET_64BIT_MS_ABI
)
4332 container
= construct_container (nat_mode
, TYPE_MODE (type
),
4333 type
, 0, X86_64_REGPARM_MAX
,
4334 X86_64_SSE_REGPARM_MAX
, intreg
,
4339 /* Pull the value out of the saved registers. */
4341 addr
= create_tmp_var (ptr_type_node
, "addr");
4345 int needed_intregs
, needed_sseregs
;
4347 tree int_addr
, sse_addr
;
4349 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
4350 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
4352 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4354 need_temp
= (!REG_P (container
)
4355 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4356 || TYPE_ALIGN (type
) > 128));
4358 /* In case we are passing structure, verify that it is consecutive block
4359 on the register save area. If not we need to do moves. */
4360 if (!need_temp
&& !REG_P (container
))
4362 /* Verify that all registers are strictly consecutive */
4363 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4367 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4369 rtx slot
= XVECEXP (container
, 0, i
);
4370 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4371 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4379 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4381 rtx slot
= XVECEXP (container
, 0, i
);
4382 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4383 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4395 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4396 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4399 /* First ensure that we fit completely in registers. */
4402 t
= build_int_cst (TREE_TYPE (gpr
),
4403 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
4404 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4405 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4406 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4407 gimplify_and_add (t
, pre_p
);
4411 t
= build_int_cst (TREE_TYPE (fpr
),
4412 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4413 + X86_64_REGPARM_MAX
* 8);
4414 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4415 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4416 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4417 gimplify_and_add (t
, pre_p
);
4420 /* Compute index to start of area used for integer regs. */
4423 /* int_addr = gpr + sav; */
4424 t
= fold_build_pointer_plus (sav
, gpr
);
4425 gimplify_assign (int_addr
, t
, pre_p
);
4429 /* sse_addr = fpr + sav; */
4430 t
= fold_build_pointer_plus (sav
, fpr
);
4431 gimplify_assign (sse_addr
, t
, pre_p
);
4435 int i
, prev_size
= 0;
4436 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4439 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4440 gimplify_assign (addr
, t
, pre_p
);
4442 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4444 rtx slot
= XVECEXP (container
, 0, i
);
4445 rtx reg
= XEXP (slot
, 0);
4446 machine_mode mode
= GET_MODE (reg
);
4452 tree dest_addr
, dest
;
4453 int cur_size
= GET_MODE_SIZE (mode
);
4455 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
4456 prev_size
= INTVAL (XEXP (slot
, 1));
4457 if (prev_size
+ cur_size
> size
)
4459 cur_size
= size
- prev_size
;
4460 unsigned int nbits
= cur_size
* BITS_PER_UNIT
;
4461 if (!int_mode_for_size (nbits
, 1).exists (&mode
))
4464 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4465 if (mode
== GET_MODE (reg
))
4466 addr_type
= build_pointer_type (piece_type
);
4468 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4470 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4473 if (SSE_REGNO_P (REGNO (reg
)))
4475 src_addr
= sse_addr
;
4476 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4480 src_addr
= int_addr
;
4481 src_offset
= REGNO (reg
) * 8;
4483 src_addr
= fold_convert (addr_type
, src_addr
);
4484 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
4486 dest_addr
= fold_convert (daddr_type
, addr
);
4487 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
4488 if (cur_size
== GET_MODE_SIZE (mode
))
4490 src
= build_va_arg_indirect_ref (src_addr
);
4491 dest
= build_va_arg_indirect_ref (dest_addr
);
4493 gimplify_assign (dest
, src
, pre_p
);
4498 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
4499 3, dest_addr
, src_addr
,
4500 size_int (cur_size
));
4501 gimplify_and_add (copy
, pre_p
);
4503 prev_size
+= cur_size
;
4509 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4510 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4511 gimplify_assign (gpr
, t
, pre_p
);
4516 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4517 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4518 gimplify_assign (unshare_expr (fpr
), t
, pre_p
);
4521 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
4523 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
4526 /* ... otherwise out of the overflow area. */
4528 /* When we align parameter on stack for caller, if the parameter
4529 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4530 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4531 here with caller. */
4532 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
4533 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
4534 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
4536 /* Care for on-stack alignment if needed. */
4537 if (arg_boundary
<= 64 || size
== 0)
4541 HOST_WIDE_INT align
= arg_boundary
/ 8;
4542 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
4543 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4544 build_int_cst (TREE_TYPE (t
), -align
));
4547 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4548 gimplify_assign (addr
, t
, pre_p
);
4550 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
4551 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
4554 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
4556 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
4557 addr
= fold_convert (ptrtype
, addr
);
4560 addr
= build_va_arg_indirect_ref (addr
);
4561 return build_va_arg_indirect_ref (addr
);
4564 /* Return true if OPNUM's MEM should be matched
4565 in movabs* patterns. */
4568 ix86_check_movabs (rtx insn
, int opnum
)
4572 set
= PATTERN (insn
);
4573 if (GET_CODE (set
) == PARALLEL
)
4574 set
= XVECEXP (set
, 0, 0);
4575 gcc_assert (GET_CODE (set
) == SET
);
4576 mem
= XEXP (set
, opnum
);
4577 while (SUBREG_P (mem
))
4578 mem
= SUBREG_REG (mem
);
4579 gcc_assert (MEM_P (mem
));
4580 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
4583 /* Return false if INSN contains a MEM with a non-default address space. */
4585 ix86_check_no_addr_space (rtx insn
)
4587 subrtx_var_iterator::array_type array
;
4588 FOR_EACH_SUBRTX_VAR (iter
, array
, PATTERN (insn
), ALL
)
4591 if (MEM_P (x
) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x
)))
4597 /* Initialize the table of extra 80387 mathematical constants. */
4600 init_ext_80387_constants (void)
4602 static const char * cst
[5] =
4604 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4605 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4606 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4607 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4608 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4612 for (i
= 0; i
< 5; i
++)
4614 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4615 /* Ensure each constant is rounded to XFmode precision. */
4616 real_convert (&ext_80387_constants_table
[i
],
4617 XFmode
, &ext_80387_constants_table
[i
]);
4620 ext_80387_constants_init
= 1;
4623 /* Return non-zero if the constant is something that
4624 can be loaded with a special instruction. */
4627 standard_80387_constant_p (rtx x
)
4629 machine_mode mode
= GET_MODE (x
);
4631 const REAL_VALUE_TYPE
*r
;
4633 if (!(CONST_DOUBLE_P (x
) && X87_FLOAT_MODE_P (mode
)))
4636 if (x
== CONST0_RTX (mode
))
4638 if (x
== CONST1_RTX (mode
))
4641 r
= CONST_DOUBLE_REAL_VALUE (x
);
4643 /* For XFmode constants, try to find a special 80387 instruction when
4644 optimizing for size or on those CPUs that benefit from them. */
4646 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
4650 if (! ext_80387_constants_init
)
4651 init_ext_80387_constants ();
4653 for (i
= 0; i
< 5; i
++)
4654 if (real_identical (r
, &ext_80387_constants_table
[i
]))
4658 /* Load of the constant -0.0 or -1.0 will be split as
4659 fldz;fchs or fld1;fchs sequence. */
4660 if (real_isnegzero (r
))
4662 if (real_identical (r
, &dconstm1
))
4668 /* Return the opcode of the special instruction to be used to load
4672 standard_80387_constant_opcode (rtx x
)
4674 switch (standard_80387_constant_p (x
))
4698 /* Return the CONST_DOUBLE representing the 80387 constant that is
4699 loaded by the specified special instruction. The argument IDX
4700 matches the return value from standard_80387_constant_p. */
4703 standard_80387_constant_rtx (int idx
)
4707 if (! ext_80387_constants_init
)
4708 init_ext_80387_constants ();
4724 return const_double_from_real_value (ext_80387_constants_table
[i
],
4728 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
4729 in supported SSE/AVX vector mode. */
4732 standard_sse_constant_p (rtx x
, machine_mode pred_mode
)
4739 mode
= GET_MODE (x
);
4741 if (x
== const0_rtx
|| const0_operand (x
, mode
))
4744 if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
4746 /* VOIDmode integer constant, get mode from the predicate. */
4747 if (mode
== VOIDmode
)
4750 switch (GET_MODE_SIZE (mode
))
4775 /* Return the opcode of the special instruction to be used to load
4776 the constant operands[1] into operands[0]. */
4779 standard_sse_constant_opcode (rtx_insn
*insn
, rtx
*operands
)
4782 rtx x
= operands
[1];
4784 gcc_assert (TARGET_SSE
);
4786 mode
= GET_MODE (x
);
4788 if (x
== const0_rtx
|| const0_operand (x
, mode
))
4790 switch (get_attr_mode (insn
))
4793 if (!EXT_REX_SSE_REG_P (operands
[0]))
4794 return "%vpxor\t%0, %d0";
4798 if (EXT_REX_SSE_REG_P (operands
[0]))
4799 return (TARGET_AVX512VL
4800 ? "vpxord\t%x0, %x0, %x0"
4801 : "vpxord\t%g0, %g0, %g0");
4802 return "vpxor\t%x0, %x0, %x0";
4805 if (!EXT_REX_SSE_REG_P (operands
[0]))
4806 return "%vxorpd\t%0, %d0";
4810 if (!EXT_REX_SSE_REG_P (operands
[0]))
4811 return "vxorpd\t%x0, %x0, %x0";
4812 else if (TARGET_AVX512DQ
)
4813 return (TARGET_AVX512VL
4814 ? "vxorpd\t%x0, %x0, %x0"
4815 : "vxorpd\t%g0, %g0, %g0");
4817 return (TARGET_AVX512VL
4818 ? "vpxorq\t%x0, %x0, %x0"
4819 : "vpxorq\t%g0, %g0, %g0");
4822 if (!EXT_REX_SSE_REG_P (operands
[0]))
4823 return "%vxorps\t%0, %d0";
4827 if (!EXT_REX_SSE_REG_P (operands
[0]))
4828 return "vxorps\t%x0, %x0, %x0";
4829 else if (TARGET_AVX512DQ
)
4830 return (TARGET_AVX512VL
4831 ? "vxorps\t%x0, %x0, %x0"
4832 : "vxorps\t%g0, %g0, %g0");
4834 return (TARGET_AVX512VL
4835 ? "vpxord\t%x0, %x0, %x0"
4836 : "vpxord\t%g0, %g0, %g0");
4842 else if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
4844 enum attr_mode insn_mode
= get_attr_mode (insn
);
4851 gcc_assert (TARGET_AVX512F
);
4852 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4857 gcc_assert (TARGET_AVX2
);
4862 gcc_assert (TARGET_SSE2
);
4863 if (!EXT_REX_SSE_REG_P (operands
[0]))
4865 ? "vpcmpeqd\t%0, %0, %0"
4866 : "pcmpeqd\t%0, %0");
4867 else if (TARGET_AVX512VL
)
4868 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
4870 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
4880 /* Returns true if INSN can be transformed from a memory load
4881 to a supported FP constant load. */
4884 ix86_standard_x87sse_constant_load_p (const rtx_insn
*insn
, rtx dst
)
4886 rtx src
= find_constant_src (insn
);
4888 gcc_assert (REG_P (dst
));
4891 || (SSE_REGNO_P (REGNO (dst
))
4892 && standard_sse_constant_p (src
, GET_MODE (dst
)) != 1)
4893 || (STACK_REGNO_P (REGNO (dst
))
4894 && standard_80387_constant_p (src
) < 1))
4900 /* Returns true if OP contains a symbol reference */
4903 symbolic_reference_mentioned_p (rtx op
)
4908 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4911 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4912 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4918 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4919 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4923 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4930 /* Return true if it is appropriate to emit `ret' instructions in the
4931 body of a function. Do this only if the epilogue is simple, needing a
4932 couple of insns. Prior to reloading, we can't tell how many registers
4933 must be saved, so return false then. Return false if there is no frame
4934 marker to de-allocate. */
4937 ix86_can_use_return_insn_p (void)
4939 if (ix86_function_naked (current_function_decl
))
4942 /* Don't use `ret' instruction in interrupt handler. */
4943 if (! reload_completed
4944 || frame_pointer_needed
4945 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
4948 /* Don't allow more than 32k pop, since that's all we can do
4949 with one instruction. */
4950 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
4953 struct ix86_frame
&frame
= cfun
->machine
->frame
;
4954 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
4955 && (frame
.nregs
+ frame
.nsseregs
) == 0);
4958 /* Return stack frame size. get_frame_size () returns used stack slots
4959 during compilation, which may be optimized out later. If stack frame
4960 is needed, stack_frame_required should be true. */
4962 static HOST_WIDE_INT
4963 ix86_get_frame_size (void)
4965 if (cfun
->machine
->stack_frame_required
)
4966 return get_frame_size ();
4971 /* Value should be nonzero if functions must have frame pointers.
4972 Zero means the frame pointer need not be set up (and parms may
4973 be accessed via the stack pointer) in functions that seem suitable. */
4976 ix86_frame_pointer_required (void)
4978 /* If we accessed previous frames, then the generated code expects
4979 to be able to access the saved ebp value in our frame. */
4980 if (cfun
->machine
->accesses_prev_frame
)
4983 /* Several x86 os'es need a frame pointer for other reasons,
4984 usually pertaining to setjmp. */
4985 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
4988 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
4989 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
4992 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
4993 allocation is 4GB. */
4994 if (TARGET_64BIT_MS_ABI
&& ix86_get_frame_size () > SEH_MAX_FRAME_SIZE
)
4997 /* SSE saves require frame-pointer when stack is misaligned. */
4998 if (TARGET_64BIT_MS_ABI
&& ix86_incoming_stack_boundary
< 128)
5001 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5002 turns off the frame pointer by default. Turn it back on now if
5003 we've not got a leaf function. */
5004 if (TARGET_OMIT_LEAF_FRAME_POINTER
5006 || ix86_current_function_calls_tls_descriptor
))
5009 if (crtl
->profile
&& !flag_fentry
)
5015 /* Record that the current function accesses previous call frames. */
5018 ix86_setup_frame_addresses (void)
5020 cfun
->machine
->accesses_prev_frame
= 1;
5023 #ifndef USE_HIDDEN_LINKONCE
5024 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5025 # define USE_HIDDEN_LINKONCE 1
5027 # define USE_HIDDEN_LINKONCE 0
5031 /* Label count for call and return thunks. It is used to make unique
5032 labels in call and return thunks. */
5033 static int indirectlabelno
;
5035 /* True if call thunk function is needed. */
5036 static bool indirect_thunk_needed
= false;
5038 /* Bit masks of integer registers, which contain branch target, used
5039 by call thunk functions. */
5040 static int indirect_thunks_used
;
5042 /* True if return thunk function is needed. */
5043 static bool indirect_return_needed
= false;
5045 /* True if return thunk function via CX is needed. */
5046 static bool indirect_return_via_cx
;
5048 #ifndef INDIRECT_LABEL
5049 # define INDIRECT_LABEL "LIND"
5052 /* Indicate what prefix is needed for an indirect branch. */
5053 enum indirect_thunk_prefix
5055 indirect_thunk_prefix_none
,
5056 indirect_thunk_prefix_nt
5059 /* Return the prefix needed for an indirect branch INSN. */
5061 enum indirect_thunk_prefix
5062 indirect_thunk_need_prefix (rtx_insn
*insn
)
5064 enum indirect_thunk_prefix need_prefix
;
5065 if ((cfun
->machine
->indirect_branch_type
5066 == indirect_branch_thunk_extern
)
5067 && ix86_notrack_prefixed_insn_p (insn
))
5069 /* NOTRACK prefix is only used with external thunk so that it
5070 can be properly updated to support CET at run-time. */
5071 need_prefix
= indirect_thunk_prefix_nt
;
5074 need_prefix
= indirect_thunk_prefix_none
;
5078 /* Fills in the label name that should be used for the indirect thunk. */
5081 indirect_thunk_name (char name
[32], unsigned int regno
,
5082 enum indirect_thunk_prefix need_prefix
,
5085 if (regno
!= INVALID_REGNUM
&& regno
!= CX_REG
&& ret_p
)
5088 if (USE_HIDDEN_LINKONCE
)
5092 if (need_prefix
== indirect_thunk_prefix_nt
5093 && regno
!= INVALID_REGNUM
)
5095 /* NOTRACK prefix is only used with external thunk via
5096 register so that NOTRACK prefix can be added to indirect
5097 branch via register to support CET at run-time. */
5103 const char *ret
= ret_p
? "return" : "indirect";
5105 if (regno
!= INVALID_REGNUM
)
5107 const char *reg_prefix
;
5108 if (LEGACY_INT_REGNO_P (regno
))
5109 reg_prefix
= TARGET_64BIT
? "r" : "e";
5112 sprintf (name
, "__x86_%s_thunk%s_%s%s",
5113 ret
, prefix
, reg_prefix
, reg_names
[regno
]);
5116 sprintf (name
, "__x86_%s_thunk%s", ret
, prefix
);
5120 if (regno
!= INVALID_REGNUM
)
5121 ASM_GENERATE_INTERNAL_LABEL (name
, "LITR", regno
);
5125 ASM_GENERATE_INTERNAL_LABEL (name
, "LRT", 0);
5127 ASM_GENERATE_INTERNAL_LABEL (name
, "LIT", 0);
5132 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5133 the function address is in REGNO and the call and return thunk looks like:
5144 Otherwise, the function address is on the top of stack and the
5145 call and return thunk looks like:
5153 lea WORD_SIZE(%sp), %sp
5158 output_indirect_thunk (unsigned int regno
)
5160 char indirectlabel1
[32];
5161 char indirectlabel2
[32];
5163 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
, INDIRECT_LABEL
,
5165 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
, INDIRECT_LABEL
,
5169 fputs ("\tcall\t", asm_out_file
);
5170 assemble_name_raw (asm_out_file
, indirectlabel2
);
5171 fputc ('\n', asm_out_file
);
5173 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
5175 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5176 Usage of both pause + lfence is compromise solution. */
5177 fprintf (asm_out_file
, "\tpause\n\tlfence\n");
5180 fputs ("\tjmp\t", asm_out_file
);
5181 assemble_name_raw (asm_out_file
, indirectlabel1
);
5182 fputc ('\n', asm_out_file
);
5184 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
5186 /* The above call insn pushed a word to stack. Adjust CFI info. */
5187 if (flag_asynchronous_unwind_tables
&& dwarf2out_do_frame ())
5189 if (! dwarf2out_do_cfi_asm ())
5191 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
5192 xcfi
->dw_cfi_opc
= DW_CFA_advance_loc4
;
5193 xcfi
->dw_cfi_oprnd1
.dw_cfi_addr
= ggc_strdup (indirectlabel2
);
5194 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
5196 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
5197 xcfi
->dw_cfi_opc
= DW_CFA_def_cfa_offset
;
5198 xcfi
->dw_cfi_oprnd1
.dw_cfi_offset
= 2 * UNITS_PER_WORD
;
5199 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
5200 dwarf2out_emit_cfi (xcfi
);
5203 if (regno
!= INVALID_REGNUM
)
5207 xops
[0] = gen_rtx_MEM (word_mode
, stack_pointer_rtx
);
5208 xops
[1] = gen_rtx_REG (word_mode
, regno
);
5209 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops
);
5215 xops
[0] = stack_pointer_rtx
;
5216 xops
[1] = plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
5217 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops
);
5220 fputs ("\tret\n", asm_out_file
);
5223 /* Output a funtion with a call and return thunk for indirect branch.
5224 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5225 Otherwise, the function address is on the top of stack. Thunk is
5226 used for function return if RET_P is true. */
5229 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix
,
5230 unsigned int regno
, bool ret_p
)
5235 /* Create __x86_indirect_thunk. */
5236 indirect_thunk_name (name
, regno
, need_prefix
, ret_p
);
5237 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
5238 get_identifier (name
),
5239 build_function_type_list (void_type_node
, NULL_TREE
));
5240 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
5241 NULL_TREE
, void_type_node
);
5242 TREE_PUBLIC (decl
) = 1;
5243 TREE_STATIC (decl
) = 1;
5244 DECL_IGNORED_P (decl
) = 1;
5249 switch_to_section (darwin_sections
[picbase_thunk_section
]);
5250 fputs ("\t.weak_definition\t", asm_out_file
);
5251 assemble_name (asm_out_file
, name
);
5252 fputs ("\n\t.private_extern\t", asm_out_file
);
5253 assemble_name (asm_out_file
, name
);
5254 putc ('\n', asm_out_file
);
5255 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5256 DECL_WEAK (decl
) = 1;
5260 if (USE_HIDDEN_LINKONCE
)
5262 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
5264 targetm
.asm_out
.unique_section (decl
, 0);
5265 switch_to_section (get_named_section (decl
, NULL
, 0));
5267 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
5268 fputs ("\t.hidden\t", asm_out_file
);
5269 assemble_name (asm_out_file
, name
);
5270 putc ('\n', asm_out_file
);
5271 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5275 switch_to_section (text_section
);
5276 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5279 DECL_INITIAL (decl
) = make_node (BLOCK
);
5280 current_function_decl
= decl
;
5281 allocate_struct_function (decl
, false);
5282 init_function_start (decl
);
5283 /* We're about to hide the function body from callees of final_* by
5284 emitting it directly; tell them we're a thunk, if they care. */
5285 cfun
->is_thunk
= true;
5286 first_function_block_is_cold
= false;
5287 /* Make sure unwind info is emitted for the thunk if needed. */
5288 final_start_function (emit_barrier (), asm_out_file
, 1);
5290 output_indirect_thunk (regno
);
5292 final_end_function ();
5293 init_insn_lengths ();
5294 free_after_compilation (cfun
);
5296 current_function_decl
= NULL
;
5299 static int pic_labels_used
;
5301 /* Fills in the label name that should be used for a pc thunk for
5302 the given register. */
5305 get_pc_thunk_name (char name
[32], unsigned int regno
)
5307 gcc_assert (!TARGET_64BIT
);
5309 if (USE_HIDDEN_LINKONCE
)
5310 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
5312 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5316 /* This function generates code for -fpic that loads %ebx with
5317 the return address of the caller and then returns. */
5320 ix86_code_end (void)
5325 if (indirect_return_needed
)
5326 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5327 INVALID_REGNUM
, true);
5328 if (indirect_return_via_cx
)
5329 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5331 if (indirect_thunk_needed
)
5332 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5333 INVALID_REGNUM
, false);
5335 for (regno
= FIRST_REX_INT_REG
; regno
<= LAST_REX_INT_REG
; regno
++)
5337 unsigned int i
= regno
- FIRST_REX_INT_REG
+ LAST_INT_REG
+ 1;
5338 if ((indirect_thunks_used
& (1 << i
)))
5339 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5343 for (regno
= FIRST_INT_REG
; regno
<= LAST_INT_REG
; regno
++)
5348 if ((indirect_thunks_used
& (1 << regno
)))
5349 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5352 if (!(pic_labels_used
& (1 << regno
)))
5355 get_pc_thunk_name (name
, regno
);
5357 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
5358 get_identifier (name
),
5359 build_function_type_list (void_type_node
, NULL_TREE
));
5360 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
5361 NULL_TREE
, void_type_node
);
5362 TREE_PUBLIC (decl
) = 1;
5363 TREE_STATIC (decl
) = 1;
5364 DECL_IGNORED_P (decl
) = 1;
5369 switch_to_section (darwin_sections
[picbase_thunk_section
]);
5370 fputs ("\t.weak_definition\t", asm_out_file
);
5371 assemble_name (asm_out_file
, name
);
5372 fputs ("\n\t.private_extern\t", asm_out_file
);
5373 assemble_name (asm_out_file
, name
);
5374 putc ('\n', asm_out_file
);
5375 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5376 DECL_WEAK (decl
) = 1;
5380 if (USE_HIDDEN_LINKONCE
)
5382 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
5384 targetm
.asm_out
.unique_section (decl
, 0);
5385 switch_to_section (get_named_section (decl
, NULL
, 0));
5387 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
5388 fputs ("\t.hidden\t", asm_out_file
);
5389 assemble_name (asm_out_file
, name
);
5390 putc ('\n', asm_out_file
);
5391 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5395 switch_to_section (text_section
);
5396 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5399 DECL_INITIAL (decl
) = make_node (BLOCK
);
5400 current_function_decl
= decl
;
5401 allocate_struct_function (decl
, false);
5402 init_function_start (decl
);
5403 /* We're about to hide the function body from callees of final_* by
5404 emitting it directly; tell them we're a thunk, if they care. */
5405 cfun
->is_thunk
= true;
5406 first_function_block_is_cold
= false;
5407 /* Make sure unwind info is emitted for the thunk if needed. */
5408 final_start_function (emit_barrier (), asm_out_file
, 1);
5410 /* Pad stack IP move with 4 instructions (two NOPs count
5411 as one instruction). */
5412 if (TARGET_PAD_SHORT_FUNCTION
)
5417 fputs ("\tnop\n", asm_out_file
);
5420 xops
[0] = gen_rtx_REG (Pmode
, regno
);
5421 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
5422 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
5423 output_asm_insn ("%!ret", NULL
);
5424 final_end_function ();
5425 init_insn_lengths ();
5426 free_after_compilation (cfun
);
5428 current_function_decl
= NULL
;
5431 if (flag_split_stack
)
5432 file_end_indicate_split_stack ();
5435 /* Emit code for the SET_GOT patterns. */
5438 output_set_got (rtx dest
, rtx label
)
5444 if (TARGET_VXWORKS_RTP
&& flag_pic
)
5446 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5447 xops
[2] = gen_rtx_MEM (Pmode
,
5448 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
5449 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
5451 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5452 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5453 an unadorned address. */
5454 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5455 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
5456 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
5460 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
5465 get_pc_thunk_name (name
, REGNO (dest
));
5466 pic_labels_used
|= 1 << REGNO (dest
);
5468 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
5469 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
5470 output_asm_insn ("%!call\t%X2", xops
);
5473 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
5474 This is what will be referenced by the Mach-O PIC subsystem. */
5475 if (machopic_should_output_picbase_label () || !label
)
5476 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
5478 /* When we are restoring the pic base at the site of a nonlocal label,
5479 and we decided to emit the pic base above, we will still output a
5480 local label used for calculating the correction offset (even though
5481 the offset will be 0 in that case). */
5483 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5484 CODE_LABEL_NUMBER (label
));
5490 /* We don't need a pic base, we're not producing pic. */
5493 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
5494 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
5495 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5496 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
5500 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
5505 /* Generate an "push" pattern for input ARG. */
5510 struct machine_function
*m
= cfun
->machine
;
5512 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
5513 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
5514 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
5516 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
5517 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
5519 return gen_rtx_SET (gen_rtx_MEM (word_mode
,
5520 gen_rtx_PRE_DEC (Pmode
,
5521 stack_pointer_rtx
)),
5525 /* Generate an "pop" pattern for input ARG. */
5530 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
5531 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
5533 return gen_rtx_SET (arg
,
5534 gen_rtx_MEM (word_mode
,
5535 gen_rtx_POST_INC (Pmode
,
5536 stack_pointer_rtx
)));
5539 /* Return >= 0 if there is an unused call-clobbered register available
5540 for the entire function. */
5543 ix86_select_alt_pic_regnum (void)
5545 if (ix86_use_pseudo_pic_reg ())
5546 return INVALID_REGNUM
;
5550 && !ix86_current_function_calls_tls_descriptor
)
5553 /* Can't use the same register for both PIC and DRAP. */
5555 drap
= REGNO (crtl
->drap_reg
);
5558 for (i
= 2; i
>= 0; --i
)
5559 if (i
!= drap
&& !df_regs_ever_live_p (i
))
5563 return INVALID_REGNUM
;
5566 /* Return true if REGNO is used by the epilogue. */
5569 ix86_epilogue_uses (int regno
)
5571 /* If there are no caller-saved registers, we preserve all registers,
5572 except for MMX and x87 registers which aren't supported when saving
5573 and restoring registers. Don't explicitly save SP register since
5574 it is always preserved. */
5575 return (epilogue_completed
5576 && cfun
->machine
->no_caller_saved_registers
5577 && !fixed_regs
[regno
]
5578 && !STACK_REGNO_P (regno
)
5579 && !MMX_REGNO_P (regno
));
5582 /* Return nonzero if register REGNO can be used as a scratch register
5586 ix86_hard_regno_scratch_ok (unsigned int regno
)
5588 /* If there are no caller-saved registers, we can't use any register
5589 as a scratch register after epilogue and use REGNO as scratch
5590 register only if it has been used before to avoid saving and
5592 return (!cfun
->machine
->no_caller_saved_registers
5593 || (!epilogue_completed
5594 && df_regs_ever_live_p (regno
)));
5597 /* Return TRUE if we need to save REGNO. */
5600 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
, bool ignore_outlined
)
5602 /* If there are no caller-saved registers, we preserve all registers,
5603 except for MMX and x87 registers which aren't supported when saving
5604 and restoring registers. Don't explicitly save SP register since
5605 it is always preserved. */
5606 if (cfun
->machine
->no_caller_saved_registers
)
5608 /* Don't preserve registers used for function return value. */
5609 rtx reg
= crtl
->return_rtx
;
5612 unsigned int i
= REGNO (reg
);
5613 unsigned int nregs
= REG_NREGS (reg
);
5615 if ((i
+ nregs
) == regno
)
5619 return (df_regs_ever_live_p (regno
)
5620 && !fixed_regs
[regno
]
5621 && !STACK_REGNO_P (regno
)
5622 && !MMX_REGNO_P (regno
)
5623 && (regno
!= HARD_FRAME_POINTER_REGNUM
5624 || !frame_pointer_needed
));
5627 if (regno
== REAL_PIC_OFFSET_TABLE_REGNUM
5628 && pic_offset_table_rtx
)
5630 if (ix86_use_pseudo_pic_reg ())
5632 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
5633 _mcount in prologue. */
5634 if (!TARGET_64BIT
&& flag_pic
&& crtl
->profile
)
5637 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
5639 || crtl
->calls_eh_return
5640 || crtl
->uses_const_pool
5641 || cfun
->has_nonlocal_label
)
5642 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
5645 if (crtl
->calls_eh_return
&& maybe_eh_return
)
5650 unsigned test
= EH_RETURN_DATA_REGNO (i
);
5651 if (test
== INVALID_REGNUM
)
5658 if (ignore_outlined
&& cfun
->machine
->call_ms2sysv
)
5660 unsigned count
= cfun
->machine
->call_ms2sysv_extra_regs
5661 + xlogue_layout::MIN_REGS
;
5662 if (xlogue_layout::is_stub_managed_reg (regno
, count
))
5667 && regno
== REGNO (crtl
->drap_reg
)
5668 && !cfun
->machine
->no_drap_save_restore
)
5671 return (df_regs_ever_live_p (regno
)
5672 && !call_used_regs
[regno
]
5673 && !fixed_regs
[regno
]
5674 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
5677 /* Return number of saved general prupose registers. */
5680 ix86_nsaved_regs (void)
5685 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5686 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
5691 /* Return number of saved SSE registers. */
5694 ix86_nsaved_sseregs (void)
5699 if (!TARGET_64BIT_MS_ABI
)
5701 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
5702 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
5707 /* Given FROM and TO register numbers, say whether this elimination is
5708 allowed. If stack alignment is needed, we can only replace argument
5709 pointer with hard frame pointer, or replace frame pointer with stack
5710 pointer. Otherwise, frame pointer elimination is automatically
5711 handled and all other eliminations are valid. */
5714 ix86_can_eliminate (const int from
, const int to
)
5716 if (stack_realign_fp
)
5717 return ((from
== ARG_POINTER_REGNUM
5718 && to
== HARD_FRAME_POINTER_REGNUM
)
5719 || (from
== FRAME_POINTER_REGNUM
5720 && to
== STACK_POINTER_REGNUM
));
5722 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
5725 /* Return the offset between two registers, one to be eliminated, and the other
5726 its replacement, at the start of a routine. */
5729 ix86_initial_elimination_offset (int from
, int to
)
5731 struct ix86_frame
&frame
= cfun
->machine
->frame
;
5733 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
5734 return frame
.hard_frame_pointer_offset
;
5735 else if (from
== FRAME_POINTER_REGNUM
5736 && to
== HARD_FRAME_POINTER_REGNUM
)
5737 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
5740 gcc_assert (to
== STACK_POINTER_REGNUM
);
5742 if (from
== ARG_POINTER_REGNUM
)
5743 return frame
.stack_pointer_offset
;
5745 gcc_assert (from
== FRAME_POINTER_REGNUM
);
5746 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
5750 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
5751 void warn_once_call_ms2sysv_xlogues (const char *feature
)
5753 static bool warned_once
= false;
5756 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
5762 /* Return the probing interval for -fstack-clash-protection. */
5764 static HOST_WIDE_INT
5765 get_probe_interval (void)
5767 if (flag_stack_clash_protection
)
5768 return (HOST_WIDE_INT_1U
5769 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL
));
5771 return (HOST_WIDE_INT_1U
<< STACK_CHECK_PROBE_INTERVAL_EXP
);
5774 /* When using -fsplit-stack, the allocation routines set a field in
5775 the TCB to the bottom of the stack plus this much space, measured
5778 #define SPLIT_STACK_AVAILABLE 256
5780 /* Fill structure ix86_frame about frame of currently computed function. */
5783 ix86_compute_frame_layout (void)
5785 struct ix86_frame
*frame
= &cfun
->machine
->frame
;
5786 struct machine_function
*m
= cfun
->machine
;
5787 unsigned HOST_WIDE_INT stack_alignment_needed
;
5788 HOST_WIDE_INT offset
;
5789 unsigned HOST_WIDE_INT preferred_alignment
;
5790 HOST_WIDE_INT size
= ix86_get_frame_size ();
5791 HOST_WIDE_INT to_allocate
;
5793 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
5794 * ms_abi functions that call a sysv function. We now need to prune away
5795 * cases where it should be disabled. */
5796 if (TARGET_64BIT
&& m
->call_ms2sysv
)
5798 gcc_assert (TARGET_64BIT_MS_ABI
);
5799 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES
);
5800 gcc_assert (!TARGET_SEH
);
5801 gcc_assert (TARGET_SSE
);
5802 gcc_assert (!ix86_using_red_zone ());
5804 if (crtl
->calls_eh_return
)
5806 gcc_assert (!reload_completed
);
5807 m
->call_ms2sysv
= false;
5808 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
5811 else if (ix86_static_chain_on_stack
)
5813 gcc_assert (!reload_completed
);
5814 m
->call_ms2sysv
= false;
5815 warn_once_call_ms2sysv_xlogues ("static call chains");
5818 /* Finally, compute which registers the stub will manage. */
5821 unsigned count
= xlogue_layout::count_stub_managed_regs ();
5822 m
->call_ms2sysv_extra_regs
= count
- xlogue_layout::MIN_REGS
;
5823 m
->call_ms2sysv_pad_in
= 0;
5827 frame
->nregs
= ix86_nsaved_regs ();
5828 frame
->nsseregs
= ix86_nsaved_sseregs ();
5830 /* 64-bit MS ABI seem to require stack alignment to be always 16,
5831 except for function prologues, leaf functions and when the defult
5832 incoming stack boundary is overriden at command line or via
5833 force_align_arg_pointer attribute.
5835 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
5836 at call sites, including profile function calls.
5838 if (((TARGET_64BIT_MS_ABI
|| TARGET_MACHO
)
5839 && crtl
->preferred_stack_boundary
< 128)
5840 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
5841 || ix86_current_function_calls_tls_descriptor
5842 || (TARGET_MACHO
&& crtl
->profile
)
5843 || ix86_incoming_stack_boundary
< 128))
5845 crtl
->preferred_stack_boundary
= 128;
5846 crtl
->stack_alignment_needed
= 128;
5849 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
5850 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
5852 gcc_assert (!size
|| stack_alignment_needed
);
5853 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
5854 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
5856 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
5857 gcc_assert (TARGET_64BIT
|| !frame
->nsseregs
);
5858 if (TARGET_64BIT
&& m
->call_ms2sysv
)
5860 gcc_assert (stack_alignment_needed
>= 16);
5861 gcc_assert (!frame
->nsseregs
);
5864 /* For SEH we have to limit the amount of code movement into the prologue.
5865 At present we do this via a BLOCKAGE, at which point there's very little
5866 scheduling that can be done, which means that there's very little point
5867 in doing anything except PUSHs. */
5869 m
->use_fast_prologue_epilogue
= false;
5870 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
)))
5872 int count
= frame
->nregs
;
5873 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
5875 /* The fast prologue uses move instead of push to save registers. This
5876 is significantly longer, but also executes faster as modern hardware
5877 can execute the moves in parallel, but can't do that for push/pop.
5879 Be careful about choosing what prologue to emit: When function takes
5880 many instructions to execute we may use slow version as well as in
5881 case function is known to be outside hot spot (this is known with
5882 feedback only). Weight the size of function by number of registers
5883 to save as it is cheap to use one or two push instructions but very
5884 slow to use many of them. */
5886 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
5887 if (node
->frequency
< NODE_FREQUENCY_NORMAL
5888 || (flag_branch_probabilities
5889 && node
->frequency
< NODE_FREQUENCY_HOT
))
5890 m
->use_fast_prologue_epilogue
= false;
5892 m
->use_fast_prologue_epilogue
5893 = !expensive_function_p (count
);
5896 frame
->save_regs_using_mov
5897 = (TARGET_PROLOGUE_USING_MOVE
&& m
->use_fast_prologue_epilogue
5898 /* If static stack checking is enabled and done with probes,
5899 the registers need to be saved before allocating the frame. */
5900 && flag_stack_check
!= STATIC_BUILTIN_STACK_CHECK
);
5902 /* Skip return address and error code in exception handler. */
5903 offset
= INCOMING_FRAME_SP_OFFSET
;
5905 /* Skip pushed static chain. */
5906 if (ix86_static_chain_on_stack
)
5907 offset
+= UNITS_PER_WORD
;
5909 /* Skip saved base pointer. */
5910 if (frame_pointer_needed
)
5911 offset
+= UNITS_PER_WORD
;
5912 frame
->hfp_save_offset
= offset
;
5914 /* The traditional frame pointer location is at the top of the frame. */
5915 frame
->hard_frame_pointer_offset
= offset
;
5917 /* Register save area */
5918 offset
+= frame
->nregs
* UNITS_PER_WORD
;
5919 frame
->reg_save_offset
= offset
;
5921 /* On SEH target, registers are pushed just before the frame pointer
5924 frame
->hard_frame_pointer_offset
= offset
;
5926 /* Calculate the size of the va-arg area (not including padding, if any). */
5927 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
5929 /* Also adjust stack_realign_offset for the largest alignment of
5930 stack slot actually used. */
5931 if (stack_realign_fp
5932 || (cfun
->machine
->max_used_stack_alignment
!= 0
5933 && (offset
% cfun
->machine
->max_used_stack_alignment
) != 0))
5935 /* We may need a 16-byte aligned stack for the remainder of the
5936 register save area, but the stack frame for the local function
5937 may require a greater alignment if using AVX/2/512. In order
5938 to avoid wasting space, we first calculate the space needed for
5939 the rest of the register saves, add that to the stack pointer,
5940 and then realign the stack to the boundary of the start of the
5941 frame for the local function. */
5942 HOST_WIDE_INT space_needed
= 0;
5943 HOST_WIDE_INT sse_reg_space_needed
= 0;
5947 if (m
->call_ms2sysv
)
5949 m
->call_ms2sysv_pad_in
= 0;
5950 space_needed
= xlogue_layout::get_instance ().get_stack_space_used ();
5953 else if (frame
->nsseregs
)
5954 /* The only ABI that has saved SSE registers (Win64) also has a
5955 16-byte aligned default stack. However, many programs violate
5956 the ABI, and Wine64 forces stack realignment to compensate. */
5957 space_needed
= frame
->nsseregs
* 16;
5959 sse_reg_space_needed
= space_needed
= ROUND_UP (space_needed
, 16);
5961 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
5962 rounding to be pedantic. */
5963 space_needed
= ROUND_UP (space_needed
+ frame
->va_arg_size
, 16);
5966 space_needed
= frame
->va_arg_size
;
5968 /* Record the allocation size required prior to the realignment AND. */
5969 frame
->stack_realign_allocate
= space_needed
;
5971 /* The re-aligned stack starts at frame->stack_realign_offset. Values
5972 before this point are not directly comparable with values below
5973 this point. Use sp_valid_at to determine if the stack pointer is
5974 valid for a given offset, fp_valid_at for the frame pointer, or
5975 choose_baseaddr to have a base register chosen for you.
5977 Note that the result of (frame->stack_realign_offset
5978 & (stack_alignment_needed - 1)) may not equal zero. */
5979 offset
= ROUND_UP (offset
+ space_needed
, stack_alignment_needed
);
5980 frame
->stack_realign_offset
= offset
- space_needed
;
5981 frame
->sse_reg_save_offset
= frame
->stack_realign_offset
5982 + sse_reg_space_needed
;
5986 frame
->stack_realign_offset
= offset
;
5988 if (TARGET_64BIT
&& m
->call_ms2sysv
)
5990 m
->call_ms2sysv_pad_in
= !!(offset
& UNITS_PER_WORD
);
5991 offset
+= xlogue_layout::get_instance ().get_stack_space_used ();
5994 /* Align and set SSE register save area. */
5995 else if (frame
->nsseregs
)
5997 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
5998 required and the DRAP re-alignment boundary is at least 16 bytes,
5999 then we want the SSE register save area properly aligned. */
6000 if (ix86_incoming_stack_boundary
>= 128
6001 || (stack_realign_drap
&& stack_alignment_needed
>= 16))
6002 offset
= ROUND_UP (offset
, 16);
6003 offset
+= frame
->nsseregs
* 16;
6005 frame
->sse_reg_save_offset
= offset
;
6006 offset
+= frame
->va_arg_size
;
6009 /* Align start of frame for local function. When a function call
6010 is removed, it may become a leaf function. But if argument may
6011 be passed on stack, we need to align the stack when there is no
6014 || frame
->va_arg_size
!= 0
6017 || (!crtl
->tail_call_emit
6018 && cfun
->machine
->outgoing_args_on_stack
)
6019 || cfun
->calls_alloca
6020 || ix86_current_function_calls_tls_descriptor
)
6021 offset
= ROUND_UP (offset
, stack_alignment_needed
);
6023 /* Frame pointer points here. */
6024 frame
->frame_pointer_offset
= offset
;
6028 /* Add outgoing arguments area. Can be skipped if we eliminated
6029 all the function calls as dead code.
6030 Skipping is however impossible when function calls alloca. Alloca
6031 expander assumes that last crtl->outgoing_args_size
6032 of stack frame are unused. */
6033 if (ACCUMULATE_OUTGOING_ARGS
6034 && (!crtl
->is_leaf
|| cfun
->calls_alloca
6035 || ix86_current_function_calls_tls_descriptor
))
6037 offset
+= crtl
->outgoing_args_size
;
6038 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
6041 frame
->outgoing_arguments_size
= 0;
6043 /* Align stack boundary. Only needed if we're calling another function
6045 if (!crtl
->is_leaf
|| cfun
->calls_alloca
6046 || ix86_current_function_calls_tls_descriptor
)
6047 offset
= ROUND_UP (offset
, preferred_alignment
);
6049 /* We've reached end of stack frame. */
6050 frame
->stack_pointer_offset
= offset
;
6052 /* Size prologue needs to allocate. */
6053 to_allocate
= offset
- frame
->sse_reg_save_offset
;
6055 if ((!to_allocate
&& frame
->nregs
<= 1)
6056 || (TARGET_64BIT
&& to_allocate
>= HOST_WIDE_INT_C (0x80000000))
6057 /* If stack clash probing needs a loop, then it needs a
6058 scratch register. But the returned register is only guaranteed
6059 to be safe to use after register saves are complete. So if
6060 stack clash protections are enabled and the allocated frame is
6061 larger than the probe interval, then use pushes to save
6062 callee saved registers. */
6063 || (flag_stack_clash_protection
&& to_allocate
> get_probe_interval ()))
6064 frame
->save_regs_using_mov
= false;
6066 if (ix86_using_red_zone ()
6067 && crtl
->sp_is_unchanging
6069 && !ix86_pc_thunk_call_expanded
6070 && !ix86_current_function_calls_tls_descriptor
)
6072 frame
->red_zone_size
= to_allocate
;
6073 if (frame
->save_regs_using_mov
)
6074 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6075 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6076 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6079 frame
->red_zone_size
= 0;
6080 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6082 /* The SEH frame pointer location is near the bottom of the frame.
6083 This is enforced by the fact that the difference between the
6084 stack pointer and the frame pointer is limited to 240 bytes in
6085 the unwind data structure. */
6090 /* If we can leave the frame pointer where it is, do so. Also, returns
6091 the establisher frame for __builtin_frame_address (0). */
6092 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
6093 if (diff
<= SEH_MAX_FRAME_SIZE
6094 && (diff
> 240 || (diff
& 15) != 0)
6095 && !crtl
->accesses_prior_frames
)
6097 /* Ideally we'd determine what portion of the local stack frame
6098 (within the constraint of the lowest 240) is most heavily used.
6099 But without that complication, simply bias the frame pointer
6100 by 128 bytes so as to maximize the amount of the local stack
6101 frame that is addressable with 8-bit offsets. */
6102 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
6107 /* This is semi-inlined memory_address_length, but simplified
6108 since we know that we're always dealing with reg+offset, and
6109 to avoid having to create and discard all that rtl. */
6112 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
6118 /* EBP and R13 cannot be encoded without an offset. */
6119 len
= (regno
== BP_REG
|| regno
== R13_REG
);
6121 else if (IN_RANGE (offset
, -128, 127))
6124 /* ESP and R12 must be encoded with a SIB byte. */
6125 if (regno
== SP_REG
|| regno
== R12_REG
)
6131 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6132 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6135 sp_valid_at (HOST_WIDE_INT cfa_offset
)
6137 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
6138 if (fs
.sp_realigned
&& cfa_offset
<= fs
.sp_realigned_offset
)
6140 /* Validate that the cfa_offset isn't in a "no-man's land". */
6141 gcc_assert (cfa_offset
<= fs
.sp_realigned_fp_last
);
6147 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6148 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6151 fp_valid_at (HOST_WIDE_INT cfa_offset
)
6153 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
6154 if (fs
.sp_realigned
&& cfa_offset
> fs
.sp_realigned_fp_last
)
6156 /* Validate that the cfa_offset isn't in a "no-man's land". */
6157 gcc_assert (cfa_offset
>= fs
.sp_realigned_offset
);
6163 /* Choose a base register based upon alignment requested, speed and/or
6167 choose_basereg (HOST_WIDE_INT cfa_offset
, rtx
&base_reg
,
6168 HOST_WIDE_INT
&base_offset
,
6169 unsigned int align_reqested
, unsigned int *align
)
6171 const struct machine_function
*m
= cfun
->machine
;
6172 unsigned int hfp_align
;
6173 unsigned int drap_align
;
6174 unsigned int sp_align
;
6175 bool hfp_ok
= fp_valid_at (cfa_offset
);
6176 bool drap_ok
= m
->fs
.drap_valid
;
6177 bool sp_ok
= sp_valid_at (cfa_offset
);
6179 hfp_align
= drap_align
= sp_align
= INCOMING_STACK_BOUNDARY
;
6181 /* Filter out any registers that don't meet the requested alignment
6185 if (m
->fs
.realigned
)
6186 hfp_align
= drap_align
= sp_align
= crtl
->stack_alignment_needed
;
6187 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6188 notes (which we would need to use a realigned stack pointer),
6189 so disable on SEH targets. */
6190 else if (m
->fs
.sp_realigned
)
6191 sp_align
= crtl
->stack_alignment_needed
;
6193 hfp_ok
= hfp_ok
&& hfp_align
>= align_reqested
;
6194 drap_ok
= drap_ok
&& drap_align
>= align_reqested
;
6195 sp_ok
= sp_ok
&& sp_align
>= align_reqested
;
6198 if (m
->use_fast_prologue_epilogue
)
6200 /* Choose the base register most likely to allow the most scheduling
6201 opportunities. Generally FP is valid throughout the function,
6202 while DRAP must be reloaded within the epilogue. But choose either
6203 over the SP due to increased encoding size. */
6207 base_reg
= hard_frame_pointer_rtx
;
6208 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
6212 base_reg
= crtl
->drap_reg
;
6213 base_offset
= 0 - cfa_offset
;
6217 base_reg
= stack_pointer_rtx
;
6218 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
6223 HOST_WIDE_INT toffset
;
6226 /* Choose the base register with the smallest address encoding.
6227 With a tie, choose FP > DRAP > SP. */
6230 base_reg
= stack_pointer_rtx
;
6231 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
6232 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
6236 toffset
= 0 - cfa_offset
;
6237 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
6240 base_reg
= crtl
->drap_reg
;
6241 base_offset
= toffset
;
6247 toffset
= m
->fs
.fp_offset
- cfa_offset
;
6248 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
6251 base_reg
= hard_frame_pointer_rtx
;
6252 base_offset
= toffset
;
6257 /* Set the align return value. */
6260 if (base_reg
== stack_pointer_rtx
)
6262 else if (base_reg
== crtl
->drap_reg
)
6263 *align
= drap_align
;
6264 else if (base_reg
== hard_frame_pointer_rtx
)
6269 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6270 the alignment of address. If ALIGN is non-null, it should point to
6271 an alignment value (in bits) that is preferred or zero and will
6272 recieve the alignment of the base register that was selected,
6273 irrespective of rather or not CFA_OFFSET is a multiple of that
6274 alignment value. If it is possible for the base register offset to be
6275 non-immediate then SCRATCH_REGNO should specify a scratch register to
6278 The valid base registers are taken from CFUN->MACHINE->FS. */
6281 choose_baseaddr (HOST_WIDE_INT cfa_offset
, unsigned int *align
,
6282 unsigned int scratch_regno
= INVALID_REGNUM
)
6284 rtx base_reg
= NULL
;
6285 HOST_WIDE_INT base_offset
= 0;
6287 /* If a specific alignment is requested, try to get a base register
6288 with that alignment first. */
6289 if (align
&& *align
)
6290 choose_basereg (cfa_offset
, base_reg
, base_offset
, *align
, align
);
6293 choose_basereg (cfa_offset
, base_reg
, base_offset
, 0, align
);
6295 gcc_assert (base_reg
!= NULL
);
6297 rtx base_offset_rtx
= GEN_INT (base_offset
);
6299 if (!x86_64_immediate_operand (base_offset_rtx
, Pmode
))
6301 gcc_assert (scratch_regno
!= INVALID_REGNUM
);
6303 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
6304 emit_move_insn (scratch_reg
, base_offset_rtx
);
6306 return gen_rtx_PLUS (Pmode
, base_reg
, scratch_reg
);
6309 return plus_constant (Pmode
, base_reg
, base_offset
);
6312 /* Emit code to save registers in the prologue. */
6315 ix86_emit_save_regs (void)
6320 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
6321 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6323 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
6324 RTX_FRAME_RELATED_P (insn
) = 1;
6328 /* Emit a single register save at CFA - CFA_OFFSET. */
6331 ix86_emit_save_reg_using_mov (machine_mode mode
, unsigned int regno
,
6332 HOST_WIDE_INT cfa_offset
)
6334 struct machine_function
*m
= cfun
->machine
;
6335 rtx reg
= gen_rtx_REG (mode
, regno
);
6336 rtx mem
, addr
, base
, insn
;
6337 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
6339 addr
= choose_baseaddr (cfa_offset
, &align
);
6340 mem
= gen_frame_mem (mode
, addr
);
6342 /* The location aligment depends upon the base register. */
6343 align
= MIN (GET_MODE_ALIGNMENT (mode
), align
);
6344 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
6345 set_mem_align (mem
, align
);
6347 insn
= emit_insn (gen_rtx_SET (mem
, reg
));
6348 RTX_FRAME_RELATED_P (insn
) = 1;
6351 if (GET_CODE (base
) == PLUS
)
6352 base
= XEXP (base
, 0);
6353 gcc_checking_assert (REG_P (base
));
6355 /* When saving registers into a re-aligned local stack frame, avoid
6356 any tricky guessing by dwarf2out. */
6357 if (m
->fs
.realigned
)
6359 gcc_checking_assert (stack_realign_drap
);
6361 if (regno
== REGNO (crtl
->drap_reg
))
6363 /* A bit of a hack. We force the DRAP register to be saved in
6364 the re-aligned stack frame, which provides us with a copy
6365 of the CFA that will last past the prologue. Install it. */
6366 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
6367 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
6368 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
6369 mem
= gen_rtx_MEM (mode
, addr
);
6370 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
6374 /* The frame pointer is a stable reference within the
6375 aligned frame. Use it. */
6376 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
6377 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
6378 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
6379 mem
= gen_rtx_MEM (mode
, addr
);
6380 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
6384 else if (base
== stack_pointer_rtx
&& m
->fs
.sp_realigned
6385 && cfa_offset
>= m
->fs
.sp_realigned_offset
)
6387 gcc_checking_assert (stack_realign_fp
);
6388 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
6391 /* The memory may not be relative to the current CFA register,
6392 which means that we may need to generate a new pattern for
6393 use by the unwind info. */
6394 else if (base
!= m
->fs
.cfa_reg
)
6396 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
6397 m
->fs
.cfa_offset
- cfa_offset
);
6398 mem
= gen_rtx_MEM (mode
, addr
);
6399 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, reg
));
6403 /* Emit code to save registers using MOV insns.
6404 First register is stored at CFA - CFA_OFFSET. */
6406 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
6410 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6411 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6413 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
6414 cfa_offset
-= UNITS_PER_WORD
;
6418 /* Emit code to save SSE registers using MOV insns.
6419 First register is stored at CFA - CFA_OFFSET. */
6421 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
6425 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6426 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6428 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
6429 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
6433 static GTY(()) rtx queued_cfa_restores
;
6435 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
6436 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
6437 Don't add the note if the previously saved value will be left untouched
6438 within stack red-zone till return, as unwinders can find the same value
6439 in the register and on the stack. */
6442 ix86_add_cfa_restore_note (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
6444 if (!crtl
->shrink_wrapped
6445 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
6450 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
6451 RTX_FRAME_RELATED_P (insn
) = 1;
6455 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
6458 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
6461 ix86_add_queued_cfa_restore_notes (rtx insn
)
6464 if (!queued_cfa_restores
)
6466 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
6468 XEXP (last
, 1) = REG_NOTES (insn
);
6469 REG_NOTES (insn
) = queued_cfa_restores
;
6470 queued_cfa_restores
= NULL_RTX
;
6471 RTX_FRAME_RELATED_P (insn
) = 1;
6474 /* Expand prologue or epilogue stack adjustment.
6475 The pattern exist to put a dependency on all ebp-based memory accesses.
6476 STYLE should be negative if instructions should be marked as frame related,
6477 zero if %r11 register is live and cannot be freely used and positive
6481 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
6482 int style
, bool set_cfa
)
6484 struct machine_function
*m
= cfun
->machine
;
6485 rtx addend
= offset
;
6487 bool add_frame_related_expr
= false;
6489 if (!x86_64_immediate_operand (offset
, Pmode
))
6491 /* r11 is used by indirect sibcall return as well, set before the
6492 epilogue and used after the epilogue. */
6494 addend
= gen_rtx_REG (Pmode
, R11_REG
);
6497 gcc_assert (src
!= hard_frame_pointer_rtx
6498 && dest
!= hard_frame_pointer_rtx
);
6499 addend
= hard_frame_pointer_rtx
;
6501 emit_insn (gen_rtx_SET (addend
, offset
));
6503 add_frame_related_expr
= true;
6506 insn
= emit_insn (gen_pro_epilogue_adjust_stack_add
6507 (Pmode
, dest
, src
, addend
));
6509 ix86_add_queued_cfa_restore_notes (insn
);
6515 gcc_assert (m
->fs
.cfa_reg
== src
);
6516 m
->fs
.cfa_offset
+= INTVAL (offset
);
6517 m
->fs
.cfa_reg
= dest
;
6519 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
6520 r
= gen_rtx_SET (dest
, r
);
6521 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
6522 RTX_FRAME_RELATED_P (insn
) = 1;
6526 RTX_FRAME_RELATED_P (insn
) = 1;
6527 if (add_frame_related_expr
)
6529 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
6530 r
= gen_rtx_SET (dest
, r
);
6531 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
6535 if (dest
== stack_pointer_rtx
)
6537 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
6538 bool valid
= m
->fs
.sp_valid
;
6539 bool realigned
= m
->fs
.sp_realigned
;
6541 if (src
== hard_frame_pointer_rtx
)
6543 valid
= m
->fs
.fp_valid
;
6545 ooffset
= m
->fs
.fp_offset
;
6547 else if (src
== crtl
->drap_reg
)
6549 valid
= m
->fs
.drap_valid
;
6555 /* Else there are two possibilities: SP itself, which we set
6556 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
6557 taken care of this by hand along the eh_return path. */
6558 gcc_checking_assert (src
== stack_pointer_rtx
6559 || offset
== const0_rtx
);
6562 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
6563 m
->fs
.sp_valid
= valid
;
6564 m
->fs
.sp_realigned
= realigned
;
6569 /* Find an available register to be used as dynamic realign argument
6570 pointer regsiter. Such a register will be written in prologue and
6571 used in begin of body, so it must not be
6572 1. parameter passing register.
6574 We reuse static-chain register if it is available. Otherwise, we
6575 use DI for i386 and R13 for x86-64. We chose R13 since it has
6578 Return: the regno of chosen register. */
6581 find_drap_reg (void)
6583 tree decl
= cfun
->decl
;
6585 /* Always use callee-saved register if there are no caller-saved
6589 /* Use R13 for nested function or function need static chain.
6590 Since function with tail call may use any caller-saved
6591 registers in epilogue, DRAP must not use caller-saved
6592 register in such case. */
6593 if (DECL_STATIC_CHAIN (decl
)
6594 || cfun
->machine
->no_caller_saved_registers
6595 || crtl
->tail_call_emit
)
6602 /* Use DI for nested function or function need static chain.
6603 Since function with tail call may use any caller-saved
6604 registers in epilogue, DRAP must not use caller-saved
6605 register in such case. */
6606 if (DECL_STATIC_CHAIN (decl
)
6607 || cfun
->machine
->no_caller_saved_registers
6608 || crtl
->tail_call_emit
)
6611 /* Reuse static chain register if it isn't used for parameter
6613 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
6615 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
6616 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
6623 /* Return minimum incoming stack alignment. */
6626 ix86_minimum_incoming_stack_boundary (bool sibcall
)
6628 unsigned int incoming_stack_boundary
;
6630 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
6631 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
6632 incoming_stack_boundary
= TARGET_64BIT
? 128 : MIN_STACK_BOUNDARY
;
6633 /* Prefer the one specified at command line. */
6634 else if (ix86_user_incoming_stack_boundary
)
6635 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
6636 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
6637 if -mstackrealign is used, it isn't used for sibcall check and
6638 estimated stack alignment is 128bit. */
6640 && ix86_force_align_arg_pointer
6641 && crtl
->stack_alignment_estimated
== 128)
6642 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
6644 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
6646 /* Incoming stack alignment can be changed on individual functions
6647 via force_align_arg_pointer attribute. We use the smallest
6648 incoming stack boundary. */
6649 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
6650 && lookup_attribute ("force_align_arg_pointer",
6651 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
6652 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
6654 /* The incoming stack frame has to be aligned at least at
6655 parm_stack_boundary. */
6656 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
6657 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
6659 /* Stack at entrance of main is aligned by runtime. We use the
6660 smallest incoming stack boundary. */
6661 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
6662 && DECL_NAME (current_function_decl
)
6663 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
6664 && DECL_FILE_SCOPE_P (current_function_decl
))
6665 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
6667 return incoming_stack_boundary
;
6670 /* Update incoming stack boundary and estimated stack alignment. */
6673 ix86_update_stack_boundary (void)
6675 ix86_incoming_stack_boundary
6676 = ix86_minimum_incoming_stack_boundary (false);
6678 /* x86_64 vararg needs 16byte stack alignment for register save area. */
6681 && crtl
->stack_alignment_estimated
< 128)
6682 crtl
->stack_alignment_estimated
= 128;
6684 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
6685 if (ix86_tls_descriptor_calls_expanded_in_cfun
6686 && crtl
->preferred_stack_boundary
< 128)
6687 crtl
->preferred_stack_boundary
= 128;
6690 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
6691 needed or an rtx for DRAP otherwise. */
6694 ix86_get_drap_rtx (void)
6696 /* We must use DRAP if there are outgoing arguments on stack and
6697 ACCUMULATE_OUTGOING_ARGS is false. */
6699 || (cfun
->machine
->outgoing_args_on_stack
6700 && !ACCUMULATE_OUTGOING_ARGS
))
6701 crtl
->need_drap
= true;
6703 if (stack_realign_drap
)
6705 /* Assign DRAP to vDRAP and returns vDRAP */
6706 unsigned int regno
= find_drap_reg ();
6709 rtx_insn
*seq
, *insn
;
6711 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
6712 crtl
->drap_reg
= arg_ptr
;
6715 drap_vreg
= copy_to_reg (arg_ptr
);
6719 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
6722 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
6723 RTX_FRAME_RELATED_P (insn
) = 1;
6731 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6734 ix86_internal_arg_pointer (void)
6736 return virtual_incoming_args_rtx
;
6739 struct scratch_reg
{
6744 /* Return a short-lived scratch register for use on function entry.
6745 In 32-bit mode, it is valid only after the registers are saved
6746 in the prologue. This register must be released by means of
6747 release_scratch_register_on_entry once it is dead. */
6750 get_scratch_register_on_entry (struct scratch_reg
*sr
)
6758 /* We always use R11 in 64-bit mode. */
6763 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
6765 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
6767 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
6768 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
6769 int regparm
= ix86_function_regparm (fntype
, decl
);
6771 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
6773 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
6774 for the static chain register. */
6775 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
6776 && drap_regno
!= AX_REG
)
6778 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
6779 for the static chain register. */
6780 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
6782 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
6784 /* ecx is the static chain register. */
6785 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
6787 && drap_regno
!= CX_REG
)
6789 else if (ix86_save_reg (BX_REG
, true, false))
6791 /* esi is the static chain register. */
6792 else if (!(regparm
== 3 && static_chain_p
)
6793 && ix86_save_reg (SI_REG
, true, false))
6795 else if (ix86_save_reg (DI_REG
, true, false))
6799 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
6804 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
6807 rtx_insn
*insn
= emit_insn (gen_push (sr
->reg
));
6808 RTX_FRAME_RELATED_P (insn
) = 1;
6812 /* Release a scratch register obtained from the preceding function.
6814 If RELEASE_VIA_POP is true, we just pop the register off the stack
6815 to release it. This is what non-Linux systems use with -fstack-check.
6817 Otherwise we use OFFSET to locate the saved register and the
6818 allocated stack space becomes part of the local frame and is
6819 deallocated by the epilogue. */
6822 release_scratch_register_on_entry (struct scratch_reg
*sr
, HOST_WIDE_INT offset
,
6823 bool release_via_pop
)
6827 if (release_via_pop
)
6829 struct machine_function
*m
= cfun
->machine
;
6830 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
6832 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
6833 RTX_FRAME_RELATED_P (insn
) = 1;
6834 x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (UNITS_PER_WORD
));
6835 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
6836 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
6837 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
6841 rtx x
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, GEN_INT (offset
));
6842 x
= gen_rtx_SET (sr
->reg
, gen_rtx_MEM (word_mode
, x
));
6848 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
6850 This differs from the next routine in that it tries hard to prevent
6851 attacks that jump the stack guard. Thus it is never allowed to allocate
6852 more than PROBE_INTERVAL bytes of stack space without a suitable
6855 INT_REGISTERS_SAVED is true if integer registers have already been
6856 pushed on the stack. */
6859 ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size
,
6860 const bool int_registers_saved
)
6862 struct machine_function
*m
= cfun
->machine
;
6864 /* If this function does not statically allocate stack space, then
6865 no probes are needed. */
6868 /* However, the allocation of space via pushes for register
6869 saves could be viewed as allocating space, but without the
6871 if (m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
)
6872 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
6874 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
6878 /* If we are a noreturn function, then we have to consider the
6879 possibility that we're called via a jump rather than a call.
6881 Thus we don't have the implicit probe generated by saving the
6882 return address into the stack at the call. Thus, the stack
6883 pointer could be anywhere in the guard page. The safe thing
6884 to do is emit a probe now.
6886 The probe can be avoided if we have already emitted any callee
6887 register saves into the stack or have a frame pointer (which will
6888 have been saved as well). Those saves will function as implicit
6891 ?!? This should be revamped to work like aarch64 and s390 where
6892 we track the offset from the most recent probe. Normally that
6893 offset would be zero. For a noreturn function we would reset
6894 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
6895 we just probe when we cross PROBE_INTERVAL. */
6896 if (TREE_THIS_VOLATILE (cfun
->decl
)
6897 && !(m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
))
6899 /* We can safely use any register here since we're just going to push
6900 its value and immediately pop it back. But we do try and avoid
6901 argument passing registers so as not to introduce dependencies in
6902 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
6903 rtx dummy_reg
= gen_rtx_REG (word_mode
, TARGET_64BIT
? AX_REG
: SI_REG
);
6904 rtx_insn
*insn_push
= emit_insn (gen_push (dummy_reg
));
6905 rtx_insn
*insn_pop
= emit_insn (gen_pop (dummy_reg
));
6906 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
6907 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6909 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
6910 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
6911 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
6912 add_reg_note (insn_push
, REG_CFA_ADJUST_CFA
, x
);
6913 RTX_FRAME_RELATED_P (insn_push
) = 1;
6914 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
6915 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
6916 add_reg_note (insn_pop
, REG_CFA_ADJUST_CFA
, x
);
6917 RTX_FRAME_RELATED_P (insn_pop
) = 1;
6919 emit_insn (gen_blockage ());
6922 /* If we allocate less than the size of the guard statically,
6923 then no probing is necessary, but we do need to allocate
6925 if (size
< (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE
)))
6927 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6928 GEN_INT (-size
), -1,
6929 m
->fs
.cfa_reg
== stack_pointer_rtx
);
6930 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
6934 /* We're allocating a large enough stack frame that we need to
6935 emit probes. Either emit them inline or in a loop depending
6937 HOST_WIDE_INT probe_interval
= get_probe_interval ();
6938 if (size
<= 4 * probe_interval
)
6941 for (i
= probe_interval
; i
<= size
; i
+= probe_interval
)
6943 /* Allocate PROBE_INTERVAL bytes. */
6945 = pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6946 GEN_INT (-probe_interval
), -1,
6947 m
->fs
.cfa_reg
== stack_pointer_rtx
);
6948 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
6950 /* And probe at *sp. */
6951 emit_stack_probe (stack_pointer_rtx
);
6952 emit_insn (gen_blockage ());
6955 /* We need to allocate space for the residual, but we do not need
6956 to probe the residual. */
6957 HOST_WIDE_INT residual
= (i
- probe_interval
- size
);
6959 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
6960 GEN_INT (residual
), -1,
6961 m
->fs
.cfa_reg
== stack_pointer_rtx
);
6962 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
6966 /* We expect the GP registers to be saved when probes are used
6967 as the probing sequences might need a scratch register and
6968 the routine to allocate one assumes the integer registers
6969 have already been saved. */
6970 gcc_assert (int_registers_saved
);
6972 struct scratch_reg sr
;
6973 get_scratch_register_on_entry (&sr
);
6975 /* If we needed to save a register, then account for any space
6976 that was pushed (we are not going to pop the register when
6977 we do the restore). */
6979 size
-= UNITS_PER_WORD
;
6981 /* Step 1: round SIZE down to a multiple of the interval. */
6982 HOST_WIDE_INT rounded_size
= size
& -probe_interval
;
6984 /* Step 2: compute final value of the loop counter. Use lea if
6986 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
, -rounded_size
);
6988 if (address_no_seg_operand (addr
, Pmode
))
6989 insn
= emit_insn (gen_rtx_SET (sr
.reg
, addr
));
6992 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
6993 insn
= emit_insn (gen_rtx_SET (sr
.reg
,
6994 gen_rtx_PLUS (Pmode
, sr
.reg
,
6995 stack_pointer_rtx
)));
6997 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6999 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7000 plus_constant (Pmode
, sr
.reg
,
7001 m
->fs
.cfa_offset
+ rounded_size
));
7002 RTX_FRAME_RELATED_P (insn
) = 1;
7005 /* Step 3: the loop. */
7006 rtx size_rtx
= GEN_INT (rounded_size
);
7007 insn
= emit_insn (gen_adjust_stack_and_probe (Pmode
, sr
.reg
, sr
.reg
,
7009 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7011 m
->fs
.cfa_offset
+= rounded_size
;
7012 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7013 plus_constant (Pmode
, stack_pointer_rtx
,
7015 RTX_FRAME_RELATED_P (insn
) = 1;
7017 m
->fs
.sp_offset
+= rounded_size
;
7018 emit_insn (gen_blockage ());
7020 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7021 is equal to ROUNDED_SIZE. */
7023 if (size
!= rounded_size
)
7024 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7025 GEN_INT (rounded_size
- size
), -1,
7026 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7027 dump_stack_clash_frame_info (PROBE_LOOP
, size
!= rounded_size
);
7029 /* This does not deallocate the space reserved for the scratch
7030 register. That will be deallocated in the epilogue. */
7031 release_scratch_register_on_entry (&sr
, size
, false);
7034 /* Make sure nothing is scheduled before we are done. */
7035 emit_insn (gen_blockage ());
7038 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7040 INT_REGISTERS_SAVED is true if integer registers have already been
7041 pushed on the stack. */
7044 ix86_adjust_stack_and_probe (HOST_WIDE_INT size
,
7045 const bool int_registers_saved
)
7047 /* We skip the probe for the first interval + a small dope of 4 words and
7048 probe that many bytes past the specified size to maintain a protection
7049 area at the botton of the stack. */
7050 const int dope
= 4 * UNITS_PER_WORD
;
7051 rtx size_rtx
= GEN_INT (size
), last
;
7053 /* See if we have a constant small number of probes to generate. If so,
7054 that's the easy case. The run-time loop is made up of 9 insns in the
7055 generic case while the compile-time loop is made up of 3+2*(n-1) insns
7056 for n # of intervals. */
7057 if (size
<= 4 * get_probe_interval ())
7059 HOST_WIDE_INT i
, adjust
;
7060 bool first_probe
= true;
7062 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
7063 values of N from 1 until it exceeds SIZE. If only one probe is
7064 needed, this will not generate any code. Then adjust and probe
7065 to PROBE_INTERVAL + SIZE. */
7066 for (i
= get_probe_interval (); i
< size
; i
+= get_probe_interval ())
7070 adjust
= 2 * get_probe_interval () + dope
;
7071 first_probe
= false;
7074 adjust
= get_probe_interval ();
7076 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7077 plus_constant (Pmode
, stack_pointer_rtx
,
7079 emit_stack_probe (stack_pointer_rtx
);
7083 adjust
= size
+ get_probe_interval () + dope
;
7085 adjust
= size
+ get_probe_interval () - i
;
7087 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7088 plus_constant (Pmode
, stack_pointer_rtx
,
7090 emit_stack_probe (stack_pointer_rtx
);
7092 /* Adjust back to account for the additional first interval. */
7093 last
= emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7094 plus_constant (Pmode
, stack_pointer_rtx
,
7095 (get_probe_interval ()
7099 /* Otherwise, do the same as above, but in a loop. Note that we must be
7100 extra careful with variables wrapping around because we might be at
7101 the very top (or the very bottom) of the address space and we have
7102 to be able to handle this case properly; in particular, we use an
7103 equality test for the loop condition. */
7106 /* We expect the GP registers to be saved when probes are used
7107 as the probing sequences might need a scratch register and
7108 the routine to allocate one assumes the integer registers
7109 have already been saved. */
7110 gcc_assert (int_registers_saved
);
7112 HOST_WIDE_INT rounded_size
;
7113 struct scratch_reg sr
;
7115 get_scratch_register_on_entry (&sr
);
7117 /* If we needed to save a register, then account for any space
7118 that was pushed (we are not going to pop the register when
7119 we do the restore). */
7121 size
-= UNITS_PER_WORD
;
7123 /* Step 1: round SIZE to the previous multiple of the interval. */
7125 rounded_size
= ROUND_DOWN (size
, get_probe_interval ());
7128 /* Step 2: compute initial and final value of the loop counter. */
7130 /* SP = SP_0 + PROBE_INTERVAL. */
7131 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7132 plus_constant (Pmode
, stack_pointer_rtx
,
7133 - (get_probe_interval () + dope
))));
7135 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
7136 if (rounded_size
<= (HOST_WIDE_INT_1
<< 31))
7137 emit_insn (gen_rtx_SET (sr
.reg
,
7138 plus_constant (Pmode
, stack_pointer_rtx
,
7142 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
7143 emit_insn (gen_rtx_SET (sr
.reg
,
7144 gen_rtx_PLUS (Pmode
, sr
.reg
,
7145 stack_pointer_rtx
)));
7153 SP = SP + PROBE_INTERVAL
7156 while (SP != LAST_ADDR)
7158 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
7159 values of N from 1 until it is equal to ROUNDED_SIZE. */
7161 emit_insn (gen_adjust_stack_and_probe (Pmode
, sr
.reg
, sr
.reg
, size_rtx
));
7164 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
7165 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
7167 if (size
!= rounded_size
)
7169 emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7170 plus_constant (Pmode
, stack_pointer_rtx
,
7171 rounded_size
- size
)));
7172 emit_stack_probe (stack_pointer_rtx
);
7175 /* Adjust back to account for the additional first interval. */
7176 last
= emit_insn (gen_rtx_SET (stack_pointer_rtx
,
7177 plus_constant (Pmode
, stack_pointer_rtx
,
7178 (get_probe_interval ()
7181 /* This does not deallocate the space reserved for the scratch
7182 register. That will be deallocated in the epilogue. */
7183 release_scratch_register_on_entry (&sr
, size
, false);
7186 /* Even if the stack pointer isn't the CFA register, we need to correctly
7187 describe the adjustments made to it, in particular differentiate the
7188 frame-related ones from the frame-unrelated ones. */
7191 rtx expr
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (2));
7192 XVECEXP (expr
, 0, 0)
7193 = gen_rtx_SET (stack_pointer_rtx
,
7194 plus_constant (Pmode
, stack_pointer_rtx
, -size
));
7195 XVECEXP (expr
, 0, 1)
7196 = gen_rtx_SET (stack_pointer_rtx
,
7197 plus_constant (Pmode
, stack_pointer_rtx
,
7198 get_probe_interval () + dope
+ size
));
7199 add_reg_note (last
, REG_FRAME_RELATED_EXPR
, expr
);
7200 RTX_FRAME_RELATED_P (last
) = 1;
7202 cfun
->machine
->fs
.sp_offset
+= size
;
7205 /* Make sure nothing is scheduled before we are done. */
7206 emit_insn (gen_blockage ());
7209 /* Adjust the stack pointer up to REG while probing it. */
7212 output_adjust_stack_and_probe (rtx reg
)
7214 static int labelno
= 0;
7218 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7221 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7223 /* SP = SP + PROBE_INTERVAL. */
7224 xops
[0] = stack_pointer_rtx
;
7225 xops
[1] = GEN_INT (get_probe_interval ());
7226 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7229 xops
[1] = const0_rtx
;
7230 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
7232 /* Test if SP == LAST_ADDR. */
7233 xops
[0] = stack_pointer_rtx
;
7235 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7238 fputs ("\tjne\t", asm_out_file
);
7239 assemble_name_raw (asm_out_file
, loop_lab
);
7240 fputc ('\n', asm_out_file
);
7245 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7246 inclusive. These are offsets from the current stack pointer.
7248 INT_REGISTERS_SAVED is true if integer registers have already been
7249 pushed on the stack. */
7252 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
7253 const bool int_registers_saved
)
7255 /* See if we have a constant small number of probes to generate. If so,
7256 that's the easy case. The run-time loop is made up of 6 insns in the
7257 generic case while the compile-time loop is made up of n insns for n #
7259 if (size
<= 6 * get_probe_interval ())
7263 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7264 it exceeds SIZE. If only one probe is needed, this will not
7265 generate any code. Then probe at FIRST + SIZE. */
7266 for (i
= get_probe_interval (); i
< size
; i
+= get_probe_interval ())
7267 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
7270 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
7274 /* Otherwise, do the same as above, but in a loop. Note that we must be
7275 extra careful with variables wrapping around because we might be at
7276 the very top (or the very bottom) of the address space and we have
7277 to be able to handle this case properly; in particular, we use an
7278 equality test for the loop condition. */
7281 /* We expect the GP registers to be saved when probes are used
7282 as the probing sequences might need a scratch register and
7283 the routine to allocate one assumes the integer registers
7284 have already been saved. */
7285 gcc_assert (int_registers_saved
);
7287 HOST_WIDE_INT rounded_size
, last
;
7288 struct scratch_reg sr
;
7290 get_scratch_register_on_entry (&sr
);
7293 /* Step 1: round SIZE to the previous multiple of the interval. */
7295 rounded_size
= ROUND_DOWN (size
, get_probe_interval ());
7298 /* Step 2: compute initial and final value of the loop counter. */
7300 /* TEST_OFFSET = FIRST. */
7301 emit_move_insn (sr
.reg
, GEN_INT (-first
));
7303 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7304 last
= first
+ rounded_size
;
7311 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7314 while (TEST_ADDR != LAST_ADDR)
7316 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7317 until it is equal to ROUNDED_SIZE. */
7320 (gen_probe_stack_range (Pmode
, sr
.reg
, sr
.reg
, GEN_INT (-last
)));
7323 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7324 that SIZE is equal to ROUNDED_SIZE. */
7326 if (size
!= rounded_size
)
7327 emit_stack_probe (plus_constant (Pmode
,
7328 gen_rtx_PLUS (Pmode
,
7331 rounded_size
- size
));
7333 release_scratch_register_on_entry (&sr
, size
, true);
7336 /* Make sure nothing is scheduled before we are done. */
7337 emit_insn (gen_blockage ());
7340 /* Probe a range of stack addresses from REG to END, inclusive. These are
7341 offsets from the current stack pointer. */
7344 output_probe_stack_range (rtx reg
, rtx end
)
7346 static int labelno
= 0;
7350 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7353 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7355 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7357 xops
[1] = GEN_INT (get_probe_interval ());
7358 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7360 /* Probe at TEST_ADDR. */
7361 xops
[0] = stack_pointer_rtx
;
7363 xops
[2] = const0_rtx
;
7364 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
7366 /* Test if TEST_ADDR == LAST_ADDR. */
7369 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7372 fputs ("\tjne\t", asm_out_file
);
7373 assemble_name_raw (asm_out_file
, loop_lab
);
7374 fputc ('\n', asm_out_file
);
7379 /* Set stack_frame_required to false if stack frame isn't required.
7380 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7381 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
7384 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment
,
7385 bool check_stack_slot
)
7387 HARD_REG_SET set_up_by_prologue
, prologue_used
;
7390 CLEAR_HARD_REG_SET (prologue_used
);
7391 CLEAR_HARD_REG_SET (set_up_by_prologue
);
7392 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
7393 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
7394 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
7395 HARD_FRAME_POINTER_REGNUM
);
7397 /* The preferred stack alignment is the minimum stack alignment. */
7398 if (stack_alignment
> crtl
->preferred_stack_boundary
)
7399 stack_alignment
= crtl
->preferred_stack_boundary
;
7401 bool require_stack_frame
= false;
7403 FOR_EACH_BB_FN (bb
, cfun
)
7406 FOR_BB_INSNS (bb
, insn
)
7407 if (NONDEBUG_INSN_P (insn
)
7408 && requires_stack_frame_p (insn
, prologue_used
,
7409 set_up_by_prologue
))
7411 require_stack_frame
= true;
7413 if (check_stack_slot
)
7415 /* Find the maximum stack alignment. */
7416 subrtx_iterator::array_type array
;
7417 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
7419 && (reg_mentioned_p (stack_pointer_rtx
,
7421 || reg_mentioned_p (frame_pointer_rtx
,
7424 unsigned int alignment
= MEM_ALIGN (*iter
);
7425 if (alignment
> stack_alignment
)
7426 stack_alignment
= alignment
;
7432 cfun
->machine
->stack_frame_required
= require_stack_frame
;
7435 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7436 will guide prologue/epilogue to be generated in correct form. */
7439 ix86_finalize_stack_frame_flags (void)
7441 /* Check if stack realign is really needed after reload, and
7442 stores result in cfun */
7443 unsigned int incoming_stack_boundary
7444 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
7445 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
7446 unsigned int stack_alignment
7447 = (crtl
->is_leaf
&& !ix86_current_function_calls_tls_descriptor
7448 ? crtl
->max_used_stack_slot_alignment
7449 : crtl
->stack_alignment_needed
);
7450 unsigned int stack_realign
7451 = (incoming_stack_boundary
< stack_alignment
);
7452 bool recompute_frame_layout_p
= false;
7454 if (crtl
->stack_realign_finalized
)
7456 /* After stack_realign_needed is finalized, we can't no longer
7458 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
7462 /* It is always safe to compute max_used_stack_alignment. We
7463 compute it only if 128-bit aligned load/store may be generated
7464 on misaligned stack slot which will lead to segfault. */
7465 bool check_stack_slot
7466 = (stack_realign
|| crtl
->max_used_stack_slot_alignment
>= 128);
7467 ix86_find_max_used_stack_alignment (stack_alignment
,
7470 /* If the only reason for frame_pointer_needed is that we conservatively
7471 assumed stack realignment might be needed or -fno-omit-frame-pointer
7472 is used, but in the end nothing that needed the stack alignment had
7473 been spilled nor stack access, clear frame_pointer_needed and say we
7474 don't need stack realignment. */
7475 if ((stack_realign
|| (!flag_omit_frame_pointer
&& optimize
))
7476 && frame_pointer_needed
7478 && crtl
->sp_is_unchanging
7479 && !ix86_current_function_calls_tls_descriptor
7480 && !crtl
->accesses_prior_frames
7481 && !cfun
->calls_alloca
7482 && !crtl
->calls_eh_return
7483 /* See ira_setup_eliminable_regset for the rationale. */
7484 && !(STACK_CHECK_MOVING_SP
7487 && cfun
->can_throw_non_call_exceptions
)
7488 && !ix86_frame_pointer_required ()
7489 && ix86_get_frame_size () == 0
7490 && ix86_nsaved_sseregs () == 0
7491 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
7493 if (cfun
->machine
->stack_frame_required
)
7495 /* Stack frame is required. If stack alignment needed is less
7496 than incoming stack boundary, don't realign stack. */
7497 stack_realign
= incoming_stack_boundary
< stack_alignment
;
7500 crtl
->max_used_stack_slot_alignment
7501 = incoming_stack_boundary
;
7502 crtl
->stack_alignment_needed
7503 = incoming_stack_boundary
;
7504 /* Also update preferred_stack_boundary for leaf
7506 crtl
->preferred_stack_boundary
7507 = incoming_stack_boundary
;
7512 /* If drap has been set, but it actually isn't live at the
7513 start of the function, there is no reason to set it up. */
7516 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
7517 if (! REGNO_REG_SET_P (DF_LR_IN (bb
),
7518 REGNO (crtl
->drap_reg
)))
7520 crtl
->drap_reg
= NULL_RTX
;
7521 crtl
->need_drap
= false;
7525 cfun
->machine
->no_drap_save_restore
= true;
7527 frame_pointer_needed
= false;
7528 stack_realign
= false;
7529 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
7530 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
7531 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
7532 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
7533 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
7534 df_finish_pass (true);
7535 df_scan_alloc (NULL
);
7537 df_compute_regs_ever_live (true);
7540 if (flag_var_tracking
)
7542 /* Since frame pointer is no longer available, replace it with
7543 stack pointer - UNITS_PER_WORD in debug insns. */
7545 for (ref
= DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM
);
7548 next
= DF_REF_NEXT_REG (ref
);
7549 if (!DF_REF_INSN_INFO (ref
))
7552 /* Make sure the next ref is for a different instruction,
7553 so that we're not affected by the rescan. */
7554 rtx_insn
*insn
= DF_REF_INSN (ref
);
7555 while (next
&& DF_REF_INSN (next
) == insn
)
7556 next
= DF_REF_NEXT_REG (next
);
7558 if (DEBUG_INSN_P (insn
))
7560 bool changed
= false;
7561 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
7563 rtx
*loc
= DF_REF_LOC (ref
);
7564 if (*loc
== hard_frame_pointer_rtx
)
7566 *loc
= plus_constant (Pmode
,
7573 df_insn_rescan (insn
);
7578 recompute_frame_layout_p
= true;
7581 else if (crtl
->max_used_stack_slot_alignment
>= 128
7582 && cfun
->machine
->stack_frame_required
)
7584 /* We don't need to realign stack. max_used_stack_alignment is
7585 used to decide how stack frame should be aligned. This is
7586 independent of any psABIs nor 32-bit vs 64-bit. */
7587 cfun
->machine
->max_used_stack_alignment
7588 = stack_alignment
/ BITS_PER_UNIT
;
7591 if (crtl
->stack_realign_needed
!= stack_realign
)
7592 recompute_frame_layout_p
= true;
7593 crtl
->stack_realign_needed
= stack_realign
;
7594 crtl
->stack_realign_finalized
= true;
7595 if (recompute_frame_layout_p
)
7596 ix86_compute_frame_layout ();
7599 /* Delete SET_GOT right after entry block if it is allocated to reg. */
7602 ix86_elim_entry_set_got (rtx reg
)
7604 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
7605 rtx_insn
*c_insn
= BB_HEAD (bb
);
7606 if (!NONDEBUG_INSN_P (c_insn
))
7607 c_insn
= next_nonnote_nondebug_insn (c_insn
);
7608 if (c_insn
&& NONJUMP_INSN_P (c_insn
))
7610 rtx pat
= PATTERN (c_insn
);
7611 if (GET_CODE (pat
) == PARALLEL
)
7613 rtx vec
= XVECEXP (pat
, 0, 0);
7614 if (GET_CODE (vec
) == SET
7615 && XINT (XEXP (vec
, 1), 1) == UNSPEC_SET_GOT
7616 && REGNO (XEXP (vec
, 0)) == REGNO (reg
))
7617 delete_insn (c_insn
);
7623 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
7628 addr
= gen_rtx_PLUS (Pmode
, frame_reg
, GEN_INT (offset
));
7629 mem
= gen_frame_mem (GET_MODE (reg
), offset
? addr
: frame_reg
);
7630 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
7634 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
7636 return gen_frame_set (reg
, frame_reg
, offset
, false);
7640 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
7642 return gen_frame_set (reg
, frame_reg
, offset
, true);
7646 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame
&frame
)
7648 struct machine_function
*m
= cfun
->machine
;
7649 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
7650 + m
->call_ms2sysv_extra_regs
;
7651 rtvec v
= rtvec_alloc (ncregs
+ 1);
7652 unsigned int align
, i
, vi
= 0;
7655 rtx rax
= gen_rtx_REG (word_mode
, AX_REG
);
7656 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
7658 /* AL should only be live with sysv_abi. */
7659 gcc_assert (!ix86_eax_live_at_start_p ());
7660 gcc_assert (m
->fs
.sp_offset
>= frame
.sse_reg_save_offset
);
7662 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
7663 we've actually realigned the stack or not. */
7664 align
= GET_MODE_ALIGNMENT (V4SFmode
);
7665 addr
= choose_baseaddr (frame
.stack_realign_offset
7666 + xlogue
.get_stub_ptr_offset (), &align
, AX_REG
);
7667 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
7669 emit_insn (gen_rtx_SET (rax
, addr
));
7671 /* Get the stub symbol. */
7672 sym
= xlogue
.get_stub_rtx (frame_pointer_needed
? XLOGUE_STUB_SAVE_HFP
7673 : XLOGUE_STUB_SAVE
);
7674 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
7676 for (i
= 0; i
< ncregs
; ++i
)
7678 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
7679 rtx reg
= gen_rtx_REG ((SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
),
7681 RTVEC_ELT (v
, vi
++) = gen_frame_store (reg
, rax
, -r
.offset
);
7684 gcc_assert (vi
== (unsigned)GET_NUM_ELEM (v
));
7686 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, v
));
7687 RTX_FRAME_RELATED_P (insn
) = true;
7690 /* Generate and return an insn body to AND X with Y. */
7693 gen_and2_insn (rtx x
, rtx y
)
7695 enum insn_code icode
= optab_handler (and_optab
, GET_MODE (x
));
7697 gcc_assert (insn_operand_matches (icode
, 0, x
));
7698 gcc_assert (insn_operand_matches (icode
, 1, x
));
7699 gcc_assert (insn_operand_matches (icode
, 2, y
));
7701 return GEN_FCN (icode
) (x
, x
, y
);
7704 /* Expand the prologue into a bunch of separate insns. */
7707 ix86_expand_prologue (void)
7709 struct machine_function
*m
= cfun
->machine
;
7711 HOST_WIDE_INT allocate
;
7712 bool int_registers_saved
;
7713 bool sse_registers_saved
;
7714 bool save_stub_call_needed
;
7715 rtx static_chain
= NULL_RTX
;
7717 if (ix86_function_naked (current_function_decl
))
7720 ix86_finalize_stack_frame_flags ();
7722 /* DRAP should not coexist with stack_realign_fp */
7723 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
7725 memset (&m
->fs
, 0, sizeof (m
->fs
));
7727 /* Initialize CFA state for before the prologue. */
7728 m
->fs
.cfa_reg
= stack_pointer_rtx
;
7729 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
7731 /* Track SP offset to the CFA. We continue tracking this after we've
7732 swapped the CFA register away from SP. In the case of re-alignment
7733 this is fudged; we're interested to offsets within the local frame. */
7734 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
7735 m
->fs
.sp_valid
= true;
7736 m
->fs
.sp_realigned
= false;
7738 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
7740 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
7742 /* We should have already generated an error for any use of
7743 ms_hook on a nested function. */
7744 gcc_checking_assert (!ix86_static_chain_on_stack
);
7746 /* Check if profiling is active and we shall use profiling before
7747 prologue variant. If so sorry. */
7748 if (crtl
->profile
&& flag_fentry
!= 0)
7749 sorry ("%<ms_hook_prologue%> attribute is not compatible "
7750 "with %<-mfentry%> for 32-bit");
7752 /* In ix86_asm_output_function_label we emitted:
7753 8b ff movl.s %edi,%edi
7755 8b ec movl.s %esp,%ebp
7757 This matches the hookable function prologue in Win32 API
7758 functions in Microsoft Windows XP Service Pack 2 and newer.
7759 Wine uses this to enable Windows apps to hook the Win32 API
7760 functions provided by Wine.
7762 What that means is that we've already set up the frame pointer. */
7764 if (frame_pointer_needed
7765 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
7769 /* We've decided to use the frame pointer already set up.
7770 Describe this to the unwinder by pretending that both
7771 push and mov insns happen right here.
7773 Putting the unwind info here at the end of the ms_hook
7774 is done so that we can make absolutely certain we get
7775 the required byte sequence at the start of the function,
7776 rather than relying on an assembler that can produce
7777 the exact encoding required.
7779 However it does mean (in the unpatched case) that we have
7780 a 1 insn window where the asynchronous unwind info is
7781 incorrect. However, if we placed the unwind info at
7782 its correct location we would have incorrect unwind info
7783 in the patched case. Which is probably all moot since
7784 I don't expect Wine generates dwarf2 unwind info for the
7785 system libraries that use this feature. */
7787 insn
= emit_insn (gen_blockage ());
7789 push
= gen_push (hard_frame_pointer_rtx
);
7790 mov
= gen_rtx_SET (hard_frame_pointer_rtx
,
7792 RTX_FRAME_RELATED_P (push
) = 1;
7793 RTX_FRAME_RELATED_P (mov
) = 1;
7795 RTX_FRAME_RELATED_P (insn
) = 1;
7796 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
7797 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
7799 /* Note that gen_push incremented m->fs.cfa_offset, even
7800 though we didn't emit the push insn here. */
7801 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
7802 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
7803 m
->fs
.fp_valid
= true;
7807 /* The frame pointer is not needed so pop %ebp again.
7808 This leaves us with a pristine state. */
7809 emit_insn (gen_pop (hard_frame_pointer_rtx
));
7813 /* The first insn of a function that accepts its static chain on the
7814 stack is to push the register that would be filled in by a direct
7815 call. This insn will be skipped by the trampoline. */
7816 else if (ix86_static_chain_on_stack
)
7818 static_chain
= ix86_static_chain (cfun
->decl
, false);
7819 insn
= emit_insn (gen_push (static_chain
));
7820 emit_insn (gen_blockage ());
7822 /* We don't want to interpret this push insn as a register save,
7823 only as a stack adjustment. The real copy of the register as
7824 a save will be done later, if needed. */
7825 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
7826 t
= gen_rtx_SET (stack_pointer_rtx
, t
);
7827 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
7828 RTX_FRAME_RELATED_P (insn
) = 1;
7831 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
7832 of DRAP is needed and stack realignment is really needed after reload */
7833 if (stack_realign_drap
)
7835 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
7837 /* Can't use DRAP in interrupt function. */
7838 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
7839 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
7840 "in interrupt service routine. This may be worked "
7841 "around by avoiding functions with aggregate return.");
7843 /* Only need to push parameter pointer reg if it is caller saved. */
7844 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
7846 /* Push arg pointer reg */
7847 insn
= emit_insn (gen_push (crtl
->drap_reg
));
7848 RTX_FRAME_RELATED_P (insn
) = 1;
7851 /* Grab the argument pointer. */
7852 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
7853 insn
= emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
7854 RTX_FRAME_RELATED_P (insn
) = 1;
7855 m
->fs
.cfa_reg
= crtl
->drap_reg
;
7856 m
->fs
.cfa_offset
= 0;
7858 /* Align the stack. */
7859 insn
= emit_insn (gen_and2_insn (stack_pointer_rtx
,
7860 GEN_INT (-align_bytes
)));
7861 RTX_FRAME_RELATED_P (insn
) = 1;
7863 /* Replicate the return address on the stack so that return
7864 address can be reached via (argp - 1) slot. This is needed
7865 to implement macro RETURN_ADDR_RTX and intrinsic function
7866 expand_builtin_return_addr etc. */
7867 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
7868 t
= gen_frame_mem (word_mode
, t
);
7869 insn
= emit_insn (gen_push (t
));
7870 RTX_FRAME_RELATED_P (insn
) = 1;
7872 /* For the purposes of frame and register save area addressing,
7873 we've started over with a new frame. */
7874 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
7875 m
->fs
.realigned
= true;
7879 /* Replicate static chain on the stack so that static chain
7880 can be reached via (argp - 2) slot. This is needed for
7881 nested function with stack realignment. */
7882 insn
= emit_insn (gen_push (static_chain
));
7883 RTX_FRAME_RELATED_P (insn
) = 1;
7887 int_registers_saved
= (frame
.nregs
== 0);
7888 sse_registers_saved
= (frame
.nsseregs
== 0);
7889 save_stub_call_needed
= (m
->call_ms2sysv
);
7890 gcc_assert (sse_registers_saved
|| !save_stub_call_needed
);
7892 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
7894 /* Note: AT&T enter does NOT have reversed args. Enter is probably
7895 slower on all targets. Also sdb didn't like it. */
7896 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
7897 RTX_FRAME_RELATED_P (insn
) = 1;
7899 /* Push registers now, before setting the frame pointer
7901 if (!int_registers_saved
7903 && !frame
.save_regs_using_mov
)
7905 ix86_emit_save_regs ();
7906 int_registers_saved
= true;
7907 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
7910 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
7912 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
7913 RTX_FRAME_RELATED_P (insn
) = 1;
7915 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7916 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
7917 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
7918 m
->fs
.fp_valid
= true;
7922 if (!int_registers_saved
)
7924 /* If saving registers via PUSH, do so now. */
7925 if (!frame
.save_regs_using_mov
)
7927 ix86_emit_save_regs ();
7928 int_registers_saved
= true;
7929 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
7932 /* When using red zone we may start register saving before allocating
7933 the stack frame saving one cycle of the prologue. However, avoid
7934 doing this if we have to probe the stack; at least on x86_64 the
7935 stack probe can turn into a call that clobbers a red zone location. */
7936 else if (ix86_using_red_zone ()
7937 && (! TARGET_STACK_PROBE
7938 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
7940 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
7941 int_registers_saved
= true;
7945 if (stack_realign_fp
)
7947 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
7948 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
7950 /* Record last valid frame pointer offset. */
7951 m
->fs
.sp_realigned_fp_last
= frame
.reg_save_offset
;
7953 /* The computation of the size of the re-aligned stack frame means
7954 that we must allocate the size of the register save area before
7955 performing the actual alignment. Otherwise we cannot guarantee
7956 that there's enough storage above the realignment point. */
7957 allocate
= frame
.reg_save_offset
- m
->fs
.sp_offset
7958 + frame
.stack_realign_allocate
;
7960 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7961 GEN_INT (-allocate
), -1, false);
7963 /* Align the stack. */
7964 emit_insn (gen_and2_insn (stack_pointer_rtx
, GEN_INT (-align_bytes
)));
7965 m
->fs
.sp_offset
= ROUND_UP (m
->fs
.sp_offset
, align_bytes
);
7966 m
->fs
.sp_realigned_offset
= m
->fs
.sp_offset
7967 - frame
.stack_realign_allocate
;
7968 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
7969 Beyond this point, stack access should be done via choose_baseaddr or
7970 by using sp_valid_at and fp_valid_at to determine the correct base
7971 register. Henceforth, any CFA offset should be thought of as logical
7972 and not physical. */
7973 gcc_assert (m
->fs
.sp_realigned_offset
>= m
->fs
.sp_realigned_fp_last
);
7974 gcc_assert (m
->fs
.sp_realigned_offset
== frame
.stack_realign_offset
);
7975 m
->fs
.sp_realigned
= true;
7977 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
7978 is needed to describe where a register is saved using a realigned
7979 stack pointer, so we need to invalidate the stack pointer for that
7982 m
->fs
.sp_valid
= false;
7984 /* If SP offset is non-immediate after allocation of the stack frame,
7985 then emit SSE saves or stub call prior to allocating the rest of the
7986 stack frame. This is less efficient for the out-of-line stub because
7987 we can't combine allocations across the call barrier, but it's better
7988 than using a scratch register. */
7989 else if (!x86_64_immediate_operand (GEN_INT (frame
.stack_pointer_offset
7990 - m
->fs
.sp_realigned_offset
),
7993 if (!sse_registers_saved
)
7995 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
7996 sse_registers_saved
= true;
7998 else if (save_stub_call_needed
)
8000 ix86_emit_outlined_ms2sysv_save (frame
);
8001 save_stub_call_needed
= false;
8006 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
8008 if (flag_stack_usage_info
)
8010 /* We start to count from ARG_POINTER. */
8011 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
8013 /* If it was realigned, take into account the fake frame. */
8014 if (stack_realign_drap
)
8016 if (ix86_static_chain_on_stack
)
8017 stack_size
+= UNITS_PER_WORD
;
8019 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
8020 stack_size
+= UNITS_PER_WORD
;
8022 /* This over-estimates by 1 minimal-stack-alignment-unit but
8023 mitigates that by counting in the new return address slot. */
8024 current_function_dynamic_stack_size
8025 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8028 current_function_static_stack_size
= stack_size
;
8031 /* On SEH target with very large frame size, allocate an area to save
8032 SSE registers (as the very large allocation won't be described). */
8034 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
8035 && !sse_registers_saved
)
8037 HOST_WIDE_INT sse_size
8038 = frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
8040 gcc_assert (int_registers_saved
);
8042 /* No need to do stack checking as the area will be immediately
8044 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8045 GEN_INT (-sse_size
), -1,
8046 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8047 allocate
-= sse_size
;
8048 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8049 sse_registers_saved
= true;
8052 /* The stack has already been decremented by the instruction calling us
8053 so probe if the size is non-negative to preserve the protection area. */
8055 && (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
8056 || flag_stack_clash_protection
))
8058 if (flag_stack_clash_protection
)
8060 ix86_adjust_stack_and_probe_stack_clash (allocate
,
8061 int_registers_saved
);
8064 else if (STACK_CHECK_MOVING_SP
)
8066 if (!(crtl
->is_leaf
&& !cfun
->calls_alloca
8067 && allocate
<= get_probe_interval ()))
8069 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
);
8075 HOST_WIDE_INT size
= allocate
;
8077 if (TARGET_64BIT
&& size
>= HOST_WIDE_INT_C (0x80000000))
8078 size
= 0x80000000 - get_stack_check_protect () - 1;
8080 if (TARGET_STACK_PROBE
)
8082 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8084 if (size
> get_probe_interval ())
8085 ix86_emit_probe_stack_range (0, size
, int_registers_saved
);
8088 ix86_emit_probe_stack_range (0,
8089 size
+ get_stack_check_protect (),
8090 int_registers_saved
);
8094 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8096 if (size
> get_probe_interval ()
8097 && size
> get_stack_check_protect ())
8098 ix86_emit_probe_stack_range (get_stack_check_protect (),
8100 - get_stack_check_protect ()),
8101 int_registers_saved
);
8104 ix86_emit_probe_stack_range (get_stack_check_protect (), size
,
8105 int_registers_saved
);
8112 else if (!ix86_target_stack_probe ()
8113 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
8115 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8116 GEN_INT (-allocate
), -1,
8117 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8121 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
8123 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8124 bool eax_live
= ix86_eax_live_at_start_p ();
8125 bool r10_live
= false;
8128 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
8132 insn
= emit_insn (gen_push (eax
));
8133 allocate
-= UNITS_PER_WORD
;
8134 /* Note that SEH directives need to continue tracking the stack
8135 pointer even after the frame pointer has been set up. */
8136 if (sp_is_cfa_reg
|| TARGET_SEH
)
8139 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8140 RTX_FRAME_RELATED_P (insn
) = 1;
8141 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8142 gen_rtx_SET (stack_pointer_rtx
,
8143 plus_constant (Pmode
,
8151 r10
= gen_rtx_REG (Pmode
, R10_REG
);
8152 insn
= emit_insn (gen_push (r10
));
8153 allocate
-= UNITS_PER_WORD
;
8154 if (sp_is_cfa_reg
|| TARGET_SEH
)
8157 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8158 RTX_FRAME_RELATED_P (insn
) = 1;
8159 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8160 gen_rtx_SET (stack_pointer_rtx
,
8161 plus_constant (Pmode
,
8167 emit_move_insn (eax
, GEN_INT (allocate
));
8168 emit_insn (gen_allocate_stack_worker_probe (Pmode
, eax
, eax
));
8170 /* Use the fact that AX still contains ALLOCATE. */
8171 insn
= emit_insn (gen_pro_epilogue_adjust_stack_sub
8172 (Pmode
, stack_pointer_rtx
, stack_pointer_rtx
, eax
));
8174 if (sp_is_cfa_reg
|| TARGET_SEH
)
8177 m
->fs
.cfa_offset
+= allocate
;
8178 RTX_FRAME_RELATED_P (insn
) = 1;
8179 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8180 gen_rtx_SET (stack_pointer_rtx
,
8181 plus_constant (Pmode
, stack_pointer_rtx
,
8184 m
->fs
.sp_offset
+= allocate
;
8186 /* Use stack_pointer_rtx for relative addressing so that code works for
8187 realigned stack. But this means that we need a blockage to prevent
8188 stores based on the frame pointer from being scheduled before. */
8189 if (r10_live
&& eax_live
)
8191 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8192 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
8193 gen_frame_mem (word_mode
, t
));
8194 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
8195 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
8196 gen_frame_mem (word_mode
, t
));
8197 emit_insn (gen_memory_blockage ());
8199 else if (eax_live
|| r10_live
)
8201 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8202 emit_move_insn (gen_rtx_REG (word_mode
,
8203 (eax_live
? AX_REG
: R10_REG
)),
8204 gen_frame_mem (word_mode
, t
));
8205 emit_insn (gen_memory_blockage ());
8208 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
8210 /* If we havn't already set up the frame pointer, do so now. */
8211 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8213 insn
= gen_add3_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
,
8214 GEN_INT (frame
.stack_pointer_offset
8215 - frame
.hard_frame_pointer_offset
));
8216 insn
= emit_insn (insn
);
8217 RTX_FRAME_RELATED_P (insn
) = 1;
8218 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
8220 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8221 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8222 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
8223 m
->fs
.fp_valid
= true;
8226 if (!int_registers_saved
)
8227 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8228 if (!sse_registers_saved
)
8229 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8230 else if (save_stub_call_needed
)
8231 ix86_emit_outlined_ms2sysv_save (frame
);
8233 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8235 if (!TARGET_64BIT
&& pic_offset_table_rtx
&& crtl
->profile
&& !flag_fentry
)
8237 rtx pic
= gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
);
8238 insn
= emit_insn (gen_set_got (pic
));
8239 RTX_FRAME_RELATED_P (insn
) = 1;
8240 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
8241 emit_insn (gen_prologue_use (pic
));
8242 /* Deleting already emmitted SET_GOT if exist and allocated to
8243 REAL_PIC_OFFSET_TABLE_REGNUM. */
8244 ix86_elim_entry_set_got (pic
);
8247 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
8249 /* vDRAP is setup but after reload it turns out stack realign
8250 isn't necessary, here we will emit prologue to setup DRAP
8251 without stack realign adjustment */
8252 t
= choose_baseaddr (0, NULL
);
8253 emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
8256 /* Prevent instructions from being scheduled into register save push
8257 sequence when access to the redzone area is done through frame pointer.
8258 The offset between the frame pointer and the stack pointer is calculated
8259 relative to the value of the stack pointer at the end of the function
8260 prologue, and moving instructions that access redzone area via frame
8261 pointer inside push sequence violates this assumption. */
8262 if (frame_pointer_needed
&& frame
.red_zone_size
)
8263 emit_insn (gen_memory_blockage ());
8265 /* SEH requires that the prologue end within 256 bytes of the start of
8266 the function. Prevent instruction schedules that would extend that.
8267 Further, prevent alloca modifications to the stack pointer from being
8268 combined with prologue modifications. */
8270 emit_insn (gen_prologue_use (stack_pointer_rtx
));
8273 /* Emit code to restore REG using a POP insn. */
8276 ix86_emit_restore_reg_using_pop (rtx reg
)
8278 struct machine_function
*m
= cfun
->machine
;
8279 rtx_insn
*insn
= emit_insn (gen_pop (reg
));
8281 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
8282 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
8284 if (m
->fs
.cfa_reg
== crtl
->drap_reg
8285 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
8287 /* Previously we'd represented the CFA as an expression
8288 like *(%ebp - 8). We've just popped that value from
8289 the stack, which means we need to reset the CFA to
8290 the drap register. This will remain until we restore
8291 the stack pointer. */
8292 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8293 RTX_FRAME_RELATED_P (insn
) = 1;
8295 /* This means that the DRAP register is valid for addressing too. */
8296 m
->fs
.drap_valid
= true;
8300 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8302 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
8303 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
8304 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
8305 RTX_FRAME_RELATED_P (insn
) = 1;
8307 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8310 /* When the frame pointer is the CFA, and we pop it, we are
8311 swapping back to the stack pointer as the CFA. This happens
8312 for stack frames that don't allocate other data, so we assume
8313 the stack pointer is now pointing at the return address, i.e.
8314 the function entry state, which makes the offset be 1 word. */
8315 if (reg
== hard_frame_pointer_rtx
)
8317 m
->fs
.fp_valid
= false;
8318 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
8320 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8321 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8323 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8324 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
8325 GEN_INT (m
->fs
.cfa_offset
)));
8326 RTX_FRAME_RELATED_P (insn
) = 1;
8331 /* Emit code to restore saved registers using POP insns. */
8334 ix86_emit_restore_regs_using_pop (void)
8338 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8339 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
8340 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
8343 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8344 omits the emit and only attaches the notes. */
8347 ix86_emit_leave (rtx_insn
*insn
)
8349 struct machine_function
*m
= cfun
->machine
;
8352 insn
= emit_insn (gen_leave (word_mode
));
8354 ix86_add_queued_cfa_restore_notes (insn
);
8356 gcc_assert (m
->fs
.fp_valid
);
8357 m
->fs
.sp_valid
= true;
8358 m
->fs
.sp_realigned
= false;
8359 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
8360 m
->fs
.fp_valid
= false;
8362 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
8364 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8365 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
8367 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8368 plus_constant (Pmode
, stack_pointer_rtx
,
8370 RTX_FRAME_RELATED_P (insn
) = 1;
8372 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
8376 /* Emit code to restore saved registers using MOV insns.
8377 First register is restored from CFA - CFA_OFFSET. */
8379 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
8380 bool maybe_eh_return
)
8382 struct machine_function
*m
= cfun
->machine
;
8385 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8386 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
8388 rtx reg
= gen_rtx_REG (word_mode
, regno
);
8392 mem
= choose_baseaddr (cfa_offset
, NULL
);
8393 mem
= gen_frame_mem (word_mode
, mem
);
8394 insn
= emit_move_insn (reg
, mem
);
8396 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8398 /* Previously we'd represented the CFA as an expression
8399 like *(%ebp - 8). We've just popped that value from
8400 the stack, which means we need to reset the CFA to
8401 the drap register. This will remain until we restore
8402 the stack pointer. */
8403 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8404 RTX_FRAME_RELATED_P (insn
) = 1;
8406 /* This means that the DRAP register is valid for addressing. */
8407 m
->fs
.drap_valid
= true;
8410 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
8412 cfa_offset
-= UNITS_PER_WORD
;
8416 /* Emit code to restore saved registers using MOV insns.
8417 First register is restored from CFA - CFA_OFFSET. */
8419 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
8420 bool maybe_eh_return
)
8424 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8425 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
8427 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
8429 unsigned int align
= GET_MODE_ALIGNMENT (V4SFmode
);
8431 mem
= choose_baseaddr (cfa_offset
, &align
);
8432 mem
= gen_rtx_MEM (V4SFmode
, mem
);
8434 /* The location aligment depends upon the base register. */
8435 align
= MIN (GET_MODE_ALIGNMENT (V4SFmode
), align
);
8436 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
8437 set_mem_align (mem
, align
);
8438 emit_insn (gen_rtx_SET (reg
, mem
));
8440 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
8442 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
8447 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame
&frame
,
8448 bool use_call
, int style
)
8450 struct machine_function
*m
= cfun
->machine
;
8451 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
8452 + m
->call_ms2sysv_extra_regs
;
8454 unsigned int elems_needed
, align
, i
, vi
= 0;
8457 rtx rsi
= gen_rtx_REG (word_mode
, SI_REG
);
8459 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
8460 HOST_WIDE_INT stub_ptr_offset
= xlogue
.get_stub_ptr_offset ();
8461 HOST_WIDE_INT rsi_offset
= frame
.stack_realign_offset
+ stub_ptr_offset
;
8462 rtx rsi_frame_load
= NULL_RTX
;
8463 HOST_WIDE_INT rsi_restore_offset
= (HOST_WIDE_INT
)-1;
8464 enum xlogue_stub stub
;
8466 gcc_assert (!m
->fs
.fp_valid
|| frame_pointer_needed
);
8468 /* If using a realigned stack, we should never start with padding. */
8469 gcc_assert (!stack_realign_fp
|| !xlogue
.get_stack_align_off_in ());
8471 /* Setup RSI as the stub's base pointer. */
8472 align
= GET_MODE_ALIGNMENT (V4SFmode
);
8473 tmp
= choose_baseaddr (rsi_offset
, &align
, SI_REG
);
8474 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
8476 emit_insn (gen_rtx_SET (rsi
, tmp
));
8478 /* Get a symbol for the stub. */
8479 if (frame_pointer_needed
)
8480 stub
= use_call
? XLOGUE_STUB_RESTORE_HFP
8481 : XLOGUE_STUB_RESTORE_HFP_TAIL
;
8483 stub
= use_call
? XLOGUE_STUB_RESTORE
8484 : XLOGUE_STUB_RESTORE_TAIL
;
8485 sym
= xlogue
.get_stub_rtx (stub
);
8487 elems_needed
= ncregs
;
8491 elems_needed
+= frame_pointer_needed
? 5 : 3;
8492 v
= rtvec_alloc (elems_needed
);
8494 /* We call the epilogue stub when we need to pop incoming args or we are
8495 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8496 epilogue stub and it is the tail-call. */
8498 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8501 RTVEC_ELT (v
, vi
++) = ret_rtx
;
8502 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8503 if (frame_pointer_needed
)
8505 rtx rbp
= gen_rtx_REG (DImode
, BP_REG
);
8506 gcc_assert (m
->fs
.fp_valid
);
8507 gcc_assert (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
);
8509 tmp
= gen_rtx_PLUS (DImode
, rbp
, GEN_INT (8));
8510 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, tmp
);
8511 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (rbp
, gen_rtx_MEM (DImode
, rbp
));
8512 tmp
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
8513 RTVEC_ELT (v
, vi
++) = gen_rtx_CLOBBER (VOIDmode
, tmp
);
8517 /* If no hard frame pointer, we set R10 to the SP restore value. */
8518 gcc_assert (!m
->fs
.fp_valid
);
8519 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8520 gcc_assert (m
->fs
.sp_valid
);
8522 r10
= gen_rtx_REG (DImode
, R10_REG
);
8523 tmp
= gen_rtx_PLUS (Pmode
, rsi
, GEN_INT (stub_ptr_offset
));
8524 emit_insn (gen_rtx_SET (r10
, tmp
));
8526 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, r10
);
8530 /* Generate frame load insns and restore notes. */
8531 for (i
= 0; i
< ncregs
; ++i
)
8533 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
8534 machine_mode mode
= SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
;
8535 rtx reg
, frame_load
;
8537 reg
= gen_rtx_REG (mode
, r
.regno
);
8538 frame_load
= gen_frame_load (reg
, rsi
, r
.offset
);
8540 /* Save RSI frame load insn & note to add last. */
8541 if (r
.regno
== SI_REG
)
8543 gcc_assert (!rsi_frame_load
);
8544 rsi_frame_load
= frame_load
;
8545 rsi_restore_offset
= r
.offset
;
8549 RTVEC_ELT (v
, vi
++) = frame_load
;
8550 ix86_add_cfa_restore_note (NULL
, reg
, r
.offset
);
8554 /* Add RSI frame load & restore note at the end. */
8555 gcc_assert (rsi_frame_load
);
8556 gcc_assert (rsi_restore_offset
!= (HOST_WIDE_INT
)-1);
8557 RTVEC_ELT (v
, vi
++) = rsi_frame_load
;
8558 ix86_add_cfa_restore_note (NULL
, gen_rtx_REG (DImode
, SI_REG
),
8559 rsi_restore_offset
);
8561 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
8562 if (!use_call
&& !frame_pointer_needed
)
8564 gcc_assert (m
->fs
.sp_valid
);
8565 gcc_assert (!m
->fs
.sp_realigned
);
8567 /* At this point, R10 should point to frame.stack_realign_offset. */
8568 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8569 m
->fs
.cfa_offset
+= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
8570 m
->fs
.sp_offset
= frame
.stack_realign_offset
;
8573 gcc_assert (vi
== (unsigned int)GET_NUM_ELEM (v
));
8574 tmp
= gen_rtx_PARALLEL (VOIDmode
, v
);
8576 insn
= emit_insn (tmp
);
8579 insn
= emit_jump_insn (tmp
);
8580 JUMP_LABEL (insn
) = ret_rtx
;
8582 if (frame_pointer_needed
)
8583 ix86_emit_leave (insn
);
8586 /* Need CFA adjust note. */
8587 tmp
= gen_rtx_SET (stack_pointer_rtx
, r10
);
8588 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, tmp
);
8592 RTX_FRAME_RELATED_P (insn
) = true;
8593 ix86_add_queued_cfa_restore_notes (insn
);
8595 /* If we're not doing a tail-call, we need to adjust the stack. */
8596 if (use_call
&& m
->fs
.sp_valid
)
8598 HOST_WIDE_INT dealloc
= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
8599 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8600 GEN_INT (dealloc
), style
,
8601 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8605 /* Restore function stack, frame, and registers. */
8608 ix86_expand_epilogue (int style
)
8610 struct machine_function
*m
= cfun
->machine
;
8611 struct machine_frame_state frame_state_save
= m
->fs
;
8612 bool restore_regs_via_mov
;
8614 bool restore_stub_is_tail
= false;
8616 if (ix86_function_naked (current_function_decl
))
8618 /* The program should not reach this point. */
8619 emit_insn (gen_ud2 ());
8623 ix86_finalize_stack_frame_flags ();
8624 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
8626 m
->fs
.sp_realigned
= stack_realign_fp
;
8627 m
->fs
.sp_valid
= stack_realign_fp
8628 || !frame_pointer_needed
8629 || crtl
->sp_is_unchanging
;
8630 gcc_assert (!m
->fs
.sp_valid
8631 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
8633 /* The FP must be valid if the frame pointer is present. */
8634 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
8635 gcc_assert (!m
->fs
.fp_valid
8636 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
8638 /* We must have *some* valid pointer to the stack frame. */
8639 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
8641 /* The DRAP is never valid at this point. */
8642 gcc_assert (!m
->fs
.drap_valid
);
8644 /* See the comment about red zone and frame
8645 pointer usage in ix86_expand_prologue. */
8646 if (frame_pointer_needed
&& frame
.red_zone_size
)
8647 emit_insn (gen_memory_blockage ());
8649 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
8650 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
8652 /* Determine the CFA offset of the end of the red-zone. */
8653 m
->fs
.red_zone_offset
= 0;
8654 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
8656 /* The red-zone begins below return address and error code in
8657 exception handler. */
8658 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ INCOMING_FRAME_SP_OFFSET
;
8660 /* When the register save area is in the aligned portion of
8661 the stack, determine the maximum runtime displacement that
8662 matches up with the aligned frame. */
8663 if (stack_realign_drap
)
8664 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
8668 HOST_WIDE_INT reg_save_offset
= frame
.reg_save_offset
;
8670 /* Special care must be taken for the normal return case of a function
8671 using eh_return: the eax and edx registers are marked as saved, but
8672 not restored along this path. Adjust the save location to match. */
8673 if (crtl
->calls_eh_return
&& style
!= 2)
8674 reg_save_offset
-= 2 * UNITS_PER_WORD
;
8676 /* EH_RETURN requires the use of moves to function properly. */
8677 if (crtl
->calls_eh_return
)
8678 restore_regs_via_mov
= true;
8679 /* SEH requires the use of pops to identify the epilogue. */
8680 else if (TARGET_SEH
)
8681 restore_regs_via_mov
= false;
8682 /* If we're only restoring one register and sp cannot be used then
8683 using a move instruction to restore the register since it's
8684 less work than reloading sp and popping the register. */
8685 else if (!sp_valid_at (frame
.hfp_save_offset
) && frame
.nregs
<= 1)
8686 restore_regs_via_mov
= true;
8687 else if (TARGET_EPILOGUE_USING_MOVE
8688 && cfun
->machine
->use_fast_prologue_epilogue
8690 || m
->fs
.sp_offset
!= reg_save_offset
))
8691 restore_regs_via_mov
= true;
8692 else if (frame_pointer_needed
8694 && m
->fs
.sp_offset
!= reg_save_offset
)
8695 restore_regs_via_mov
= true;
8696 else if (frame_pointer_needed
8698 && cfun
->machine
->use_fast_prologue_epilogue
8699 && frame
.nregs
== 1)
8700 restore_regs_via_mov
= true;
8702 restore_regs_via_mov
= false;
8704 if (restore_regs_via_mov
|| frame
.nsseregs
)
8706 /* Ensure that the entire register save area is addressable via
8707 the stack pointer, if we will restore SSE regs via sp. */
8709 && m
->fs
.sp_offset
> 0x7fffffff
8710 && sp_valid_at (frame
.stack_realign_offset
+ 1)
8711 && (frame
.nsseregs
+ frame
.nregs
) != 0)
8713 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8714 GEN_INT (m
->fs
.sp_offset
8715 - frame
.sse_reg_save_offset
),
8717 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8721 /* If there are any SSE registers to restore, then we have to do it
8722 via moves, since there's obviously no pop for SSE regs. */
8724 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
8727 if (m
->call_ms2sysv
)
8729 int pop_incoming_args
= crtl
->args
.pops_args
&& crtl
->args
.size
;
8731 /* We cannot use a tail-call for the stub if:
8732 1. We have to pop incoming args,
8733 2. We have additional int regs to restore, or
8734 3. A sibling call will be the tail-call, or
8735 4. We are emitting an eh_return_internal epilogue.
8737 TODO: Item 4 has not yet tested!
8739 If any of the above are true, we will call the stub rather than
8741 restore_stub_is_tail
= !(pop_incoming_args
|| frame
.nregs
|| style
!= 1);
8742 ix86_emit_outlined_ms2sysv_restore (frame
, !restore_stub_is_tail
, style
);
8745 /* If using out-of-line stub that is a tail-call, then...*/
8746 if (m
->call_ms2sysv
&& restore_stub_is_tail
)
8748 /* TODO: parinoid tests. (remove eventually) */
8749 gcc_assert (m
->fs
.sp_valid
);
8750 gcc_assert (!m
->fs
.sp_realigned
);
8751 gcc_assert (!m
->fs
.fp_valid
);
8752 gcc_assert (!m
->fs
.realigned
);
8753 gcc_assert (m
->fs
.sp_offset
== UNITS_PER_WORD
);
8754 gcc_assert (!crtl
->drap_reg
);
8755 gcc_assert (!frame
.nregs
);
8757 else if (restore_regs_via_mov
)
8762 ix86_emit_restore_regs_using_mov (reg_save_offset
, style
== 2);
8764 /* eh_return epilogues need %ecx added to the stack pointer. */
8767 rtx sa
= EH_RETURN_STACKADJ_RTX
;
8770 /* %ecx can't be used for both DRAP register and eh_return. */
8772 gcc_assert (REGNO (crtl
->drap_reg
) != CX_REG
);
8774 /* regparm nested functions don't work with eh_return. */
8775 gcc_assert (!ix86_static_chain_on_stack
);
8777 if (frame_pointer_needed
)
8779 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
8780 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
8781 emit_insn (gen_rtx_SET (sa
, t
));
8783 t
= gen_frame_mem (Pmode
, hard_frame_pointer_rtx
);
8784 insn
= emit_move_insn (hard_frame_pointer_rtx
, t
);
8786 /* Note that we use SA as a temporary CFA, as the return
8787 address is at the proper place relative to it. We
8788 pretend this happens at the FP restore insn because
8789 prior to this insn the FP would be stored at the wrong
8790 offset relative to SA, and after this insn we have no
8791 other reasonable register to use for the CFA. We don't
8792 bother resetting the CFA to the SP for the duration of
8793 the return insn, unless the control flow instrumentation
8794 is done. In this case the SP is used later and we have
8795 to reset CFA to SP. */
8796 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8797 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
8798 ix86_add_queued_cfa_restore_notes (insn
);
8799 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
8800 RTX_FRAME_RELATED_P (insn
) = 1;
8803 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
8804 m
->fs
.fp_valid
= false;
8806 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
8808 flag_cf_protection
);
8812 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
8813 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
8814 insn
= emit_insn (gen_rtx_SET (stack_pointer_rtx
, t
));
8815 ix86_add_queued_cfa_restore_notes (insn
);
8817 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8818 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
8820 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
8821 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8822 plus_constant (Pmode
, stack_pointer_rtx
,
8824 RTX_FRAME_RELATED_P (insn
) = 1;
8827 m
->fs
.sp_offset
= UNITS_PER_WORD
;
8828 m
->fs
.sp_valid
= true;
8829 m
->fs
.sp_realigned
= false;
8834 /* SEH requires that the function end with (1) a stack adjustment
8835 if necessary, (2) a sequence of pops, and (3) a return or
8836 jump instruction. Prevent insns from the function body from
8837 being scheduled into this sequence. */
8840 /* Prevent a catch region from being adjacent to the standard
8841 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
8842 nor several other flags that would be interesting to test are
8844 if (flag_non_call_exceptions
)
8845 emit_insn (gen_nops (const1_rtx
));
8847 emit_insn (gen_blockage ());
8850 /* First step is to deallocate the stack frame so that we can
8851 pop the registers. If the stack pointer was realigned, it needs
8852 to be restored now. Also do it on SEH target for very large
8853 frame as the emitted instructions aren't allowed by the ABI
8855 if (!m
->fs
.sp_valid
|| m
->fs
.sp_realigned
8857 && (m
->fs
.sp_offset
- reg_save_offset
8858 >= SEH_MAX_FRAME_SIZE
)))
8860 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
8861 GEN_INT (m
->fs
.fp_offset
8865 else if (m
->fs
.sp_offset
!= reg_save_offset
)
8867 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8868 GEN_INT (m
->fs
.sp_offset
8871 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8874 ix86_emit_restore_regs_using_pop ();
8877 /* If we used a stack pointer and haven't already got rid of it,
8881 /* If the stack pointer is valid and pointing at the frame
8882 pointer store address, then we only need a pop. */
8883 if (sp_valid_at (frame
.hfp_save_offset
)
8884 && m
->fs
.sp_offset
== frame
.hfp_save_offset
)
8885 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
8886 /* Leave results in shorter dependency chains on CPUs that are
8887 able to grok it fast. */
8888 else if (TARGET_USE_LEAVE
8889 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
8890 || !cfun
->machine
->use_fast_prologue_epilogue
)
8891 ix86_emit_leave (NULL
);
8894 pro_epilogue_adjust_stack (stack_pointer_rtx
,
8895 hard_frame_pointer_rtx
,
8896 const0_rtx
, style
, !using_drap
);
8897 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
8903 int param_ptr_offset
= UNITS_PER_WORD
;
8906 gcc_assert (stack_realign_drap
);
8908 if (ix86_static_chain_on_stack
)
8909 param_ptr_offset
+= UNITS_PER_WORD
;
8910 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
8911 param_ptr_offset
+= UNITS_PER_WORD
;
8913 insn
= emit_insn (gen_rtx_SET
8915 gen_rtx_PLUS (Pmode
,
8917 GEN_INT (-param_ptr_offset
))));
8918 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8919 m
->fs
.cfa_offset
= param_ptr_offset
;
8920 m
->fs
.sp_offset
= param_ptr_offset
;
8921 m
->fs
.realigned
= false;
8923 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8924 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
8925 GEN_INT (param_ptr_offset
)));
8926 RTX_FRAME_RELATED_P (insn
) = 1;
8928 if (!call_used_regs
[REGNO (crtl
->drap_reg
)])
8929 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
8932 /* At this point the stack pointer must be valid, and we must have
8933 restored all of the registers. We may not have deallocated the
8934 entire stack frame. We've delayed this until now because it may
8935 be possible to merge the local stack deallocation with the
8936 deallocation forced by ix86_static_chain_on_stack. */
8937 gcc_assert (m
->fs
.sp_valid
);
8938 gcc_assert (!m
->fs
.sp_realigned
);
8939 gcc_assert (!m
->fs
.fp_valid
);
8940 gcc_assert (!m
->fs
.realigned
);
8941 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
8943 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8944 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
8948 ix86_add_queued_cfa_restore_notes (get_last_insn ());
8950 /* Sibcall epilogues don't want a return instruction. */
8953 m
->fs
= frame_state_save
;
8957 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
8958 emit_jump_insn (gen_interrupt_return ());
8959 else if (crtl
->args
.pops_args
&& crtl
->args
.size
)
8961 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
8963 /* i386 can only pop 64K bytes. If asked to pop more, pop return
8964 address, do explicit add, and jump indirectly to the caller. */
8966 if (crtl
->args
.pops_args
>= 65536)
8968 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
8971 /* There is no "pascal" calling convention in any 64bit ABI. */
8972 gcc_assert (!TARGET_64BIT
);
8974 insn
= emit_insn (gen_pop (ecx
));
8975 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8976 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
8978 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
8979 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
8980 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
8981 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
8982 RTX_FRAME_RELATED_P (insn
) = 1;
8984 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8986 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
8989 emit_jump_insn (gen_simple_return_pop_internal (popc
));
8991 else if (!m
->call_ms2sysv
|| !restore_stub_is_tail
)
8993 /* In case of return from EH a simple return cannot be used
8994 as a return address will be compared with a shadow stack
8995 return address. Use indirect jump instead. */
8996 if (style
== 2 && flag_cf_protection
)
8998 /* Register used in indirect jump must be in word_mode. But
8999 Pmode may not be the same as word_mode for x32. */
9000 rtx ecx
= gen_rtx_REG (word_mode
, CX_REG
);
9003 insn
= emit_insn (gen_pop (ecx
));
9004 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9005 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9007 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9008 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9009 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9010 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9011 RTX_FRAME_RELATED_P (insn
) = 1;
9013 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9016 emit_jump_insn (gen_simple_return_internal ());
9019 /* Restore the state back to the state from the prologue,
9020 so that it's correct for the next epilogue. */
9021 m
->fs
= frame_state_save
;
9024 /* Reset from the function's potential modifications. */
9027 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
)
9029 if (pic_offset_table_rtx
9030 && !ix86_use_pseudo_pic_reg ())
9031 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
9035 rtx_insn
*insn
= get_last_insn ();
9036 rtx_insn
*deleted_debug_label
= NULL
;
9038 /* Mach-O doesn't support labels at the end of objects, so if
9039 it looks like we might want one, take special action.
9040 First, collect any sequence of deleted debug labels. */
9043 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
9045 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9046 notes only, instead set their CODE_LABEL_NUMBER to -1,
9047 otherwise there would be code generation differences
9048 in between -g and -g0. */
9049 if (NOTE_P (insn
) && NOTE_KIND (insn
)
9050 == NOTE_INSN_DELETED_DEBUG_LABEL
)
9051 deleted_debug_label
= insn
;
9052 insn
= PREV_INSN (insn
);
9058 then this needs to be detected, so skip past the barrier. */
9060 if (insn
&& BARRIER_P (insn
))
9061 insn
= PREV_INSN (insn
);
9063 /* Up to now we've only seen notes or barriers. */
9068 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
9069 /* Trailing label. */
9070 fputs ("\tnop\n", file
);
9071 else if (cfun
&& ! cfun
->is_thunk
)
9073 /* See if we have a completely empty function body, skipping
9074 the special case of the picbase thunk emitted as asm. */
9075 while (insn
&& ! INSN_P (insn
))
9076 insn
= PREV_INSN (insn
);
9077 /* If we don't find any insns, we've got an empty function body;
9078 I.e. completely empty - without a return or branch. This is
9079 taken as the case where a function body has been removed
9080 because it contains an inline __builtin_unreachable(). GCC
9081 declares that reaching __builtin_unreachable() means UB so
9082 we're not obliged to do anything special; however, we want
9083 non-zero-sized function bodies. To meet this, and help the
9084 user out, let's trap the case. */
9086 fputs ("\tud2\n", file
);
9089 else if (deleted_debug_label
)
9090 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
9091 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
9092 CODE_LABEL_NUMBER (insn
) = -1;
9096 /* Return a scratch register to use in the split stack prologue. The
9097 split stack prologue is used for -fsplit-stack. It is the first
9098 instructions in the function, even before the regular prologue.
9099 The scratch register can be any caller-saved register which is not
9100 used for parameters or for the static chain. */
9103 split_stack_prologue_scratch_regno (void)
9109 bool is_fastcall
, is_thiscall
;
9112 is_fastcall
= (lookup_attribute ("fastcall",
9113 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9115 is_thiscall
= (lookup_attribute ("thiscall",
9116 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9118 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
9122 if (DECL_STATIC_CHAIN (cfun
->decl
))
9124 sorry ("%<-fsplit-stack%> does not support fastcall with "
9126 return INVALID_REGNUM
;
9130 else if (is_thiscall
)
9132 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9136 else if (regparm
< 3)
9138 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9144 sorry ("%<-fsplit-stack%> does not support 2 register "
9145 "parameters for a nested function");
9146 return INVALID_REGNUM
;
9153 /* FIXME: We could make this work by pushing a register
9154 around the addition and comparison. */
9155 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9156 return INVALID_REGNUM
;
9161 /* A SYMBOL_REF for the function which allocates new stackspace for
9164 static GTY(()) rtx split_stack_fn
;
9166 /* A SYMBOL_REF for the more stack function when using the large
9169 static GTY(()) rtx split_stack_fn_large
;
9171 /* Return location of the stack guard value in the TLS block. */
9174 ix86_split_stack_guard (void)
9177 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
9180 gcc_assert (flag_split_stack
);
9182 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9183 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
9188 r
= GEN_INT (offset
);
9189 r
= gen_const_mem (Pmode
, r
);
9190 set_mem_addr_space (r
, as
);
9195 /* Handle -fsplit-stack. These are the first instructions in the
9196 function, even before the regular prologue. */
9199 ix86_expand_split_stack_prologue (void)
9201 HOST_WIDE_INT allocate
;
9202 unsigned HOST_WIDE_INT args_size
;
9203 rtx_code_label
*label
;
9204 rtx limit
, current
, allocate_rtx
, call_fusage
;
9205 rtx_insn
*call_insn
;
9206 rtx scratch_reg
= NULL_RTX
;
9207 rtx_code_label
*varargs_label
= NULL
;
9210 gcc_assert (flag_split_stack
&& reload_completed
);
9212 ix86_finalize_stack_frame_flags ();
9213 struct ix86_frame
&frame
= cfun
->machine
->frame
;
9214 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
9216 /* This is the label we will branch to if we have enough stack
9217 space. We expect the basic block reordering pass to reverse this
9218 branch if optimizing, so that we branch in the unlikely case. */
9219 label
= gen_label_rtx ();
9221 /* We need to compare the stack pointer minus the frame size with
9222 the stack boundary in the TCB. The stack boundary always gives
9223 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9224 can compare directly. Otherwise we need to do an addition. */
9226 limit
= ix86_split_stack_guard ();
9228 if (allocate
< SPLIT_STACK_AVAILABLE
)
9229 current
= stack_pointer_rtx
;
9232 unsigned int scratch_regno
;
9235 /* We need a scratch register to hold the stack pointer minus
9236 the required frame size. Since this is the very start of the
9237 function, the scratch register can be any caller-saved
9238 register which is not used for parameters. */
9239 offset
= GEN_INT (- allocate
);
9240 scratch_regno
= split_stack_prologue_scratch_regno ();
9241 if (scratch_regno
== INVALID_REGNUM
)
9243 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
9244 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
9246 /* We don't use gen_add in this case because it will
9247 want to split to lea, but when not optimizing the insn
9248 will not be split after this point. */
9249 emit_insn (gen_rtx_SET (scratch_reg
,
9250 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
9255 emit_move_insn (scratch_reg
, offset
);
9256 emit_insn (gen_add2_insn (scratch_reg
, stack_pointer_rtx
));
9258 current
= scratch_reg
;
9261 ix86_expand_branch (GEU
, current
, limit
, label
);
9262 rtx_insn
*jump_insn
= get_last_insn ();
9263 JUMP_LABEL (jump_insn
) = label
;
9265 /* Mark the jump as very likely to be taken. */
9266 add_reg_br_prob_note (jump_insn
, profile_probability::very_likely ());
9268 if (split_stack_fn
== NULL_RTX
)
9270 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
9271 SYMBOL_REF_FLAGS (split_stack_fn
) |= SYMBOL_FLAG_LOCAL
;
9273 fn
= split_stack_fn
;
9275 /* Get more stack space. We pass in the desired stack space and the
9276 size of the arguments to copy to the new stack. In 32-bit mode
9277 we push the parameters; __morestack will return on a new stack
9278 anyhow. In 64-bit mode we pass the parameters in r10 and
9280 allocate_rtx
= GEN_INT (allocate
);
9281 args_size
= crtl
->args
.size
>= 0 ? (HOST_WIDE_INT
) crtl
->args
.size
: 0;
9282 call_fusage
= NULL_RTX
;
9288 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
9289 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
9291 /* If this function uses a static chain, it will be in %r10.
9292 Preserve it across the call to __morestack. */
9293 if (DECL_STATIC_CHAIN (cfun
->decl
))
9297 rax
= gen_rtx_REG (word_mode
, AX_REG
);
9298 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
9299 use_reg (&call_fusage
, rax
);
9302 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
9305 HOST_WIDE_INT argval
;
9307 gcc_assert (Pmode
== DImode
);
9308 /* When using the large model we need to load the address
9309 into a register, and we've run out of registers. So we
9310 switch to a different calling convention, and we call a
9311 different function: __morestack_large. We pass the
9312 argument size in the upper 32 bits of r10 and pass the
9313 frame size in the lower 32 bits. */
9314 gcc_assert ((allocate
& HOST_WIDE_INT_C (0xffffffff)) == allocate
);
9315 gcc_assert ((args_size
& 0xffffffff) == args_size
);
9317 if (split_stack_fn_large
== NULL_RTX
)
9319 split_stack_fn_large
9320 = gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
9321 SYMBOL_REF_FLAGS (split_stack_fn_large
) |= SYMBOL_FLAG_LOCAL
;
9323 if (ix86_cmodel
== CM_LARGE_PIC
)
9325 rtx_code_label
*label
;
9328 label
= gen_label_rtx ();
9330 LABEL_PRESERVE_P (label
) = 1;
9331 emit_insn (gen_set_rip_rex64 (reg10
, label
));
9332 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
9333 emit_insn (gen_add2_insn (reg10
, reg11
));
9334 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
9336 x
= gen_rtx_CONST (Pmode
, x
);
9337 emit_move_insn (reg11
, x
);
9338 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
9339 x
= gen_const_mem (Pmode
, x
);
9340 emit_move_insn (reg11
, x
);
9343 emit_move_insn (reg11
, split_stack_fn_large
);
9347 argval
= ((args_size
<< 16) << 16) + allocate
;
9348 emit_move_insn (reg10
, GEN_INT (argval
));
9352 emit_move_insn (reg10
, allocate_rtx
);
9353 emit_move_insn (reg11
, GEN_INT (args_size
));
9354 use_reg (&call_fusage
, reg11
);
9357 use_reg (&call_fusage
, reg10
);
9361 rtx_insn
*insn
= emit_insn (gen_push (GEN_INT (args_size
)));
9362 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (UNITS_PER_WORD
));
9363 insn
= emit_insn (gen_push (allocate_rtx
));
9364 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (2 * UNITS_PER_WORD
));
9365 pop
= GEN_INT (2 * UNITS_PER_WORD
);
9367 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
9368 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
9370 add_function_usage_to (call_insn
, call_fusage
);
9372 add_reg_note (call_insn
, REG_ARGS_SIZE
, GEN_INT (0));
9373 /* Indicate that this function can't jump to non-local gotos. */
9374 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
9376 /* In order to make call/return prediction work right, we now need
9377 to execute a return instruction. See
9378 libgcc/config/i386/morestack.S for the details on how this works.
9380 For flow purposes gcc must not see this as a return
9381 instruction--we need control flow to continue at the subsequent
9382 label. Therefore, we use an unspec. */
9383 gcc_assert (crtl
->args
.pops_args
< 65536);
9385 = emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
9387 if ((flag_cf_protection
& CF_BRANCH
))
9389 /* Insert ENDBR since __morestack will jump back here via indirect
9391 rtx cet_eb
= gen_nop_endbr ();
9392 emit_insn_after (cet_eb
, ret_insn
);
9395 /* If we are in 64-bit mode and this function uses a static chain,
9396 we saved %r10 in %rax before calling _morestack. */
9397 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
9398 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
9399 gen_rtx_REG (word_mode
, AX_REG
));
9401 /* If this function calls va_start, we need to store a pointer to
9402 the arguments on the old stack, because they may not have been
9403 all copied to the new stack. At this point the old stack can be
9404 found at the frame pointer value used by __morestack, because
9405 __morestack has set that up before calling back to us. Here we
9406 store that pointer in a scratch register, and in
9407 ix86_expand_prologue we store the scratch register in a stack
9409 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9411 unsigned int scratch_regno
;
9415 scratch_regno
= split_stack_prologue_scratch_regno ();
9416 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
9417 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
9421 return address within this function
9422 return address of caller of this function
9424 So we add three words to get to the stack arguments.
9428 return address within this function
9429 first argument to __morestack
9430 second argument to __morestack
9431 return address of caller of this function
9433 So we add five words to get to the stack arguments.
9435 words
= TARGET_64BIT
? 3 : 5;
9436 emit_insn (gen_rtx_SET (scratch_reg
,
9437 gen_rtx_PLUS (Pmode
, frame_reg
,
9438 GEN_INT (words
* UNITS_PER_WORD
))));
9440 varargs_label
= gen_label_rtx ();
9441 emit_jump_insn (gen_jump (varargs_label
));
9442 JUMP_LABEL (get_last_insn ()) = varargs_label
;
9448 LABEL_NUSES (label
) = 1;
9450 /* If this function calls va_start, we now have to set the scratch
9451 register for the case where we do not call __morestack. In this
9452 case we need to set it based on the stack pointer. */
9453 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9455 emit_insn (gen_rtx_SET (scratch_reg
,
9456 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
9457 GEN_INT (UNITS_PER_WORD
))));
9459 emit_label (varargs_label
);
9460 LABEL_NUSES (varargs_label
) = 1;
9464 /* We may have to tell the dataflow pass that the split stack prologue
9465 is initializing a scratch register. */
9468 ix86_live_on_entry (bitmap regs
)
9470 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9472 gcc_assert (flag_split_stack
);
9473 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
9477 /* Extract the parts of an RTL expression that is a valid memory address
9478 for an instruction. Return 0 if the structure of the address is
9479 grossly off. Return -1 if the address contains ASHIFT, so it is not
9480 strictly valid, but still used for computing length of lea instruction. */
9483 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
9485 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
9486 rtx base_reg
, index_reg
;
9487 HOST_WIDE_INT scale
= 1;
9488 rtx scale_rtx
= NULL_RTX
;
9491 addr_space_t seg
= ADDR_SPACE_GENERIC
;
9493 /* Allow zero-extended SImode addresses,
9494 they will be emitted with addr32 prefix. */
9495 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
9497 if (GET_CODE (addr
) == ZERO_EXTEND
9498 && GET_MODE (XEXP (addr
, 0)) == SImode
)
9500 addr
= XEXP (addr
, 0);
9501 if (CONST_INT_P (addr
))
9504 else if (GET_CODE (addr
) == AND
9505 && const_32bit_mask (XEXP (addr
, 1), DImode
))
9507 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0), DImode
);
9508 if (addr
== NULL_RTX
)
9511 if (CONST_INT_P (addr
))
9516 /* Allow SImode subregs of DImode addresses,
9517 they will be emitted with addr32 prefix. */
9518 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
9521 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
9523 addr
= SUBREG_REG (addr
);
9524 if (CONST_INT_P (addr
))
9531 else if (SUBREG_P (addr
))
9533 if (REG_P (SUBREG_REG (addr
)))
9538 else if (GET_CODE (addr
) == PLUS
)
9548 addends
[n
++] = XEXP (op
, 1);
9551 while (GET_CODE (op
) == PLUS
);
9556 for (i
= n
; i
>= 0; --i
)
9559 switch (GET_CODE (op
))
9564 index
= XEXP (op
, 0);
9565 scale_rtx
= XEXP (op
, 1);
9571 index
= XEXP (op
, 0);
9573 if (!CONST_INT_P (tmp
))
9575 scale
= INTVAL (tmp
);
9576 if ((unsigned HOST_WIDE_INT
) scale
> 3)
9583 if (GET_CODE (op
) != UNSPEC
)
9588 if (XINT (op
, 1) == UNSPEC_TP
9589 && TARGET_TLS_DIRECT_SEG_REFS
9590 && seg
== ADDR_SPACE_GENERIC
)
9591 seg
= DEFAULT_TLS_SEG_REG
;
9597 if (!REG_P (SUBREG_REG (op
)))
9624 else if (GET_CODE (addr
) == MULT
)
9626 index
= XEXP (addr
, 0); /* index*scale */
9627 scale_rtx
= XEXP (addr
, 1);
9629 else if (GET_CODE (addr
) == ASHIFT
)
9631 /* We're called for lea too, which implements ashift on occasion. */
9632 index
= XEXP (addr
, 0);
9633 tmp
= XEXP (addr
, 1);
9634 if (!CONST_INT_P (tmp
))
9636 scale
= INTVAL (tmp
);
9637 if ((unsigned HOST_WIDE_INT
) scale
> 3)
9643 disp
= addr
; /* displacement */
9649 else if (SUBREG_P (index
)
9650 && REG_P (SUBREG_REG (index
)))
9656 /* Extract the integral value of scale. */
9659 if (!CONST_INT_P (scale_rtx
))
9661 scale
= INTVAL (scale_rtx
);
9664 base_reg
= base
&& SUBREG_P (base
) ? SUBREG_REG (base
) : base
;
9665 index_reg
= index
&& SUBREG_P (index
) ? SUBREG_REG (index
) : index
;
9667 /* Avoid useless 0 displacement. */
9668 if (disp
== const0_rtx
&& (base
|| index
))
9671 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9672 if (base_reg
&& index_reg
&& scale
== 1
9673 && (REGNO (index_reg
) == ARG_POINTER_REGNUM
9674 || REGNO (index_reg
) == FRAME_POINTER_REGNUM
9675 || REGNO (index_reg
) == SP_REG
))
9677 std::swap (base
, index
);
9678 std::swap (base_reg
, index_reg
);
9681 /* Special case: %ebp cannot be encoded as a base without a displacement.
9683 if (!disp
&& base_reg
9684 && (REGNO (base_reg
) == ARG_POINTER_REGNUM
9685 || REGNO (base_reg
) == FRAME_POINTER_REGNUM
9686 || REGNO (base_reg
) == BP_REG
9687 || REGNO (base_reg
) == R13_REG
))
9690 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9691 Avoid this by transforming to [%esi+0].
9692 Reload calls address legitimization without cfun defined, so we need
9693 to test cfun for being non-NULL. */
9694 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
9695 && base_reg
&& !index_reg
&& !disp
9696 && REGNO (base_reg
) == SI_REG
)
9699 /* Special case: encode reg+reg instead of reg*2. */
9700 if (!base
&& index
&& scale
== 2)
9701 base
= index
, base_reg
= index_reg
, scale
= 1;
9703 /* Special case: scaling cannot be encoded without base or displacement. */
9704 if (!base
&& !disp
&& index
&& scale
!= 1)
9716 /* Return cost of the memory address x.
9717 For i386, it is better to use a complex address than let gcc copy
9718 the address into a reg and make a new pseudo. But not if the address
9719 requires to two regs - that would mean more pseudos with longer
9722 ix86_address_cost (rtx x
, machine_mode
, addr_space_t
, bool)
9724 struct ix86_address parts
;
9726 int ok
= ix86_decompose_address (x
, &parts
);
9730 if (parts
.base
&& SUBREG_P (parts
.base
))
9731 parts
.base
= SUBREG_REG (parts
.base
);
9732 if (parts
.index
&& SUBREG_P (parts
.index
))
9733 parts
.index
= SUBREG_REG (parts
.index
);
9735 /* Attempt to minimize number of registers in the address by increasing
9736 address cost for each used register. We don't increase address cost
9737 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
9738 is not invariant itself it most likely means that base or index is not
9739 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
9740 which is not profitable for x86. */
9742 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
9743 && (current_pass
->type
== GIMPLE_PASS
9744 || !pic_offset_table_rtx
9745 || !REG_P (parts
.base
)
9746 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.base
)))
9750 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
9751 && (current_pass
->type
== GIMPLE_PASS
9752 || !pic_offset_table_rtx
9753 || !REG_P (parts
.index
)
9754 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.index
)))
9757 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9758 since it's predecode logic can't detect the length of instructions
9759 and it degenerates to vector decoded. Increase cost of such
9760 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9761 to split such addresses or even refuse such addresses at all.
9763 Following addressing modes are affected:
9768 The first and last case may be avoidable by explicitly coding the zero in
9769 memory address, but I don't have AMD-K6 machine handy to check this
9773 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
9774 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
9775 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
9781 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9782 this is used for to form addresses to local data when -fPIC is in
9786 darwin_local_data_pic (rtx disp
)
9788 return (GET_CODE (disp
) == UNSPEC
9789 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
9792 /* True if operand X should be loaded from GOT. */
9795 ix86_force_load_from_GOT_p (rtx x
)
9797 return ((TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
9798 && !TARGET_PECOFF
&& !TARGET_MACHO
9800 && ix86_cmodel
!= CM_LARGE
9801 && GET_CODE (x
) == SYMBOL_REF
9802 && SYMBOL_REF_FUNCTION_P (x
)
9804 || (SYMBOL_REF_DECL (x
)
9805 && lookup_attribute ("noplt",
9806 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x
)))))
9807 && !SYMBOL_REF_LOCAL_P (x
));
9810 /* Determine if a given RTX is a valid constant. We already know this
9811 satisfies CONSTANT_P. */
9814 ix86_legitimate_constant_p (machine_mode mode
, rtx x
)
9816 switch (GET_CODE (x
))
9821 if (GET_CODE (x
) == PLUS
)
9823 if (!CONST_INT_P (XEXP (x
, 1)))
9828 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
9831 /* Only some unspecs are valid as "constants". */
9832 if (GET_CODE (x
) == UNSPEC
)
9833 switch (XINT (x
, 1))
9838 return TARGET_64BIT
;
9841 x
= XVECEXP (x
, 0, 0);
9842 return (GET_CODE (x
) == SYMBOL_REF
9843 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
9845 x
= XVECEXP (x
, 0, 0);
9846 return (GET_CODE (x
) == SYMBOL_REF
9847 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
9852 /* We must have drilled down to a symbol. */
9853 if (GET_CODE (x
) == LABEL_REF
)
9855 if (GET_CODE (x
) != SYMBOL_REF
)
9860 /* TLS symbols are never valid. */
9861 if (SYMBOL_REF_TLS_MODEL (x
))
9864 /* DLLIMPORT symbols are never valid. */
9865 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9866 && SYMBOL_REF_DLLIMPORT_P (x
))
9870 /* mdynamic-no-pic */
9871 if (MACHO_DYNAMIC_NO_PIC_P
)
9872 return machopic_symbol_defined_p (x
);
9875 /* External function address should be loaded
9876 via the GOT slot to avoid PLT. */
9877 if (ix86_force_load_from_GOT_p (x
))
9882 CASE_CONST_SCALAR_INT
:
9891 if (!standard_sse_constant_p (x
, mode
))
9899 if (!standard_sse_constant_p (x
, mode
))
9906 /* Otherwise we handle everything else in the move patterns. */
9910 /* Determine if it's legal to put X into the constant pool. This
9911 is not possible for the address of thread-local symbols, which
9912 is checked above. */
9915 ix86_cannot_force_const_mem (machine_mode mode
, rtx x
)
9917 /* We can put any immediate constant in memory. */
9918 switch (GET_CODE (x
))
9927 return !ix86_legitimate_constant_p (mode
, x
);
9930 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
9934 is_imported_p (rtx x
)
9936 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
9937 || GET_CODE (x
) != SYMBOL_REF
)
9940 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
9944 /* Nonzero if the constant value X is a legitimate general operand
9945 when generating PIC code. It is given that flag_pic is on and
9946 that X satisfies CONSTANT_P. */
9949 legitimate_pic_operand_p (rtx x
)
9953 switch (GET_CODE (x
))
9956 inner
= XEXP (x
, 0);
9957 if (GET_CODE (inner
) == PLUS
9958 && CONST_INT_P (XEXP (inner
, 1)))
9959 inner
= XEXP (inner
, 0);
9961 /* Only some unspecs are valid as "constants". */
9962 if (GET_CODE (inner
) == UNSPEC
)
9963 switch (XINT (inner
, 1))
9968 return TARGET_64BIT
;
9970 x
= XVECEXP (inner
, 0, 0);
9971 return (GET_CODE (x
) == SYMBOL_REF
9972 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
9973 case UNSPEC_MACHOPIC_OFFSET
:
9974 return legitimate_pic_address_disp_p (x
);
9982 return legitimate_pic_address_disp_p (x
);
9989 /* Determine if a given CONST RTX is a valid memory displacement
9993 legitimate_pic_address_disp_p (rtx disp
)
9997 /* In 64bit mode we can allow direct addresses of symbols and labels
9998 when they are not dynamic symbols. */
10001 rtx op0
= disp
, op1
;
10003 switch (GET_CODE (disp
))
10009 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
10011 op0
= XEXP (XEXP (disp
, 0), 0);
10012 op1
= XEXP (XEXP (disp
, 0), 1);
10013 if (!CONST_INT_P (op1
))
10015 if (GET_CODE (op0
) == UNSPEC
10016 && (XINT (op0
, 1) == UNSPEC_DTPOFF
10017 || XINT (op0
, 1) == UNSPEC_NTPOFF
)
10018 && trunc_int_for_mode (INTVAL (op1
), SImode
) == INTVAL (op1
))
10020 if (INTVAL (op1
) >= 16*1024*1024
10021 || INTVAL (op1
) < -16*1024*1024)
10023 if (GET_CODE (op0
) == LABEL_REF
)
10025 if (GET_CODE (op0
) == CONST
10026 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
10027 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
10029 if (GET_CODE (op0
) == UNSPEC
10030 && XINT (op0
, 1) == UNSPEC_PCREL
)
10032 if (GET_CODE (op0
) != SYMBOL_REF
)
10037 /* TLS references should always be enclosed in UNSPEC.
10038 The dllimported symbol needs always to be resolved. */
10039 if (SYMBOL_REF_TLS_MODEL (op0
)
10040 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
10045 if (is_imported_p (op0
))
10048 if (SYMBOL_REF_FAR_ADDR_P (op0
)
10049 || !SYMBOL_REF_LOCAL_P (op0
))
10052 /* Function-symbols need to be resolved only for
10054 For the small-model we don't need to resolve anything
10056 if ((ix86_cmodel
!= CM_LARGE_PIC
10057 && SYMBOL_REF_FUNCTION_P (op0
))
10058 || ix86_cmodel
== CM_SMALL_PIC
)
10060 /* Non-external symbols don't need to be resolved for
10061 large, and medium-model. */
10062 if ((ix86_cmodel
== CM_LARGE_PIC
10063 || ix86_cmodel
== CM_MEDIUM_PIC
)
10064 && !SYMBOL_REF_EXTERNAL_P (op0
))
10067 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
10068 && (SYMBOL_REF_LOCAL_P (op0
)
10069 || (HAVE_LD_PIE_COPYRELOC
10071 && !SYMBOL_REF_WEAK (op0
)
10072 && !SYMBOL_REF_FUNCTION_P (op0
)))
10073 && ix86_cmodel
!= CM_LARGE_PIC
)
10081 if (GET_CODE (disp
) != CONST
)
10083 disp
= XEXP (disp
, 0);
10087 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10088 of GOT tables. We should not need these anyway. */
10089 if (GET_CODE (disp
) != UNSPEC
10090 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
10091 && XINT (disp
, 1) != UNSPEC_GOTOFF
10092 && XINT (disp
, 1) != UNSPEC_PCREL
10093 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
10096 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
10097 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
10103 if (GET_CODE (disp
) == PLUS
)
10105 if (!CONST_INT_P (XEXP (disp
, 1)))
10107 disp
= XEXP (disp
, 0);
10111 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
10114 if (GET_CODE (disp
) != UNSPEC
)
10117 switch (XINT (disp
, 1))
10122 /* We need to check for both symbols and labels because VxWorks loads
10123 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10125 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10126 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
10127 case UNSPEC_GOTOFF
:
10128 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10129 While ABI specify also 32bit relocation but we don't produce it in
10130 small PIC model at all. */
10131 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10132 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
10134 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
10136 case UNSPEC_GOTTPOFF
:
10137 case UNSPEC_GOTNTPOFF
:
10138 case UNSPEC_INDNTPOFF
:
10141 disp
= XVECEXP (disp
, 0, 0);
10142 return (GET_CODE (disp
) == SYMBOL_REF
10143 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
10144 case UNSPEC_NTPOFF
:
10145 disp
= XVECEXP (disp
, 0, 0);
10146 return (GET_CODE (disp
) == SYMBOL_REF
10147 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
10148 case UNSPEC_DTPOFF
:
10149 disp
= XVECEXP (disp
, 0, 0);
10150 return (GET_CODE (disp
) == SYMBOL_REF
10151 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
10157 /* Determine if op is suitable RTX for an address register.
10158 Return naked register if a register or a register subreg is
10159 found, otherwise return NULL_RTX. */
10162 ix86_validate_address_register (rtx op
)
10164 machine_mode mode
= GET_MODE (op
);
10166 /* Only SImode or DImode registers can form the address. */
10167 if (mode
!= SImode
&& mode
!= DImode
)
10172 else if (SUBREG_P (op
))
10174 rtx reg
= SUBREG_REG (op
);
10179 mode
= GET_MODE (reg
);
10181 /* Don't allow SUBREGs that span more than a word. It can
10182 lead to spill failures when the register is one word out
10183 of a two word structure. */
10184 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
10187 /* Allow only SUBREGs of non-eliminable hard registers. */
10188 if (register_no_elim_operand (reg
, mode
))
10192 /* Op is not a register. */
10196 /* Recognizes RTL expressions that are valid memory addresses for an
10197 instruction. The MODE argument is the machine mode for the MEM
10198 expression that wants to use this address.
10200 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10201 convert common non-canonical forms to canonical form so that they will
10205 ix86_legitimate_address_p (machine_mode
, rtx addr
, bool strict
)
10207 struct ix86_address parts
;
10208 rtx base
, index
, disp
;
10209 HOST_WIDE_INT scale
;
10212 if (ix86_decompose_address (addr
, &parts
) <= 0)
10213 /* Decomposition failed. */
10217 index
= parts
.index
;
10219 scale
= parts
.scale
;
10222 /* Validate base register. */
10225 rtx reg
= ix86_validate_address_register (base
);
10227 if (reg
== NULL_RTX
)
10230 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
10231 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
10232 /* Base is not valid. */
10236 /* Validate index register. */
10239 rtx reg
= ix86_validate_address_register (index
);
10241 if (reg
== NULL_RTX
)
10244 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
10245 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
10246 /* Index is not valid. */
10250 /* Index and base should have the same mode. */
10252 && GET_MODE (base
) != GET_MODE (index
))
10255 /* Address override works only on the (%reg) part of %fs:(%reg). */
10256 if (seg
!= ADDR_SPACE_GENERIC
10257 && ((base
&& GET_MODE (base
) != word_mode
)
10258 || (index
&& GET_MODE (index
) != word_mode
)))
10261 /* Validate scale factor. */
10265 /* Scale without index. */
10268 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
10269 /* Scale is not a valid multiplier. */
10273 /* Validate displacement. */
10276 if (GET_CODE (disp
) == CONST
10277 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
10278 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
10279 switch (XINT (XEXP (disp
, 0), 1))
10281 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10282 when used. While ABI specify also 32bit relocations, we
10283 don't produce them at all and use IP relative instead.
10284 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10285 should be loaded via GOT. */
10288 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
10289 goto is_legitimate_pic
;
10291 case UNSPEC_GOTOFF
:
10292 gcc_assert (flag_pic
);
10294 goto is_legitimate_pic
;
10296 /* 64bit address unspec. */
10299 case UNSPEC_GOTPCREL
:
10300 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
10301 goto is_legitimate_pic
;
10304 gcc_assert (flag_pic
);
10305 goto is_legitimate_pic
;
10307 case UNSPEC_GOTTPOFF
:
10308 case UNSPEC_GOTNTPOFF
:
10309 case UNSPEC_INDNTPOFF
:
10310 case UNSPEC_NTPOFF
:
10311 case UNSPEC_DTPOFF
:
10315 /* Invalid address unspec. */
10319 else if (SYMBOLIC_CONST (disp
)
10323 && MACHOPIC_INDIRECT
10324 && !machopic_operand_p (disp
)
10330 if (TARGET_64BIT
&& (index
|| base
))
10332 /* foo@dtpoff(%rX) is ok. */
10333 if (GET_CODE (disp
) != CONST
10334 || GET_CODE (XEXP (disp
, 0)) != PLUS
10335 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
10336 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
10337 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
10338 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
10339 /* Non-constant pic memory reference. */
10342 else if ((!TARGET_MACHO
|| flag_pic
)
10343 && ! legitimate_pic_address_disp_p (disp
))
10344 /* Displacement is an invalid pic construct. */
10347 else if (MACHO_DYNAMIC_NO_PIC_P
10348 && !ix86_legitimate_constant_p (Pmode
, disp
))
10349 /* displacment must be referenced via non_lazy_pointer */
10353 /* This code used to verify that a symbolic pic displacement
10354 includes the pic_offset_table_rtx register.
10356 While this is good idea, unfortunately these constructs may
10357 be created by "adds using lea" optimization for incorrect
10366 This code is nonsensical, but results in addressing
10367 GOT table with pic_offset_table_rtx base. We can't
10368 just refuse it easily, since it gets matched by
10369 "addsi3" pattern, that later gets split to lea in the
10370 case output register differs from input. While this
10371 can be handled by separate addsi pattern for this case
10372 that never results in lea, this seems to be easier and
10373 correct fix for crash to disable this test. */
10375 else if (GET_CODE (disp
) != LABEL_REF
10376 && !CONST_INT_P (disp
)
10377 && (GET_CODE (disp
) != CONST
10378 || !ix86_legitimate_constant_p (Pmode
, disp
))
10379 && (GET_CODE (disp
) != SYMBOL_REF
10380 || !ix86_legitimate_constant_p (Pmode
, disp
)))
10381 /* Displacement is not constant. */
10383 else if (TARGET_64BIT
10384 && !x86_64_immediate_operand (disp
, VOIDmode
))
10385 /* Displacement is out of range. */
10387 /* In x32 mode, constant addresses are sign extended to 64bit, so
10388 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10389 else if (TARGET_X32
&& !(index
|| base
)
10390 && CONST_INT_P (disp
)
10391 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
10395 /* Everything looks valid. */
10399 /* Determine if a given RTX is a valid constant address. */
10402 constant_address_p (rtx x
)
10404 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
10407 /* Return a unique alias set for the GOT. */
10410 ix86_GOT_alias_set (void)
10412 static alias_set_type set
= -1;
10414 set
= new_alias_set ();
10418 /* Return a legitimate reference for ORIG (an address) using the
10419 register REG. If REG is 0, a new pseudo is generated.
10421 There are two types of references that must be handled:
10423 1. Global data references must load the address from the GOT, via
10424 the PIC reg. An insn is emitted to do this load, and the reg is
10427 2. Static data references, constant pool addresses, and code labels
10428 compute the address as an offset from the GOT, whose base is in
10429 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10430 differentiate them from global data objects. The returned
10431 address is the PIC reg + an unspec constant.
10433 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10434 reg also appears in the address. */
10437 legitimize_pic_address (rtx orig
, rtx reg
)
10440 rtx new_rtx
= orig
;
10443 if (TARGET_MACHO
&& !TARGET_64BIT
)
10446 reg
= gen_reg_rtx (Pmode
);
10447 /* Use the generic Mach-O PIC machinery. */
10448 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
10452 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
10454 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
10459 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
10461 else if ((!TARGET_64BIT
10462 || /* TARGET_64BIT && */ ix86_cmodel
!= CM_SMALL_PIC
)
10464 && gotoff_operand (addr
, Pmode
))
10466 /* This symbol may be referenced via a displacement
10467 from the PIC base address (@GOTOFF). */
10468 if (GET_CODE (addr
) == CONST
)
10469 addr
= XEXP (addr
, 0);
10471 if (GET_CODE (addr
) == PLUS
)
10473 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
10475 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
10478 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
10480 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10483 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
10487 gcc_assert (REG_P (reg
));
10488 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
10489 new_rtx
, reg
, 1, OPTAB_DIRECT
);
10492 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10494 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
10495 /* We can't use @GOTOFF for text labels
10496 on VxWorks, see gotoff_operand. */
10497 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
10499 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
10503 /* For x64 PE-COFF there is no GOT table,
10504 so we use address directly. */
10505 if (TARGET_64BIT
&& TARGET_PECOFF
)
10507 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
10508 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10510 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
10512 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
10514 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10515 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
10516 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
10520 /* This symbol must be referenced via a load
10521 from the Global Offset Table (@GOT). */
10522 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
10523 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10525 new_rtx
= force_reg (Pmode
, new_rtx
);
10526 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10527 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
10528 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
10531 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
10535 if (CONST_INT_P (addr
)
10536 && !x86_64_immediate_operand (addr
, VOIDmode
))
10537 new_rtx
= copy_to_suggested_reg (addr
, reg
, Pmode
);
10538 else if (GET_CODE (addr
) == CONST
)
10540 addr
= XEXP (addr
, 0);
10542 /* We must match stuff we generate before. Assume the only
10543 unspecs that can get here are ours. Not that we could do
10544 anything with them anyway.... */
10545 if (GET_CODE (addr
) == UNSPEC
10546 || (GET_CODE (addr
) == PLUS
10547 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
10549 gcc_assert (GET_CODE (addr
) == PLUS
);
10552 if (GET_CODE (addr
) == PLUS
)
10554 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
10556 /* Check first to see if this is a constant
10557 offset from a @GOTOFF symbol reference. */
10559 && gotoff_operand (op0
, Pmode
)
10560 && CONST_INT_P (op1
))
10564 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
10566 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
10567 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10571 gcc_assert (REG_P (reg
));
10572 new_rtx
= expand_simple_binop (Pmode
, PLUS
,
10573 pic_offset_table_rtx
,
10579 = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10583 if (INTVAL (op1
) < -16*1024*1024
10584 || INTVAL (op1
) >= 16*1024*1024)
10586 if (!x86_64_immediate_operand (op1
, Pmode
))
10587 op1
= force_reg (Pmode
, op1
);
10590 = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
10596 rtx base
= legitimize_pic_address (op0
, reg
);
10597 machine_mode mode
= GET_MODE (base
);
10599 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
10601 if (CONST_INT_P (new_rtx
))
10603 if (INTVAL (new_rtx
) < -16*1024*1024
10604 || INTVAL (new_rtx
) >= 16*1024*1024)
10606 if (!x86_64_immediate_operand (new_rtx
, mode
))
10607 new_rtx
= force_reg (mode
, new_rtx
);
10610 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
10613 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
10617 /* For %rip addressing, we have to use
10618 just disp32, not base nor index. */
10620 && (GET_CODE (base
) == SYMBOL_REF
10621 || GET_CODE (base
) == LABEL_REF
))
10622 base
= force_reg (mode
, base
);
10623 if (GET_CODE (new_rtx
) == PLUS
10624 && CONSTANT_P (XEXP (new_rtx
, 1)))
10626 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
10627 new_rtx
= XEXP (new_rtx
, 1);
10629 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
10637 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10640 get_thread_pointer (machine_mode tp_mode
, bool to_reg
)
10642 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
10644 if (GET_MODE (tp
) != tp_mode
)
10646 gcc_assert (GET_MODE (tp
) == SImode
);
10647 gcc_assert (tp_mode
== DImode
);
10649 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
10653 tp
= copy_to_mode_reg (tp_mode
, tp
);
10658 /* Construct the SYMBOL_REF for the tls_get_addr function. */
10660 static GTY(()) rtx ix86_tls_symbol
;
10663 ix86_tls_get_addr (void)
10665 if (!ix86_tls_symbol
)
10668 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
10669 ? "___tls_get_addr" : "__tls_get_addr");
10671 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
10674 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
10676 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
10678 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
10679 gen_rtx_CONST (Pmode
, unspec
));
10682 return ix86_tls_symbol
;
10685 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
10687 static GTY(()) rtx ix86_tls_module_base_symbol
;
10690 ix86_tls_module_base (void)
10692 if (!ix86_tls_module_base_symbol
)
10694 ix86_tls_module_base_symbol
10695 = gen_rtx_SYMBOL_REF (Pmode
, "_TLS_MODULE_BASE_");
10697 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
10698 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
10701 return ix86_tls_module_base_symbol
;
10704 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10705 false if we expect this to be used for a memory address and true if
10706 we expect to load the address into a register. */
10709 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
10711 rtx dest
, base
, off
;
10712 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
10713 machine_mode tp_mode
= Pmode
;
10716 /* Fall back to global dynamic model if tool chain cannot support local
10718 if (TARGET_SUN_TLS
&& !TARGET_64BIT
10719 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
10720 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
10721 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
10725 case TLS_MODEL_GLOBAL_DYNAMIC
:
10726 dest
= gen_reg_rtx (Pmode
);
10730 if (flag_pic
&& !TARGET_PECOFF
)
10731 pic
= pic_offset_table_rtx
;
10734 pic
= gen_reg_rtx (Pmode
);
10735 emit_insn (gen_set_got (pic
));
10739 if (TARGET_GNU2_TLS
)
10742 emit_insn (gen_tls_dynamic_gnu2_64 (dest
, x
));
10744 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
10746 tp
= get_thread_pointer (Pmode
, true);
10747 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, tp
, dest
));
10749 if (GET_MODE (x
) != Pmode
)
10750 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
10752 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
10756 rtx caddr
= ix86_tls_get_addr ();
10760 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
10765 (gen_tls_global_dynamic_64 (Pmode
, rax
, x
, caddr
));
10766 insns
= get_insns ();
10769 if (GET_MODE (x
) != Pmode
)
10770 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
10772 RTL_CONST_CALL_P (insns
) = 1;
10773 emit_libcall_block (insns
, dest
, rax
, x
);
10776 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
10780 case TLS_MODEL_LOCAL_DYNAMIC
:
10781 base
= gen_reg_rtx (Pmode
);
10786 pic
= pic_offset_table_rtx
;
10789 pic
= gen_reg_rtx (Pmode
);
10790 emit_insn (gen_set_got (pic
));
10794 if (TARGET_GNU2_TLS
)
10796 rtx tmp
= ix86_tls_module_base ();
10799 emit_insn (gen_tls_dynamic_gnu2_64 (base
, tmp
));
10801 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
10803 tp
= get_thread_pointer (Pmode
, true);
10804 set_unique_reg_note (get_last_insn (), REG_EQUAL
,
10805 gen_rtx_MINUS (Pmode
, tmp
, tp
));
10809 rtx caddr
= ix86_tls_get_addr ();
10813 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
10819 (gen_tls_local_dynamic_base_64 (Pmode
, rax
, caddr
));
10820 insns
= get_insns ();
10823 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
10824 share the LD_BASE result with other LD model accesses. */
10825 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
10826 UNSPEC_TLS_LD_BASE
);
10828 RTL_CONST_CALL_P (insns
) = 1;
10829 emit_libcall_block (insns
, base
, rax
, eqv
);
10832 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
10835 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
10836 off
= gen_rtx_CONST (Pmode
, off
);
10838 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
10840 if (TARGET_GNU2_TLS
)
10842 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, dest
, tp
));
10844 if (GET_MODE (x
) != Pmode
)
10845 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
10847 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
10851 case TLS_MODEL_INITIAL_EXEC
:
10854 if (TARGET_SUN_TLS
&& !TARGET_X32
)
10856 /* The Sun linker took the AMD64 TLS spec literally
10857 and can only handle %rax as destination of the
10858 initial executable code sequence. */
10860 dest
= gen_reg_rtx (DImode
);
10861 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
10865 /* Generate DImode references to avoid %fs:(%reg32)
10866 problems and linker IE->LE relaxation bug. */
10869 type
= UNSPEC_GOTNTPOFF
;
10873 pic
= pic_offset_table_rtx
;
10874 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
10876 else if (!TARGET_ANY_GNU_TLS
)
10878 pic
= gen_reg_rtx (Pmode
);
10879 emit_insn (gen_set_got (pic
));
10880 type
= UNSPEC_GOTTPOFF
;
10885 type
= UNSPEC_INDNTPOFF
;
10888 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
10889 off
= gen_rtx_CONST (tp_mode
, off
);
10891 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
10892 off
= gen_const_mem (tp_mode
, off
);
10893 set_mem_alias_set (off
, ix86_GOT_alias_set ());
10895 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10897 base
= get_thread_pointer (tp_mode
,
10898 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
10899 off
= force_reg (tp_mode
, off
);
10900 dest
= gen_rtx_PLUS (tp_mode
, base
, off
);
10901 if (tp_mode
!= Pmode
)
10902 dest
= convert_to_mode (Pmode
, dest
, 1);
10906 base
= get_thread_pointer (Pmode
, true);
10907 dest
= gen_reg_rtx (Pmode
);
10908 emit_insn (gen_sub3_insn (dest
, base
, off
));
10912 case TLS_MODEL_LOCAL_EXEC
:
10913 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
10914 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10915 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
10916 off
= gen_rtx_CONST (Pmode
, off
);
10918 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
10920 base
= get_thread_pointer (Pmode
,
10921 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
10922 return gen_rtx_PLUS (Pmode
, base
, off
);
10926 base
= get_thread_pointer (Pmode
, true);
10927 dest
= gen_reg_rtx (Pmode
);
10928 emit_insn (gen_sub3_insn (dest
, base
, off
));
10933 gcc_unreachable ();
10939 /* Return true if OP refers to a TLS address. */
10941 ix86_tls_address_pattern_p (rtx op
)
10943 subrtx_var_iterator::array_type array
;
10944 FOR_EACH_SUBRTX_VAR (iter
, array
, op
, ALL
)
10949 rtx
*x
= &XEXP (op
, 0);
10950 while (GET_CODE (*x
) == PLUS
)
10953 for (i
= 0; i
< 2; i
++)
10955 rtx u
= XEXP (*x
, i
);
10956 if (GET_CODE (u
) == ZERO_EXTEND
)
10958 if (GET_CODE (u
) == UNSPEC
10959 && XINT (u
, 1) == UNSPEC_TP
)
10965 iter
.skip_subrtxes ();
10972 /* Rewrite *LOC so that it refers to a default TLS address space. */
10974 ix86_rewrite_tls_address_1 (rtx
*loc
)
10976 subrtx_ptr_iterator::array_type array
;
10977 FOR_EACH_SUBRTX_PTR (iter
, array
, loc
, ALL
)
10982 rtx addr
= XEXP (*loc
, 0);
10984 while (GET_CODE (*x
) == PLUS
)
10987 for (i
= 0; i
< 2; i
++)
10989 rtx u
= XEXP (*x
, i
);
10990 if (GET_CODE (u
) == ZERO_EXTEND
)
10992 if (GET_CODE (u
) == UNSPEC
10993 && XINT (u
, 1) == UNSPEC_TP
)
10995 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
10997 *x
= XEXP (*x
, 1 - i
);
10999 *loc
= replace_equiv_address_nv (*loc
, addr
, true);
11000 set_mem_addr_space (*loc
, as
);
11007 iter
.skip_subrtxes ();
11012 /* Rewrite instruction pattern involvning TLS address
11013 so that it refers to a default TLS address space. */
11015 ix86_rewrite_tls_address (rtx pattern
)
11017 pattern
= copy_insn (pattern
);
11018 ix86_rewrite_tls_address_1 (&pattern
);
11022 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11023 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11024 unique refptr-DECL symbol corresponding to symbol DECL. */
11026 struct dllimport_hasher
: ggc_cache_ptr_hash
<tree_map
>
11028 static inline hashval_t
hash (tree_map
*m
) { return m
->hash
; }
11030 equal (tree_map
*a
, tree_map
*b
)
11032 return a
->base
.from
== b
->base
.from
;
11036 keep_cache_entry (tree_map
*&m
)
11038 return ggc_marked_p (m
->base
.from
);
11042 static GTY((cache
)) hash_table
<dllimport_hasher
> *dllimport_map
;
11045 get_dllimport_decl (tree decl
, bool beimport
)
11047 struct tree_map
*h
, in
;
11049 const char *prefix
;
11050 size_t namelen
, prefixlen
;
11055 if (!dllimport_map
)
11056 dllimport_map
= hash_table
<dllimport_hasher
>::create_ggc (512);
11058 in
.hash
= htab_hash_pointer (decl
);
11059 in
.base
.from
= decl
;
11060 tree_map
**loc
= dllimport_map
->find_slot_with_hash (&in
, in
.hash
, INSERT
);
11065 *loc
= h
= ggc_alloc
<tree_map
> ();
11067 h
->base
.from
= decl
;
11068 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
11069 VAR_DECL
, NULL
, ptr_type_node
);
11070 DECL_ARTIFICIAL (to
) = 1;
11071 DECL_IGNORED_P (to
) = 1;
11072 DECL_EXTERNAL (to
) = 1;
11073 TREE_READONLY (to
) = 1;
11075 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
11076 name
= targetm
.strip_name_encoding (name
);
11078 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
11079 ? "*__imp_" : "*__imp__";
11081 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
11082 namelen
= strlen (name
);
11083 prefixlen
= strlen (prefix
);
11084 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
11085 memcpy (imp_name
, prefix
, prefixlen
);
11086 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
11088 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
11089 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11090 SET_SYMBOL_REF_DECL (rtl
, to
);
11091 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
11094 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
11095 #ifdef SUB_TARGET_RECORD_STUB
11096 SUB_TARGET_RECORD_STUB (name
);
11100 rtl
= gen_const_mem (Pmode
, rtl
);
11101 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
11103 SET_DECL_RTL (to
, rtl
);
11104 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
11109 /* Expand SYMBOL into its corresponding far-address symbol.
11110 WANT_REG is true if we require the result be a register. */
11113 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
11118 gcc_assert (SYMBOL_REF_DECL (symbol
));
11119 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
11121 x
= DECL_RTL (imp_decl
);
11123 x
= force_reg (Pmode
, x
);
11127 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11128 true if we require the result be a register. */
11131 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
11136 gcc_assert (SYMBOL_REF_DECL (symbol
));
11137 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
11139 x
= DECL_RTL (imp_decl
);
11141 x
= force_reg (Pmode
, x
);
11145 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11146 is true if we require the result be a register. */
11149 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
11151 if (!TARGET_PECOFF
)
11154 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11156 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
11157 return legitimize_dllimport_symbol (addr
, inreg
);
11158 if (GET_CODE (addr
) == CONST
11159 && GET_CODE (XEXP (addr
, 0)) == PLUS
11160 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11161 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
11163 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
11164 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11168 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
11170 if (GET_CODE (addr
) == SYMBOL_REF
11171 && !is_imported_p (addr
)
11172 && SYMBOL_REF_EXTERNAL_P (addr
)
11173 && SYMBOL_REF_DECL (addr
))
11174 return legitimize_pe_coff_extern_decl (addr
, inreg
);
11176 if (GET_CODE (addr
) == CONST
11177 && GET_CODE (XEXP (addr
, 0)) == PLUS
11178 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11179 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
11180 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
11181 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
11183 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
11184 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11189 /* Try machine-dependent ways of modifying an illegitimate address
11190 to be legitimate. If we find one, return the new, valid address.
11191 This macro is used in only one place: `memory_address' in explow.c.
11193 OLDX is the address as it was before break_out_memory_refs was called.
11194 In some cases it is useful to look at this to decide what needs to be done.
11196 It is always safe for this macro to do nothing. It exists to recognize
11197 opportunities to optimize the output.
11199 For the 80386, we handle X+REG by loading X into a register R and
11200 using R+REG. R will go in a general reg and indexing will be used.
11201 However, if REG is a broken-out memory address or multiplication,
11202 nothing needs to be done because REG can certainly go in a general reg.
11204 When -fpic is used, special handling is needed for symbolic references.
11205 See comments by legitimize_pic_address in i386.c for details. */
11208 ix86_legitimize_address (rtx x
, rtx
, machine_mode mode
)
11210 bool changed
= false;
11213 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
11215 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
11216 if (GET_CODE (x
) == CONST
11217 && GET_CODE (XEXP (x
, 0)) == PLUS
11218 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
11219 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
11221 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
11222 (enum tls_model
) log
, false);
11223 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
11226 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11228 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
11233 if (flag_pic
&& SYMBOLIC_CONST (x
))
11234 return legitimize_pic_address (x
, 0);
11237 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
11238 return machopic_indirect_data_reference (x
, 0);
11241 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11242 if (GET_CODE (x
) == ASHIFT
11243 && CONST_INT_P (XEXP (x
, 1))
11244 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
11247 log
= INTVAL (XEXP (x
, 1));
11248 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
11249 GEN_INT (1 << log
));
11252 if (GET_CODE (x
) == PLUS
)
11254 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11256 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
11257 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11258 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
11261 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
11262 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
11263 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
11264 GEN_INT (1 << log
));
11267 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
11268 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11269 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
11272 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
11273 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
11274 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
11275 GEN_INT (1 << log
));
11278 /* Put multiply first if it isn't already. */
11279 if (GET_CODE (XEXP (x
, 1)) == MULT
)
11281 std::swap (XEXP (x
, 0), XEXP (x
, 1));
11285 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11286 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11287 created by virtual register instantiation, register elimination, and
11288 similar optimizations. */
11289 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
11292 x
= gen_rtx_PLUS (Pmode
,
11293 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
11294 XEXP (XEXP (x
, 1), 0)),
11295 XEXP (XEXP (x
, 1), 1));
11299 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11300 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11301 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
11302 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11303 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
11304 && CONSTANT_P (XEXP (x
, 1)))
11307 rtx other
= NULL_RTX
;
11309 if (CONST_INT_P (XEXP (x
, 1)))
11311 constant
= XEXP (x
, 1);
11312 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
11314 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
11316 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
11317 other
= XEXP (x
, 1);
11325 x
= gen_rtx_PLUS (Pmode
,
11326 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
11327 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
11328 plus_constant (Pmode
, other
,
11329 INTVAL (constant
)));
11333 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
11336 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11339 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
11342 if (GET_CODE (XEXP (x
, 1)) == MULT
)
11345 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
11349 && REG_P (XEXP (x
, 1))
11350 && REG_P (XEXP (x
, 0)))
11353 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
11356 x
= legitimize_pic_address (x
, 0);
11359 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
11362 if (REG_P (XEXP (x
, 0)))
11364 rtx temp
= gen_reg_rtx (Pmode
);
11365 rtx val
= force_operand (XEXP (x
, 1), temp
);
11368 val
= convert_to_mode (Pmode
, val
, 1);
11369 emit_move_insn (temp
, val
);
11372 XEXP (x
, 1) = temp
;
11376 else if (REG_P (XEXP (x
, 1)))
11378 rtx temp
= gen_reg_rtx (Pmode
);
11379 rtx val
= force_operand (XEXP (x
, 0), temp
);
11382 val
= convert_to_mode (Pmode
, val
, 1);
11383 emit_move_insn (temp
, val
);
11386 XEXP (x
, 0) = temp
;
11394 /* Print an integer constant expression in assembler syntax. Addition
11395 and subtraction are the only arithmetic that may appear in these
11396 expressions. FILE is the stdio stream to write to, X is the rtx, and
11397 CODE is the operand print code from the output string. */
11400 output_pic_addr_const (FILE *file
, rtx x
, int code
)
11404 switch (GET_CODE (x
))
11407 gcc_assert (flag_pic
);
11412 if (TARGET_64BIT
|| ! TARGET_MACHO_SYMBOL_STUBS
)
11413 output_addr_const (file
, x
);
11416 const char *name
= XSTR (x
, 0);
11418 /* Mark the decl as referenced so that cgraph will
11419 output the function. */
11420 if (SYMBOL_REF_DECL (x
))
11421 mark_decl_referenced (SYMBOL_REF_DECL (x
));
11424 if (MACHOPIC_INDIRECT
11425 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
11426 name
= machopic_indirection_name (x
, /*stub_p=*/true);
11428 assemble_name (file
, name
);
11430 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
11431 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
11432 fputs ("@PLT", file
);
11439 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
11440 assemble_name (asm_out_file
, buf
);
11444 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
11448 /* This used to output parentheses around the expression,
11449 but that does not work on the 386 (either ATT or BSD assembler). */
11450 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11454 /* We can't handle floating point constants;
11455 TARGET_PRINT_OPERAND must handle them. */
11456 output_operand_lossage ("floating constant misused");
11460 /* Some assemblers need integer constants to appear first. */
11461 if (CONST_INT_P (XEXP (x
, 0)))
11463 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11465 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11469 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
11470 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11472 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11478 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
11479 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11481 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11483 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
11487 gcc_assert (XVECLEN (x
, 0) == 1);
11488 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
11489 switch (XINT (x
, 1))
11492 fputs ("@GOT", file
);
11494 case UNSPEC_GOTOFF
:
11495 fputs ("@GOTOFF", file
);
11497 case UNSPEC_PLTOFF
:
11498 fputs ("@PLTOFF", file
);
11501 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
11502 "(%rip)" : "[rip]", file
);
11504 case UNSPEC_GOTPCREL
:
11505 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
11506 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
11508 case UNSPEC_GOTTPOFF
:
11509 /* FIXME: This might be @TPOFF in Sun ld too. */
11510 fputs ("@gottpoff", file
);
11513 fputs ("@tpoff", file
);
11515 case UNSPEC_NTPOFF
:
11517 fputs ("@tpoff", file
);
11519 fputs ("@ntpoff", file
);
11521 case UNSPEC_DTPOFF
:
11522 fputs ("@dtpoff", file
);
11524 case UNSPEC_GOTNTPOFF
:
11526 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
11527 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
11529 fputs ("@gotntpoff", file
);
11531 case UNSPEC_INDNTPOFF
:
11532 fputs ("@indntpoff", file
);
11535 case UNSPEC_MACHOPIC_OFFSET
:
11537 machopic_output_function_base_name (file
);
11541 output_operand_lossage ("invalid UNSPEC as operand");
11547 output_operand_lossage ("invalid expression as operand");
11551 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11552 We need to emit DTP-relative relocations. */
11554 static void ATTRIBUTE_UNUSED
11555 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
11557 fputs (ASM_LONG
, file
);
11558 output_addr_const (file
, x
);
11559 fputs ("@dtpoff", file
);
11565 fputs (", 0", file
);
11568 gcc_unreachable ();
11572 /* Return true if X is a representation of the PIC register. This copes
11573 with calls from ix86_find_base_term, where the register might have
11574 been replaced by a cselib value. */
11577 ix86_pic_register_p (rtx x
)
11579 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
11580 return (pic_offset_table_rtx
11581 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
11582 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SET_GOT
)
11584 else if (!REG_P (x
))
11586 else if (pic_offset_table_rtx
)
11588 if (REGNO (x
) == REGNO (pic_offset_table_rtx
))
11590 if (HARD_REGISTER_P (x
)
11591 && !HARD_REGISTER_P (pic_offset_table_rtx
)
11592 && ORIGINAL_REGNO (x
) == REGNO (pic_offset_table_rtx
))
11597 return REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
11600 /* Helper function for ix86_delegitimize_address.
11601 Attempt to delegitimize TLS local-exec accesses. */
11604 ix86_delegitimize_tls_address (rtx orig_x
)
11606 rtx x
= orig_x
, unspec
;
11607 struct ix86_address addr
;
11609 if (!TARGET_TLS_DIRECT_SEG_REFS
)
11613 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
11615 if (ix86_decompose_address (x
, &addr
) == 0
11616 || addr
.seg
!= DEFAULT_TLS_SEG_REG
11617 || addr
.disp
== NULL_RTX
11618 || GET_CODE (addr
.disp
) != CONST
)
11620 unspec
= XEXP (addr
.disp
, 0);
11621 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
11622 unspec
= XEXP (unspec
, 0);
11623 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
11625 x
= XVECEXP (unspec
, 0, 0);
11626 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
11627 if (unspec
!= XEXP (addr
.disp
, 0))
11628 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
11631 rtx idx
= addr
.index
;
11632 if (addr
.scale
!= 1)
11633 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
11634 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
11637 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
11638 if (MEM_P (orig_x
))
11639 x
= replace_equiv_address_nv (orig_x
, x
);
11643 /* In the name of slightly smaller debug output, and to cater to
11644 general assembler lossage, recognize PIC+GOTOFF and turn it back
11645 into a direct symbol reference.
11647 On Darwin, this is necessary to avoid a crash, because Darwin
11648 has a different PIC label for each routine but the DWARF debugging
11649 information is not associated with any particular routine, so it's
11650 necessary to remove references to the PIC label from RTL stored by
11651 the DWARF output code.
11653 This helper is used in the normal ix86_delegitimize_address
11654 entrypoint (e.g. used in the target delegitimization hook) and
11655 in ix86_find_base_term. As compile time memory optimization, we
11656 avoid allocating rtxes that will not change anything on the outcome
11657 of the callers (find_base_value and find_base_term). */
11660 ix86_delegitimize_address_1 (rtx x
, bool base_term_p
)
11662 rtx orig_x
= delegitimize_mem_from_attrs (x
);
11663 /* addend is NULL or some rtx if x is something+GOTOFF where
11664 something doesn't include the PIC register. */
11665 rtx addend
= NULL_RTX
;
11666 /* reg_addend is NULL or a multiple of some register. */
11667 rtx reg_addend
= NULL_RTX
;
11668 /* const_addend is NULL or a const_int. */
11669 rtx const_addend
= NULL_RTX
;
11670 /* This is the result, or NULL. */
11671 rtx result
= NULL_RTX
;
11680 if (GET_CODE (x
) == CONST
11681 && GET_CODE (XEXP (x
, 0)) == PLUS
11682 && GET_MODE (XEXP (x
, 0)) == Pmode
11683 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11684 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
11685 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
11687 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
11688 base. A CONST can't be arg_pointer_rtx based. */
11689 if (base_term_p
&& MEM_P (orig_x
))
11691 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
11692 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
11693 if (MEM_P (orig_x
))
11694 x
= replace_equiv_address_nv (orig_x
, x
);
11698 if (GET_CODE (x
) == CONST
11699 && GET_CODE (XEXP (x
, 0)) == UNSPEC
11700 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
11701 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
11702 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
11704 x
= XVECEXP (XEXP (x
, 0), 0, 0);
11705 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
11707 x
= lowpart_subreg (GET_MODE (orig_x
), x
, GET_MODE (x
));
11714 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
11715 return ix86_delegitimize_tls_address (orig_x
);
11717 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
11718 and -mcmodel=medium -fpic. */
11721 if (GET_CODE (x
) != PLUS
11722 || GET_CODE (XEXP (x
, 1)) != CONST
)
11723 return ix86_delegitimize_tls_address (orig_x
);
11725 if (ix86_pic_register_p (XEXP (x
, 0)))
11726 /* %ebx + GOT/GOTOFF */
11728 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
11730 /* %ebx + %reg * scale + GOT/GOTOFF */
11731 reg_addend
= XEXP (x
, 0);
11732 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
11733 reg_addend
= XEXP (reg_addend
, 1);
11734 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
11735 reg_addend
= XEXP (reg_addend
, 0);
11738 reg_addend
= NULL_RTX
;
11739 addend
= XEXP (x
, 0);
11743 addend
= XEXP (x
, 0);
11745 x
= XEXP (XEXP (x
, 1), 0);
11746 if (GET_CODE (x
) == PLUS
11747 && CONST_INT_P (XEXP (x
, 1)))
11749 const_addend
= XEXP (x
, 1);
11753 if (GET_CODE (x
) == UNSPEC
11754 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
11755 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
11756 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
11757 && !MEM_P (orig_x
) && !addend
)))
11758 result
= XVECEXP (x
, 0, 0);
11760 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
11761 && !MEM_P (orig_x
))
11762 result
= XVECEXP (x
, 0, 0);
11765 return ix86_delegitimize_tls_address (orig_x
);
11767 /* For (PLUS something CONST_INT) both find_base_{value,term} just
11768 recurse on the first operand. */
11769 if (const_addend
&& !base_term_p
)
11770 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
11772 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
11775 /* If the rest of original X doesn't involve the PIC register, add
11776 addend and subtract pic_offset_table_rtx. This can happen e.g.
11778 leal (%ebx, %ecx, 4), %ecx
11780 movl foo@GOTOFF(%ecx), %edx
11781 in which case we return (%ecx - %ebx) + foo
11782 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
11783 and reload has completed. Don't do the latter for debug,
11784 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
11785 if (pic_offset_table_rtx
11786 && (!reload_completed
|| !ix86_use_pseudo_pic_reg ()))
11787 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
11788 pic_offset_table_rtx
),
11790 else if (base_term_p
11791 && pic_offset_table_rtx
11793 && !TARGET_VXWORKS_RTP
)
11795 rtx tmp
= gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
11796 tmp
= gen_rtx_MINUS (Pmode
, copy_rtx (addend
), tmp
);
11797 result
= gen_rtx_PLUS (Pmode
, tmp
, result
);
11802 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
11804 result
= lowpart_subreg (GET_MODE (orig_x
), result
, Pmode
);
11805 if (result
== NULL_RTX
)
11811 /* The normal instantiation of the above template. */
11814 ix86_delegitimize_address (rtx x
)
11816 return ix86_delegitimize_address_1 (x
, false);
11819 /* If X is a machine specific address (i.e. a symbol or label being
11820 referenced as a displacement from the GOT implemented using an
11821 UNSPEC), then return the base term. Otherwise return X. */
11824 ix86_find_base_term (rtx x
)
11830 if (GET_CODE (x
) != CONST
)
11832 term
= XEXP (x
, 0);
11833 if (GET_CODE (term
) == PLUS
11834 && CONST_INT_P (XEXP (term
, 1)))
11835 term
= XEXP (term
, 0);
11836 if (GET_CODE (term
) != UNSPEC
11837 || (XINT (term
, 1) != UNSPEC_GOTPCREL
11838 && XINT (term
, 1) != UNSPEC_PCREL
))
11841 return XVECEXP (term
, 0, 0);
11844 return ix86_delegitimize_address_1 (x
, true);
11847 /* Return true if X shouldn't be emitted into the debug info.
11848 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
11849 symbol easily into the .debug_info section, so we need not to
11850 delegitimize, but instead assemble as @gotoff.
11851 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
11852 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
11855 ix86_const_not_ok_for_debug_p (rtx x
)
11857 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) != UNSPEC_GOTOFF
)
11860 if (SYMBOL_REF_P (x
) && strcmp (XSTR (x
, 0), GOT_SYMBOL_NAME
) == 0)
11867 put_condition_code (enum rtx_code code
, machine_mode mode
, bool reverse
,
11868 bool fp
, FILE *file
)
11870 const char *suffix
;
11872 if (mode
== CCFPmode
)
11874 code
= ix86_fp_compare_code_to_integer (code
);
11878 code
= reverse_condition (code
);
11883 gcc_assert (mode
!= CCGZmode
);
11907 gcc_assert (mode
!= CCGZmode
);
11931 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
11935 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11936 Those same assemblers have the same but opposite lossage on cmov. */
11937 if (mode
== CCmode
)
11938 suffix
= fp
? "nbe" : "a";
11940 gcc_unreachable ();
11957 gcc_unreachable ();
11961 if (mode
== CCmode
|| mode
== CCGZmode
)
11963 else if (mode
== CCCmode
)
11964 suffix
= fp
? "b" : "c";
11966 gcc_unreachable ();
11983 gcc_unreachable ();
11987 if (mode
== CCmode
|| mode
== CCGZmode
)
11989 else if (mode
== CCCmode
)
11990 suffix
= fp
? "nb" : "nc";
11992 gcc_unreachable ();
11995 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
11999 if (mode
== CCmode
)
12002 gcc_unreachable ();
12005 suffix
= fp
? "u" : "p";
12008 suffix
= fp
? "nu" : "np";
12011 gcc_unreachable ();
12013 fputs (suffix
, file
);
12016 /* Print the name of register X to FILE based on its machine mode and number.
12017 If CODE is 'w', pretend the mode is HImode.
12018 If CODE is 'b', pretend the mode is QImode.
12019 If CODE is 'k', pretend the mode is SImode.
12020 If CODE is 'q', pretend the mode is DImode.
12021 If CODE is 'x', pretend the mode is V4SFmode.
12022 If CODE is 't', pretend the mode is V8SFmode.
12023 If CODE is 'g', pretend the mode is V16SFmode.
12024 If CODE is 'h', pretend the reg is the 'high' byte register.
12025 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12026 If CODE is 'd', duplicate the operand for AVX instruction.
12027 If CODE is 'V', print naked full integer register name without %.
12031 print_reg (rtx x
, int code
, FILE *file
)
12035 unsigned int regno
;
12038 if (ASSEMBLER_DIALECT
== ASM_ATT
&& code
!= 'V')
12043 gcc_assert (TARGET_64BIT
);
12044 fputs ("rip", file
);
12048 if (code
== 'y' && STACK_TOP_P (x
))
12050 fputs ("st(0)", file
);
12056 else if (code
== 'b')
12058 else if (code
== 'k')
12060 else if (code
== 'q')
12062 else if (code
== 'h')
12064 else if (code
== 'x')
12066 else if (code
== 't')
12068 else if (code
== 'g')
12071 msize
= GET_MODE_SIZE (GET_MODE (x
));
12075 if (regno
== ARG_POINTER_REGNUM
12076 || regno
== FRAME_POINTER_REGNUM
12077 || regno
== FPSR_REG
)
12079 output_operand_lossage
12080 ("invalid use of register '%s'", reg_names
[regno
]);
12083 else if (regno
== FLAGS_REG
)
12085 output_operand_lossage ("invalid use of asm flag output");
12091 if (GENERAL_REGNO_P (regno
))
12092 msize
= GET_MODE_SIZE (word_mode
);
12094 error ("%<V%> modifier on non-integer register");
12097 duplicated
= code
== 'd' && TARGET_AVX
;
12104 if (GENERAL_REGNO_P (regno
) && msize
> GET_MODE_SIZE (word_mode
))
12105 warning (0, "unsupported size for integer register");
12108 if (LEGACY_INT_REGNO_P (regno
))
12109 putc (msize
> 4 && TARGET_64BIT
? 'r' : 'e', file
);
12113 reg
= hi_reg_name
[regno
];
12116 if (regno
>= ARRAY_SIZE (qi_reg_name
))
12118 if (!ANY_QI_REGNO_P (regno
))
12119 error ("unsupported size for integer register");
12120 reg
= qi_reg_name
[regno
];
12123 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
12125 reg
= qi_high_reg_name
[regno
];
12129 if (SSE_REGNO_P (regno
))
12131 gcc_assert (!duplicated
);
12132 putc (msize
== 32 ? 'y' : 'z', file
);
12133 reg
= hi_reg_name
[regno
] + 1;
12138 gcc_unreachable ();
12143 /* Irritatingly, AMD extended registers use
12144 different naming convention: "r%d[bwd]" */
12145 if (REX_INT_REGNO_P (regno
))
12147 gcc_assert (TARGET_64BIT
);
12151 error ("extended registers have no high halves");
12166 error ("unsupported operand size for extended register");
12174 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12175 fprintf (file
, ", %%%s", reg
);
12177 fprintf (file
, ", %s", reg
);
12181 /* Meaning of CODE:
12182 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12183 C -- print opcode suffix for set/cmov insn.
12184 c -- like C, but print reversed condition
12185 F,f -- likewise, but for floating-point.
12186 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12188 R -- print embedded rounding and sae.
12189 r -- print only sae.
12190 z -- print the opcode suffix for the size of the current operand.
12191 Z -- likewise, with special suffixes for x87 instructions.
12192 * -- print a star (in certain assembler syntax)
12193 A -- print an absolute memory reference.
12194 E -- print address with DImode register names if TARGET_64BIT.
12195 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12196 s -- print a shift double count, followed by the assemblers argument
12198 b -- print the QImode name of the register for the indicated operand.
12199 %b0 would print %al if operands[0] is reg 0.
12200 w -- likewise, print the HImode name of the register.
12201 k -- likewise, print the SImode name of the register.
12202 q -- likewise, print the DImode name of the register.
12203 x -- likewise, print the V4SFmode name of the register.
12204 t -- likewise, print the V8SFmode name of the register.
12205 g -- likewise, print the V16SFmode name of the register.
12206 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12207 y -- print "st(0)" instead of "st" as a register.
12208 d -- print duplicated register operand for AVX instruction.
12209 D -- print condition for SSE cmp instruction.
12210 P -- if PIC, print an @PLT suffix.
12211 p -- print raw symbol name.
12212 X -- don't print any sort of PIC '@' suffix for a symbol.
12213 & -- print some in-use local-dynamic symbol name.
12214 H -- print a memory address offset by 8; used for sse high-parts
12215 Y -- print condition for XOP pcom* instruction.
12216 V -- print naked full integer register name without %.
12217 + -- print a branch hint as 'cs' or 'ds' prefix
12218 ; -- print a semicolon (after prefixes due to bug in older gas).
12219 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12220 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12221 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12222 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12226 ix86_print_operand (FILE *file
, rtx x
, int code
)
12233 switch (ASSEMBLER_DIALECT
)
12240 /* Intel syntax. For absolute addresses, registers should not
12241 be surrounded by braces. */
12245 ix86_print_operand (file
, x
, 0);
12252 gcc_unreachable ();
12255 ix86_print_operand (file
, x
, 0);
12259 /* Wrap address in an UNSPEC to declare special handling. */
12261 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
12263 output_address (VOIDmode
, x
);
12267 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12272 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12277 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12282 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12287 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12292 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12297 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12298 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
12301 switch (GET_MODE_SIZE (GET_MODE (x
)))
12316 output_operand_lossage ("invalid operand size for operand "
12326 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
12328 /* Opcodes don't get size suffixes if using Intel opcodes. */
12329 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12332 switch (GET_MODE_SIZE (GET_MODE (x
)))
12351 output_operand_lossage ("invalid operand size for operand "
12357 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
12358 warning (0, "non-integer operand used with operand code %<z%>");
12362 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12363 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12366 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
12368 switch (GET_MODE_SIZE (GET_MODE (x
)))
12371 #ifdef HAVE_AS_IX86_FILDS
12381 #ifdef HAVE_AS_IX86_FILDQ
12384 fputs ("ll", file
);
12392 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
12394 /* 387 opcodes don't get size suffixes
12395 if the operands are registers. */
12396 if (STACK_REG_P (x
))
12399 switch (GET_MODE_SIZE (GET_MODE (x
)))
12420 output_operand_lossage ("invalid operand type used with "
12421 "operand code 'Z'");
12425 output_operand_lossage ("invalid operand size for operand code 'Z'");
12445 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
12447 ix86_print_operand (file
, x
, 0);
12448 fputs (", ", file
);
12453 switch (GET_CODE (x
))
12456 fputs ("neq", file
);
12459 fputs ("eq", file
);
12463 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
12467 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
12471 fputs ("le", file
);
12475 fputs ("lt", file
);
12478 fputs ("unord", file
);
12481 fputs ("ord", file
);
12484 fputs ("ueq", file
);
12487 fputs ("nlt", file
);
12490 fputs ("nle", file
);
12493 fputs ("ule", file
);
12496 fputs ("ult", file
);
12499 fputs ("une", file
);
12502 output_operand_lossage ("operand is not a condition code, "
12503 "invalid operand code 'Y'");
12509 /* Little bit of braindamage here. The SSE compare instructions
12510 does use completely different names for the comparisons that the
12511 fp conditional moves. */
12512 switch (GET_CODE (x
))
12517 fputs ("eq_us", file
);
12522 fputs ("eq", file
);
12527 fputs ("nge", file
);
12532 fputs ("lt", file
);
12537 fputs ("ngt", file
);
12542 fputs ("le", file
);
12545 fputs ("unord", file
);
12550 fputs ("neq_oq", file
);
12555 fputs ("neq", file
);
12560 fputs ("ge", file
);
12565 fputs ("nlt", file
);
12570 fputs ("gt", file
);
12575 fputs ("nle", file
);
12578 fputs ("ord", file
);
12581 output_operand_lossage ("operand is not a condition code, "
12582 "invalid operand code 'D'");
12589 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12590 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12592 gcc_fallthrough ();
12597 if (!COMPARISON_P (x
))
12599 output_operand_lossage ("operand is not a condition code, "
12600 "invalid operand code '%c'", code
);
12603 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
12604 code
== 'c' || code
== 'f',
12605 code
== 'F' || code
== 'f',
12610 if (!offsettable_memref_p (x
))
12612 output_operand_lossage ("operand is not an offsettable memory "
12613 "reference, invalid operand code 'H'");
12616 /* It doesn't actually matter what mode we use here, as we're
12617 only going to use this for printing. */
12618 x
= adjust_address_nv (x
, DImode
, 8);
12619 /* Output 'qword ptr' for intel assembler dialect. */
12620 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12625 if (!CONST_INT_P (x
))
12627 output_operand_lossage ("operand is not an integer, invalid "
12628 "operand code 'K'");
12632 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
12633 #ifdef HAVE_AS_IX86_HLE
12634 fputs ("xacquire ", file
);
12636 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
12638 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
12639 #ifdef HAVE_AS_IX86_HLE
12640 fputs ("xrelease ", file
);
12642 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
12644 /* We do not want to print value of the operand. */
12648 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
12649 fputs ("{z}", file
);
12653 if (!CONST_INT_P (x
) || INTVAL (x
) != ROUND_SAE
)
12655 output_operand_lossage ("operand is not a specific integer, "
12656 "invalid operand code 'r'");
12660 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12661 fputs (", ", file
);
12663 fputs ("{sae}", file
);
12665 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12666 fputs (", ", file
);
12671 if (!CONST_INT_P (x
))
12673 output_operand_lossage ("operand is not an integer, invalid "
12674 "operand code 'R'");
12678 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12679 fputs (", ", file
);
12681 switch (INTVAL (x
))
12683 case ROUND_NEAREST_INT
| ROUND_SAE
:
12684 fputs ("{rn-sae}", file
);
12686 case ROUND_NEG_INF
| ROUND_SAE
:
12687 fputs ("{rd-sae}", file
);
12689 case ROUND_POS_INF
| ROUND_SAE
:
12690 fputs ("{ru-sae}", file
);
12692 case ROUND_ZERO
| ROUND_SAE
:
12693 fputs ("{rz-sae}", file
);
12696 output_operand_lossage ("operand is not a specific integer, "
12697 "invalid operand code 'R'");
12700 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12701 fputs (", ", file
);
12706 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12712 const char *name
= get_some_local_dynamic_name ();
12714 output_operand_lossage ("'%%&' used without any "
12715 "local dynamic TLS references");
12717 assemble_name (file
, name
);
12726 || optimize_function_for_size_p (cfun
)
12727 || !TARGET_BRANCH_PREDICTION_HINTS
)
12730 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
12733 int pred_val
= profile_probability::from_reg_br_prob_note
12734 (XINT (x
, 0)).to_reg_br_prob_base ();
12736 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
12737 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
12739 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
12741 = final_forward_branch_p (current_output_insn
) == 0;
12743 /* Emit hints only in the case default branch prediction
12744 heuristics would fail. */
12745 if (taken
!= cputaken
)
12747 /* We use 3e (DS) prefix for taken branches and
12748 2e (CS) prefix for not taken branches. */
12750 fputs ("ds ; ", file
);
12752 fputs ("cs ; ", file
);
12760 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12766 putc (TARGET_AVX2
? 'i' : 'f', file
);
12772 /* NB: 32-bit indices in VSIB address are sign-extended
12773 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
12774 sign-extended to 0xfffffffff7fa3010 which is invalid
12775 address. Add addr32 prefix if there is no base
12776 register nor symbol. */
12778 struct ix86_address parts
;
12779 ok
= ix86_decompose_address (x
, &parts
);
12780 gcc_assert (ok
&& parts
.index
== NULL_RTX
);
12781 if (parts
.base
== NULL_RTX
12782 && (parts
.disp
== NULL_RTX
12783 || !symbolic_operand (parts
.disp
,
12784 GET_MODE (parts
.disp
))))
12785 fputs ("addr32 ", file
);
12790 if (TARGET_64BIT
&& Pmode
!= word_mode
)
12791 fputs ("addr32 ", file
);
12795 if (ix86_notrack_prefixed_insn_p (current_output_insn
))
12796 fputs ("notrack ", file
);
12800 output_operand_lossage ("invalid operand code '%c'", code
);
12805 print_reg (x
, code
, file
);
12807 else if (MEM_P (x
))
12809 rtx addr
= XEXP (x
, 0);
12811 /* No `byte ptr' prefix for call instructions ... */
12812 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
12814 machine_mode mode
= GET_MODE (x
);
12817 /* Check for explicit size override codes. */
12820 else if (code
== 'w')
12822 else if (code
== 'k')
12824 else if (code
== 'q')
12826 else if (code
== 'x')
12828 else if (code
== 't')
12830 else if (code
== 'g')
12832 else if (mode
== BLKmode
)
12833 /* ... or BLKmode operands, when not overridden. */
12836 switch (GET_MODE_SIZE (mode
))
12838 case 1: size
= "BYTE"; break;
12839 case 2: size
= "WORD"; break;
12840 case 4: size
= "DWORD"; break;
12841 case 8: size
= "QWORD"; break;
12842 case 12: size
= "TBYTE"; break;
12844 if (mode
== XFmode
)
12849 case 32: size
= "YMMWORD"; break;
12850 case 64: size
= "ZMMWORD"; break;
12852 gcc_unreachable ();
12856 fputs (size
, file
);
12857 fputs (" PTR ", file
);
12861 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
12862 output_operand_lossage ("invalid constraints for operand");
12864 ix86_print_operand_address_as
12865 (file
, addr
, MEM_ADDR_SPACE (x
), code
== 'p' || code
== 'P');
12868 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == SFmode
)
12872 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
12874 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12876 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12878 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
12879 (unsigned long long) (int) l
);
12881 fprintf (file
, "0x%08x", (unsigned int) l
);
12884 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == DFmode
)
12888 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
12890 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12892 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
12895 /* These float cases don't actually occur as immediate operands. */
12896 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == XFmode
)
12900 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
12901 fputs (dstr
, file
);
12906 /* We have patterns that allow zero sets of memory, for instance.
12907 In 64-bit mode, we should probably support all 8-byte vectors,
12908 since we can in fact encode that into an immediate. */
12909 if (GET_CODE (x
) == CONST_VECTOR
)
12911 if (x
!= CONST0_RTX (GET_MODE (x
)))
12912 output_operand_lossage ("invalid vector immediate");
12916 if (code
!= 'P' && code
!= 'p')
12918 if (CONST_INT_P (x
))
12920 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12923 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
12924 || GET_CODE (x
) == LABEL_REF
)
12926 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12929 fputs ("OFFSET FLAT:", file
);
12932 if (CONST_INT_P (x
))
12933 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
12934 else if (flag_pic
|| MACHOPIC_INDIRECT
)
12935 output_pic_addr_const (file
, x
, code
);
12937 output_addr_const (file
, x
);
12942 ix86_print_operand_punct_valid_p (unsigned char code
)
12944 return (code
== '*' || code
== '+' || code
== '&' || code
== ';'
12945 || code
== '~' || code
== '^' || code
== '!');
12948 /* Print a memory operand whose address is ADDR. */
12951 ix86_print_operand_address_as (FILE *file
, rtx addr
,
12952 addr_space_t as
, bool no_rip
)
12954 struct ix86_address parts
;
12955 rtx base
, index
, disp
;
12961 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
12963 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
12964 gcc_assert (parts
.index
== NULL_RTX
);
12965 parts
.index
= XVECEXP (addr
, 0, 1);
12966 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
12967 addr
= XVECEXP (addr
, 0, 0);
12970 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
12972 gcc_assert (TARGET_64BIT
);
12973 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
12977 ok
= ix86_decompose_address (addr
, &parts
);
12982 index
= parts
.index
;
12984 scale
= parts
.scale
;
12986 if (ADDR_SPACE_GENERIC_P (as
))
12989 gcc_assert (ADDR_SPACE_GENERIC_P (parts
.seg
));
12991 if (!ADDR_SPACE_GENERIC_P (as
))
12993 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12998 case ADDR_SPACE_SEG_FS
:
12999 fputs ("fs:", file
);
13001 case ADDR_SPACE_SEG_GS
:
13002 fputs ("gs:", file
);
13005 gcc_unreachable ();
13009 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13010 if (TARGET_64BIT
&& !base
&& !index
&& !no_rip
)
13014 if (GET_CODE (disp
) == CONST
13015 && GET_CODE (XEXP (disp
, 0)) == PLUS
13016 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13017 symbol
= XEXP (XEXP (disp
, 0), 0);
13019 if (GET_CODE (symbol
) == LABEL_REF
13020 || (GET_CODE (symbol
) == SYMBOL_REF
13021 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
13025 if (!base
&& !index
)
13027 /* Displacement only requires special attention. */
13028 if (CONST_INT_P (disp
))
13030 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& ADDR_SPACE_GENERIC_P (as
))
13031 fputs ("ds:", file
);
13032 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
13034 /* Load the external function address via the GOT slot to avoid PLT. */
13035 else if (GET_CODE (disp
) == CONST
13036 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
13037 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTPCREL
13038 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
)
13039 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
13040 output_pic_addr_const (file
, disp
, 0);
13042 output_pic_addr_const (file
, disp
, 0);
13044 output_addr_const (file
, disp
);
13048 /* Print SImode register names to force addr32 prefix. */
13049 if (SImode_address_operand (addr
, VOIDmode
))
13053 gcc_assert (TARGET_64BIT
);
13054 switch (GET_CODE (addr
))
13057 gcc_assert (GET_MODE (addr
) == SImode
);
13058 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
13062 gcc_assert (GET_MODE (addr
) == DImode
);
13065 gcc_unreachable ();
13068 gcc_assert (!code
);
13074 && CONST_INT_P (disp
)
13075 && INTVAL (disp
) < -16*1024*1024)
13077 /* X32 runs in 64-bit mode, where displacement, DISP, in
13078 address DISP(%r64), is encoded as 32-bit immediate sign-
13079 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13080 address is %r64 + 0xffffffffbffffd00. When %r64 <
13081 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13082 which is invalid for x32. The correct address is %r64
13083 - 0x40000300 == 0xf7ffdd64. To properly encode
13084 -0x40000300(%r64) for x32, we zero-extend negative
13085 displacement by forcing addr32 prefix which truncates
13086 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13087 zero-extend all negative displacements, including -1(%rsp).
13088 However, for small negative displacements, sign-extension
13089 won't cause overflow. We only zero-extend negative
13090 displacements if they < -16*1024*1024, which is also used
13091 to check legitimate address displacements for PIC. */
13095 /* Since the upper 32 bits of RSP are always zero for x32,
13096 we can encode %esp as %rsp to avoid 0x67 prefix if
13097 there is no index register. */
13098 if (TARGET_X32
&& Pmode
== SImode
13099 && !index
&& base
&& REG_P (base
) && REGNO (base
) == SP_REG
)
13102 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13107 output_pic_addr_const (file
, disp
, 0);
13108 else if (GET_CODE (disp
) == LABEL_REF
)
13109 output_asm_label (disp
);
13111 output_addr_const (file
, disp
);
13116 print_reg (base
, code
, file
);
13120 print_reg (index
, vsib
? 0 : code
, file
);
13121 if (scale
!= 1 || vsib
)
13122 fprintf (file
, ",%d", scale
);
13128 rtx offset
= NULL_RTX
;
13132 /* Pull out the offset of a symbol; print any symbol itself. */
13133 if (GET_CODE (disp
) == CONST
13134 && GET_CODE (XEXP (disp
, 0)) == PLUS
13135 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13137 offset
= XEXP (XEXP (disp
, 0), 1);
13138 disp
= gen_rtx_CONST (VOIDmode
,
13139 XEXP (XEXP (disp
, 0), 0));
13143 output_pic_addr_const (file
, disp
, 0);
13144 else if (GET_CODE (disp
) == LABEL_REF
)
13145 output_asm_label (disp
);
13146 else if (CONST_INT_P (disp
))
13149 output_addr_const (file
, disp
);
13155 print_reg (base
, code
, file
);
13158 if (INTVAL (offset
) >= 0)
13160 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
13164 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
13171 print_reg (index
, vsib
? 0 : code
, file
);
13172 if (scale
!= 1 || vsib
)
13173 fprintf (file
, "*%d", scale
);
13181 ix86_print_operand_address (FILE *file
, machine_mode
/*mode*/, rtx addr
)
13183 ix86_print_operand_address_as (file
, addr
, ADDR_SPACE_GENERIC
, false);
13186 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13189 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
13193 if (GET_CODE (x
) != UNSPEC
)
13196 op
= XVECEXP (x
, 0, 0);
13197 switch (XINT (x
, 1))
13199 case UNSPEC_GOTOFF
:
13200 output_addr_const (file
, op
);
13201 fputs ("@gotoff", file
);
13203 case UNSPEC_GOTTPOFF
:
13204 output_addr_const (file
, op
);
13205 /* FIXME: This might be @TPOFF in Sun ld. */
13206 fputs ("@gottpoff", file
);
13209 output_addr_const (file
, op
);
13210 fputs ("@tpoff", file
);
13212 case UNSPEC_NTPOFF
:
13213 output_addr_const (file
, op
);
13215 fputs ("@tpoff", file
);
13217 fputs ("@ntpoff", file
);
13219 case UNSPEC_DTPOFF
:
13220 output_addr_const (file
, op
);
13221 fputs ("@dtpoff", file
);
13223 case UNSPEC_GOTNTPOFF
:
13224 output_addr_const (file
, op
);
13226 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13227 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
13229 fputs ("@gotntpoff", file
);
13231 case UNSPEC_INDNTPOFF
:
13232 output_addr_const (file
, op
);
13233 fputs ("@indntpoff", file
);
13236 case UNSPEC_MACHOPIC_OFFSET
:
13237 output_addr_const (file
, op
);
13239 machopic_output_function_base_name (file
);
13251 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13252 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13253 is the expression of the binary operation. The output may either be
13254 emitted here, or returned to the caller, like all output_* functions.
13256 There is no guarantee that the operands are the same mode, as they
13257 might be within FLOAT or FLOAT_EXTEND expressions. */
13259 #ifndef SYSV386_COMPAT
13260 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13261 wants to fix the assemblers because that causes incompatibility
13262 with gcc. No-one wants to fix gcc because that causes
13263 incompatibility with assemblers... You can use the option of
13264 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13265 #define SYSV386_COMPAT 1
13269 output_387_binary_op (rtx_insn
*insn
, rtx
*operands
)
13271 static char buf
[40];
13274 = (SSE_REG_P (operands
[0])
13275 || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]));
13279 else if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
13280 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
13287 switch (GET_CODE (operands
[3]))
13298 gcc_unreachable ();
13305 p
= (GET_MODE (operands
[0]) == SFmode
) ? "ss" : "sd";
13309 p
= "\t{%2, %1, %0|%0, %1, %2}";
13311 p
= "\t{%2, %0|%0, %2}";
13317 /* Even if we do not want to check the inputs, this documents input
13318 constraints. Which helps in understanding the following code. */
13321 if (STACK_REG_P (operands
[0])
13322 && ((REG_P (operands
[1])
13323 && REGNO (operands
[0]) == REGNO (operands
[1])
13324 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
13325 || (REG_P (operands
[2])
13326 && REGNO (operands
[0]) == REGNO (operands
[2])
13327 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
13328 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
13331 gcc_unreachable ();
13334 switch (GET_CODE (operands
[3]))
13338 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
13339 std::swap (operands
[1], operands
[2]);
13341 /* know operands[0] == operands[1]. */
13343 if (MEM_P (operands
[2]))
13349 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
13351 if (STACK_TOP_P (operands
[0]))
13352 /* How is it that we are storing to a dead operand[2]?
13353 Well, presumably operands[1] is dead too. We can't
13354 store the result to st(0) as st(0) gets popped on this
13355 instruction. Instead store to operands[2] (which I
13356 think has to be st(1)). st(1) will be popped later.
13357 gcc <= 2.8.1 didn't have this check and generated
13358 assembly code that the Unixware assembler rejected. */
13359 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13361 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13365 if (STACK_TOP_P (operands
[0]))
13366 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13368 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13373 if (MEM_P (operands
[1]))
13379 if (MEM_P (operands
[2]))
13385 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
13388 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13389 derived assemblers, confusingly reverse the direction of
13390 the operation for fsub{r} and fdiv{r} when the
13391 destination register is not st(0). The Intel assembler
13392 doesn't have this brain damage. Read !SYSV386_COMPAT to
13393 figure out what the hardware really does. */
13394 if (STACK_TOP_P (operands
[0]))
13395 p
= "{p\t%0, %2|rp\t%2, %0}";
13397 p
= "{rp\t%2, %0|p\t%0, %2}";
13399 if (STACK_TOP_P (operands
[0]))
13400 /* As above for fmul/fadd, we can't store to st(0). */
13401 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13403 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13408 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
13411 if (STACK_TOP_P (operands
[0]))
13412 p
= "{rp\t%0, %1|p\t%1, %0}";
13414 p
= "{p\t%1, %0|rp\t%0, %1}";
13416 if (STACK_TOP_P (operands
[0]))
13417 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13419 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13424 if (STACK_TOP_P (operands
[0]))
13426 if (STACK_TOP_P (operands
[1]))
13427 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13429 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13432 else if (STACK_TOP_P (operands
[1]))
13435 p
= "{\t%1, %0|r\t%0, %1}";
13437 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13443 p
= "{r\t%2, %0|\t%0, %2}";
13445 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13451 gcc_unreachable ();
13458 /* Return needed mode for entity in optimize_mode_switching pass. */
13461 ix86_dirflag_mode_needed (rtx_insn
*insn
)
13465 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
13466 return X86_DIRFLAG_ANY
;
13468 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
13469 return TARGET_CLD
? X86_DIRFLAG_ANY
: X86_DIRFLAG_RESET
;
13472 if (recog_memoized (insn
) < 0)
13473 return X86_DIRFLAG_ANY
;
13475 if (get_attr_type (insn
) == TYPE_STR
)
13477 /* Emit cld instruction if stringops are used in the function. */
13478 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
13479 return TARGET_CLD
? X86_DIRFLAG_RESET
: X86_DIRFLAG_ANY
;
13481 return X86_DIRFLAG_RESET
;
13484 return X86_DIRFLAG_ANY
;
13487 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
13490 ix86_check_avx_upper_register (const_rtx exp
)
13492 return SSE_REG_P (exp
) && GET_MODE_BITSIZE (GET_MODE (exp
)) > 128;
13495 /* Return needed mode for entity in optimize_mode_switching pass. */
13498 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
13504 /* Needed mode is set to AVX_U128_CLEAN if there are
13505 no 256bit or 512bit modes used in function arguments. */
13506 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
13508 link
= XEXP (link
, 1))
13510 if (GET_CODE (XEXP (link
, 0)) == USE
)
13512 rtx arg
= XEXP (XEXP (link
, 0), 0);
13514 if (ix86_check_avx_upper_register (arg
))
13515 return AVX_U128_DIRTY
;
13519 return AVX_U128_CLEAN
;
13522 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
13523 Hardware changes state only when a 256bit register is written to,
13524 but we need to prevent the compiler from moving optimal insertion
13525 point above eventual read from 256bit or 512 bit register. */
13526 subrtx_iterator::array_type array
;
13527 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
13528 if (ix86_check_avx_upper_register (*iter
))
13529 return AVX_U128_DIRTY
;
13531 return AVX_U128_ANY
;
13534 /* Return mode that i387 must be switched into
13535 prior to the execution of insn. */
13538 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
13540 enum attr_i387_cw mode
;
13542 /* The mode UNINITIALIZED is used to store control word after a
13543 function call or ASM pattern. The mode ANY specify that function
13544 has no requirements on the control word and make no changes in the
13545 bits we are interested in. */
13548 || (NONJUMP_INSN_P (insn
)
13549 && (asm_noperands (PATTERN (insn
)) >= 0
13550 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
13551 return I387_CW_UNINITIALIZED
;
13553 if (recog_memoized (insn
) < 0)
13554 return I387_CW_ANY
;
13556 mode
= get_attr_i387_cw (insn
);
13561 if (mode
== I387_CW_TRUNC
)
13566 if (mode
== I387_CW_FLOOR
)
13571 if (mode
== I387_CW_CEIL
)
13576 gcc_unreachable ();
13579 return I387_CW_ANY
;
13582 /* Return mode that entity must be switched into
13583 prior to the execution of insn. */
13586 ix86_mode_needed (int entity
, rtx_insn
*insn
)
13591 return ix86_dirflag_mode_needed (insn
);
13593 return ix86_avx_u128_mode_needed (insn
);
13597 return ix86_i387_mode_needed (entity
, insn
);
13599 gcc_unreachable ();
13604 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
13607 ix86_check_avx_upper_stores (rtx dest
, const_rtx
, void *data
)
13609 if (ix86_check_avx_upper_register (dest
))
13611 bool *used
= (bool *) data
;
13616 /* Calculate mode of upper 128bit AVX registers after the insn. */
13619 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
13621 rtx pat
= PATTERN (insn
);
13623 if (vzeroupper_pattern (pat
, VOIDmode
)
13624 || vzeroall_pattern (pat
, VOIDmode
))
13625 return AVX_U128_CLEAN
;
13627 /* We know that state is clean after CALL insn if there are no
13628 256bit or 512bit registers used in the function return register. */
13631 bool avx_upper_reg_found
= false;
13632 note_stores (pat
, ix86_check_avx_upper_stores
, &avx_upper_reg_found
);
13634 return avx_upper_reg_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
13637 /* Otherwise, return current mode. Remember that if insn
13638 references AVX 256bit or 512bit registers, the mode was already
13639 changed to DIRTY from MODE_NEEDED. */
13643 /* Return the mode that an insn results in. */
13646 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
)
13653 return ix86_avx_u128_mode_after (mode
, insn
);
13659 gcc_unreachable ();
13664 ix86_dirflag_mode_entry (void)
13666 /* For TARGET_CLD or in the interrupt handler we can't assume
13667 direction flag state at function entry. */
13669 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
13670 return X86_DIRFLAG_ANY
;
13672 return X86_DIRFLAG_RESET
;
13676 ix86_avx_u128_mode_entry (void)
13680 /* Entry mode is set to AVX_U128_DIRTY if there are
13681 256bit or 512bit modes used in function arguments. */
13682 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
13683 arg
= TREE_CHAIN (arg
))
13685 rtx incoming
= DECL_INCOMING_RTL (arg
);
13687 if (incoming
&& ix86_check_avx_upper_register (incoming
))
13688 return AVX_U128_DIRTY
;
13691 return AVX_U128_CLEAN
;
13694 /* Return a mode that ENTITY is assumed to be
13695 switched to at function entry. */
13698 ix86_mode_entry (int entity
)
13703 return ix86_dirflag_mode_entry ();
13705 return ix86_avx_u128_mode_entry ();
13709 return I387_CW_ANY
;
13711 gcc_unreachable ();
13716 ix86_avx_u128_mode_exit (void)
13718 rtx reg
= crtl
->return_rtx
;
13720 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
13721 or 512 bit modes used in the function return register. */
13722 if (reg
&& ix86_check_avx_upper_register (reg
))
13723 return AVX_U128_DIRTY
;
13725 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
13726 modes used in function arguments, otherwise return AVX_U128_CLEAN.
13728 return ix86_avx_u128_mode_entry ();
13731 /* Return a mode that ENTITY is assumed to be
13732 switched to at function exit. */
13735 ix86_mode_exit (int entity
)
13740 return X86_DIRFLAG_ANY
;
13742 return ix86_avx_u128_mode_exit ();
13746 return I387_CW_ANY
;
13748 gcc_unreachable ();
13753 ix86_mode_priority (int, int n
)
13758 /* Output code to initialize control word copies used by trunc?f?i and
13759 rounding patterns. CURRENT_MODE is set to current control word,
13760 while NEW_MODE is set to new control word. */
13763 emit_i387_cw_initialization (int mode
)
13765 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
13768 enum ix86_stack_slot slot
;
13770 rtx reg
= gen_reg_rtx (HImode
);
13772 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
13773 emit_move_insn (reg
, copy_rtx (stored_mode
));
13777 case I387_CW_TRUNC
:
13778 /* round toward zero (truncate) */
13779 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
13780 slot
= SLOT_CW_TRUNC
;
13783 case I387_CW_FLOOR
:
13784 /* round down toward -oo */
13785 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
13786 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
13787 slot
= SLOT_CW_FLOOR
;
13791 /* round up toward +oo */
13792 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
13793 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
13794 slot
= SLOT_CW_CEIL
;
13798 gcc_unreachable ();
13801 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
13803 new_mode
= assign_386_stack_local (HImode
, slot
);
13804 emit_move_insn (new_mode
, reg
);
13807 /* Generate one or more insns to set ENTITY to MODE. */
13810 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
13811 HARD_REG_SET regs_live ATTRIBUTE_UNUSED
)
13816 if (mode
== X86_DIRFLAG_RESET
)
13817 emit_insn (gen_cld ());
13820 if (mode
== AVX_U128_CLEAN
)
13821 emit_insn (gen_avx_vzeroupper ());
13826 if (mode
!= I387_CW_ANY
13827 && mode
!= I387_CW_UNINITIALIZED
)
13828 emit_i387_cw_initialization (mode
);
13831 gcc_unreachable ();
13835 /* Output code for INSN to convert a float to a signed int. OPERANDS
13836 are the insn operands. The output may be [HSD]Imode and the input
13837 operand may be [SDX]Fmode. */
13840 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
13842 bool stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
13843 bool dimode_p
= GET_MODE (operands
[0]) == DImode
;
13844 int round_mode
= get_attr_i387_cw (insn
);
13846 static char buf
[40];
13849 /* Jump through a hoop or two for DImode, since the hardware has no
13850 non-popping instruction. We used to do this a different way, but
13851 that was somewhat fragile and broke with post-reload splitters. */
13852 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
13853 output_asm_insn ("fld\t%y1", operands
);
13855 gcc_assert (STACK_TOP_P (operands
[1]));
13856 gcc_assert (MEM_P (operands
[0]));
13857 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
13860 return "fisttp%Z0\t%0";
13862 strcpy (buf
, "fist");
13864 if (round_mode
!= I387_CW_ANY
)
13865 output_asm_insn ("fldcw\t%3", operands
);
13868 strcat (buf
, p
+ !(stack_top_dies
|| dimode_p
));
13870 output_asm_insn (buf
, operands
);
13872 if (round_mode
!= I387_CW_ANY
)
13873 output_asm_insn ("fldcw\t%2", operands
);
13878 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13879 have the values zero or one, indicates the ffreep insn's operand
13880 from the OPERANDS array. */
13882 static const char *
13883 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
13885 if (TARGET_USE_FFREEP
)
13886 #ifdef HAVE_AS_IX86_FFREEP
13887 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
13890 static char retval
[32];
13891 int regno
= REGNO (operands
[opno
]);
13893 gcc_assert (STACK_REGNO_P (regno
));
13895 regno
-= FIRST_STACK_REG
;
13897 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
13902 return opno
? "fstp\t%y1" : "fstp\t%y0";
13906 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13907 should be used. UNORDERED_P is true when fucom should be used. */
13910 output_fp_compare (rtx_insn
*insn
, rtx
*operands
,
13911 bool eflags_p
, bool unordered_p
)
13913 rtx
*xops
= eflags_p
? &operands
[0] : &operands
[1];
13914 bool stack_top_dies
;
13916 static char buf
[40];
13919 gcc_assert (STACK_TOP_P (xops
[0]));
13921 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
13925 p
= unordered_p
? "fucomi" : "fcomi";
13928 p
= "p\t{%y1, %0|%0, %y1}";
13929 strcat (buf
, p
+ !stack_top_dies
);
13934 if (STACK_REG_P (xops
[1])
13936 && find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
+ 1))
13938 gcc_assert (REGNO (xops
[1]) == FIRST_STACK_REG
+ 1);
13940 /* If both the top of the 387 stack die, and the other operand
13941 is also a stack register that dies, then this must be a
13942 `fcompp' float compare. */
13943 p
= unordered_p
? "fucompp" : "fcompp";
13946 else if (const0_operand (xops
[1], VOIDmode
))
13948 gcc_assert (!unordered_p
);
13949 strcpy (buf
, "ftst");
13953 if (GET_MODE_CLASS (GET_MODE (xops
[1])) == MODE_INT
)
13955 gcc_assert (!unordered_p
);
13959 p
= unordered_p
? "fucom" : "fcom";
13964 strcat (buf
, p
+ !stack_top_dies
);
13967 output_asm_insn (buf
, operands
);
13968 return "fnstsw\t%0";
13972 ix86_output_addr_vec_elt (FILE *file
, int value
)
13974 const char *directive
= ASM_LONG
;
13978 directive
= ASM_QUAD
;
13980 gcc_assert (!TARGET_64BIT
);
13983 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
13987 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
13989 const char *directive
= ASM_LONG
;
13992 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
13993 directive
= ASM_QUAD
;
13995 gcc_assert (!TARGET_64BIT
);
13997 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13998 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
13999 fprintf (file
, "%s%s%d-%s%d\n",
14000 directive
, LPREFIX
, value
, LPREFIX
, rel
);
14002 else if (TARGET_MACHO
)
14004 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
14005 machopic_output_function_base_name (file
);
14009 else if (HAVE_AS_GOTOFF_IN_DATA
)
14010 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
14012 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
14013 GOT_SYMBOL_NAME
, LPREFIX
, value
);
14016 #define LEA_MAX_STALL (3)
14017 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14019 /* Increase given DISTANCE in half-cycles according to
14020 dependencies between PREV and NEXT instructions.
14021 Add 1 half-cycle if there is no dependency and
14022 go to next cycle if there is some dependecy. */
14024 static unsigned int
14025 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
14029 if (!prev
|| !next
)
14030 return distance
+ (distance
& 1) + 2;
14032 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
14033 return distance
+ 1;
14035 FOR_EACH_INSN_USE (use
, next
)
14036 FOR_EACH_INSN_DEF (def
, prev
)
14037 if (!DF_REF_IS_ARTIFICIAL (def
)
14038 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
14039 return distance
+ (distance
& 1) + 2;
14041 return distance
+ 1;
14044 /* Function checks if instruction INSN defines register number
14045 REGNO1 or REGNO2. */
14048 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
14053 FOR_EACH_INSN_DEF (def
, insn
)
14054 if (DF_REF_REG_DEF_P (def
)
14055 && !DF_REF_IS_ARTIFICIAL (def
)
14056 && (regno1
== DF_REF_REGNO (def
)
14057 || regno2
== DF_REF_REGNO (def
)))
14063 /* Function checks if instruction INSN uses register number
14064 REGNO as a part of address expression. */
14067 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
14071 FOR_EACH_INSN_USE (use
, insn
)
14072 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
14078 /* Search backward for non-agu definition of register number REGNO1
14079 or register number REGNO2 in basic block starting from instruction
14080 START up to head of basic block or instruction INSN.
14082 Function puts true value into *FOUND var if definition was found
14083 and false otherwise.
14085 Distance in half-cycles between START and found instruction or head
14086 of BB is added to DISTANCE and returned. */
14089 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
14090 rtx_insn
*insn
, int distance
,
14091 rtx_insn
*start
, bool *found
)
14093 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
14094 rtx_insn
*prev
= start
;
14095 rtx_insn
*next
= NULL
;
14101 && distance
< LEA_SEARCH_THRESHOLD
)
14103 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
14105 distance
= increase_distance (prev
, next
, distance
);
14106 if (insn_defines_reg (regno1
, regno2
, prev
))
14108 if (recog_memoized (prev
) < 0
14109 || get_attr_type (prev
) != TYPE_LEA
)
14118 if (prev
== BB_HEAD (bb
))
14121 prev
= PREV_INSN (prev
);
14127 /* Search backward for non-agu definition of register number REGNO1
14128 or register number REGNO2 in INSN's basic block until
14129 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14130 2. Reach neighbor BBs boundary, or
14131 3. Reach agu definition.
14132 Returns the distance between the non-agu definition point and INSN.
14133 If no definition point, returns -1. */
14136 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
14139 basic_block bb
= BLOCK_FOR_INSN (insn
);
14141 bool found
= false;
14143 if (insn
!= BB_HEAD (bb
))
14144 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
14145 distance
, PREV_INSN (insn
),
14148 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
14152 bool simple_loop
= false;
14154 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14157 simple_loop
= true;
14162 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
14164 BB_END (bb
), &found
);
14167 int shortest_dist
= -1;
14168 bool found_in_bb
= false;
14170 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14173 = distance_non_agu_define_in_bb (regno1
, regno2
,
14179 if (shortest_dist
< 0)
14180 shortest_dist
= bb_dist
;
14181 else if (bb_dist
> 0)
14182 shortest_dist
= MIN (bb_dist
, shortest_dist
);
14188 distance
= shortest_dist
;
14192 /* get_attr_type may modify recog data. We want to make sure
14193 that recog data is valid for instruction INSN, on which
14194 distance_non_agu_define is called. INSN is unchanged here. */
14195 extract_insn_cached (insn
);
14200 return distance
>> 1;
14203 /* Return the distance in half-cycles between INSN and the next
14204 insn that uses register number REGNO in memory address added
14205 to DISTANCE. Return -1 if REGNO0 is set.
14207 Put true value into *FOUND if register usage was found and
14209 Put true value into *REDEFINED if register redefinition was
14210 found and false otherwise. */
14213 distance_agu_use_in_bb (unsigned int regno
,
14214 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
14215 bool *found
, bool *redefined
)
14217 basic_block bb
= NULL
;
14218 rtx_insn
*next
= start
;
14219 rtx_insn
*prev
= NULL
;
14222 *redefined
= false;
14224 if (start
!= NULL_RTX
)
14226 bb
= BLOCK_FOR_INSN (start
);
14227 if (start
!= BB_HEAD (bb
))
14228 /* If insn and start belong to the same bb, set prev to insn,
14229 so the call to increase_distance will increase the distance
14230 between insns by 1. */
14236 && distance
< LEA_SEARCH_THRESHOLD
)
14238 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
14240 distance
= increase_distance(prev
, next
, distance
);
14241 if (insn_uses_reg_mem (regno
, next
))
14243 /* Return DISTANCE if OP0 is used in memory
14244 address in NEXT. */
14249 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
14251 /* Return -1 if OP0 is set in NEXT. */
14259 if (next
== BB_END (bb
))
14262 next
= NEXT_INSN (next
);
14268 /* Return the distance between INSN and the next insn that uses
14269 register number REGNO0 in memory address. Return -1 if no such
14270 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14273 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
14275 basic_block bb
= BLOCK_FOR_INSN (insn
);
14277 bool found
= false;
14278 bool redefined
= false;
14280 if (insn
!= BB_END (bb
))
14281 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
14283 &found
, &redefined
);
14285 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
14289 bool simple_loop
= false;
14291 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
14294 simple_loop
= true;
14299 distance
= distance_agu_use_in_bb (regno0
, insn
,
14300 distance
, BB_HEAD (bb
),
14301 &found
, &redefined
);
14304 int shortest_dist
= -1;
14305 bool found_in_bb
= false;
14306 bool redefined_in_bb
= false;
14308 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
14311 = distance_agu_use_in_bb (regno0
, insn
,
14312 distance
, BB_HEAD (e
->dest
),
14313 &found_in_bb
, &redefined_in_bb
);
14316 if (shortest_dist
< 0)
14317 shortest_dist
= bb_dist
;
14318 else if (bb_dist
> 0)
14319 shortest_dist
= MIN (bb_dist
, shortest_dist
);
14325 distance
= shortest_dist
;
14329 if (!found
|| redefined
)
14332 return distance
>> 1;
14335 /* Define this macro to tune LEA priority vs ADD, it take effect when
14336 there is a dilemma of choicing LEA or ADD
14337 Negative value: ADD is more preferred than LEA
14339 Positive value: LEA is more preferred than ADD*/
14340 #define IX86_LEA_PRIORITY 0
14342 /* Return true if usage of lea INSN has performance advantage
14343 over a sequence of instructions. Instructions sequence has
14344 SPLIT_COST cycles higher latency than lea latency. */
14347 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
14348 unsigned int regno2
, int split_cost
, bool has_scale
)
14350 int dist_define
, dist_use
;
14352 /* For Silvermont if using a 2-source or 3-source LEA for
14353 non-destructive destination purposes, or due to wanting
14354 ability to use SCALE, the use of LEA is justified. */
14355 if (TARGET_SILVERMONT
|| TARGET_GOLDMONT
|| TARGET_GOLDMONT_PLUS
14356 || TARGET_TREMONT
|| TARGET_INTEL
)
14360 if (split_cost
< 1)
14362 if (regno0
== regno1
|| regno0
== regno2
)
14367 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
14368 dist_use
= distance_agu_use (regno0
, insn
);
14370 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
14372 /* If there is no non AGU operand definition, no AGU
14373 operand usage and split cost is 0 then both lea
14374 and non lea variants have same priority. Currently
14375 we prefer lea for 64 bit code and non lea on 32 bit
14377 if (dist_use
< 0 && split_cost
== 0)
14378 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
14383 /* With longer definitions distance lea is more preferable.
14384 Here we change it to take into account splitting cost and
14386 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
14388 /* If there is no use in memory addess then we just check
14389 that split cost exceeds AGU stall. */
14391 return dist_define
> LEA_MAX_STALL
;
14393 /* If this insn has both backward non-agu dependence and forward
14394 agu dependence, the one with short distance takes effect. */
14395 return dist_define
>= dist_use
;
14398 /* Return true if it is legal to clobber flags by INSN and
14399 false otherwise. */
14402 ix86_ok_to_clobber_flags (rtx_insn
*insn
)
14404 basic_block bb
= BLOCK_FOR_INSN (insn
);
14410 if (NONDEBUG_INSN_P (insn
))
14412 FOR_EACH_INSN_USE (use
, insn
)
14413 if (DF_REF_REG_USE_P (use
) && DF_REF_REGNO (use
) == FLAGS_REG
)
14416 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
14420 if (insn
== BB_END (bb
))
14423 insn
= NEXT_INSN (insn
);
14426 live
= df_get_live_out(bb
);
14427 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
14430 /* Return true if we need to split op0 = op1 + op2 into a sequence of
14431 move and add to avoid AGU stalls. */
14434 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
14436 unsigned int regno0
, regno1
, regno2
;
14438 /* Check if we need to optimize. */
14439 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
14442 /* Check it is correct to split here. */
14443 if (!ix86_ok_to_clobber_flags(insn
))
14446 regno0
= true_regnum (operands
[0]);
14447 regno1
= true_regnum (operands
[1]);
14448 regno2
= true_regnum (operands
[2]);
14450 /* We need to split only adds with non destructive
14451 destination operand. */
14452 if (regno0
== regno1
|| regno0
== regno2
)
14455 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
14458 /* Return true if we should emit lea instruction instead of mov
14462 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
14464 unsigned int regno0
, regno1
;
14466 /* Check if we need to optimize. */
14467 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
14470 /* Use lea for reg to reg moves only. */
14471 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
14474 regno0
= true_regnum (operands
[0]);
14475 regno1
= true_regnum (operands
[1]);
14477 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
14480 /* Return true if we need to split lea into a sequence of
14481 instructions to avoid AGU stalls. */
14484 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
14486 unsigned int regno0
, regno1
, regno2
;
14488 struct ix86_address parts
;
14491 /* Check we need to optimize. */
14492 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
14495 /* The "at least two components" test below might not catch simple
14496 move or zero extension insns if parts.base is non-NULL and parts.disp
14497 is const0_rtx as the only components in the address, e.g. if the
14498 register is %rbp or %r13. As this test is much cheaper and moves or
14499 zero extensions are the common case, do this check first. */
14500 if (REG_P (operands
[1])
14501 || (SImode_address_operand (operands
[1], VOIDmode
)
14502 && REG_P (XEXP (operands
[1], 0))))
14505 /* Check if it is OK to split here. */
14506 if (!ix86_ok_to_clobber_flags (insn
))
14509 ok
= ix86_decompose_address (operands
[1], &parts
);
14512 /* There should be at least two components in the address. */
14513 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
14514 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
14517 /* We should not split into add if non legitimate pic
14518 operand is used as displacement. */
14519 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
14522 regno0
= true_regnum (operands
[0]) ;
14523 regno1
= INVALID_REGNUM
;
14524 regno2
= INVALID_REGNUM
;
14527 regno1
= true_regnum (parts
.base
);
14529 regno2
= true_regnum (parts
.index
);
14533 /* Compute how many cycles we will add to execution time
14534 if split lea into a sequence of instructions. */
14535 if (parts
.base
|| parts
.index
)
14537 /* Have to use mov instruction if non desctructive
14538 destination form is used. */
14539 if (regno1
!= regno0
&& regno2
!= regno0
)
14542 /* Have to add index to base if both exist. */
14543 if (parts
.base
&& parts
.index
)
14546 /* Have to use shift and adds if scale is 2 or greater. */
14547 if (parts
.scale
> 1)
14549 if (regno0
!= regno1
)
14551 else if (regno2
== regno0
)
14554 split_cost
+= parts
.scale
;
14557 /* Have to use add instruction with immediate if
14558 disp is non zero. */
14559 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
14562 /* Subtract the price of lea. */
14566 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
14570 /* Return true if it is ok to optimize an ADD operation to LEA
14571 operation to avoid flag register consumation. For most processors,
14572 ADD is faster than LEA. For the processors like BONNELL, if the
14573 destination register of LEA holds an actual address which will be
14574 used soon, LEA is better and otherwise ADD is better. */
14577 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
14579 unsigned int regno0
= true_regnum (operands
[0]);
14580 unsigned int regno1
= true_regnum (operands
[1]);
14581 unsigned int regno2
= true_regnum (operands
[2]);
14583 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14584 if (regno0
!= regno1
&& regno0
!= regno2
)
14587 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
14590 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
14593 /* Return true if destination reg of SET_BODY is shift count of
14597 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
14603 /* Retrieve destination of SET_BODY. */
14604 switch (GET_CODE (set_body
))
14607 set_dest
= SET_DEST (set_body
);
14608 if (!set_dest
|| !REG_P (set_dest
))
14612 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
14613 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
14621 /* Retrieve shift count of USE_BODY. */
14622 switch (GET_CODE (use_body
))
14625 shift_rtx
= XEXP (use_body
, 1);
14628 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
14629 if (ix86_dep_by_shift_count_body (set_body
,
14630 XVECEXP (use_body
, 0, i
)))
14638 && (GET_CODE (shift_rtx
) == ASHIFT
14639 || GET_CODE (shift_rtx
) == LSHIFTRT
14640 || GET_CODE (shift_rtx
) == ASHIFTRT
14641 || GET_CODE (shift_rtx
) == ROTATE
14642 || GET_CODE (shift_rtx
) == ROTATERT
))
14644 rtx shift_count
= XEXP (shift_rtx
, 1);
14646 /* Return true if shift count is dest of SET_BODY. */
14647 if (REG_P (shift_count
))
14649 /* Add check since it can be invoked before register
14650 allocation in pre-reload schedule. */
14651 if (reload_completed
14652 && true_regnum (set_dest
) == true_regnum (shift_count
))
14654 else if (REGNO(set_dest
) == REGNO(shift_count
))
14662 /* Return true if destination reg of SET_INSN is shift count of
14666 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
14668 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
14669 PATTERN (use_insn
));
14672 /* Return TRUE or FALSE depending on whether the unary operator meets the
14673 appropriate constraints. */
14676 ix86_unary_operator_ok (enum rtx_code
,
14680 /* If one of operands is memory, source and destination must match. */
14681 if ((MEM_P (operands
[0])
14682 || MEM_P (operands
[1]))
14683 && ! rtx_equal_p (operands
[0], operands
[1]))
14688 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14689 are ok, keeping in mind the possible movddup alternative. */
14692 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
14694 if (MEM_P (operands
[0]))
14695 return rtx_equal_p (operands
[0], operands
[1 + high
]);
14696 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
14697 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
14701 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14702 then replicate the value for all elements of the vector
14706 ix86_build_const_vector (machine_mode mode
, bool vect
, rtx value
)
14710 machine_mode scalar_mode
;
14734 n_elt
= GET_MODE_NUNITS (mode
);
14735 v
= rtvec_alloc (n_elt
);
14736 scalar_mode
= GET_MODE_INNER (mode
);
14738 RTVEC_ELT (v
, 0) = value
;
14740 for (i
= 1; i
< n_elt
; ++i
)
14741 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
14743 return gen_rtx_CONST_VECTOR (mode
, v
);
14746 gcc_unreachable ();
14750 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14751 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14752 for an SSE register. If VECT is true, then replicate the mask for
14753 all elements of the vector register. If INVERT is true, then create
14754 a mask excluding the sign bit. */
14757 ix86_build_signbit_mask (machine_mode mode
, bool vect
, bool invert
)
14759 machine_mode vec_mode
, imode
;
14787 vec_mode
= VOIDmode
;
14792 gcc_unreachable ();
14795 machine_mode inner_mode
= GET_MODE_INNER (mode
);
14796 w
= wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode
) - 1,
14797 GET_MODE_BITSIZE (inner_mode
));
14799 w
= wi::bit_not (w
);
14801 /* Force this value into the low part of a fp vector constant. */
14802 mask
= immed_wide_int_const (w
, imode
);
14803 mask
= gen_lowpart (inner_mode
, mask
);
14805 if (vec_mode
== VOIDmode
)
14806 return force_reg (inner_mode
, mask
);
14808 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
14809 return force_reg (vec_mode
, v
);
14812 /* Return TRUE or FALSE depending on whether the first SET in INSN
14813 has source and destination with matching CC modes, and that the
14814 CC mode is at least as constrained as REQ_MODE. */
14817 ix86_match_ccmode (rtx insn
, machine_mode req_mode
)
14820 machine_mode set_mode
;
14822 set
= PATTERN (insn
);
14823 if (GET_CODE (set
) == PARALLEL
)
14824 set
= XVECEXP (set
, 0, 0);
14825 gcc_assert (GET_CODE (set
) == SET
);
14826 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
14828 set_mode
= GET_MODE (SET_DEST (set
));
14832 if (req_mode
!= CCNOmode
14833 && (req_mode
!= CCmode
14834 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
14838 if (req_mode
== CCGCmode
)
14842 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
14846 if (req_mode
== CCZmode
)
14859 if (set_mode
!= req_mode
)
14864 gcc_unreachable ();
14867 return GET_MODE (SET_SRC (set
)) == set_mode
;
14871 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
14873 machine_mode mode
= GET_MODE (op0
);
14875 if (SCALAR_FLOAT_MODE_P (mode
))
14877 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
14883 /* Only zero flag is needed. */
14884 case EQ
: /* ZF=0 */
14885 case NE
: /* ZF!=0 */
14887 /* Codes needing carry flag. */
14888 case GEU
: /* CF=0 */
14889 case LTU
: /* CF=1 */
14890 /* Detect overflow checks. They need just the carry flag. */
14891 if (GET_CODE (op0
) == PLUS
14892 && (rtx_equal_p (op1
, XEXP (op0
, 0))
14893 || rtx_equal_p (op1
, XEXP (op0
, 1))))
14897 case GTU
: /* CF=0 & ZF=0 */
14898 case LEU
: /* CF=1 | ZF=1 */
14900 /* Codes possibly doable only with sign flag when
14901 comparing against zero. */
14902 case GE
: /* SF=OF or SF=0 */
14903 case LT
: /* SF<>OF or SF=1 */
14904 if (op1
== const0_rtx
)
14907 /* For other cases Carry flag is not required. */
14909 /* Codes doable only with sign flag when comparing
14910 against zero, but we miss jump instruction for it
14911 so we need to use relational tests against overflow
14912 that thus needs to be zero. */
14913 case GT
: /* ZF=0 & SF=OF */
14914 case LE
: /* ZF=1 | SF<>OF */
14915 if (op1
== const0_rtx
)
14919 /* strcmp pattern do (use flags) and combine may ask us for proper
14924 gcc_unreachable ();
14928 /* Return the fixed registers used for condition codes. */
14931 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
14934 *p2
= INVALID_REGNUM
;
14938 /* If two condition code modes are compatible, return a condition code
14939 mode which is compatible with both. Otherwise, return
14942 static machine_mode
14943 ix86_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
14948 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
14951 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
14952 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
14955 if ((m1
== CCNOmode
&& m2
== CCGOCmode
)
14956 || (m1
== CCGOCmode
&& m2
== CCNOmode
))
14960 && (m2
== CCGCmode
|| m2
== CCGOCmode
|| m2
== CCNOmode
))
14962 else if (m2
== CCZmode
14963 && (m1
== CCGCmode
|| m1
== CCGOCmode
|| m1
== CCNOmode
))
14969 gcc_unreachable ();
15000 /* These are only compatible with themselves, which we already
15006 /* Return strategy to use for floating-point. We assume that fcomi is always
15007 preferrable where available, since that is also true when looking at size
15008 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15010 enum ix86_fpcmp_strategy
15011 ix86_fp_comparison_strategy (enum rtx_code
)
15013 /* Do fcomi/sahf based test when profitable. */
15016 return IX86_FPCMP_COMI
;
15018 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
15019 return IX86_FPCMP_SAHF
;
15021 return IX86_FPCMP_ARITH
;
15024 /* Convert comparison codes we use to represent FP comparison to integer
15025 code that will result in proper branch. Return UNKNOWN if no such code
15029 ix86_fp_compare_code_to_integer (enum rtx_code code
)
15053 /* Zero extend possibly SImode EXP to Pmode register. */
15055 ix86_zero_extend_to_Pmode (rtx exp
)
15057 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
15060 /* Return true if the function being called was marked with attribute
15061 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15062 to handle the non-PIC case in the backend because there is no easy
15063 interface for the front-end to force non-PLT calls to use the GOT.
15064 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15065 to call the function marked "noplt" indirectly. */
15068 ix86_nopic_noplt_attribute_p (rtx call_op
)
15070 if (flag_pic
|| ix86_cmodel
== CM_LARGE
15071 || !(TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
15072 || TARGET_MACHO
|| TARGET_SEH
|| TARGET_PECOFF
15073 || SYMBOL_REF_LOCAL_P (call_op
))
15076 tree symbol_decl
= SYMBOL_REF_DECL (call_op
);
15079 || (symbol_decl
!= NULL_TREE
15080 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl
))))
15086 /* Helper to output the jmp/call. */
15088 ix86_output_jmp_thunk_or_indirect (const char *thunk_name
, const int regno
)
15090 if (thunk_name
!= NULL
)
15092 fprintf (asm_out_file
, "\tjmp\t");
15093 assemble_name (asm_out_file
, thunk_name
);
15094 putc ('\n', asm_out_file
);
15097 output_indirect_thunk (regno
);
15100 /* Output indirect branch via a call and return thunk. CALL_OP is a
15101 register which contains the branch target. XASM is the assembly
15102 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15103 A normal call is converted to:
15105 call __x86_indirect_thunk_reg
15107 and a tail call is converted to:
15109 jmp __x86_indirect_thunk_reg
15113 ix86_output_indirect_branch_via_reg (rtx call_op
, bool sibcall_p
)
15115 char thunk_name_buf
[32];
15117 enum indirect_thunk_prefix need_prefix
15118 = indirect_thunk_need_prefix (current_output_insn
);
15119 int regno
= REGNO (call_op
);
15121 if (cfun
->machine
->indirect_branch_type
15122 != indirect_branch_thunk_inline
)
15124 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
15127 if (i
>= FIRST_REX_INT_REG
)
15128 i
-= (FIRST_REX_INT_REG
- LAST_INT_REG
- 1);
15129 indirect_thunks_used
|= 1 << i
;
15131 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
15132 thunk_name
= thunk_name_buf
;
15138 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15141 if (thunk_name
!= NULL
)
15143 fprintf (asm_out_file
, "\tcall\t");
15144 assemble_name (asm_out_file
, thunk_name
);
15145 putc ('\n', asm_out_file
);
15149 char indirectlabel1
[32];
15150 char indirectlabel2
[32];
15152 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
15154 indirectlabelno
++);
15155 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
15157 indirectlabelno
++);
15160 fputs ("\tjmp\t", asm_out_file
);
15161 assemble_name_raw (asm_out_file
, indirectlabel2
);
15162 fputc ('\n', asm_out_file
);
15164 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
15166 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15168 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
15171 fputs ("\tcall\t", asm_out_file
);
15172 assemble_name_raw (asm_out_file
, indirectlabel1
);
15173 fputc ('\n', asm_out_file
);
15177 /* Output indirect branch via a call and return thunk. CALL_OP is
15178 the branch target. XASM is the assembly template for CALL_OP.
15179 Branch is a tail call if SIBCALL_P is true. A normal call is
15185 jmp __x86_indirect_thunk
15189 and a tail call is converted to:
15192 jmp __x86_indirect_thunk
15196 ix86_output_indirect_branch_via_push (rtx call_op
, const char *xasm
,
15199 char thunk_name_buf
[32];
15202 enum indirect_thunk_prefix need_prefix
15203 = indirect_thunk_need_prefix (current_output_insn
);
15206 if (cfun
->machine
->indirect_branch_type
15207 != indirect_branch_thunk_inline
)
15209 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
15210 indirect_thunk_needed
= true;
15211 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
15212 thunk_name
= thunk_name_buf
;
15217 snprintf (push_buf
, sizeof (push_buf
), "push{%c}\t%s",
15218 TARGET_64BIT
? 'q' : 'l', xasm
);
15222 output_asm_insn (push_buf
, &call_op
);
15223 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15227 char indirectlabel1
[32];
15228 char indirectlabel2
[32];
15230 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
15232 indirectlabelno
++);
15233 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
15235 indirectlabelno
++);
15238 fputs ("\tjmp\t", asm_out_file
);
15239 assemble_name_raw (asm_out_file
, indirectlabel2
);
15240 fputc ('\n', asm_out_file
);
15242 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
15244 /* An external function may be called via GOT, instead of PLT. */
15245 if (MEM_P (call_op
))
15247 struct ix86_address parts
;
15248 rtx addr
= XEXP (call_op
, 0);
15249 if (ix86_decompose_address (addr
, &parts
)
15250 && parts
.base
== stack_pointer_rtx
)
15252 /* Since call will adjust stack by -UNITS_PER_WORD,
15253 we must convert "disp(stack, index, scale)" to
15254 "disp+UNITS_PER_WORD(stack, index, scale)". */
15257 addr
= gen_rtx_MULT (Pmode
, parts
.index
,
15258 GEN_INT (parts
.scale
));
15259 addr
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
15263 addr
= stack_pointer_rtx
;
15266 if (parts
.disp
!= NULL_RTX
)
15267 disp
= plus_constant (Pmode
, parts
.disp
,
15270 disp
= GEN_INT (UNITS_PER_WORD
);
15272 addr
= gen_rtx_PLUS (Pmode
, addr
, disp
);
15273 call_op
= gen_rtx_MEM (GET_MODE (call_op
), addr
);
15277 output_asm_insn (push_buf
, &call_op
);
15279 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15281 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
15284 fputs ("\tcall\t", asm_out_file
);
15285 assemble_name_raw (asm_out_file
, indirectlabel1
);
15286 fputc ('\n', asm_out_file
);
15290 /* Output indirect branch via a call and return thunk. CALL_OP is
15291 the branch target. XASM is the assembly template for CALL_OP.
15292 Branch is a tail call if SIBCALL_P is true. */
15295 ix86_output_indirect_branch (rtx call_op
, const char *xasm
,
15298 if (REG_P (call_op
))
15299 ix86_output_indirect_branch_via_reg (call_op
, sibcall_p
);
15301 ix86_output_indirect_branch_via_push (call_op
, xasm
, sibcall_p
);
15304 /* Output indirect jump. CALL_OP is the jump target. */
15307 ix86_output_indirect_jmp (rtx call_op
)
15309 if (cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
)
15311 /* We can't have red-zone since "call" in the indirect thunk
15312 pushes the return address onto stack, destroying red-zone. */
15313 if (ix86_red_zone_size
!= 0)
15314 gcc_unreachable ();
15316 ix86_output_indirect_branch (call_op
, "%0", true);
15320 return "%!jmp\t%A0";
15323 /* Output return instrumentation for current function if needed. */
15326 output_return_instrumentation (void)
15328 if (ix86_instrument_return
!= instrument_return_none
15330 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun
->decl
))
15332 if (ix86_flag_record_return
)
15333 fprintf (asm_out_file
, "1:\n");
15334 switch (ix86_instrument_return
)
15336 case instrument_return_call
:
15337 fprintf (asm_out_file
, "\tcall\t__return__\n");
15339 case instrument_return_nop5
:
15340 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15341 fprintf (asm_out_file
, ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15343 case instrument_return_none
:
15347 if (ix86_flag_record_return
)
15349 fprintf (asm_out_file
, "\t.section __return_loc, \"a\",@progbits\n");
15350 fprintf (asm_out_file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
15351 fprintf (asm_out_file
, "\t.previous\n");
15356 /* Output function return. CALL_OP is the jump target. Add a REP
15357 prefix to RET if LONG_P is true and function return is kept. */
15360 ix86_output_function_return (bool long_p
)
15362 output_return_instrumentation ();
15364 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
15366 char thunk_name
[32];
15367 enum indirect_thunk_prefix need_prefix
15368 = indirect_thunk_need_prefix (current_output_insn
);
15370 if (cfun
->machine
->function_return_type
15371 != indirect_branch_thunk_inline
)
15373 bool need_thunk
= (cfun
->machine
->function_return_type
15374 == indirect_branch_thunk
);
15375 indirect_thunk_name (thunk_name
, INVALID_REGNUM
, need_prefix
,
15377 indirect_return_needed
|= need_thunk
;
15378 fprintf (asm_out_file
, "\tjmp\t");
15379 assemble_name (asm_out_file
, thunk_name
);
15380 putc ('\n', asm_out_file
);
15383 output_indirect_thunk (INVALID_REGNUM
);
15391 return "rep%; ret";
15394 /* Output indirect function return. RET_OP is the function return
15398 ix86_output_indirect_function_return (rtx ret_op
)
15400 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
15402 char thunk_name
[32];
15403 enum indirect_thunk_prefix need_prefix
15404 = indirect_thunk_need_prefix (current_output_insn
);
15405 unsigned int regno
= REGNO (ret_op
);
15406 gcc_assert (regno
== CX_REG
);
15408 if (cfun
->machine
->function_return_type
15409 != indirect_branch_thunk_inline
)
15411 bool need_thunk
= (cfun
->machine
->function_return_type
15412 == indirect_branch_thunk
);
15413 indirect_thunk_name (thunk_name
, regno
, need_prefix
, true);
15417 indirect_return_via_cx
= true;
15418 indirect_thunks_used
|= 1 << CX_REG
;
15420 fprintf (asm_out_file
, "\tjmp\t");
15421 assemble_name (asm_out_file
, thunk_name
);
15422 putc ('\n', asm_out_file
);
15425 output_indirect_thunk (regno
);
15430 return "%!jmp\t%A0";
15433 /* Output the assembly for a call instruction. */
15436 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
15438 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
15439 bool output_indirect_p
15441 && cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
);
15442 bool seh_nop_p
= false;
15445 if (SIBLING_CALL_P (insn
))
15447 output_return_instrumentation ();
15450 if (ix86_nopic_noplt_attribute_p (call_op
))
15455 if (output_indirect_p
)
15456 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15458 xasm
= "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15462 if (output_indirect_p
)
15463 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15465 xasm
= "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15469 xasm
= "%!jmp\t%P0";
15471 /* SEH epilogue detection requires the indirect branch case
15472 to include REX.W. */
15473 else if (TARGET_SEH
)
15474 xasm
= "%!rex.W jmp\t%A0";
15477 if (output_indirect_p
)
15480 xasm
= "%!jmp\t%A0";
15483 if (output_indirect_p
&& !direct_p
)
15484 ix86_output_indirect_branch (call_op
, xasm
, true);
15486 output_asm_insn (xasm
, &call_op
);
15490 /* SEH unwinding can require an extra nop to be emitted in several
15491 circumstances. Determine if we have one of those. */
15496 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
15498 /* Prevent a catch region from being adjacent to a jump that would
15499 be interpreted as an epilogue sequence by the unwinder. */
15500 if (JUMP_P(i
) && CROSSING_JUMP_P (i
))
15506 /* If we get to another real insn, we don't need the nop. */
15510 /* If we get to the epilogue note, prevent a catch region from
15511 being adjacent to the standard epilogue sequence. If non-
15512 call-exceptions, we'll have done this during epilogue emission. */
15513 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
15514 && !flag_non_call_exceptions
15515 && !can_throw_internal (insn
))
15522 /* If we didn't find a real insn following the call, prevent the
15523 unwinder from looking into the next function. */
15530 if (ix86_nopic_noplt_attribute_p (call_op
))
15535 if (output_indirect_p
)
15536 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15538 xasm
= "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
15542 if (output_indirect_p
)
15543 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
15545 xasm
= "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
15549 xasm
= "%!call\t%P0";
15553 if (output_indirect_p
)
15556 xasm
= "%!call\t%A0";
15559 if (output_indirect_p
&& !direct_p
)
15560 ix86_output_indirect_branch (call_op
, xasm
, false);
15562 output_asm_insn (xasm
, &call_op
);
15570 /* Return a MEM corresponding to a stack slot with mode MODE.
15571 Allocate a new slot if necessary.
15573 The RTL for a function can have several slots available: N is
15574 which slot to use. */
15577 assign_386_stack_local (machine_mode mode
, enum ix86_stack_slot n
)
15579 struct stack_local_entry
*s
;
15581 gcc_assert (n
< MAX_386_STACK_LOCALS
);
15583 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15584 if (s
->mode
== mode
&& s
->n
== n
)
15585 return validize_mem (copy_rtx (s
->rtl
));
15587 s
= ggc_alloc
<stack_local_entry
> ();
15590 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
15592 s
->next
= ix86_stack_locals
;
15593 ix86_stack_locals
= s
;
15594 return validize_mem (copy_rtx (s
->rtl
));
15598 ix86_instantiate_decls (void)
15600 struct stack_local_entry
*s
;
15602 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
15603 if (s
->rtl
!= NULL_RTX
)
15604 instantiate_decl_rtl (s
->rtl
);
15607 /* Check whether x86 address PARTS is a pc-relative address. */
15610 ix86_rip_relative_addr_p (struct ix86_address
*parts
)
15612 rtx base
, index
, disp
;
15614 base
= parts
->base
;
15615 index
= parts
->index
;
15616 disp
= parts
->disp
;
15618 if (disp
&& !base
&& !index
)
15624 if (GET_CODE (disp
) == CONST
)
15625 symbol
= XEXP (disp
, 0);
15626 if (GET_CODE (symbol
) == PLUS
15627 && CONST_INT_P (XEXP (symbol
, 1)))
15628 symbol
= XEXP (symbol
, 0);
15630 if (GET_CODE (symbol
) == LABEL_REF
15631 || (GET_CODE (symbol
) == SYMBOL_REF
15632 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
15633 || (GET_CODE (symbol
) == UNSPEC
15634 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
15635 || XINT (symbol
, 1) == UNSPEC_PCREL
15636 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
15643 /* Calculate the length of the memory address in the instruction encoding.
15644 Includes addr32 prefix, does not include the one-byte modrm, opcode,
15645 or other prefixes. We never generate addr32 prefix for LEA insn. */
15648 memory_address_length (rtx addr
, bool lea
)
15650 struct ix86_address parts
;
15651 rtx base
, index
, disp
;
15655 if (GET_CODE (addr
) == PRE_DEC
15656 || GET_CODE (addr
) == POST_INC
15657 || GET_CODE (addr
) == PRE_MODIFY
15658 || GET_CODE (addr
) == POST_MODIFY
)
15661 ok
= ix86_decompose_address (addr
, &parts
);
15664 len
= (parts
.seg
== ADDR_SPACE_GENERIC
) ? 0 : 1;
15666 /* If this is not LEA instruction, add the length of addr32 prefix. */
15667 if (TARGET_64BIT
&& !lea
15668 && (SImode_address_operand (addr
, VOIDmode
)
15669 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
15670 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
15674 index
= parts
.index
;
15677 if (base
&& SUBREG_P (base
))
15678 base
= SUBREG_REG (base
);
15679 if (index
&& SUBREG_P (index
))
15680 index
= SUBREG_REG (index
);
15682 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
15683 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
15686 - esp as the base always wants an index,
15687 - ebp as the base always wants a displacement,
15688 - r12 as the base always wants an index,
15689 - r13 as the base always wants a displacement. */
15691 /* Register Indirect. */
15692 if (base
&& !index
&& !disp
)
15694 /* esp (for its index) and ebp (for its displacement) need
15695 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
15697 if (base
== arg_pointer_rtx
15698 || base
== frame_pointer_rtx
15699 || REGNO (base
) == SP_REG
15700 || REGNO (base
) == BP_REG
15701 || REGNO (base
) == R12_REG
15702 || REGNO (base
) == R13_REG
)
15706 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
15707 is not disp32, but disp32(%rip), so for disp32
15708 SIB byte is needed, unless print_operand_address
15709 optimizes it into disp32(%rip) or (%rip) is implied
15711 else if (disp
&& !base
&& !index
)
15714 if (!ix86_rip_relative_addr_p (&parts
))
15719 /* Find the length of the displacement constant. */
15722 if (base
&& satisfies_constraint_K (disp
))
15727 /* ebp always wants a displacement. Similarly r13. */
15728 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
15731 /* An index requires the two-byte modrm form.... */
15733 /* ...like esp (or r12), which always wants an index. */
15734 || base
== arg_pointer_rtx
15735 || base
== frame_pointer_rtx
15736 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
15743 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15744 is set, expect that insn have 8bit immediate alternative. */
15746 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
15750 extract_insn_cached (insn
);
15751 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15752 if (CONSTANT_P (recog_data
.operand
[i
]))
15754 enum attr_mode mode
= get_attr_mode (insn
);
15757 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
15759 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
15766 ival
= trunc_int_for_mode (ival
, HImode
);
15769 ival
= trunc_int_for_mode (ival
, SImode
);
15774 if (IN_RANGE (ival
, -128, 127))
15791 /* Immediates for DImode instructions are encoded
15792 as 32bit sign extended values. */
15797 fatal_insn ("unknown insn mode", insn
);
15803 /* Compute default value for "length_address" attribute. */
15805 ix86_attr_length_address_default (rtx_insn
*insn
)
15809 if (get_attr_type (insn
) == TYPE_LEA
)
15811 rtx set
= PATTERN (insn
), addr
;
15813 if (GET_CODE (set
) == PARALLEL
)
15814 set
= XVECEXP (set
, 0, 0);
15816 gcc_assert (GET_CODE (set
) == SET
);
15818 addr
= SET_SRC (set
);
15820 return memory_address_length (addr
, true);
15823 extract_insn_cached (insn
);
15824 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15826 rtx op
= recog_data
.operand
[i
];
15829 constrain_operands_cached (insn
, reload_completed
);
15830 if (which_alternative
!= -1)
15832 const char *constraints
= recog_data
.constraints
[i
];
15833 int alt
= which_alternative
;
15835 while (*constraints
== '=' || *constraints
== '+')
15838 while (*constraints
++ != ',')
15840 /* Skip ignored operands. */
15841 if (*constraints
== 'X')
15845 int len
= memory_address_length (XEXP (op
, 0), false);
15847 /* Account for segment prefix for non-default addr spaces. */
15848 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op
)))
15857 /* Compute default value for "length_vex" attribute. It includes
15858 2 or 3 byte VEX prefix and 1 opcode byte. */
15861 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
15866 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
15867 byte VEX prefix. */
15868 if (!has_0f_opcode
|| has_vex_w
)
15871 /* We can always use 2 byte VEX prefix in 32bit. */
15875 extract_insn_cached (insn
);
15877 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
15878 if (REG_P (recog_data
.operand
[i
]))
15880 /* REX.W bit uses 3 byte VEX prefix. */
15881 if (GET_MODE (recog_data
.operand
[i
]) == DImode
15882 && GENERAL_REG_P (recog_data
.operand
[i
]))
15887 /* REX.X or REX.B bits use 3 byte VEX prefix. */
15888 if (MEM_P (recog_data
.operand
[i
])
15889 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
15898 ix86_class_likely_spilled_p (reg_class_t
);
15900 /* Returns true if lhs of insn is HW function argument register and set up
15901 is_spilled to true if it is likely spilled HW register. */
15903 insn_is_function_arg (rtx insn
, bool* is_spilled
)
15907 if (!NONDEBUG_INSN_P (insn
))
15909 /* Call instructions are not movable, ignore it. */
15912 insn
= PATTERN (insn
);
15913 if (GET_CODE (insn
) == PARALLEL
)
15914 insn
= XVECEXP (insn
, 0, 0);
15915 if (GET_CODE (insn
) != SET
)
15917 dst
= SET_DEST (insn
);
15918 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
15919 && ix86_function_arg_regno_p (REGNO (dst
)))
15921 /* Is it likely spilled HW register? */
15922 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
15923 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
15924 *is_spilled
= true;
15930 /* Add output dependencies for chain of function adjacent arguments if only
15931 there is a move to likely spilled HW register. Return first argument
15932 if at least one dependence was added or NULL otherwise. */
15934 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
15937 rtx_insn
*last
= call
;
15938 rtx_insn
*first_arg
= NULL
;
15939 bool is_spilled
= false;
15941 head
= PREV_INSN (head
);
15943 /* Find nearest to call argument passing instruction. */
15946 last
= PREV_INSN (last
);
15949 if (!NONDEBUG_INSN_P (last
))
15951 if (insn_is_function_arg (last
, &is_spilled
))
15959 insn
= PREV_INSN (last
);
15960 if (!INSN_P (insn
))
15964 if (!NONDEBUG_INSN_P (insn
))
15969 if (insn_is_function_arg (insn
, &is_spilled
))
15971 /* Add output depdendence between two function arguments if chain
15972 of output arguments contains likely spilled HW registers. */
15974 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
15975 first_arg
= last
= insn
;
15985 /* Add output or anti dependency from insn to first_arg to restrict its code
15988 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
15993 set
= single_set (insn
);
15996 tmp
= SET_DEST (set
);
15999 /* Add output dependency to the first function argument. */
16000 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
16003 /* Add anti dependency. */
16004 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
16007 /* Avoid cross block motion of function argument through adding dependency
16008 from the first non-jump instruction in bb. */
16010 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
16012 rtx_insn
*insn
= BB_END (bb
);
16016 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
16018 rtx set
= single_set (insn
);
16021 avoid_func_arg_motion (arg
, insn
);
16025 if (insn
== BB_HEAD (bb
))
16027 insn
= PREV_INSN (insn
);
16031 /* Hook for pre-reload schedule - avoid motion of function arguments
16032 passed in likely spilled HW registers. */
16034 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
16037 rtx_insn
*first_arg
= NULL
;
16038 if (reload_completed
)
16040 while (head
!= tail
&& DEBUG_INSN_P (head
))
16041 head
= NEXT_INSN (head
);
16042 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
16043 if (INSN_P (insn
) && CALL_P (insn
))
16045 first_arg
= add_parameter_dependencies (insn
, head
);
16048 /* Add dependee for first argument to predecessors if only
16049 region contains more than one block. */
16050 basic_block bb
= BLOCK_FOR_INSN (insn
);
16051 int rgn
= CONTAINING_RGN (bb
->index
);
16052 int nr_blks
= RGN_NR_BLOCKS (rgn
);
16053 /* Skip trivial regions and region head blocks that can have
16054 predecessors outside of region. */
16055 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
16060 /* Regions are SCCs with the exception of selective
16061 scheduling with pipelining of outer blocks enabled.
16062 So also check that immediate predecessors of a non-head
16063 block are in the same region. */
16064 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16066 /* Avoid creating of loop-carried dependencies through
16067 using topological ordering in the region. */
16068 if (rgn
== CONTAINING_RGN (e
->src
->index
)
16069 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
16070 add_dependee_for_func_arg (first_arg
, e
->src
);
16078 else if (first_arg
)
16079 avoid_func_arg_motion (first_arg
, insn
);
16082 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16083 HW registers to maximum, to schedule them at soon as possible. These are
16084 moves from function argument registers at the top of the function entry
16085 and moves from function return value registers after call. */
16087 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
16091 if (reload_completed
)
16094 if (!NONDEBUG_INSN_P (insn
))
16097 set
= single_set (insn
);
16100 rtx tmp
= SET_SRC (set
);
16102 && HARD_REGISTER_P (tmp
)
16103 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
16104 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
16105 return current_sched_info
->sched_max_insns_priority
;
16111 /* Prepare for scheduling pass. */
16113 ix86_sched_init_global (FILE *, int, int)
16115 /* Install scheduling hooks for current CPU. Some of these hooks are used
16116 in time-critical parts of the scheduler, so we only set them up when
16117 they are actually used. */
16120 case PROCESSOR_CORE2
:
16121 case PROCESSOR_NEHALEM
:
16122 case PROCESSOR_SANDYBRIDGE
:
16123 case PROCESSOR_HASWELL
:
16124 case PROCESSOR_GENERIC
:
16125 /* Do not perform multipass scheduling for pre-reload schedule
16126 to save compile time. */
16127 if (reload_completed
)
16129 ix86_core2i7_init_hooks ();
16132 /* Fall through. */
16134 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
16135 targetm
.sched
.first_cycle_multipass_init
= NULL
;
16136 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
16137 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
16138 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
16139 targetm
.sched
.first_cycle_multipass_end
= NULL
;
16140 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
16146 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16148 static HOST_WIDE_INT
16149 ix86_static_rtx_alignment (machine_mode mode
)
16151 if (mode
== DFmode
)
16153 if (ALIGN_MODE_128 (mode
))
16154 return MAX (128, GET_MODE_ALIGNMENT (mode
));
16155 return GET_MODE_ALIGNMENT (mode
);
16158 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16160 static HOST_WIDE_INT
16161 ix86_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
16163 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
16164 || TREE_CODE (exp
) == INTEGER_CST
)
16166 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
16167 HOST_WIDE_INT mode_align
= ix86_static_rtx_alignment (mode
);
16168 return MAX (mode_align
, align
);
16170 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16171 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16172 return BITS_PER_WORD
;
16177 /* Implement TARGET_EMPTY_RECORD_P. */
16180 ix86_is_empty_record (const_tree type
)
16184 return default_is_empty_record (type
);
16187 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16190 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v
, tree type
)
16192 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
16194 if (!cum
->warn_empty
)
16197 if (!TYPE_EMPTY_P (type
))
16200 /* Don't warn if the function isn't visible outside of the TU. */
16201 if (cum
->decl
&& !TREE_PUBLIC (cum
->decl
))
16204 const_tree ctx
= get_ultimate_context (cum
->decl
);
16205 if (ctx
!= NULL_TREE
16206 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx
))
16209 /* If the actual size of the type is zero, then there is no change
16210 in how objects of this size are passed. */
16211 if (int_size_in_bytes (type
) == 0)
16214 warning (OPT_Wabi
, "empty class %qT parameter passing ABI "
16215 "changes in %<-fabi-version=12%> (GCC 8)", type
);
16217 /* Only warn once. */
16218 cum
->warn_empty
= false;
16221 /* This hook returns name of multilib ABI. */
16223 static const char *
16224 ix86_get_multilib_abi_name (void)
16226 if (!(TARGET_64BIT_P (ix86_isa_flags
)))
16228 else if (TARGET_X32_P (ix86_isa_flags
))
16234 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16235 the data type, and ALIGN is the alignment that the object would
16236 ordinarily have. */
16239 iamcu_alignment (tree type
, int align
)
16243 if (align
< 32 || TYPE_USER_ALIGN (type
))
16246 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16248 mode
= TYPE_MODE (strip_array_types (type
));
16249 switch (GET_MODE_CLASS (mode
))
16252 case MODE_COMPLEX_INT
:
16253 case MODE_COMPLEX_FLOAT
:
16255 case MODE_DECIMAL_FLOAT
:
16262 /* Compute the alignment for a static variable.
16263 TYPE is the data type, and ALIGN is the alignment that
16264 the object would ordinarily have. The value of this function is used
16265 instead of that alignment to align the object. */
16268 ix86_data_alignment (tree type
, unsigned int align
, bool opt
)
16270 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16271 for symbols from other compilation units or symbols that don't need
16272 to bind locally. In order to preserve some ABI compatibility with
16273 those compilers, ensure we don't decrease alignment from what we
16276 unsigned int max_align_compat
= MIN (256, MAX_OFILE_ALIGNMENT
);
16278 /* A data structure, equal or greater than the size of a cache line
16279 (64 bytes in the Pentium 4 and other recent Intel processors, including
16280 processors based on Intel Core microarchitecture) should be aligned
16281 so that its base address is a multiple of a cache line size. */
16283 unsigned int max_align
16284 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
16286 if (max_align
< BITS_PER_WORD
)
16287 max_align
= BITS_PER_WORD
;
16289 switch (ix86_align_data_type
)
16291 case ix86_align_data_type_abi
: opt
= false; break;
16292 case ix86_align_data_type_compat
: max_align
= BITS_PER_WORD
; break;
16293 case ix86_align_data_type_cacheline
: break;
16297 align
= iamcu_alignment (type
, align
);
16300 && AGGREGATE_TYPE_P (type
)
16301 && TYPE_SIZE (type
)
16302 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
16304 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align_compat
)
16305 && align
< max_align_compat
)
16306 align
= max_align_compat
;
16307 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align
)
16308 && align
< max_align
)
16312 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16313 to 16byte boundary. */
16316 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
16317 && TYPE_SIZE (type
)
16318 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16319 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
16327 if (TREE_CODE (type
) == ARRAY_TYPE
)
16329 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16331 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16334 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16337 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16339 if ((TYPE_MODE (type
) == XCmode
16340 || TYPE_MODE (type
) == TCmode
) && align
< 128)
16343 else if ((TREE_CODE (type
) == RECORD_TYPE
16344 || TREE_CODE (type
) == UNION_TYPE
16345 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16346 && TYPE_FIELDS (type
))
16348 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16350 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16353 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16354 || TREE_CODE (type
) == INTEGER_TYPE
)
16356 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16358 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16365 /* Compute the alignment for a local variable or a stack slot. EXP is
16366 the data type or decl itself, MODE is the widest mode available and
16367 ALIGN is the alignment that the object would ordinarily have. The
16368 value of this macro is used instead of that alignment to align the
16372 ix86_local_alignment (tree exp
, machine_mode mode
,
16373 unsigned int align
)
16377 if (exp
&& DECL_P (exp
))
16379 type
= TREE_TYPE (exp
);
16388 /* Don't do dynamic stack realignment for long long objects with
16389 -mpreferred-stack-boundary=2. */
16392 && ix86_preferred_stack_boundary
< 64
16393 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
16394 && (!type
|| !TYPE_USER_ALIGN (type
))
16395 && (!decl
|| !DECL_USER_ALIGN (decl
)))
16398 /* If TYPE is NULL, we are allocating a stack slot for caller-save
16399 register in MODE. We will return the largest alignment of XF
16403 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
16404 align
= GET_MODE_ALIGNMENT (DFmode
);
16408 /* Don't increase alignment for Intel MCU psABI. */
16412 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16413 to 16byte boundary. Exact wording is:
16415 An array uses the same alignment as its elements, except that a local or
16416 global array variable of length at least 16 bytes or
16417 a C99 variable-length array variable always has alignment of at least 16 bytes.
16419 This was added to allow use of aligned SSE instructions at arrays. This
16420 rule is meant for static storage (where compiler cannot do the analysis
16421 by itself). We follow it for automatic variables only when convenient.
16422 We fully control everything in the function compiled and functions from
16423 other unit cannot rely on the alignment.
16425 Exclude va_list type. It is the common case of local array where
16426 we cannot benefit from the alignment.
16428 TODO: Probably one should optimize for size only when var is not escaping. */
16429 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
16432 if (AGGREGATE_TYPE_P (type
)
16433 && (va_list_type_node
== NULL_TREE
16434 || (TYPE_MAIN_VARIANT (type
)
16435 != TYPE_MAIN_VARIANT (va_list_type_node
)))
16436 && TYPE_SIZE (type
)
16437 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16438 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
16442 if (TREE_CODE (type
) == ARRAY_TYPE
)
16444 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16446 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16449 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16451 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16453 if ((TYPE_MODE (type
) == XCmode
16454 || TYPE_MODE (type
) == TCmode
) && align
< 128)
16457 else if ((TREE_CODE (type
) == RECORD_TYPE
16458 || TREE_CODE (type
) == UNION_TYPE
16459 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16460 && TYPE_FIELDS (type
))
16462 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16464 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16467 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16468 || TREE_CODE (type
) == INTEGER_TYPE
)
16471 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16473 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16479 /* Compute the minimum required alignment for dynamic stack realignment
16480 purposes for a local variable, parameter or a stack slot. EXP is
16481 the data type or decl itself, MODE is its mode and ALIGN is the
16482 alignment that the object would ordinarily have. */
16485 ix86_minimum_alignment (tree exp
, machine_mode mode
,
16486 unsigned int align
)
16490 if (exp
&& DECL_P (exp
))
16492 type
= TREE_TYPE (exp
);
16501 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
16504 /* Don't do dynamic stack realignment for long long objects with
16505 -mpreferred-stack-boundary=2. */
16506 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
16507 && (!type
|| !TYPE_USER_ALIGN (type
))
16508 && (!decl
|| !DECL_USER_ALIGN (decl
)))
16510 gcc_checking_assert (!TARGET_STV
);
16517 /* Find a location for the static chain incoming to a nested function.
16518 This is a register, unless all free registers are used by arguments. */
16521 ix86_static_chain (const_tree fndecl_or_type
, bool incoming_p
)
16527 /* We always use R10 in 64-bit mode. */
16532 const_tree fntype
, fndecl
;
16535 /* By default in 32-bit mode we use ECX to pass the static chain. */
16538 if (TREE_CODE (fndecl_or_type
) == FUNCTION_DECL
)
16540 fntype
= TREE_TYPE (fndecl_or_type
);
16541 fndecl
= fndecl_or_type
;
16545 fntype
= fndecl_or_type
;
16549 ccvt
= ix86_get_callcvt (fntype
);
16550 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
16552 /* Fastcall functions use ecx/edx for arguments, which leaves
16553 us with EAX for the static chain.
16554 Thiscall functions use ecx for arguments, which also
16555 leaves us with EAX for the static chain. */
16558 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
16560 /* Thiscall functions use ecx for arguments, which leaves
16561 us with EAX and EDX for the static chain.
16562 We are using for abi-compatibility EAX. */
16565 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
16567 /* For regparm 3, we have no free call-clobbered registers in
16568 which to store the static chain. In order to implement this,
16569 we have the trampoline push the static chain to the stack.
16570 However, we can't push a value below the return address when
16571 we call the nested function directly, so we have to use an
16572 alternate entry point. For this we use ESI, and have the
16573 alternate entry point push ESI, so that things appear the
16574 same once we're executing the nested function. */
16577 if (fndecl
== current_function_decl
16578 && !ix86_static_chain_on_stack
)
16580 gcc_assert (!reload_completed
);
16581 ix86_static_chain_on_stack
= true;
16583 return gen_frame_mem (SImode
,
16584 plus_constant (Pmode
,
16585 arg_pointer_rtx
, -8));
16591 return gen_rtx_REG (Pmode
, regno
);
16594 /* Emit RTL insns to initialize the variable parts of a trampoline.
16595 FNDECL is the decl of the target address; M_TRAMP is a MEM for
16596 the trampoline, and CHAIN_VALUE is an RTX for the static chain
16597 to be passed to the target function. */
16600 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
16605 bool need_endbr
= (flag_cf_protection
& CF_BRANCH
);
16607 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
16615 /* Insert ENDBR64. */
16616 mem
= adjust_address (m_tramp
, SImode
, offset
);
16617 emit_move_insn (mem
, gen_int_mode (0xfa1e0ff3, SImode
));
16621 /* Load the function address to r11. Try to load address using
16622 the shorter movl instead of movabs. We may want to support
16623 movq for kernel mode, but kernel does not use trampolines at
16624 the moment. FNADDR is a 32bit address and may not be in
16625 DImode when ptr_mode == SImode. Always use movl in this
16627 if (ptr_mode
== SImode
16628 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
16630 fnaddr
= copy_addr_to_reg (fnaddr
);
16632 mem
= adjust_address (m_tramp
, HImode
, offset
);
16633 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
16635 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
16636 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
16641 mem
= adjust_address (m_tramp
, HImode
, offset
);
16642 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
16644 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
16645 emit_move_insn (mem
, fnaddr
);
16649 /* Load static chain using movabs to r10. Use the shorter movl
16650 instead of movabs when ptr_mode == SImode. */
16651 if (ptr_mode
== SImode
)
16662 mem
= adjust_address (m_tramp
, HImode
, offset
);
16663 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
16665 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
16666 emit_move_insn (mem
, chain_value
);
16669 /* Jump to r11; the last (unused) byte is a nop, only there to
16670 pad the write out to a single 32-bit store. */
16671 mem
= adjust_address (m_tramp
, SImode
, offset
);
16672 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
16679 /* Depending on the static chain location, either load a register
16680 with a constant, or push the constant to the stack. All of the
16681 instructions are the same size. */
16682 chain
= ix86_static_chain (fndecl
, true);
16685 switch (REGNO (chain
))
16688 opcode
= 0xb8; break;
16690 opcode
= 0xb9; break;
16692 gcc_unreachable ();
16700 /* Insert ENDBR32. */
16701 mem
= adjust_address (m_tramp
, SImode
, offset
);
16702 emit_move_insn (mem
, gen_int_mode (0xfb1e0ff3, SImode
));
16706 mem
= adjust_address (m_tramp
, QImode
, offset
);
16707 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
16709 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
16710 emit_move_insn (mem
, chain_value
);
16713 mem
= adjust_address (m_tramp
, QImode
, offset
);
16714 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
16716 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
16718 /* Compute offset from the end of the jmp to the target function.
16719 In the case in which the trampoline stores the static chain on
16720 the stack, we need to skip the first insn which pushes the
16721 (call-saved) register static chain; this push is 1 byte. */
16723 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
16724 plus_constant (Pmode
, XEXP (m_tramp
, 0),
16725 offset
- (MEM_P (chain
) ? 1 : 0)),
16726 NULL_RTX
, 1, OPTAB_DIRECT
);
16727 emit_move_insn (mem
, disp
);
16730 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
16732 #ifdef HAVE_ENABLE_EXECUTE_STACK
16733 #ifdef CHECK_EXECUTE_STACK_ENABLED
16734 if (CHECK_EXECUTE_STACK_ENABLED
)
16736 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
16737 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
16742 ix86_allocate_stack_slots_for_args (void)
16744 /* Naked functions should not allocate stack slots for arguments. */
16745 return !ix86_function_naked (current_function_decl
);
16749 ix86_warn_func_return (tree decl
)
16751 /* Naked functions are implemented entirely in assembly, including the
16752 return sequence, so suppress warnings about this. */
16753 return !ix86_function_naked (decl
);
16756 /* Return the shift count of a vector by scalar shift builtin second argument
16759 ix86_vector_shift_count (tree arg1
)
16761 if (tree_fits_uhwi_p (arg1
))
16763 else if (TREE_CODE (arg1
) == VECTOR_CST
&& CHAR_BIT
== 8)
16765 /* The count argument is weird, passed in as various 128-bit
16766 (or 64-bit) vectors, the low 64 bits from it are the count. */
16767 unsigned char buf
[16];
16768 int len
= native_encode_expr (arg1
, buf
, 16);
16771 tree t
= native_interpret_expr (uint64_type_node
, buf
, len
);
16772 if (t
&& tree_fits_uhwi_p (t
))
16779 ix86_fold_builtin (tree fndecl
, int n_args
,
16780 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
16782 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
16784 enum ix86_builtins fn_code
16785 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
16786 enum rtx_code rcode
;
16788 unsigned HOST_WIDE_INT mask
;
16792 case IX86_BUILTIN_CPU_IS
:
16793 case IX86_BUILTIN_CPU_SUPPORTS
:
16794 gcc_assert (n_args
== 1);
16795 return fold_builtin_cpu (fndecl
, args
);
16797 case IX86_BUILTIN_NANQ
:
16798 case IX86_BUILTIN_NANSQ
:
16800 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16801 const char *str
= c_getstr (*args
);
16802 int quiet
= fn_code
== IX86_BUILTIN_NANQ
;
16803 REAL_VALUE_TYPE real
;
16805 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
16806 return build_real (type
, real
);
16810 case IX86_BUILTIN_INFQ
:
16811 case IX86_BUILTIN_HUGE_VALQ
:
16813 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16814 REAL_VALUE_TYPE inf
;
16816 return build_real (type
, inf
);
16819 case IX86_BUILTIN_TZCNT16
:
16820 case IX86_BUILTIN_CTZS
:
16821 case IX86_BUILTIN_TZCNT32
:
16822 case IX86_BUILTIN_TZCNT64
:
16823 gcc_assert (n_args
== 1);
16824 if (TREE_CODE (args
[0]) == INTEGER_CST
)
16826 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16827 tree arg
= args
[0];
16828 if (fn_code
== IX86_BUILTIN_TZCNT16
16829 || fn_code
== IX86_BUILTIN_CTZS
)
16830 arg
= fold_convert (short_unsigned_type_node
, arg
);
16831 if (integer_zerop (arg
))
16832 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
16834 return fold_const_call (CFN_CTZ
, type
, arg
);
16838 case IX86_BUILTIN_LZCNT16
:
16839 case IX86_BUILTIN_CLZS
:
16840 case IX86_BUILTIN_LZCNT32
:
16841 case IX86_BUILTIN_LZCNT64
:
16842 gcc_assert (n_args
== 1);
16843 if (TREE_CODE (args
[0]) == INTEGER_CST
)
16845 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
16846 tree arg
= args
[0];
16847 if (fn_code
== IX86_BUILTIN_LZCNT16
16848 || fn_code
== IX86_BUILTIN_CLZS
)
16849 arg
= fold_convert (short_unsigned_type_node
, arg
);
16850 if (integer_zerop (arg
))
16851 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
16853 return fold_const_call (CFN_CLZ
, type
, arg
);
16857 case IX86_BUILTIN_BEXTR32
:
16858 case IX86_BUILTIN_BEXTR64
:
16859 case IX86_BUILTIN_BEXTRI32
:
16860 case IX86_BUILTIN_BEXTRI64
:
16861 gcc_assert (n_args
== 2);
16862 if (tree_fits_uhwi_p (args
[1]))
16864 unsigned HOST_WIDE_INT res
= 0;
16865 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (args
[0]));
16866 unsigned int start
= tree_to_uhwi (args
[1]);
16867 unsigned int len
= (start
& 0xff00) >> 8;
16869 if (start
>= prec
|| len
== 0)
16871 else if (!tree_fits_uhwi_p (args
[0]))
16874 res
= tree_to_uhwi (args
[0]) >> start
;
16877 if (len
< HOST_BITS_PER_WIDE_INT
)
16878 res
&= (HOST_WIDE_INT_1U
<< len
) - 1;
16879 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16883 case IX86_BUILTIN_BZHI32
:
16884 case IX86_BUILTIN_BZHI64
:
16885 gcc_assert (n_args
== 2);
16886 if (tree_fits_uhwi_p (args
[1]))
16888 unsigned int idx
= tree_to_uhwi (args
[1]) & 0xff;
16889 if (idx
>= TYPE_PRECISION (TREE_TYPE (args
[0])))
16892 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), 0);
16893 if (!tree_fits_uhwi_p (args
[0]))
16895 unsigned HOST_WIDE_INT res
= tree_to_uhwi (args
[0]);
16896 res
&= ~(HOST_WIDE_INT_M1U
<< idx
);
16897 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16901 case IX86_BUILTIN_PDEP32
:
16902 case IX86_BUILTIN_PDEP64
:
16903 gcc_assert (n_args
== 2);
16904 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
16906 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
16907 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
16908 unsigned HOST_WIDE_INT res
= 0;
16909 unsigned HOST_WIDE_INT m
, k
= 1;
16910 for (m
= 1; m
; m
<<= 1)
16911 if ((mask
& m
) != 0)
16913 if ((src
& k
) != 0)
16917 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16921 case IX86_BUILTIN_PEXT32
:
16922 case IX86_BUILTIN_PEXT64
:
16923 gcc_assert (n_args
== 2);
16924 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
16926 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
16927 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
16928 unsigned HOST_WIDE_INT res
= 0;
16929 unsigned HOST_WIDE_INT m
, k
= 1;
16930 for (m
= 1; m
; m
<<= 1)
16931 if ((mask
& m
) != 0)
16933 if ((src
& m
) != 0)
16937 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16941 case IX86_BUILTIN_MOVMSKPS
:
16942 case IX86_BUILTIN_PMOVMSKB
:
16943 case IX86_BUILTIN_MOVMSKPD
:
16944 case IX86_BUILTIN_PMOVMSKB128
:
16945 case IX86_BUILTIN_MOVMSKPD256
:
16946 case IX86_BUILTIN_MOVMSKPS256
:
16947 case IX86_BUILTIN_PMOVMSKB256
:
16948 gcc_assert (n_args
== 1);
16949 if (TREE_CODE (args
[0]) == VECTOR_CST
)
16951 HOST_WIDE_INT res
= 0;
16952 for (unsigned i
= 0; i
< VECTOR_CST_NELTS (args
[0]); ++i
)
16954 tree e
= VECTOR_CST_ELT (args
[0], i
);
16955 if (TREE_CODE (e
) == INTEGER_CST
&& !TREE_OVERFLOW (e
))
16957 if (wi::neg_p (wi::to_wide (e
)))
16958 res
|= HOST_WIDE_INT_1
<< i
;
16960 else if (TREE_CODE (e
) == REAL_CST
&& !TREE_OVERFLOW (e
))
16962 if (TREE_REAL_CST (e
).sign
)
16963 res
|= HOST_WIDE_INT_1
<< i
;
16968 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
16972 case IX86_BUILTIN_PSLLD
:
16973 case IX86_BUILTIN_PSLLD128
:
16974 case IX86_BUILTIN_PSLLD128_MASK
:
16975 case IX86_BUILTIN_PSLLD256
:
16976 case IX86_BUILTIN_PSLLD256_MASK
:
16977 case IX86_BUILTIN_PSLLD512
:
16978 case IX86_BUILTIN_PSLLDI
:
16979 case IX86_BUILTIN_PSLLDI128
:
16980 case IX86_BUILTIN_PSLLDI128_MASK
:
16981 case IX86_BUILTIN_PSLLDI256
:
16982 case IX86_BUILTIN_PSLLDI256_MASK
:
16983 case IX86_BUILTIN_PSLLDI512
:
16984 case IX86_BUILTIN_PSLLQ
:
16985 case IX86_BUILTIN_PSLLQ128
:
16986 case IX86_BUILTIN_PSLLQ128_MASK
:
16987 case IX86_BUILTIN_PSLLQ256
:
16988 case IX86_BUILTIN_PSLLQ256_MASK
:
16989 case IX86_BUILTIN_PSLLQ512
:
16990 case IX86_BUILTIN_PSLLQI
:
16991 case IX86_BUILTIN_PSLLQI128
:
16992 case IX86_BUILTIN_PSLLQI128_MASK
:
16993 case IX86_BUILTIN_PSLLQI256
:
16994 case IX86_BUILTIN_PSLLQI256_MASK
:
16995 case IX86_BUILTIN_PSLLQI512
:
16996 case IX86_BUILTIN_PSLLW
:
16997 case IX86_BUILTIN_PSLLW128
:
16998 case IX86_BUILTIN_PSLLW128_MASK
:
16999 case IX86_BUILTIN_PSLLW256
:
17000 case IX86_BUILTIN_PSLLW256_MASK
:
17001 case IX86_BUILTIN_PSLLW512_MASK
:
17002 case IX86_BUILTIN_PSLLWI
:
17003 case IX86_BUILTIN_PSLLWI128
:
17004 case IX86_BUILTIN_PSLLWI128_MASK
:
17005 case IX86_BUILTIN_PSLLWI256
:
17006 case IX86_BUILTIN_PSLLWI256_MASK
:
17007 case IX86_BUILTIN_PSLLWI512_MASK
:
17011 case IX86_BUILTIN_PSRAD
:
17012 case IX86_BUILTIN_PSRAD128
:
17013 case IX86_BUILTIN_PSRAD128_MASK
:
17014 case IX86_BUILTIN_PSRAD256
:
17015 case IX86_BUILTIN_PSRAD256_MASK
:
17016 case IX86_BUILTIN_PSRAD512
:
17017 case IX86_BUILTIN_PSRADI
:
17018 case IX86_BUILTIN_PSRADI128
:
17019 case IX86_BUILTIN_PSRADI128_MASK
:
17020 case IX86_BUILTIN_PSRADI256
:
17021 case IX86_BUILTIN_PSRADI256_MASK
:
17022 case IX86_BUILTIN_PSRADI512
:
17023 case IX86_BUILTIN_PSRAQ128_MASK
:
17024 case IX86_BUILTIN_PSRAQ256_MASK
:
17025 case IX86_BUILTIN_PSRAQ512
:
17026 case IX86_BUILTIN_PSRAQI128_MASK
:
17027 case IX86_BUILTIN_PSRAQI256_MASK
:
17028 case IX86_BUILTIN_PSRAQI512
:
17029 case IX86_BUILTIN_PSRAW
:
17030 case IX86_BUILTIN_PSRAW128
:
17031 case IX86_BUILTIN_PSRAW128_MASK
:
17032 case IX86_BUILTIN_PSRAW256
:
17033 case IX86_BUILTIN_PSRAW256_MASK
:
17034 case IX86_BUILTIN_PSRAW512
:
17035 case IX86_BUILTIN_PSRAWI
:
17036 case IX86_BUILTIN_PSRAWI128
:
17037 case IX86_BUILTIN_PSRAWI128_MASK
:
17038 case IX86_BUILTIN_PSRAWI256
:
17039 case IX86_BUILTIN_PSRAWI256_MASK
:
17040 case IX86_BUILTIN_PSRAWI512
:
17044 case IX86_BUILTIN_PSRLD
:
17045 case IX86_BUILTIN_PSRLD128
:
17046 case IX86_BUILTIN_PSRLD128_MASK
:
17047 case IX86_BUILTIN_PSRLD256
:
17048 case IX86_BUILTIN_PSRLD256_MASK
:
17049 case IX86_BUILTIN_PSRLD512
:
17050 case IX86_BUILTIN_PSRLDI
:
17051 case IX86_BUILTIN_PSRLDI128
:
17052 case IX86_BUILTIN_PSRLDI128_MASK
:
17053 case IX86_BUILTIN_PSRLDI256
:
17054 case IX86_BUILTIN_PSRLDI256_MASK
:
17055 case IX86_BUILTIN_PSRLDI512
:
17056 case IX86_BUILTIN_PSRLQ
:
17057 case IX86_BUILTIN_PSRLQ128
:
17058 case IX86_BUILTIN_PSRLQ128_MASK
:
17059 case IX86_BUILTIN_PSRLQ256
:
17060 case IX86_BUILTIN_PSRLQ256_MASK
:
17061 case IX86_BUILTIN_PSRLQ512
:
17062 case IX86_BUILTIN_PSRLQI
:
17063 case IX86_BUILTIN_PSRLQI128
:
17064 case IX86_BUILTIN_PSRLQI128_MASK
:
17065 case IX86_BUILTIN_PSRLQI256
:
17066 case IX86_BUILTIN_PSRLQI256_MASK
:
17067 case IX86_BUILTIN_PSRLQI512
:
17068 case IX86_BUILTIN_PSRLW
:
17069 case IX86_BUILTIN_PSRLW128
:
17070 case IX86_BUILTIN_PSRLW128_MASK
:
17071 case IX86_BUILTIN_PSRLW256
:
17072 case IX86_BUILTIN_PSRLW256_MASK
:
17073 case IX86_BUILTIN_PSRLW512
:
17074 case IX86_BUILTIN_PSRLWI
:
17075 case IX86_BUILTIN_PSRLWI128
:
17076 case IX86_BUILTIN_PSRLWI128_MASK
:
17077 case IX86_BUILTIN_PSRLWI256
:
17078 case IX86_BUILTIN_PSRLWI256_MASK
:
17079 case IX86_BUILTIN_PSRLWI512
:
17083 case IX86_BUILTIN_PSLLVV16HI
:
17084 case IX86_BUILTIN_PSLLVV16SI
:
17085 case IX86_BUILTIN_PSLLVV2DI
:
17086 case IX86_BUILTIN_PSLLVV2DI_MASK
:
17087 case IX86_BUILTIN_PSLLVV32HI
:
17088 case IX86_BUILTIN_PSLLVV4DI
:
17089 case IX86_BUILTIN_PSLLVV4DI_MASK
:
17090 case IX86_BUILTIN_PSLLVV4SI
:
17091 case IX86_BUILTIN_PSLLVV4SI_MASK
:
17092 case IX86_BUILTIN_PSLLVV8DI
:
17093 case IX86_BUILTIN_PSLLVV8HI
:
17094 case IX86_BUILTIN_PSLLVV8SI
:
17095 case IX86_BUILTIN_PSLLVV8SI_MASK
:
17099 case IX86_BUILTIN_PSRAVQ128
:
17100 case IX86_BUILTIN_PSRAVQ256
:
17101 case IX86_BUILTIN_PSRAVV16HI
:
17102 case IX86_BUILTIN_PSRAVV16SI
:
17103 case IX86_BUILTIN_PSRAVV32HI
:
17104 case IX86_BUILTIN_PSRAVV4SI
:
17105 case IX86_BUILTIN_PSRAVV4SI_MASK
:
17106 case IX86_BUILTIN_PSRAVV8DI
:
17107 case IX86_BUILTIN_PSRAVV8HI
:
17108 case IX86_BUILTIN_PSRAVV8SI
:
17109 case IX86_BUILTIN_PSRAVV8SI_MASK
:
17113 case IX86_BUILTIN_PSRLVV16HI
:
17114 case IX86_BUILTIN_PSRLVV16SI
:
17115 case IX86_BUILTIN_PSRLVV2DI
:
17116 case IX86_BUILTIN_PSRLVV2DI_MASK
:
17117 case IX86_BUILTIN_PSRLVV32HI
:
17118 case IX86_BUILTIN_PSRLVV4DI
:
17119 case IX86_BUILTIN_PSRLVV4DI_MASK
:
17120 case IX86_BUILTIN_PSRLVV4SI
:
17121 case IX86_BUILTIN_PSRLVV4SI_MASK
:
17122 case IX86_BUILTIN_PSRLVV8DI
:
17123 case IX86_BUILTIN_PSRLVV8HI
:
17124 case IX86_BUILTIN_PSRLVV8SI
:
17125 case IX86_BUILTIN_PSRLVV8SI_MASK
:
17131 gcc_assert (n_args
>= 2);
17132 if (TREE_CODE (args
[0]) != VECTOR_CST
)
17134 mask
= HOST_WIDE_INT_M1U
;
17137 /* This is masked shift. */
17138 if (!tree_fits_uhwi_p (args
[n_args
- 1])
17139 || TREE_SIDE_EFFECTS (args
[n_args
- 2]))
17141 mask
= tree_to_uhwi (args
[n_args
- 1]);
17142 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0]));
17143 mask
|= HOST_WIDE_INT_M1U
<< elems
;
17144 if (mask
!= HOST_WIDE_INT_M1U
17145 && TREE_CODE (args
[n_args
- 2]) != VECTOR_CST
)
17147 if (mask
== (HOST_WIDE_INT_M1U
<< elems
))
17148 return args
[n_args
- 2];
17150 if (is_vshift
&& TREE_CODE (args
[1]) != VECTOR_CST
)
17152 if (tree tem
= (is_vshift
? integer_one_node
17153 : ix86_vector_shift_count (args
[1])))
17155 unsigned HOST_WIDE_INT count
= tree_to_uhwi (tem
);
17156 unsigned HOST_WIDE_INT prec
17157 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args
[0])));
17158 if (count
== 0 && mask
== HOST_WIDE_INT_M1U
)
17162 if (rcode
== ASHIFTRT
)
17164 else if (mask
== HOST_WIDE_INT_M1U
)
17165 return build_zero_cst (TREE_TYPE (args
[0]));
17167 tree countt
= NULL_TREE
;
17171 countt
= integer_zero_node
;
17173 countt
= build_int_cst (integer_type_node
, count
);
17175 tree_vector_builder builder
;
17176 builder
.new_unary_operation (TREE_TYPE (args
[0]), args
[0],
17178 unsigned int cnt
= builder
.encoded_nelts ();
17179 for (unsigned int i
= 0; i
< cnt
; ++i
)
17181 tree elt
= VECTOR_CST_ELT (args
[0], i
);
17182 if (TREE_CODE (elt
) != INTEGER_CST
|| TREE_OVERFLOW (elt
))
17184 tree type
= TREE_TYPE (elt
);
17185 if (rcode
== LSHIFTRT
)
17186 elt
= fold_convert (unsigned_type_for (type
), elt
);
17189 countt
= VECTOR_CST_ELT (args
[1], i
);
17190 if (TREE_CODE (countt
) != INTEGER_CST
17191 || TREE_OVERFLOW (countt
))
17193 if (wi::neg_p (wi::to_wide (countt
))
17194 || wi::to_widest (countt
) >= prec
)
17196 if (rcode
== ASHIFTRT
)
17197 countt
= build_int_cst (TREE_TYPE (countt
),
17201 elt
= build_zero_cst (TREE_TYPE (elt
));
17202 countt
= build_zero_cst (TREE_TYPE (countt
));
17206 else if (count
>= prec
)
17207 elt
= build_zero_cst (TREE_TYPE (elt
));
17208 elt
= const_binop (rcode
== ASHIFT
17209 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
17210 TREE_TYPE (elt
), elt
, countt
);
17211 if (!elt
|| TREE_CODE (elt
) != INTEGER_CST
)
17213 if (rcode
== LSHIFTRT
)
17214 elt
= fold_convert (type
, elt
);
17215 if ((mask
& (HOST_WIDE_INT_1U
<< i
)) == 0)
17217 elt
= VECTOR_CST_ELT (args
[n_args
- 2], i
);
17218 if (TREE_CODE (elt
) != INTEGER_CST
17219 || TREE_OVERFLOW (elt
))
17222 builder
.quick_push (elt
);
17224 return builder
.build ();
17233 #ifdef SUBTARGET_FOLD_BUILTIN
17234 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
17240 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17241 constant) in GIMPLE. */
17244 ix86_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
17246 gimple
*stmt
= gsi_stmt (*gsi
);
17247 tree fndecl
= gimple_call_fndecl (stmt
);
17248 gcc_checking_assert (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
));
17249 int n_args
= gimple_call_num_args (stmt
);
17250 enum ix86_builtins fn_code
17251 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
17252 tree decl
= NULL_TREE
;
17253 tree arg0
, arg1
, arg2
;
17254 enum rtx_code rcode
;
17255 unsigned HOST_WIDE_INT count
;
17260 case IX86_BUILTIN_TZCNT32
:
17261 decl
= builtin_decl_implicit (BUILT_IN_CTZ
);
17262 goto fold_tzcnt_lzcnt
;
17264 case IX86_BUILTIN_TZCNT64
:
17265 decl
= builtin_decl_implicit (BUILT_IN_CTZLL
);
17266 goto fold_tzcnt_lzcnt
;
17268 case IX86_BUILTIN_LZCNT32
:
17269 decl
= builtin_decl_implicit (BUILT_IN_CLZ
);
17270 goto fold_tzcnt_lzcnt
;
17272 case IX86_BUILTIN_LZCNT64
:
17273 decl
= builtin_decl_implicit (BUILT_IN_CLZLL
);
17274 goto fold_tzcnt_lzcnt
;
17277 gcc_assert (n_args
== 1);
17278 arg0
= gimple_call_arg (stmt
, 0);
17279 if (TREE_CODE (arg0
) == SSA_NAME
&& decl
&& gimple_call_lhs (stmt
))
17281 int prec
= TYPE_PRECISION (TREE_TYPE (arg0
));
17282 /* If arg0 is provably non-zero, optimize into generic
17283 __builtin_c[tl]z{,ll} function the middle-end handles
17285 if (!expr_not_equal_to (arg0
, wi::zero (prec
)))
17288 location_t loc
= gimple_location (stmt
);
17289 gimple
*g
= gimple_build_call (decl
, 1, arg0
);
17290 gimple_set_location (g
, loc
);
17291 tree lhs
= make_ssa_name (integer_type_node
);
17292 gimple_call_set_lhs (g
, lhs
);
17293 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
17294 g
= gimple_build_assign (gimple_call_lhs (stmt
), NOP_EXPR
, lhs
);
17295 gimple_set_location (g
, loc
);
17296 gsi_replace (gsi
, g
, false);
17301 case IX86_BUILTIN_BZHI32
:
17302 case IX86_BUILTIN_BZHI64
:
17303 gcc_assert (n_args
== 2);
17304 arg1
= gimple_call_arg (stmt
, 1);
17305 if (tree_fits_uhwi_p (arg1
) && gimple_call_lhs (stmt
))
17307 unsigned int idx
= tree_to_uhwi (arg1
) & 0xff;
17308 arg0
= gimple_call_arg (stmt
, 0);
17309 if (idx
< TYPE_PRECISION (TREE_TYPE (arg0
)))
17311 location_t loc
= gimple_location (stmt
);
17312 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17313 gimple_set_location (g
, loc
);
17314 gsi_replace (gsi
, g
, false);
17319 case IX86_BUILTIN_PDEP32
:
17320 case IX86_BUILTIN_PDEP64
:
17321 case IX86_BUILTIN_PEXT32
:
17322 case IX86_BUILTIN_PEXT64
:
17323 gcc_assert (n_args
== 2);
17324 arg1
= gimple_call_arg (stmt
, 1);
17325 if (integer_all_onesp (arg1
) && gimple_call_lhs (stmt
))
17327 location_t loc
= gimple_location (stmt
);
17328 arg0
= gimple_call_arg (stmt
, 0);
17329 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17330 gimple_set_location (g
, loc
);
17331 gsi_replace (gsi
, g
, false);
17336 case IX86_BUILTIN_PSLLD
:
17337 case IX86_BUILTIN_PSLLD128
:
17338 case IX86_BUILTIN_PSLLD128_MASK
:
17339 case IX86_BUILTIN_PSLLD256
:
17340 case IX86_BUILTIN_PSLLD256_MASK
:
17341 case IX86_BUILTIN_PSLLD512
:
17342 case IX86_BUILTIN_PSLLDI
:
17343 case IX86_BUILTIN_PSLLDI128
:
17344 case IX86_BUILTIN_PSLLDI128_MASK
:
17345 case IX86_BUILTIN_PSLLDI256
:
17346 case IX86_BUILTIN_PSLLDI256_MASK
:
17347 case IX86_BUILTIN_PSLLDI512
:
17348 case IX86_BUILTIN_PSLLQ
:
17349 case IX86_BUILTIN_PSLLQ128
:
17350 case IX86_BUILTIN_PSLLQ128_MASK
:
17351 case IX86_BUILTIN_PSLLQ256
:
17352 case IX86_BUILTIN_PSLLQ256_MASK
:
17353 case IX86_BUILTIN_PSLLQ512
:
17354 case IX86_BUILTIN_PSLLQI
:
17355 case IX86_BUILTIN_PSLLQI128
:
17356 case IX86_BUILTIN_PSLLQI128_MASK
:
17357 case IX86_BUILTIN_PSLLQI256
:
17358 case IX86_BUILTIN_PSLLQI256_MASK
:
17359 case IX86_BUILTIN_PSLLQI512
:
17360 case IX86_BUILTIN_PSLLW
:
17361 case IX86_BUILTIN_PSLLW128
:
17362 case IX86_BUILTIN_PSLLW128_MASK
:
17363 case IX86_BUILTIN_PSLLW256
:
17364 case IX86_BUILTIN_PSLLW256_MASK
:
17365 case IX86_BUILTIN_PSLLW512_MASK
:
17366 case IX86_BUILTIN_PSLLWI
:
17367 case IX86_BUILTIN_PSLLWI128
:
17368 case IX86_BUILTIN_PSLLWI128_MASK
:
17369 case IX86_BUILTIN_PSLLWI256
:
17370 case IX86_BUILTIN_PSLLWI256_MASK
:
17371 case IX86_BUILTIN_PSLLWI512_MASK
:
17375 case IX86_BUILTIN_PSRAD
:
17376 case IX86_BUILTIN_PSRAD128
:
17377 case IX86_BUILTIN_PSRAD128_MASK
:
17378 case IX86_BUILTIN_PSRAD256
:
17379 case IX86_BUILTIN_PSRAD256_MASK
:
17380 case IX86_BUILTIN_PSRAD512
:
17381 case IX86_BUILTIN_PSRADI
:
17382 case IX86_BUILTIN_PSRADI128
:
17383 case IX86_BUILTIN_PSRADI128_MASK
:
17384 case IX86_BUILTIN_PSRADI256
:
17385 case IX86_BUILTIN_PSRADI256_MASK
:
17386 case IX86_BUILTIN_PSRADI512
:
17387 case IX86_BUILTIN_PSRAQ128_MASK
:
17388 case IX86_BUILTIN_PSRAQ256_MASK
:
17389 case IX86_BUILTIN_PSRAQ512
:
17390 case IX86_BUILTIN_PSRAQI128_MASK
:
17391 case IX86_BUILTIN_PSRAQI256_MASK
:
17392 case IX86_BUILTIN_PSRAQI512
:
17393 case IX86_BUILTIN_PSRAW
:
17394 case IX86_BUILTIN_PSRAW128
:
17395 case IX86_BUILTIN_PSRAW128_MASK
:
17396 case IX86_BUILTIN_PSRAW256
:
17397 case IX86_BUILTIN_PSRAW256_MASK
:
17398 case IX86_BUILTIN_PSRAW512
:
17399 case IX86_BUILTIN_PSRAWI
:
17400 case IX86_BUILTIN_PSRAWI128
:
17401 case IX86_BUILTIN_PSRAWI128_MASK
:
17402 case IX86_BUILTIN_PSRAWI256
:
17403 case IX86_BUILTIN_PSRAWI256_MASK
:
17404 case IX86_BUILTIN_PSRAWI512
:
17408 case IX86_BUILTIN_PSRLD
:
17409 case IX86_BUILTIN_PSRLD128
:
17410 case IX86_BUILTIN_PSRLD128_MASK
:
17411 case IX86_BUILTIN_PSRLD256
:
17412 case IX86_BUILTIN_PSRLD256_MASK
:
17413 case IX86_BUILTIN_PSRLD512
:
17414 case IX86_BUILTIN_PSRLDI
:
17415 case IX86_BUILTIN_PSRLDI128
:
17416 case IX86_BUILTIN_PSRLDI128_MASK
:
17417 case IX86_BUILTIN_PSRLDI256
:
17418 case IX86_BUILTIN_PSRLDI256_MASK
:
17419 case IX86_BUILTIN_PSRLDI512
:
17420 case IX86_BUILTIN_PSRLQ
:
17421 case IX86_BUILTIN_PSRLQ128
:
17422 case IX86_BUILTIN_PSRLQ128_MASK
:
17423 case IX86_BUILTIN_PSRLQ256
:
17424 case IX86_BUILTIN_PSRLQ256_MASK
:
17425 case IX86_BUILTIN_PSRLQ512
:
17426 case IX86_BUILTIN_PSRLQI
:
17427 case IX86_BUILTIN_PSRLQI128
:
17428 case IX86_BUILTIN_PSRLQI128_MASK
:
17429 case IX86_BUILTIN_PSRLQI256
:
17430 case IX86_BUILTIN_PSRLQI256_MASK
:
17431 case IX86_BUILTIN_PSRLQI512
:
17432 case IX86_BUILTIN_PSRLW
:
17433 case IX86_BUILTIN_PSRLW128
:
17434 case IX86_BUILTIN_PSRLW128_MASK
:
17435 case IX86_BUILTIN_PSRLW256
:
17436 case IX86_BUILTIN_PSRLW256_MASK
:
17437 case IX86_BUILTIN_PSRLW512
:
17438 case IX86_BUILTIN_PSRLWI
:
17439 case IX86_BUILTIN_PSRLWI128
:
17440 case IX86_BUILTIN_PSRLWI128_MASK
:
17441 case IX86_BUILTIN_PSRLWI256
:
17442 case IX86_BUILTIN_PSRLWI256_MASK
:
17443 case IX86_BUILTIN_PSRLWI512
:
17447 case IX86_BUILTIN_PSLLVV16HI
:
17448 case IX86_BUILTIN_PSLLVV16SI
:
17449 case IX86_BUILTIN_PSLLVV2DI
:
17450 case IX86_BUILTIN_PSLLVV2DI_MASK
:
17451 case IX86_BUILTIN_PSLLVV32HI
:
17452 case IX86_BUILTIN_PSLLVV4DI
:
17453 case IX86_BUILTIN_PSLLVV4DI_MASK
:
17454 case IX86_BUILTIN_PSLLVV4SI
:
17455 case IX86_BUILTIN_PSLLVV4SI_MASK
:
17456 case IX86_BUILTIN_PSLLVV8DI
:
17457 case IX86_BUILTIN_PSLLVV8HI
:
17458 case IX86_BUILTIN_PSLLVV8SI
:
17459 case IX86_BUILTIN_PSLLVV8SI_MASK
:
17463 case IX86_BUILTIN_PSRAVQ128
:
17464 case IX86_BUILTIN_PSRAVQ256
:
17465 case IX86_BUILTIN_PSRAVV16HI
:
17466 case IX86_BUILTIN_PSRAVV16SI
:
17467 case IX86_BUILTIN_PSRAVV32HI
:
17468 case IX86_BUILTIN_PSRAVV4SI
:
17469 case IX86_BUILTIN_PSRAVV4SI_MASK
:
17470 case IX86_BUILTIN_PSRAVV8DI
:
17471 case IX86_BUILTIN_PSRAVV8HI
:
17472 case IX86_BUILTIN_PSRAVV8SI
:
17473 case IX86_BUILTIN_PSRAVV8SI_MASK
:
17477 case IX86_BUILTIN_PSRLVV16HI
:
17478 case IX86_BUILTIN_PSRLVV16SI
:
17479 case IX86_BUILTIN_PSRLVV2DI
:
17480 case IX86_BUILTIN_PSRLVV2DI_MASK
:
17481 case IX86_BUILTIN_PSRLVV32HI
:
17482 case IX86_BUILTIN_PSRLVV4DI
:
17483 case IX86_BUILTIN_PSRLVV4DI_MASK
:
17484 case IX86_BUILTIN_PSRLVV4SI
:
17485 case IX86_BUILTIN_PSRLVV4SI_MASK
:
17486 case IX86_BUILTIN_PSRLVV8DI
:
17487 case IX86_BUILTIN_PSRLVV8HI
:
17488 case IX86_BUILTIN_PSRLVV8SI
:
17489 case IX86_BUILTIN_PSRLVV8SI_MASK
:
17495 gcc_assert (n_args
>= 2);
17496 arg0
= gimple_call_arg (stmt
, 0);
17497 arg1
= gimple_call_arg (stmt
, 1);
17500 /* This is masked shift. Only optimize if the mask is all ones. */
17501 tree argl
= gimple_call_arg (stmt
, n_args
- 1);
17502 if (!tree_fits_uhwi_p (argl
))
17504 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (argl
);
17505 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
17506 if ((mask
| (HOST_WIDE_INT_M1U
<< elems
)) != HOST_WIDE_INT_M1U
)
17511 if (TREE_CODE (arg1
) != VECTOR_CST
)
17513 count
= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
)));
17514 if (integer_zerop (arg1
))
17516 else if (rcode
== ASHIFTRT
)
17519 for (unsigned int i
= 0; i
< VECTOR_CST_NELTS (arg1
); ++i
)
17521 tree elt
= VECTOR_CST_ELT (arg1
, i
);
17522 if (!wi::neg_p (wi::to_wide (elt
))
17523 && wi::to_widest (elt
) < count
)
17529 arg1
= ix86_vector_shift_count (arg1
);
17532 count
= tree_to_uhwi (arg1
);
17536 /* Just return the first argument for shift by 0. */
17537 location_t loc
= gimple_location (stmt
);
17538 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17539 gimple_set_location (g
, loc
);
17540 gsi_replace (gsi
, g
, false);
17543 if (rcode
!= ASHIFTRT
17544 && count
>= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
))))
17546 /* For shift counts equal or greater than precision, except for
17547 arithmetic right shift the result is zero. */
17548 location_t loc
= gimple_location (stmt
);
17549 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
),
17550 build_zero_cst (TREE_TYPE (arg0
)));
17551 gimple_set_location (g
, loc
);
17552 gsi_replace (gsi
, g
, false);
17557 case IX86_BUILTIN_SHUFPD
:
17558 arg2
= gimple_call_arg (stmt
, 2);
17559 if (TREE_CODE (arg2
) == INTEGER_CST
)
17561 location_t loc
= gimple_location (stmt
);
17562 unsigned HOST_WIDE_INT imask
= TREE_INT_CST_LOW (arg2
);
17563 arg0
= gimple_call_arg (stmt
, 0);
17564 arg1
= gimple_call_arg (stmt
, 1);
17565 tree itype
= long_long_integer_type_node
;
17566 tree vtype
= build_vector_type (itype
, 2); /* V2DI */
17567 tree_vector_builder
elts (vtype
, 2, 1);
17568 /* Ignore bits other than the lowest 2. */
17569 elts
.quick_push (build_int_cst (itype
, imask
& 1));
17571 elts
.quick_push (build_int_cst (itype
, 2 + (imask
& 1)));
17572 tree omask
= elts
.build ();
17573 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
),
17575 arg0
, arg1
, omask
);
17576 gimple_set_location (g
, loc
);
17577 gsi_replace (gsi
, g
, false);
17580 // Do not error yet, the constant could be propagated later?
17590 /* Handler for an SVML-style interface to
17591 a library with vectorized intrinsics. */
17594 ix86_veclibabi_svml (combined_fn fn
, tree type_out
, tree type_in
)
17597 tree fntype
, new_fndecl
, args
;
17600 machine_mode el_mode
, in_mode
;
17603 /* The SVML is suitable for unsafe math only. */
17604 if (!flag_unsafe_math_optimizations
)
17607 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
17608 n
= TYPE_VECTOR_SUBPARTS (type_out
);
17609 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
17610 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
17611 if (el_mode
!= in_mode
17635 if ((el_mode
!= DFmode
|| n
!= 2)
17636 && (el_mode
!= SFmode
|| n
!= 4))
17644 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
17645 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
17647 if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOGF
)
17648 strcpy (name
, "vmlsLn4");
17649 else if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOG
)
17650 strcpy (name
, "vmldLn2");
17653 sprintf (name
, "vmls%s", bname
+10);
17654 name
[strlen (name
)-1] = '4';
17657 sprintf (name
, "vmld%s2", bname
+10);
17659 /* Convert to uppercase. */
17663 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
17667 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
17669 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
17671 /* Build a function declaration for the vectorized function. */
17672 new_fndecl
= build_decl (BUILTINS_LOCATION
,
17673 FUNCTION_DECL
, get_identifier (name
), fntype
);
17674 TREE_PUBLIC (new_fndecl
) = 1;
17675 DECL_EXTERNAL (new_fndecl
) = 1;
17676 DECL_IS_NOVOPS (new_fndecl
) = 1;
17677 TREE_READONLY (new_fndecl
) = 1;
17682 /* Handler for an ACML-style interface to
17683 a library with vectorized intrinsics. */
17686 ix86_veclibabi_acml (combined_fn fn
, tree type_out
, tree type_in
)
17688 char name
[20] = "__vr.._";
17689 tree fntype
, new_fndecl
, args
;
17692 machine_mode el_mode
, in_mode
;
17695 /* The ACML is 64bits only and suitable for unsafe math only as
17696 it does not correctly support parts of IEEE with the required
17697 precision such as denormals. */
17699 || !flag_unsafe_math_optimizations
)
17702 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
17703 n
= TYPE_VECTOR_SUBPARTS (type_out
);
17704 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
17705 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
17706 if (el_mode
!= in_mode
17718 if (el_mode
== DFmode
&& n
== 2)
17723 else if (el_mode
== SFmode
&& n
== 4)
17736 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
17737 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
17738 sprintf (name
+ 7, "%s", bname
+10);
17741 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
17745 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
17747 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
17749 /* Build a function declaration for the vectorized function. */
17750 new_fndecl
= build_decl (BUILTINS_LOCATION
,
17751 FUNCTION_DECL
, get_identifier (name
), fntype
);
17752 TREE_PUBLIC (new_fndecl
) = 1;
17753 DECL_EXTERNAL (new_fndecl
) = 1;
17754 DECL_IS_NOVOPS (new_fndecl
) = 1;
17755 TREE_READONLY (new_fndecl
) = 1;
17760 /* Returns a decl of a function that implements scatter store with
17761 register type VECTYPE and index type INDEX_TYPE and SCALE.
17762 Return NULL_TREE if it is not available. */
17765 ix86_vectorize_builtin_scatter (const_tree vectype
,
17766 const_tree index_type
, int scale
)
17769 enum ix86_builtins code
;
17771 if (!TARGET_AVX512F
)
17774 if ((TREE_CODE (index_type
) != INTEGER_TYPE
17775 && !POINTER_TYPE_P (index_type
))
17776 || (TYPE_MODE (index_type
) != SImode
17777 && TYPE_MODE (index_type
) != DImode
))
17780 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
17783 /* v*scatter* insn sign extends index to pointer mode. */
17784 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
17785 && TYPE_UNSIGNED (index_type
))
17788 /* Scale can be 1, 2, 4 or 8. */
17791 || (scale
& (scale
- 1)) != 0)
17794 si
= TYPE_MODE (index_type
) == SImode
;
17795 switch (TYPE_MODE (vectype
))
17798 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DF
: IX86_BUILTIN_SCATTERDIV8DF
;
17801 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DI
: IX86_BUILTIN_SCATTERDIV8DI
;
17804 code
= si
? IX86_BUILTIN_SCATTERSIV16SF
: IX86_BUILTIN_SCATTERALTDIV16SF
;
17807 code
= si
? IX86_BUILTIN_SCATTERSIV16SI
: IX86_BUILTIN_SCATTERALTDIV16SI
;
17810 if (TARGET_AVX512VL
)
17811 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DF
: IX86_BUILTIN_SCATTERDIV4DF
;
17816 if (TARGET_AVX512VL
)
17817 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DI
: IX86_BUILTIN_SCATTERDIV4DI
;
17822 if (TARGET_AVX512VL
)
17823 code
= si
? IX86_BUILTIN_SCATTERSIV8SF
: IX86_BUILTIN_SCATTERALTDIV8SF
;
17828 if (TARGET_AVX512VL
)
17829 code
= si
? IX86_BUILTIN_SCATTERSIV8SI
: IX86_BUILTIN_SCATTERALTDIV8SI
;
17834 if (TARGET_AVX512VL
)
17835 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DF
: IX86_BUILTIN_SCATTERDIV2DF
;
17840 if (TARGET_AVX512VL
)
17841 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DI
: IX86_BUILTIN_SCATTERDIV2DI
;
17846 if (TARGET_AVX512VL
)
17847 code
= si
? IX86_BUILTIN_SCATTERSIV4SF
: IX86_BUILTIN_SCATTERALTDIV4SF
;
17852 if (TARGET_AVX512VL
)
17853 code
= si
? IX86_BUILTIN_SCATTERSIV4SI
: IX86_BUILTIN_SCATTERALTDIV4SI
;
17861 return get_ix86_builtin (code
);
17864 /* Return true if it is safe to use the rsqrt optabs to optimize
17870 return (TARGET_SSE
&& TARGET_SSE_MATH
17871 && flag_finite_math_only
17872 && !flag_trapping_math
17873 && flag_unsafe_math_optimizations
);
17876 /* Helper for avx_vpermilps256_operand et al. This is also used by
17877 the expansion functions to turn the parallel back into a mask.
17878 The return value is 0 for no match and the imm8+1 for a match. */
17881 avx_vpermilp_parallel (rtx par
, machine_mode mode
)
17883 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
17885 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
17887 if (XVECLEN (par
, 0) != (int) nelt
)
17890 /* Validate that all of the elements are constants, and not totally
17891 out of range. Copy the data into an integral array to make the
17892 subsequent checks easier. */
17893 for (i
= 0; i
< nelt
; ++i
)
17895 rtx er
= XVECEXP (par
, 0, i
);
17896 unsigned HOST_WIDE_INT ei
;
17898 if (!CONST_INT_P (er
))
17909 /* In the 512-bit DFmode case, we can only move elements within
17910 a 128-bit lane. First fill the second part of the mask,
17912 for (i
= 4; i
< 6; ++i
)
17914 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
17916 mask
|= (ipar
[i
] - 4) << i
;
17918 for (i
= 6; i
< 8; ++i
)
17922 mask
|= (ipar
[i
] - 6) << i
;
17927 /* In the 256-bit DFmode case, we can only move elements within
17929 for (i
= 0; i
< 2; ++i
)
17933 mask
|= ipar
[i
] << i
;
17935 for (i
= 2; i
< 4; ++i
)
17939 mask
|= (ipar
[i
] - 2) << i
;
17944 /* In 512 bit SFmode case, permutation in the upper 256 bits
17945 must mirror the permutation in the lower 256-bits. */
17946 for (i
= 0; i
< 8; ++i
)
17947 if (ipar
[i
] + 8 != ipar
[i
+ 8])
17952 /* In 256 bit SFmode case, we have full freedom of
17953 movement within the low 128-bit lane, but the high 128-bit
17954 lane must mirror the exact same pattern. */
17955 for (i
= 0; i
< 4; ++i
)
17956 if (ipar
[i
] + 4 != ipar
[i
+ 4])
17963 /* In the 128-bit case, we've full freedom in the placement of
17964 the elements from the source operand. */
17965 for (i
= 0; i
< nelt
; ++i
)
17966 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
17970 gcc_unreachable ();
17973 /* Make sure success has a non-zero value by adding one. */
17977 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
17978 the expansion functions to turn the parallel back into a mask.
17979 The return value is 0 for no match and the imm8+1 for a match. */
17982 avx_vperm2f128_parallel (rtx par
, machine_mode mode
)
17984 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
17986 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
17988 if (XVECLEN (par
, 0) != (int) nelt
)
17991 /* Validate that all of the elements are constants, and not totally
17992 out of range. Copy the data into an integral array to make the
17993 subsequent checks easier. */
17994 for (i
= 0; i
< nelt
; ++i
)
17996 rtx er
= XVECEXP (par
, 0, i
);
17997 unsigned HOST_WIDE_INT ei
;
17999 if (!CONST_INT_P (er
))
18002 if (ei
>= 2 * nelt
)
18007 /* Validate that the halves of the permute are halves. */
18008 for (i
= 0; i
< nelt2
- 1; ++i
)
18009 if (ipar
[i
] + 1 != ipar
[i
+ 1])
18011 for (i
= nelt2
; i
< nelt
- 1; ++i
)
18012 if (ipar
[i
] + 1 != ipar
[i
+ 1])
18015 /* Reconstruct the mask. */
18016 for (i
= 0; i
< 2; ++i
)
18018 unsigned e
= ipar
[i
* nelt2
];
18022 mask
|= e
<< (i
* 4);
18025 /* Make sure success has a non-zero value by adding one. */
18029 /* Return a register priority for hard reg REGNO. */
18031 ix86_register_priority (int hard_regno
)
18033 /* ebp and r13 as the base always wants a displacement, r12 as the
18034 base always wants an index. So discourage their usage in an
18036 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
18038 if (hard_regno
== BP_REG
)
18040 /* New x86-64 int registers result in bigger code size. Discourage
18042 if (IN_RANGE (hard_regno
, FIRST_REX_INT_REG
, LAST_REX_INT_REG
))
18044 /* New x86-64 SSE registers result in bigger code size. Discourage
18046 if (IN_RANGE (hard_regno
, FIRST_REX_SSE_REG
, LAST_REX_SSE_REG
))
18048 if (IN_RANGE (hard_regno
, FIRST_EXT_REX_SSE_REG
, LAST_EXT_REX_SSE_REG
))
18050 /* Usage of AX register results in smaller code. Prefer it. */
18051 if (hard_regno
== AX_REG
)
18056 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18058 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18059 QImode must go into class Q_REGS.
18060 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18061 movdf to do mem-to-mem moves through integer regs. */
18064 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
18066 machine_mode mode
= GET_MODE (x
);
18068 /* We're only allowed to return a subclass of CLASS. Many of the
18069 following checks fail for NO_REGS, so eliminate that early. */
18070 if (regclass
== NO_REGS
)
18073 /* All classes can load zeros. */
18074 if (x
== CONST0_RTX (mode
))
18077 /* Force constants into memory if we are loading a (nonzero) constant into
18078 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18079 instructions to load from a constant. */
18081 && (MAYBE_MMX_CLASS_P (regclass
)
18082 || MAYBE_SSE_CLASS_P (regclass
)
18083 || MAYBE_MASK_CLASS_P (regclass
)))
18086 /* Floating-point constants need more complex checks. */
18087 if (CONST_DOUBLE_P (x
))
18089 /* General regs can load everything. */
18090 if (INTEGER_CLASS_P (regclass
))
18093 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18094 zero above. We only want to wind up preferring 80387 registers if
18095 we plan on doing computation with them. */
18096 if (IS_STACK_MODE (mode
)
18097 && standard_80387_constant_p (x
) > 0)
18099 /* Limit class to FP regs. */
18100 if (FLOAT_CLASS_P (regclass
))
18107 /* Prefer SSE regs only, if we can use them for math. */
18108 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18109 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18111 /* Generally when we see PLUS here, it's the function invariant
18112 (plus soft-fp const_int). Which can only be computed into general
18114 if (GET_CODE (x
) == PLUS
)
18115 return INTEGER_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18117 /* QImode constants are easy to load, but non-constant QImode data
18118 must go into Q_REGS. */
18119 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18121 if (Q_CLASS_P (regclass
))
18123 else if (reg_class_subset_p (Q_REGS
, regclass
))
18132 /* Discourage putting floating-point values in SSE registers unless
18133 SSE math is being used, and likewise for the 387 registers. */
18135 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
18137 /* Restrict the output reload class to the register bank that we are doing
18138 math on. If we would like not to return a subset of CLASS, reject this
18139 alternative: if reload cannot do this, it will still use its choice. */
18140 machine_mode mode
= GET_MODE (x
);
18141 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18142 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
18144 if (IS_STACK_MODE (mode
))
18145 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18151 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
18152 machine_mode mode
, secondary_reload_info
*sri
)
18154 /* Double-word spills from general registers to non-offsettable memory
18155 references (zero-extended addresses) require special handling. */
18158 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
18159 && INTEGER_CLASS_P (rclass
)
18160 && !offsettable_memref_p (x
))
18163 ? CODE_FOR_reload_noff_load
18164 : CODE_FOR_reload_noff_store
);
18165 /* Add the cost of moving address to a temporary. */
18166 sri
->extra_cost
= 1;
18171 /* QImode spills from non-QI registers require
18172 intermediate register on 32bit targets. */
18174 && ((!TARGET_64BIT
&& !in_p
18175 && INTEGER_CLASS_P (rclass
)
18176 && MAYBE_NON_Q_CLASS_P (rclass
))
18177 || (!TARGET_AVX512DQ
18178 && MAYBE_MASK_CLASS_P (rclass
))))
18180 int regno
= true_regnum (x
);
18182 /* Return Q_REGS if the operand is in memory. */
18189 /* This condition handles corner case where an expression involving
18190 pointers gets vectorized. We're trying to use the address of a
18191 stack slot as a vector initializer.
18193 (set (reg:V2DI 74 [ vect_cst_.2 ])
18194 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18196 Eventually frame gets turned into sp+offset like this:
18198 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18199 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18200 (const_int 392 [0x188]))))
18202 That later gets turned into:
18204 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18205 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18206 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18208 We'll have the following reload recorded:
18210 Reload 0: reload_in (DI) =
18211 (plus:DI (reg/f:DI 7 sp)
18212 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18213 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18214 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18215 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18216 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18217 reload_reg_rtx: (reg:V2DI 22 xmm1)
18219 Which isn't going to work since SSE instructions can't handle scalar
18220 additions. Returning GENERAL_REGS forces the addition into integer
18221 register and reload can handle subsequent reloads without problems. */
18223 if (in_p
&& GET_CODE (x
) == PLUS
18224 && SSE_CLASS_P (rclass
)
18225 && SCALAR_INT_MODE_P (mode
))
18226 return GENERAL_REGS
;
18231 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18234 ix86_class_likely_spilled_p (reg_class_t rclass
)
18245 case SSE_FIRST_REG
:
18247 case FP_SECOND_REG
:
18257 /* If we are copying between registers from different register sets
18258 (e.g. FP and integer), we may need a memory location.
18260 The function can't work reliably when one of the CLASSES is a class
18261 containing registers from multiple sets. We avoid this by never combining
18262 different sets in a single alternative in the machine description.
18263 Ensure that this constraint holds to avoid unexpected surprises.
18265 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18266 so do not enforce these sanity checks.
18268 To optimize register_move_cost performance, define inline variant. */
18271 inline_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
18272 reg_class_t class2
, int strict
)
18274 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
18277 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18278 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18279 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18280 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18281 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18282 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
)
18283 || MAYBE_MASK_CLASS_P (class1
) != MASK_CLASS_P (class1
)
18284 || MAYBE_MASK_CLASS_P (class2
) != MASK_CLASS_P (class2
))
18286 gcc_assert (!strict
|| lra_in_progress
);
18290 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18293 /* Between mask and general, we have moves no larger than word size. */
18294 if ((MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
18295 && (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
))
18298 /* ??? This is a lie. We do have moves between mmx/general, and for
18299 mmx/sse2. But by saying we need secondary memory we discourage the
18300 register allocator from using the mmx registers unless needed. */
18301 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18304 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18306 /* SSE1 doesn't have any direct moves from other classes. */
18310 /* If the target says that inter-unit moves are more expensive
18311 than moving through memory, then don't generate them. */
18312 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
18313 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
18316 /* Between SSE and general, we have moves no larger than word size. */
18317 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18324 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18327 ix86_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
18328 reg_class_t class2
)
18330 return inline_secondary_memory_needed (mode
, class1
, class2
, true);
18333 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18335 get_secondary_mem widens integral modes to BITS_PER_WORD.
18336 There is no need to emit full 64 bit move on 64 bit targets
18337 for integral modes that can be moved using 32 bit move. */
18339 static machine_mode
18340 ix86_secondary_memory_needed_mode (machine_mode mode
)
18342 if (GET_MODE_BITSIZE (mode
) < 32 && INTEGRAL_MODE_P (mode
))
18343 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
18347 /* Implement the TARGET_CLASS_MAX_NREGS hook.
18349 On the 80386, this is the size of MODE in words,
18350 except in the FP regs, where a single reg is always enough. */
18352 static unsigned char
18353 ix86_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
18355 if (MAYBE_INTEGER_CLASS_P (rclass
))
18357 if (mode
== XFmode
)
18358 return (TARGET_64BIT
? 2 : 3);
18359 else if (mode
== XCmode
)
18360 return (TARGET_64BIT
? 4 : 6);
18362 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
18366 if (COMPLEX_MODE_P (mode
))
18373 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
18376 ix86_can_change_mode_class (machine_mode from
, machine_mode to
,
18377 reg_class_t regclass
)
18382 /* x87 registers can't do subreg at all, as all values are reformatted
18383 to extended precision. */
18384 if (MAYBE_FLOAT_CLASS_P (regclass
))
18387 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
18389 /* Vector registers do not support QI or HImode loads. If we don't
18390 disallow a change to these modes, reload will assume it's ok to
18391 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18392 the vec_dupv4hi pattern. */
18393 if (GET_MODE_SIZE (from
) < 4)
18400 /* Return index of MODE in the sse load/store tables. */
18403 sse_store_index (machine_mode mode
)
18405 switch (GET_MODE_SIZE (mode
))
18422 /* Return the cost of moving data of mode M between a
18423 register and memory. A value of 2 is the default; this cost is
18424 relative to those in `REGISTER_MOVE_COST'.
18426 This function is used extensively by register_move_cost that is used to
18427 build tables at startup. Make it inline in this case.
18428 When IN is 2, return maximum of in and out move cost.
18430 If moving between registers and memory is more expensive than
18431 between two registers, you should define this macro to express the
18434 Model also increased moving costs of QImode registers in non
18438 inline_memory_move_cost (machine_mode mode
, enum reg_class regclass
, int in
)
18441 if (FLOAT_CLASS_P (regclass
))
18459 return MAX (ix86_cost
->hard_register
.fp_load
[index
],
18460 ix86_cost
->hard_register
.fp_store
[index
]);
18461 return in
? ix86_cost
->hard_register
.fp_load
[index
]
18462 : ix86_cost
->hard_register
.fp_store
[index
];
18464 if (SSE_CLASS_P (regclass
))
18466 int index
= sse_store_index (mode
);
18470 return MAX (ix86_cost
->hard_register
.sse_load
[index
],
18471 ix86_cost
->hard_register
.sse_store
[index
]);
18472 return in
? ix86_cost
->hard_register
.sse_load
[index
]
18473 : ix86_cost
->hard_register
.sse_store
[index
];
18475 if (MMX_CLASS_P (regclass
))
18478 switch (GET_MODE_SIZE (mode
))
18490 return MAX (ix86_cost
->hard_register
.mmx_load
[index
],
18491 ix86_cost
->hard_register
.mmx_store
[index
]);
18492 return in
? ix86_cost
->hard_register
.mmx_load
[index
]
18493 : ix86_cost
->hard_register
.mmx_store
[index
];
18495 switch (GET_MODE_SIZE (mode
))
18498 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
18501 return ix86_cost
->hard_register
.int_store
[0];
18502 if (TARGET_PARTIAL_REG_DEPENDENCY
18503 && optimize_function_for_speed_p (cfun
))
18504 cost
= ix86_cost
->hard_register
.movzbl_load
;
18506 cost
= ix86_cost
->hard_register
.int_load
[0];
18508 return MAX (cost
, ix86_cost
->hard_register
.int_store
[0]);
18514 return MAX (ix86_cost
->hard_register
.movzbl_load
,
18515 ix86_cost
->hard_register
.int_store
[0] + 4);
18517 return ix86_cost
->hard_register
.movzbl_load
;
18519 return ix86_cost
->hard_register
.int_store
[0] + 4;
18524 return MAX (ix86_cost
->hard_register
.int_load
[1],
18525 ix86_cost
->hard_register
.int_store
[1]);
18526 return in
? ix86_cost
->hard_register
.int_load
[1]
18527 : ix86_cost
->hard_register
.int_store
[1];
18530 cost
= MAX (ix86_cost
->hard_register
.int_load
[2],
18531 ix86_cost
->hard_register
.int_store
[2]);
18533 cost
= ix86_cost
->hard_register
.int_load
[2];
18535 cost
= ix86_cost
->hard_register
.int_store
[2];
18536 /* Multiply with the number of GPR moves needed. */
18537 return cost
* CEIL ((int) GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
18542 ix86_memory_move_cost (machine_mode mode
, reg_class_t regclass
, bool in
)
18544 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
18548 /* Return the cost of moving data from a register in class CLASS1 to
18549 one in class CLASS2.
18551 It is not required that the cost always equal 2 when FROM is the same as TO;
18552 on some machines it is expensive to move between registers if they are not
18553 general registers. */
18556 ix86_register_move_cost (machine_mode mode
, reg_class_t class1_i
,
18557 reg_class_t class2_i
)
18559 enum reg_class class1
= (enum reg_class
) class1_i
;
18560 enum reg_class class2
= (enum reg_class
) class2_i
;
18562 /* In case we require secondary memory, compute cost of the store followed
18563 by load. In order to avoid bad register allocation choices, we need
18564 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18566 if (inline_secondary_memory_needed (mode
, class1
, class2
, false))
18570 cost
+= inline_memory_move_cost (mode
, class1
, 2);
18571 cost
+= inline_memory_move_cost (mode
, class2
, 2);
18573 /* In case of copying from general_purpose_register we may emit multiple
18574 stores followed by single load causing memory size mismatch stall.
18575 Count this as arbitrarily high cost of 20. */
18576 if (GET_MODE_BITSIZE (mode
) > BITS_PER_WORD
18577 && TARGET_MEMORY_MISMATCH_STALL
18578 && targetm
.class_max_nregs (class1
, mode
)
18579 > targetm
.class_max_nregs (class2
, mode
))
18582 /* In the case of FP/MMX moves, the registers actually overlap, and we
18583 have to switch modes in order to treat them differently. */
18584 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
18585 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
18591 /* Moves between MMX and non-MMX units require secondary memory. */
18592 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18593 gcc_unreachable ();
18595 /* Moves between SSE and integer units are expensive. */
18596 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18598 /* ??? By keeping returned value relatively high, we limit the number
18599 of moves between integer and SSE registers for all targets.
18600 Additionally, high value prevents problem with x86_modes_tieable_p(),
18601 where integer modes in SSE registers are not tieable
18602 because of missing QImode and HImode moves to, from or between
18603 MMX/SSE registers. */
18604 return MAX (8, SSE_CLASS_P (class1
)
18605 ? ix86_cost
->hard_register
.sse_to_integer
18606 : ix86_cost
->hard_register
.integer_to_sse
);
18608 if (MAYBE_FLOAT_CLASS_P (class1
))
18609 return ix86_cost
->hard_register
.fp_move
;
18610 if (MAYBE_SSE_CLASS_P (class1
))
18612 if (GET_MODE_BITSIZE (mode
) <= 128)
18613 return ix86_cost
->hard_register
.xmm_move
;
18614 if (GET_MODE_BITSIZE (mode
) <= 256)
18615 return ix86_cost
->hard_register
.ymm_move
;
18616 return ix86_cost
->hard_register
.zmm_move
;
18618 if (MAYBE_MMX_CLASS_P (class1
))
18619 return ix86_cost
->hard_register
.mmx_move
;
18623 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
18624 words of a value of mode MODE but can be less for certain modes in
18625 special long registers.
18627 Actually there are no two word move instructions for consecutive
18628 registers. And only registers 0-3 may have mov byte instructions
18629 applied to them. */
18631 static unsigned int
18632 ix86_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
18634 if (GENERAL_REGNO_P (regno
))
18636 if (mode
== XFmode
)
18637 return TARGET_64BIT
? 2 : 3;
18638 if (mode
== XCmode
)
18639 return TARGET_64BIT
? 4 : 6;
18640 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
18642 if (COMPLEX_MODE_P (mode
))
18644 /* Register pair for mask registers. */
18645 if (mode
== P2QImode
|| mode
== P2HImode
)
18647 if (mode
== V64SFmode
|| mode
== V64SImode
)
18652 /* Implement REGMODE_NATURAL_SIZE(MODE). */
18654 ix86_regmode_natural_size (machine_mode mode
)
18656 if (mode
== P2HImode
|| mode
== P2QImode
)
18657 return GET_MODE_SIZE (mode
) / 2;
18658 return UNITS_PER_WORD
;
18661 /* Implement TARGET_HARD_REGNO_MODE_OK. */
18664 ix86_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
18666 /* Flags and only flags can only hold CCmode values. */
18667 if (CC_REGNO_P (regno
))
18668 return GET_MODE_CLASS (mode
) == MODE_CC
;
18669 if (GET_MODE_CLASS (mode
) == MODE_CC
18670 || GET_MODE_CLASS (mode
) == MODE_RANDOM
)
18672 if (STACK_REGNO_P (regno
))
18673 return VALID_FP_MODE_P (mode
);
18674 if (MASK_REGNO_P (regno
))
18676 /* Register pair only starts at even register number. */
18677 if ((mode
== P2QImode
|| mode
== P2HImode
))
18678 return MASK_PAIR_REGNO_P(regno
);
18680 return (VALID_MASK_REG_MODE (mode
)
18681 || (TARGET_AVX512BW
18682 && VALID_MASK_AVX512BW_MODE (mode
)));
18685 if (GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
18688 if (SSE_REGNO_P (regno
))
18690 /* We implement the move patterns for all vector modes into and
18691 out of SSE registers, even when no operation instructions
18694 /* For AVX-512 we allow, regardless of regno:
18696 - any of 512-bit wide vector mode
18697 - any scalar mode. */
18700 || VALID_AVX512F_REG_MODE (mode
)
18701 || VALID_AVX512F_SCALAR_MODE (mode
)))
18704 /* For AVX-5124FMAPS or AVX-5124VNNIW
18705 allow V64SF and V64SI modes for special regnos. */
18706 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
18707 && (mode
== V64SFmode
|| mode
== V64SImode
)
18708 && MOD4_SSE_REGNO_P (regno
))
18711 /* TODO check for QI/HI scalars. */
18712 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
18713 if (TARGET_AVX512VL
18716 || VALID_AVX256_REG_MODE (mode
)
18717 || VALID_AVX512VL_128_REG_MODE (mode
)))
18720 /* xmm16-xmm31 are only available for AVX-512. */
18721 if (EXT_REX_SSE_REGNO_P (regno
))
18724 /* OImode and AVX modes are available only when AVX is enabled. */
18725 return ((TARGET_AVX
18726 && VALID_AVX256_REG_OR_OI_MODE (mode
))
18727 || VALID_SSE_REG_MODE (mode
)
18728 || VALID_SSE2_REG_MODE (mode
)
18729 || VALID_MMX_REG_MODE (mode
)
18730 || VALID_MMX_REG_MODE_3DNOW (mode
));
18732 if (MMX_REGNO_P (regno
))
18734 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18735 so if the register is available at all, then we can move data of
18736 the given mode into or out of it. */
18737 return (VALID_MMX_REG_MODE (mode
)
18738 || VALID_MMX_REG_MODE_3DNOW (mode
));
18741 if (mode
== QImode
)
18743 /* Take care for QImode values - they can be in non-QI regs,
18744 but then they do cause partial register stalls. */
18745 if (ANY_QI_REGNO_P (regno
))
18747 if (!TARGET_PARTIAL_REG_STALL
)
18749 /* LRA checks if the hard register is OK for the given mode.
18750 QImode values can live in non-QI regs, so we allow all
18752 if (lra_in_progress
)
18754 return !can_create_pseudo_p ();
18756 /* We handle both integer and floats in the general purpose registers. */
18757 else if (VALID_INT_MODE_P (mode
))
18759 else if (VALID_FP_MODE_P (mode
))
18761 else if (VALID_DFP_MODE_P (mode
))
18763 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18764 on to use that value in smaller contexts, this can easily force a
18765 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18766 supporting DImode, allow it. */
18767 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
18773 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
18774 saves SSE registers across calls is Win64 (thus no need to check the
18775 current ABI here), and with AVX enabled Win64 only guarantees that
18776 the low 16 bytes are saved. */
18779 ix86_hard_regno_call_part_clobbered (rtx_insn
*insn ATTRIBUTE_UNUSED
,
18780 unsigned int regno
, machine_mode mode
)
18782 return SSE_REGNO_P (regno
) && GET_MODE_SIZE (mode
) > 16;
18785 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18786 tieable integer mode. */
18789 ix86_tieable_integer_mode_p (machine_mode mode
)
18798 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
18801 return TARGET_64BIT
;
18808 /* Implement TARGET_MODES_TIEABLE_P.
18810 Return true if MODE1 is accessible in a register that can hold MODE2
18811 without copying. That is, all register classes that can hold MODE2
18812 can also hold MODE1. */
18815 ix86_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
18817 if (mode1
== mode2
)
18820 if (ix86_tieable_integer_mode_p (mode1
)
18821 && ix86_tieable_integer_mode_p (mode2
))
18824 /* MODE2 being XFmode implies fp stack or general regs, which means we
18825 can tie any smaller floating point modes to it. Note that we do not
18826 tie this with TFmode. */
18827 if (mode2
== XFmode
)
18828 return mode1
== SFmode
|| mode1
== DFmode
;
18830 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18831 that we can tie it with SFmode. */
18832 if (mode2
== DFmode
)
18833 return mode1
== SFmode
;
18835 /* If MODE2 is only appropriate for an SSE register, then tie with
18836 any other mode acceptable to SSE registers. */
18837 if (GET_MODE_SIZE (mode2
) == 64
18838 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18839 return (GET_MODE_SIZE (mode1
) == 64
18840 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18841 if (GET_MODE_SIZE (mode2
) == 32
18842 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18843 return (GET_MODE_SIZE (mode1
) == 32
18844 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18845 if (GET_MODE_SIZE (mode2
) == 16
18846 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
18847 return (GET_MODE_SIZE (mode1
) == 16
18848 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
18850 /* If MODE2 is appropriate for an MMX register, then tie
18851 with any other mode acceptable to MMX registers. */
18852 if (GET_MODE_SIZE (mode2
) == 8
18853 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
18854 return (GET_MODE_SIZE (mode1
) == 8
18855 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
18860 /* Return the cost of moving between two registers of mode MODE. */
18863 ix86_set_reg_reg_cost (machine_mode mode
)
18865 unsigned int units
= UNITS_PER_WORD
;
18867 switch (GET_MODE_CLASS (mode
))
18873 units
= GET_MODE_SIZE (CCmode
);
18877 if ((TARGET_SSE
&& mode
== TFmode
)
18878 || (TARGET_80387
&& mode
== XFmode
)
18879 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
18880 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
18881 units
= GET_MODE_SIZE (mode
);
18884 case MODE_COMPLEX_FLOAT
:
18885 if ((TARGET_SSE
&& mode
== TCmode
)
18886 || (TARGET_80387
&& mode
== XCmode
)
18887 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
18888 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
18889 units
= GET_MODE_SIZE (mode
);
18892 case MODE_VECTOR_INT
:
18893 case MODE_VECTOR_FLOAT
:
18894 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
18895 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
18896 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
18897 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
18898 || ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
18899 && VALID_MMX_REG_MODE (mode
)))
18900 units
= GET_MODE_SIZE (mode
);
18903 /* Return the cost of moving between two registers of mode MODE,
18904 assuming that the move will be in pieces of at most UNITS bytes. */
18905 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode
), units
));
18908 /* Return cost of vector operation in MODE given that scalar version has
18912 ix86_vec_cost (machine_mode mode
, int cost
)
18914 if (!VECTOR_MODE_P (mode
))
18917 if (GET_MODE_BITSIZE (mode
) == 128
18918 && TARGET_SSE_SPLIT_REGS
)
18920 if (GET_MODE_BITSIZE (mode
) > 128
18921 && TARGET_AVX128_OPTIMAL
)
18922 return cost
* GET_MODE_BITSIZE (mode
) / 128;
18926 /* Return cost of multiplication in MODE. */
18929 ix86_multiplication_cost (const struct processor_costs
*cost
,
18930 enum machine_mode mode
)
18932 machine_mode inner_mode
= mode
;
18933 if (VECTOR_MODE_P (mode
))
18934 inner_mode
= GET_MODE_INNER (mode
);
18936 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18937 return inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
;
18938 else if (X87_FLOAT_MODE_P (mode
))
18940 else if (FLOAT_MODE_P (mode
))
18941 return ix86_vec_cost (mode
,
18942 inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
);
18943 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
18945 /* vpmullq is used in this case. No emulation is needed. */
18946 if (TARGET_AVX512DQ
)
18947 return ix86_vec_cost (mode
, cost
->mulss
);
18949 /* V*QImode is emulated with 7-13 insns. */
18950 if (mode
== V16QImode
|| mode
== V32QImode
)
18953 if (TARGET_XOP
&& mode
== V16QImode
)
18955 else if (TARGET_SSSE3
)
18957 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* extra
);
18959 /* V*DImode is emulated with 5-8 insns. */
18960 else if (mode
== V2DImode
|| mode
== V4DImode
)
18962 if (TARGET_XOP
&& mode
== V2DImode
)
18963 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 3);
18965 return ix86_vec_cost (mode
, cost
->mulss
* 3 + cost
->sse_op
* 5);
18967 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
18968 insns, including two PMULUDQ. */
18969 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
18970 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 5);
18972 return ix86_vec_cost (mode
, cost
->mulss
);
18975 return (cost
->mult_init
[MODE_INDEX (mode
)] + cost
->mult_bit
* 7);
18978 /* Return cost of multiplication in MODE. */
18981 ix86_division_cost (const struct processor_costs
*cost
,
18982 enum machine_mode mode
)
18984 machine_mode inner_mode
= mode
;
18985 if (VECTOR_MODE_P (mode
))
18986 inner_mode
= GET_MODE_INNER (mode
);
18988 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18989 return inner_mode
== DFmode
? cost
->divsd
: cost
->divss
;
18990 else if (X87_FLOAT_MODE_P (mode
))
18992 else if (FLOAT_MODE_P (mode
))
18993 return ix86_vec_cost (mode
,
18994 inner_mode
== DFmode
? cost
->divsd
: cost
->divss
);
18996 return cost
->divide
[MODE_INDEX (mode
)];
18999 #define COSTS_N_BYTES(N) ((N) * 2)
19001 /* Return cost of shift in MODE.
19002 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19003 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19004 if op1 is a result of subreg.
19006 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19009 ix86_shift_rotate_cost (const struct processor_costs
*cost
,
19010 enum machine_mode mode
, bool constant_op1
,
19011 HOST_WIDE_INT op1_val
,
19014 bool shift_and_truncate
,
19015 bool *skip_op0
, bool *skip_op1
)
19018 *skip_op0
= *skip_op1
= false;
19019 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19021 /* V*QImode is emulated with 1-11 insns. */
19022 if (mode
== V16QImode
|| mode
== V32QImode
)
19025 if (TARGET_XOP
&& mode
== V16QImode
)
19027 /* For XOP we use vpshab, which requires a broadcast of the
19028 value to the variable shift insn. For constants this
19029 means a V16Q const in mem; even when we can perform the
19030 shift with one insn set the cost to prefer paddb. */
19035 return ix86_vec_cost (mode
,
19040 (GET_MODE_UNIT_SIZE (mode
))));
19044 else if (TARGET_SSSE3
)
19046 return ix86_vec_cost (mode
, cost
->sse_op
* count
);
19049 return ix86_vec_cost (mode
, cost
->sse_op
);
19051 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19056 return cost
->shift_const
+ COSTS_N_INSNS (2);
19058 return cost
->shift_const
* 2;
19063 return cost
->shift_var
* 2;
19065 return cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19071 return cost
->shift_const
;
19072 else if (shift_and_truncate
)
19075 *skip_op0
= *skip_op1
= true;
19076 /* Return the cost after shift-and truncation. */
19077 return cost
->shift_var
;
19080 return cost
->shift_var
;
19082 return cost
->shift_const
;
19085 /* Compute a (partial) cost for rtx X. Return true if the complete
19086 cost has been computed, and false if subexpressions should be
19087 scanned. In either case, *TOTAL contains the cost result. */
19090 ix86_rtx_costs (rtx x
, machine_mode mode
, int outer_code_i
, int opno
,
19091 int *total
, bool speed
)
19094 enum rtx_code code
= GET_CODE (x
);
19095 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
19096 const struct processor_costs
*cost
19097 = speed
? ix86_tune_cost
: &ix86_size_cost
;
19103 if (register_operand (SET_DEST (x
), VOIDmode
)
19104 && register_operand (SET_SRC (x
), VOIDmode
))
19106 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
19110 if (register_operand (SET_SRC (x
), VOIDmode
))
19111 /* Avoid potentially incorrect high cost from rtx_costs
19112 for non-tieable SUBREGs. */
19116 src_cost
= rtx_cost (SET_SRC (x
), mode
, SET
, 1, speed
);
19118 if (CONSTANT_P (SET_SRC (x
)))
19119 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19120 a small value, possibly zero for cheap constants. */
19121 src_cost
+= COSTS_N_INSNS (1);
19124 *total
= src_cost
+ rtx_cost (SET_DEST (x
), mode
, SET
, 0, speed
);
19131 if (x86_64_immediate_operand (x
, VOIDmode
))
19138 if (IS_STACK_MODE (mode
))
19139 switch (standard_80387_constant_p (x
))
19147 default: /* Other constants */
19154 switch (standard_sse_constant_p (x
, mode
))
19158 case 1: /* 0: xor eliminates false dependency */
19161 default: /* -1: cmp contains false dependency */
19167 case CONST_WIDE_INT
:
19168 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19169 it'll probably end up. Add a penalty for size. */
19170 *total
= (COSTS_N_INSNS (1)
19171 + (!TARGET_64BIT
&& flag_pic
)
19172 + (GET_MODE_SIZE (mode
) <= 4
19173 ? 0 : GET_MODE_SIZE (mode
) <= 8 ? 1 : 2));
19177 /* The zero extensions is often completely free on x86_64, so make
19178 it as cheap as possible. */
19179 if (TARGET_64BIT
&& mode
== DImode
19180 && GET_MODE (XEXP (x
, 0)) == SImode
)
19182 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19183 *total
= cost
->add
;
19185 *total
= cost
->movzx
;
19189 *total
= cost
->movsx
;
19193 if (SCALAR_INT_MODE_P (mode
)
19194 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
19195 && CONST_INT_P (XEXP (x
, 1)))
19197 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19200 *total
= cost
->add
;
19203 if ((value
== 2 || value
== 3)
19204 && cost
->lea
<= cost
->shift_const
)
19206 *total
= cost
->lea
;
19216 bool skip_op0
, skip_op1
;
19217 *total
= ix86_shift_rotate_cost (cost
, mode
, CONSTANT_P (XEXP (x
, 1)),
19218 CONST_INT_P (XEXP (x
, 1))
19219 ? INTVAL (XEXP (x
, 1)) : -1,
19221 GET_CODE (XEXP (x
, 1)) == AND
,
19222 SUBREG_P (XEXP (x
, 1))
19223 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
,
19224 &skip_op0
, &skip_op1
);
19225 if (skip_op0
|| skip_op1
)
19228 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
19230 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed
);
19239 gcc_assert (FLOAT_MODE_P (mode
));
19240 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
19242 *total
= ix86_vec_cost (mode
,
19243 GET_MODE_INNER (mode
) == SFmode
19244 ? cost
->fmass
: cost
->fmasd
);
19245 *total
+= rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
);
19247 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19249 if (GET_CODE (sub
) == NEG
)
19250 sub
= XEXP (sub
, 0);
19251 *total
+= rtx_cost (sub
, mode
, FMA
, 0, speed
);
19254 if (GET_CODE (sub
) == NEG
)
19255 sub
= XEXP (sub
, 0);
19256 *total
+= rtx_cost (sub
, mode
, FMA
, 2, speed
);
19261 if (!FLOAT_MODE_P (mode
) && !VECTOR_MODE_P (mode
))
19263 rtx op0
= XEXP (x
, 0);
19264 rtx op1
= XEXP (x
, 1);
19266 if (CONST_INT_P (XEXP (x
, 1)))
19268 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19269 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19273 /* This is arbitrary. */
19276 /* Compute costs correctly for widening multiplication. */
19277 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
19278 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19279 == GET_MODE_SIZE (mode
))
19281 int is_mulwiden
= 0;
19282 machine_mode inner_mode
= GET_MODE (op0
);
19284 if (GET_CODE (op0
) == GET_CODE (op1
))
19285 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19286 else if (CONST_INT_P (op1
))
19288 if (GET_CODE (op0
) == SIGN_EXTEND
)
19289 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19292 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19296 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19299 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
19300 + nbits
* cost
->mult_bit
19301 + rtx_cost (op0
, mode
, outer_code
, opno
, speed
)
19302 + rtx_cost (op1
, mode
, outer_code
, opno
, speed
));
19306 *total
= ix86_multiplication_cost (cost
, mode
);
19313 *total
= ix86_division_cost (cost
, mode
);
19317 if (GET_MODE_CLASS (mode
) == MODE_INT
19318 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
19320 if (GET_CODE (XEXP (x
, 0)) == PLUS
19321 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
19322 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
19323 && CONSTANT_P (XEXP (x
, 1)))
19325 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
19326 if (val
== 2 || val
== 4 || val
== 8)
19328 *total
= cost
->lea
;
19329 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
19330 outer_code
, opno
, speed
);
19331 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
19332 outer_code
, opno
, speed
);
19333 *total
+= rtx_cost (XEXP (x
, 1), mode
,
19334 outer_code
, opno
, speed
);
19338 else if (GET_CODE (XEXP (x
, 0)) == MULT
19339 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
19341 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
19342 if (val
== 2 || val
== 4 || val
== 8)
19344 *total
= cost
->lea
;
19345 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
19346 outer_code
, opno
, speed
);
19347 *total
+= rtx_cost (XEXP (x
, 1), mode
,
19348 outer_code
, opno
, speed
);
19352 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
19354 /* Add with carry, ignore the cost of adding a carry flag. */
19355 if (ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 0), mode
))
19356 *total
= cost
->add
;
19359 *total
= cost
->lea
;
19360 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
19361 outer_code
, opno
, speed
);
19364 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
19365 outer_code
, opno
, speed
);
19366 *total
+= rtx_cost (XEXP (x
, 1), mode
,
19367 outer_code
, opno
, speed
);
19374 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
19375 if (GET_MODE_CLASS (mode
) == MODE_INT
19376 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
19377 && GET_CODE (XEXP (x
, 0)) == MINUS
19378 && ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 1), mode
))
19380 *total
= cost
->add
;
19381 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
19382 outer_code
, opno
, speed
);
19383 *total
+= rtx_cost (XEXP (x
, 1), mode
,
19384 outer_code
, opno
, speed
);
19388 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19390 *total
= cost
->addss
;
19393 else if (X87_FLOAT_MODE_P (mode
))
19395 *total
= cost
->fadd
;
19398 else if (FLOAT_MODE_P (mode
))
19400 *total
= ix86_vec_cost (mode
, cost
->addss
);
19408 if (GET_MODE_CLASS (mode
) == MODE_INT
19409 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19411 *total
= (cost
->add
* 2
19412 + (rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
)
19413 << (GET_MODE (XEXP (x
, 0)) != DImode
))
19414 + (rtx_cost (XEXP (x
, 1), mode
, outer_code
, opno
, speed
)
19415 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
19421 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19423 *total
= cost
->sse_op
;
19426 else if (X87_FLOAT_MODE_P (mode
))
19428 *total
= cost
->fchs
;
19431 else if (FLOAT_MODE_P (mode
))
19433 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
19439 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19440 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
19441 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19442 *total
= cost
->add
* 2;
19444 *total
= cost
->add
;
19448 if (GET_CODE (XEXP (x
, 0)) == ZERO_EXTRACT
19449 && XEXP (XEXP (x
, 0), 1) == const1_rtx
19450 && CONST_INT_P (XEXP (XEXP (x
, 0), 2))
19451 && XEXP (x
, 1) == const0_rtx
)
19453 /* This kind of construct is implemented using test[bwl].
19454 Treat it as if we had an AND. */
19455 mode
= GET_MODE (XEXP (XEXP (x
, 0), 0));
19456 *total
= (cost
->add
19457 + rtx_cost (XEXP (XEXP (x
, 0), 0), mode
, outer_code
,
19459 + rtx_cost (const1_rtx
, mode
, outer_code
, opno
, speed
));
19463 /* The embedded comparison operand is completely free. */
19464 if (!general_operand (XEXP (x
, 0), GET_MODE (XEXP (x
, 0)))
19465 && XEXP (x
, 1) == const0_rtx
)
19471 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
19474 *total
= ix86_vec_cost (mode
, cost
->addss
);
19477 case FLOAT_TRUNCATE
:
19478 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
19479 *total
= cost
->fadd
;
19481 *total
= ix86_vec_cost (mode
, cost
->addss
);
19485 /* SSE requires memory load for the constant operand. It may make
19486 sense to account for this. Of course the constant operand may or
19487 may not be reused. */
19488 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19489 *total
= cost
->sse_op
;
19490 else if (X87_FLOAT_MODE_P (mode
))
19491 *total
= cost
->fabs
;
19492 else if (FLOAT_MODE_P (mode
))
19493 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
19497 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19498 *total
= mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
;
19499 else if (X87_FLOAT_MODE_P (mode
))
19500 *total
= cost
->fsqrt
;
19501 else if (FLOAT_MODE_P (mode
))
19502 *total
= ix86_vec_cost (mode
,
19503 mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
);
19507 if (XINT (x
, 1) == UNSPEC_TP
)
19513 case VEC_DUPLICATE
:
19514 /* ??? Assume all of these vector manipulation patterns are
19515 recognizable. In which case they all pretty much have the
19517 *total
= cost
->sse_op
;
19520 mask
= XEXP (x
, 2);
19521 /* This is masked instruction, assume the same cost,
19522 as nonmasked variant. */
19523 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
19524 *total
= rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
);
19526 *total
= cost
->sse_op
;
19536 static int current_machopic_label_num
;
19538 /* Given a symbol name and its associated stub, write out the
19539 definition of the stub. */
19542 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
19544 unsigned int length
;
19545 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
19546 int label
= ++current_machopic_label_num
;
19548 /* For 64-bit we shouldn't get here. */
19549 gcc_assert (!TARGET_64BIT
);
19551 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19552 symb
= targetm
.strip_name_encoding (symb
);
19554 length
= strlen (stub
);
19555 binder_name
= XALLOCAVEC (char, length
+ 32);
19556 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
19558 length
= strlen (symb
);
19559 symbol_name
= XALLOCAVEC (char, length
+ 32);
19560 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
19562 sprintf (lazy_ptr_name
, "L%d$lz", label
);
19564 if (MACHOPIC_ATT_STUB
)
19565 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
19566 else if (MACHOPIC_PURE
)
19567 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
19569 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
19571 fprintf (file
, "%s:\n", stub
);
19572 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19574 if (MACHOPIC_ATT_STUB
)
19576 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
19578 else if (MACHOPIC_PURE
)
19581 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19582 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
19583 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
19584 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
19585 label
, lazy_ptr_name
, label
);
19586 fprintf (file
, "\tjmp\t*%%ecx\n");
19589 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
19591 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
19592 it needs no stub-binding-helper. */
19593 if (MACHOPIC_ATT_STUB
)
19596 fprintf (file
, "%s:\n", binder_name
);
19600 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
19601 fprintf (file
, "\tpushl\t%%ecx\n");
19604 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
19606 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
19608 /* N.B. Keep the correspondence of these
19609 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
19610 old-pic/new-pic/non-pic stubs; altering this will break
19611 compatibility with existing dylibs. */
19614 /* 25-byte PIC stub using "CALL get_pc_thunk". */
19615 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
19618 /* 16-byte -mdynamic-no-pic stub. */
19619 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
19621 fprintf (file
, "%s:\n", lazy_ptr_name
);
19622 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
19623 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
19625 #endif /* TARGET_MACHO */
19627 /* Order the registers for register allocator. */
19630 x86_order_regs_for_local_alloc (void)
19635 /* First allocate the local general purpose registers. */
19636 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19637 if (GENERAL_REGNO_P (i
) && call_used_regs
[i
])
19638 reg_alloc_order
[pos
++] = i
;
19640 /* Global general purpose registers. */
19641 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
19642 if (GENERAL_REGNO_P (i
) && !call_used_regs
[i
])
19643 reg_alloc_order
[pos
++] = i
;
19645 /* x87 registers come first in case we are doing FP math
19647 if (!TARGET_SSE_MATH
)
19648 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19649 reg_alloc_order
[pos
++] = i
;
19651 /* SSE registers. */
19652 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
19653 reg_alloc_order
[pos
++] = i
;
19654 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
19655 reg_alloc_order
[pos
++] = i
;
19657 /* Extended REX SSE registers. */
19658 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
19659 reg_alloc_order
[pos
++] = i
;
19661 /* Mask register. */
19662 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
19663 reg_alloc_order
[pos
++] = i
;
19665 /* x87 registers. */
19666 if (TARGET_SSE_MATH
)
19667 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
19668 reg_alloc_order
[pos
++] = i
;
19670 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
19671 reg_alloc_order
[pos
++] = i
;
19673 /* Initialize the rest of array as we do not allocate some registers
19675 while (pos
< FIRST_PSEUDO_REGISTER
)
19676 reg_alloc_order
[pos
++] = 0;
19680 ix86_ms_bitfield_layout_p (const_tree record_type
)
19682 return ((TARGET_MS_BITFIELD_LAYOUT
19683 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
19684 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
19687 /* Returns an expression indicating where the this parameter is
19688 located on entry to the FUNCTION. */
19691 x86_this_parameter (tree function
)
19693 tree type
= TREE_TYPE (function
);
19694 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
19699 const int *parm_regs
;
19701 if (ix86_function_type_abi (type
) == MS_ABI
)
19702 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
19704 parm_regs
= x86_64_int_parameter_registers
;
19705 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
19708 nregs
= ix86_function_regparm (type
, function
);
19710 if (nregs
> 0 && !stdarg_p (type
))
19713 unsigned int ccvt
= ix86_get_callcvt (type
);
19715 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
19716 regno
= aggr
? DX_REG
: CX_REG
;
19717 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
19721 return gen_rtx_MEM (SImode
,
19722 plus_constant (Pmode
, stack_pointer_rtx
, 4));
19731 return gen_rtx_MEM (SImode
,
19732 plus_constant (Pmode
,
19733 stack_pointer_rtx
, 4));
19736 return gen_rtx_REG (SImode
, regno
);
19739 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
19743 /* Determine whether x86_output_mi_thunk can succeed. */
19746 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
19747 const_tree function
)
19749 /* 64-bit can handle anything. */
19753 /* For 32-bit, everything's fine if we have one free register. */
19754 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
19757 /* Need a free register for vcall_offset. */
19761 /* Need a free register for GOT references. */
19762 if (flag_pic
&& !targetm
.binds_local_p (function
))
19765 /* Otherwise ok. */
19769 /* Output the assembler code for a thunk function. THUNK_DECL is the
19770 declaration for the thunk function itself, FUNCTION is the decl for
19771 the target function. DELTA is an immediate constant offset to be
19772 added to THIS. If VCALL_OFFSET is nonzero, the word at
19773 *(*this + vcall_offset) should be added to THIS. */
19776 x86_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
19777 HOST_WIDE_INT vcall_offset
, tree function
)
19779 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl
));
19780 rtx this_param
= x86_this_parameter (function
);
19781 rtx this_reg
, tmp
, fnaddr
;
19782 unsigned int tmp_regno
;
19786 tmp_regno
= R10_REG
;
19789 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
19790 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
19791 tmp_regno
= AX_REG
;
19792 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
19793 tmp_regno
= DX_REG
;
19795 tmp_regno
= CX_REG
;
19798 emit_note (NOTE_INSN_PROLOGUE_END
);
19800 /* CET is enabled, insert EB instruction. */
19801 if ((flag_cf_protection
& CF_BRANCH
))
19802 emit_insn (gen_nop_endbr ());
19804 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19805 pull it in now and let DELTA benefit. */
19806 if (REG_P (this_param
))
19807 this_reg
= this_param
;
19808 else if (vcall_offset
)
19810 /* Put the this parameter into %eax. */
19811 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
19812 emit_move_insn (this_reg
, this_param
);
19815 this_reg
= NULL_RTX
;
19817 /* Adjust the this parameter by a fixed constant. */
19820 rtx delta_rtx
= GEN_INT (delta
);
19821 rtx delta_dst
= this_reg
? this_reg
: this_param
;
19825 if (!x86_64_general_operand (delta_rtx
, Pmode
))
19827 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
19828 emit_move_insn (tmp
, delta_rtx
);
19833 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
19836 /* Adjust the this parameter by a value stored in the vtable. */
19839 rtx vcall_addr
, vcall_mem
, this_mem
;
19841 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
19843 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
19844 if (Pmode
!= ptr_mode
)
19845 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
19846 emit_move_insn (tmp
, this_mem
);
19848 /* Adjust the this parameter. */
19849 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
19851 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
19853 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
19854 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
19855 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
19858 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
19859 if (Pmode
!= ptr_mode
)
19860 emit_insn (gen_addsi_1_zext (this_reg
,
19861 gen_rtx_REG (ptr_mode
,
19865 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
19868 /* If necessary, drop THIS back to its stack slot. */
19869 if (this_reg
&& this_reg
!= this_param
)
19870 emit_move_insn (this_param
, this_reg
);
19872 fnaddr
= XEXP (DECL_RTL (function
), 0);
19875 if (!flag_pic
|| targetm
.binds_local_p (function
)
19880 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
19881 tmp
= gen_rtx_CONST (Pmode
, tmp
);
19882 fnaddr
= gen_const_mem (Pmode
, tmp
);
19887 if (!flag_pic
|| targetm
.binds_local_p (function
))
19890 else if (TARGET_MACHO
)
19892 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
19893 fnaddr
= XEXP (fnaddr
, 0);
19895 #endif /* TARGET_MACHO */
19898 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
19899 output_set_got (tmp
, NULL_RTX
);
19901 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
19902 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
19903 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
19904 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
19908 /* Our sibling call patterns do not allow memories, because we have no
19909 predicate that can distinguish between frame and non-frame memory.
19910 For our purposes here, we can get away with (ab)using a jump pattern,
19911 because we're going to do no optimization. */
19912 if (MEM_P (fnaddr
))
19914 if (sibcall_insn_operand (fnaddr
, word_mode
))
19916 fnaddr
= XEXP (DECL_RTL (function
), 0);
19917 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
19918 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
19919 tmp
= emit_call_insn (tmp
);
19920 SIBLING_CALL_P (tmp
) = 1;
19923 emit_jump_insn (gen_indirect_jump (fnaddr
));
19927 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
19929 // CM_LARGE_PIC always uses pseudo PIC register which is
19930 // uninitialized. Since FUNCTION is local and calling it
19931 // doesn't go through PLT, we use scratch register %r11 as
19932 // PIC register and initialize it here.
19933 pic_offset_table_rtx
= gen_rtx_REG (Pmode
, R11_REG
);
19934 ix86_init_large_pic_reg (tmp_regno
);
19935 fnaddr
= legitimize_pic_address (fnaddr
,
19936 gen_rtx_REG (Pmode
, tmp_regno
));
19939 if (!sibcall_insn_operand (fnaddr
, word_mode
))
19941 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
19942 if (GET_MODE (fnaddr
) != word_mode
)
19943 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
19944 emit_move_insn (tmp
, fnaddr
);
19948 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
19949 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
19950 tmp
= emit_call_insn (tmp
);
19951 SIBLING_CALL_P (tmp
) = 1;
19955 /* Emit just enough of rest_of_compilation to get the insns emitted. */
19956 insn
= get_insns ();
19957 shorten_branches (insn
);
19958 assemble_start_function (thunk_fndecl
, fnname
);
19959 final_start_function (insn
, file
, 1);
19960 final (insn
, file
, 1);
19961 final_end_function ();
19962 assemble_end_function (thunk_fndecl
, fnname
);
19966 x86_file_start (void)
19968 default_file_start ();
19970 fputs ("\t.code16gcc\n", asm_out_file
);
19972 darwin_file_start ();
19974 if (X86_FILE_START_VERSION_DIRECTIVE
)
19975 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
19976 if (X86_FILE_START_FLTUSED
)
19977 fputs ("\t.global\t__fltused\n", asm_out_file
);
19978 if (ix86_asm_dialect
== ASM_INTEL
)
19979 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
19983 x86_field_alignment (tree type
, int computed
)
19987 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
19990 return iamcu_alignment (type
, computed
);
19991 mode
= TYPE_MODE (strip_array_types (type
));
19992 if (mode
== DFmode
|| mode
== DCmode
19993 || GET_MODE_CLASS (mode
) == MODE_INT
19994 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
19995 return MIN (32, computed
);
19999 /* Print call to TARGET to FILE. */
20002 x86_print_call_or_nop (FILE *file
, const char *target
)
20004 if (flag_nop_mcount
|| !strcmp (target
, "nop"))
20005 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20006 fprintf (file
, "1:" ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20008 fprintf (file
, "1:\tcall\t%s\n", target
);
20012 current_fentry_name (const char **name
)
20014 tree attr
= lookup_attribute ("fentry_name",
20015 DECL_ATTRIBUTES (current_function_decl
));
20018 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
20023 current_fentry_section (const char **name
)
20025 tree attr
= lookup_attribute ("fentry_section",
20026 DECL_ATTRIBUTES (current_function_decl
));
20029 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
20033 /* Output assembler code to FILE to increment profiler label # LABELNO
20034 for profiling a function entry. */
20036 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
20038 if (cfun
->machine
->endbr_queued_at_entrance
)
20039 fprintf (file
, "\t%s\n", TARGET_64BIT
? "endbr64" : "endbr32");
20041 const char *mcount_name
= MCOUNT_NAME
;
20043 if (current_fentry_name (&mcount_name
))
20045 else if (fentry_name
)
20046 mcount_name
= fentry_name
;
20047 else if (flag_fentry
)
20048 mcount_name
= MCOUNT_NAME_BEFORE_PROLOGUE
;
20052 #ifndef NO_PROFILE_COUNTERS
20053 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
20056 if (!TARGET_PECOFF
&& flag_pic
)
20057 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
20059 x86_print_call_or_nop (file
, mcount_name
);
20063 #ifndef NO_PROFILE_COUNTERS
20064 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
20067 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
20071 #ifndef NO_PROFILE_COUNTERS
20072 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
20075 x86_print_call_or_nop (file
, mcount_name
);
20078 if (flag_record_mcount
20079 || lookup_attribute ("fentry_section",
20080 DECL_ATTRIBUTES (current_function_decl
)))
20082 const char *sname
= "__mcount_loc";
20084 if (current_fentry_section (&sname
))
20086 else if (fentry_section
)
20087 sname
= fentry_section
;
20089 fprintf (file
, "\t.section %s, \"a\",@progbits\n", sname
);
20090 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
20091 fprintf (file
, "\t.previous\n");
20095 /* We don't have exact information about the insn sizes, but we may assume
20096 quite safely that we are informed about all 1 byte insns and memory
20097 address sizes. This is enough to eliminate unnecessary padding in
20101 ix86_min_insn_size (rtx_insn
*insn
)
20105 if (!INSN_P (insn
) || !active_insn_p (insn
))
20108 /* Discard alignments we've emit and jump instructions. */
20109 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
20110 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
20113 /* Important case - calls are always 5 bytes.
20114 It is common to have many calls in the row. */
20116 && symbolic_reference_mentioned_p (PATTERN (insn
))
20117 && !SIBLING_CALL_P (insn
))
20119 len
= get_attr_length (insn
);
20123 /* For normal instructions we rely on get_attr_length being exact,
20124 with a few exceptions. */
20125 if (!JUMP_P (insn
))
20127 enum attr_type type
= get_attr_type (insn
);
20132 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
20133 || asm_noperands (PATTERN (insn
)) >= 0)
20140 /* Otherwise trust get_attr_length. */
20144 l
= get_attr_length_address (insn
);
20145 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
20154 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20156 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20160 ix86_avoid_jump_mispredicts (void)
20162 rtx_insn
*insn
, *start
= get_insns ();
20163 int nbytes
= 0, njumps
= 0;
20164 bool isjump
= false;
20166 /* Look for all minimal intervals of instructions containing 4 jumps.
20167 The intervals are bounded by START and INSN. NBYTES is the total
20168 size of instructions in the interval including INSN and not including
20169 START. When the NBYTES is smaller than 16 bytes, it is possible
20170 that the end of START and INSN ends up in the same 16byte page.
20172 The smallest offset in the page INSN can start is the case where START
20173 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20174 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20176 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20177 have to, control transfer to label(s) can be performed through other
20178 means, and also we estimate minimum length of all asm stmts as 0. */
20179 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
20183 if (LABEL_P (insn
))
20185 align_flags alignment
= label_to_alignment (insn
);
20186 int align
= alignment
.levels
[0].log
;
20187 int max_skip
= alignment
.levels
[0].maxskip
;
20191 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20192 already in the current 16 byte page, because otherwise
20193 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20194 bytes to reach 16 byte boundary. */
20196 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
20199 fprintf (dump_file
, "Label %i with max_skip %i\n",
20200 INSN_UID (insn
), max_skip
);
20203 while (nbytes
+ max_skip
>= 16)
20205 start
= NEXT_INSN (start
);
20206 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
20208 njumps
--, isjump
= true;
20211 nbytes
-= ix86_min_insn_size (start
);
20217 min_size
= ix86_min_insn_size (insn
);
20218 nbytes
+= min_size
;
20220 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
20221 INSN_UID (insn
), min_size
);
20222 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
20230 start
= NEXT_INSN (start
);
20231 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
20233 njumps
--, isjump
= true;
20236 nbytes
-= ix86_min_insn_size (start
);
20238 gcc_assert (njumps
>= 0);
20240 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
20241 INSN_UID (start
), INSN_UID (insn
), nbytes
);
20243 if (njumps
== 3 && isjump
&& nbytes
< 16)
20245 int padsize
= 15 - nbytes
+ ix86_min_insn_size (insn
);
20248 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
20249 INSN_UID (insn
), padsize
);
20250 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
20256 /* AMD Athlon works faster
20257 when RET is not destination of conditional jump or directly preceded
20258 by other jump instruction. We avoid the penalty by inserting NOP just
20259 before the RET instructions in such cases. */
20261 ix86_pad_returns (void)
20266 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20268 basic_block bb
= e
->src
;
20269 rtx_insn
*ret
= BB_END (bb
);
20271 bool replace
= false;
20273 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
20274 || optimize_bb_for_size_p (bb
))
20276 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
20277 if (active_insn_p (prev
) || LABEL_P (prev
))
20279 if (prev
&& LABEL_P (prev
))
20284 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
20285 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
20286 && !(e
->flags
& EDGE_FALLTHRU
))
20294 prev
= prev_active_insn (ret
);
20296 && ((JUMP_P (prev
) && any_condjump_p (prev
))
20299 /* Empty functions get branch mispredict even when
20300 the jump destination is not visible to us. */
20301 if (!prev
&& !optimize_function_for_size_p (cfun
))
20306 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
20312 /* Count the minimum number of instructions in BB. Return 4 if the
20313 number of instructions >= 4. */
20316 ix86_count_insn_bb (basic_block bb
)
20319 int insn_count
= 0;
20321 /* Count number of instructions in this block. Return 4 if the number
20322 of instructions >= 4. */
20323 FOR_BB_INSNS (bb
, insn
)
20325 /* Only happen in exit blocks. */
20327 && ANY_RETURN_P (PATTERN (insn
)))
20330 if (NONDEBUG_INSN_P (insn
)
20331 && GET_CODE (PATTERN (insn
)) != USE
20332 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
20335 if (insn_count
>= 4)
20344 /* Count the minimum number of instructions in code path in BB.
20345 Return 4 if the number of instructions >= 4. */
20348 ix86_count_insn (basic_block bb
)
20352 int min_prev_count
;
20354 /* Only bother counting instructions along paths with no
20355 more than 2 basic blocks between entry and exit. Given
20356 that BB has an edge to exit, determine if a predecessor
20357 of BB has an edge from entry. If so, compute the number
20358 of instructions in the predecessor block. If there
20359 happen to be multiple such blocks, compute the minimum. */
20360 min_prev_count
= 4;
20361 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
20364 edge_iterator prev_ei
;
20366 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
20368 min_prev_count
= 0;
20371 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
20373 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
20375 int count
= ix86_count_insn_bb (e
->src
);
20376 if (count
< min_prev_count
)
20377 min_prev_count
= count
;
20383 if (min_prev_count
< 4)
20384 min_prev_count
+= ix86_count_insn_bb (bb
);
20386 return min_prev_count
;
20389 /* Pad short function to 4 instructions. */
20392 ix86_pad_short_function (void)
20397 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20399 rtx_insn
*ret
= BB_END (e
->src
);
20400 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
20402 int insn_count
= ix86_count_insn (e
->src
);
20404 /* Pad short function. */
20405 if (insn_count
< 4)
20407 rtx_insn
*insn
= ret
;
20409 /* Find epilogue. */
20412 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
20413 insn
= PREV_INSN (insn
);
20418 /* Two NOPs count as one instruction. */
20419 insn_count
= 2 * (4 - insn_count
);
20420 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
20426 /* Fix up a Windows system unwinder issue. If an EH region falls through into
20427 the epilogue, the Windows system unwinder will apply epilogue logic and
20428 produce incorrect offsets. This can be avoided by adding a nop between
20429 the last insn that can throw and the first insn of the epilogue. */
20432 ix86_seh_fixup_eh_fallthru (void)
20437 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
20439 rtx_insn
*insn
, *next
;
20441 /* Find the beginning of the epilogue. */
20442 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
20443 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
20448 /* We only care about preceding insns that can throw. */
20449 insn
= prev_active_insn (insn
);
20450 if (insn
== NULL
|| !can_throw_internal (insn
))
20453 /* Do not separate calls from their debug information. */
20454 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
20455 if (NOTE_P (next
) && NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
)
20460 emit_insn_after (gen_nops (const1_rtx
), insn
);
20464 /* Implement machine specific optimizations. We implement padding of returns
20465 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20469 /* We are freeing block_for_insn in the toplev to keep compatibility
20470 with old MDEP_REORGS that are not CFG based. Recompute it now. */
20471 compute_bb_for_insn ();
20473 if (TARGET_SEH
&& current_function_has_exception_handlers ())
20474 ix86_seh_fixup_eh_fallthru ();
20476 if (optimize
&& optimize_function_for_speed_p (cfun
))
20478 if (TARGET_PAD_SHORT_FUNCTION
)
20479 ix86_pad_short_function ();
20480 else if (TARGET_PAD_RETURNS
)
20481 ix86_pad_returns ();
20482 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20483 if (TARGET_FOUR_JUMP_LIMIT
)
20484 ix86_avoid_jump_mispredicts ();
20489 /* Return nonzero when QImode register that must be represented via REX prefix
20492 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
20495 extract_insn_cached (insn
);
20496 for (i
= 0; i
< recog_data
.n_operands
; i
++)
20497 if (GENERAL_REG_P (recog_data
.operand
[i
])
20498 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
20503 /* Return true when INSN mentions register that must be encoded using REX
20506 x86_extended_reg_mentioned_p (rtx insn
)
20508 subrtx_iterator::array_type array
;
20509 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
20511 const_rtx x
= *iter
;
20513 && (REX_INT_REGNO_P (REGNO (x
)) || REX_SSE_REGNO_P (REGNO (x
))))
20519 /* If profitable, negate (without causing overflow) integer constant
20520 of mode MODE at location LOC. Return true in this case. */
20522 x86_maybe_negate_const_int (rtx
*loc
, machine_mode mode
)
20526 if (!CONST_INT_P (*loc
))
20532 /* DImode x86_64 constants must fit in 32 bits. */
20533 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
20544 gcc_unreachable ();
20547 /* Avoid overflows. */
20548 if (mode_signbit_p (mode
, *loc
))
20551 val
= INTVAL (*loc
);
20553 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
20554 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
20555 if ((val
< 0 && val
!= -128)
20558 *loc
= GEN_INT (-val
);
20565 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20566 optabs would emit if we didn't have TFmode patterns. */
20569 x86_emit_floatuns (rtx operands
[2])
20571 rtx_code_label
*neglab
, *donelab
;
20572 rtx i0
, i1
, f0
, in
, out
;
20573 machine_mode mode
, inmode
;
20575 inmode
= GET_MODE (operands
[1]);
20576 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
20579 in
= force_reg (inmode
, operands
[1]);
20580 mode
= GET_MODE (out
);
20581 neglab
= gen_label_rtx ();
20582 donelab
= gen_label_rtx ();
20583 f0
= gen_reg_rtx (mode
);
20585 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
20587 expand_float (out
, in
, 0);
20589 emit_jump_insn (gen_jump (donelab
));
20592 emit_label (neglab
);
20594 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
20596 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
20598 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
20600 expand_float (f0
, i0
, 0);
20602 emit_insn (gen_rtx_SET (out
, gen_rtx_PLUS (mode
, f0
, f0
)));
20604 emit_label (donelab
);
20607 /* Target hook for scalar_mode_supported_p. */
20609 ix86_scalar_mode_supported_p (scalar_mode mode
)
20611 if (DECIMAL_FLOAT_MODE_P (mode
))
20612 return default_decimal_float_supported_p ();
20613 else if (mode
== TFmode
)
20616 return default_scalar_mode_supported_p (mode
);
20619 /* Implements target hook vector_mode_supported_p. */
20621 ix86_vector_mode_supported_p (machine_mode mode
)
20623 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
20625 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
20627 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
20629 if (TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
20631 if ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
) && VALID_MMX_REG_MODE (mode
))
20633 if (TARGET_3DNOW
&& VALID_MMX_REG_MODE_3DNOW (mode
))
20638 /* Target hook for c_mode_for_suffix. */
20639 static machine_mode
20640 ix86_c_mode_for_suffix (char suffix
)
20650 /* Worker function for TARGET_MD_ASM_ADJUST.
20652 We implement asm flag outputs, and maintain source compatibility
20653 with the old cc0-based compiler. */
20656 ix86_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &/*inputs*/,
20657 vec
<const char *> &constraints
,
20658 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
20660 bool saw_asm_flag
= false;
20663 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
20665 const char *con
= constraints
[i
];
20666 if (strncmp (con
, "=@cc", 4) != 0)
20669 if (strchr (con
, ',') != NULL
)
20671 error ("alternatives not allowed in %<asm%> flag output");
20675 bool invert
= false;
20677 invert
= true, con
++;
20679 machine_mode mode
= CCmode
;
20680 rtx_code code
= UNKNOWN
;
20686 mode
= CCAmode
, code
= EQ
;
20687 else if (con
[1] == 'e' && con
[2] == 0)
20688 mode
= CCCmode
, code
= NE
;
20692 mode
= CCCmode
, code
= EQ
;
20693 else if (con
[1] == 'e' && con
[2] == 0)
20694 mode
= CCAmode
, code
= NE
;
20698 mode
= CCCmode
, code
= EQ
;
20702 mode
= CCZmode
, code
= EQ
;
20706 mode
= CCGCmode
, code
= GT
;
20707 else if (con
[1] == 'e' && con
[2] == 0)
20708 mode
= CCGCmode
, code
= GE
;
20712 mode
= CCGCmode
, code
= LT
;
20713 else if (con
[1] == 'e' && con
[2] == 0)
20714 mode
= CCGCmode
, code
= LE
;
20718 mode
= CCOmode
, code
= EQ
;
20722 mode
= CCPmode
, code
= EQ
;
20726 mode
= CCSmode
, code
= EQ
;
20730 mode
= CCZmode
, code
= EQ
;
20733 if (code
== UNKNOWN
)
20735 error ("unknown %<asm%> flag output %qs", constraints
[i
]);
20739 code
= reverse_condition (code
);
20741 rtx dest
= outputs
[i
];
20744 /* This is the first asm flag output. Here we put the flags
20745 register in as the real output and adjust the condition to
20747 constraints
[i
] = "=Bf";
20748 outputs
[i
] = gen_rtx_REG (CCmode
, FLAGS_REG
);
20749 saw_asm_flag
= true;
20753 /* We don't need the flags register as output twice. */
20754 constraints
[i
] = "=X";
20755 outputs
[i
] = gen_rtx_SCRATCH (SImode
);
20758 rtx x
= gen_rtx_REG (mode
, FLAGS_REG
);
20759 x
= gen_rtx_fmt_ee (code
, QImode
, x
, const0_rtx
);
20761 machine_mode dest_mode
= GET_MODE (dest
);
20762 if (!SCALAR_INT_MODE_P (dest_mode
))
20764 error ("invalid type for %<asm%> flag output");
20768 if (dest_mode
== DImode
&& !TARGET_64BIT
)
20769 dest_mode
= SImode
;
20771 if (dest_mode
!= QImode
)
20773 rtx destqi
= gen_reg_rtx (QImode
);
20774 emit_insn (gen_rtx_SET (destqi
, x
));
20776 if (TARGET_ZERO_EXTEND_WITH_AND
20777 && optimize_function_for_speed_p (cfun
))
20779 x
= force_reg (dest_mode
, const0_rtx
);
20781 emit_insn (gen_movstrictqi
20782 (gen_lowpart (QImode
, x
), destqi
));
20785 x
= gen_rtx_ZERO_EXTEND (dest_mode
, destqi
);
20788 if (dest_mode
!= GET_MODE (dest
))
20790 rtx tmp
= gen_reg_rtx (SImode
);
20792 emit_insn (gen_rtx_SET (tmp
, x
));
20793 emit_insn (gen_zero_extendsidi2 (dest
, tmp
));
20796 emit_insn (gen_rtx_SET (dest
, x
));
20798 rtx_insn
*seq
= get_insns ();
20805 /* If we had no asm flag outputs, clobber the flags. */
20806 clobbers
.safe_push (gen_rtx_REG (CCmode
, FLAGS_REG
));
20807 SET_HARD_REG_BIT (clobbered_regs
, FLAGS_REG
);
20812 /* Implements target vector targetm.asm.encode_section_info. */
20814 static void ATTRIBUTE_UNUSED
20815 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
20817 default_encode_section_info (decl
, rtl
, first
);
20819 if (ix86_in_large_data_p (decl
))
20820 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
20823 /* Worker function for REVERSE_CONDITION. */
20826 ix86_reverse_condition (enum rtx_code code
, machine_mode mode
)
20828 return (mode
== CCFPmode
20829 ? reverse_condition_maybe_unordered (code
)
20830 : reverse_condition (code
));
20833 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20837 output_387_reg_move (rtx_insn
*insn
, rtx
*operands
)
20839 if (REG_P (operands
[0]))
20841 if (REG_P (operands
[1])
20842 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20844 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
20845 return output_387_ffreep (operands
, 0);
20846 return "fstp\t%y0";
20848 if (STACK_TOP_P (operands
[0]))
20849 return "fld%Z1\t%y1";
20852 else if (MEM_P (operands
[0]))
20854 gcc_assert (REG_P (operands
[1]));
20855 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
20856 return "fstp%Z0\t%y0";
20859 /* There is no non-popping store to memory for XFmode.
20860 So if we need one, follow the store with a load. */
20861 if (GET_MODE (operands
[0]) == XFmode
)
20862 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
20864 return "fst%Z0\t%y0";
20870 #ifdef TARGET_SOLARIS
20871 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20874 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
20877 /* With Binutils 2.15, the "@unwind" marker must be specified on
20878 every occurrence of the ".eh_frame" section, not just the first
20881 && strcmp (name
, ".eh_frame") == 0)
20883 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
20884 flags
& SECTION_WRITE
? "aw" : "a");
20889 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
20891 solaris_elf_asm_comdat_section (name
, flags
, decl
);
20895 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
20896 SPARC assembler. One cannot mix single-letter flags and #exclude, so
20897 only emit the latter here. */
20898 if (flags
& SECTION_EXCLUDE
)
20900 fprintf (asm_out_file
, "\t.section\t%s,#exclude\n", name
);
20905 default_elf_asm_named_section (name
, flags
, decl
);
20907 #endif /* TARGET_SOLARIS */
20909 /* Return the mangling of TYPE if it is an extended fundamental type. */
20911 static const char *
20912 ix86_mangle_type (const_tree type
)
20914 type
= TYPE_MAIN_VARIANT (type
);
20916 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
20917 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
20920 switch (TYPE_MODE (type
))
20923 /* __float128 is "g". */
20926 /* "long double" or __float80 is "e". */
20933 static GTY(()) tree ix86_tls_stack_chk_guard_decl
;
20936 ix86_stack_protect_guard (void)
20938 if (TARGET_SSP_TLS_GUARD
)
20940 tree type_node
= lang_hooks
.types
.type_for_mode (ptr_mode
, 1);
20941 int qual
= ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg
);
20942 tree type
= build_qualified_type (type_node
, qual
);
20945 if (global_options_set
.x_ix86_stack_protector_guard_symbol_str
)
20947 t
= ix86_tls_stack_chk_guard_decl
;
20954 (UNKNOWN_LOCATION
, VAR_DECL
,
20955 get_identifier (ix86_stack_protector_guard_symbol_str
),
20957 TREE_STATIC (t
) = 1;
20958 TREE_PUBLIC (t
) = 1;
20959 DECL_EXTERNAL (t
) = 1;
20961 TREE_THIS_VOLATILE (t
) = 1;
20962 DECL_ARTIFICIAL (t
) = 1;
20963 DECL_IGNORED_P (t
) = 1;
20965 /* Do not share RTL as the declaration is visible outside of
20966 current function. */
20968 RTX_FLAG (x
, used
) = 1;
20970 ix86_tls_stack_chk_guard_decl
= t
;
20975 tree asptrtype
= build_pointer_type (type
);
20977 t
= build_int_cst (asptrtype
, ix86_stack_protector_guard_offset
);
20978 t
= build2 (MEM_REF
, asptrtype
, t
,
20979 build_int_cst (asptrtype
, 0));
20980 TREE_THIS_VOLATILE (t
) = 1;
20986 return default_stack_protect_guard ();
20989 /* For 32-bit code we can save PIC register setup by using
20990 __stack_chk_fail_local hidden function instead of calling
20991 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20992 register, so it is better to call __stack_chk_fail directly. */
20994 static tree ATTRIBUTE_UNUSED
20995 ix86_stack_protect_fail (void)
20997 return TARGET_64BIT
20998 ? default_external_stack_protect_fail ()
20999 : default_hidden_stack_protect_fail ();
21002 /* Select a format to encode pointers in exception handling data. CODE
21003 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21004 true if the symbol may be affected by dynamic relocations.
21006 ??? All x86 object file formats are capable of representing this.
21007 After all, the relocation needed is the same as for the call insn.
21008 Whether or not a particular assembler allows us to enter such, I
21009 guess we'll have to see. */
21011 asm_preferred_eh_data_format (int code
, int global
)
21015 int type
= DW_EH_PE_sdata8
;
21017 || ix86_cmodel
== CM_SMALL_PIC
21018 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21019 type
= DW_EH_PE_sdata4
;
21020 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21022 if (ix86_cmodel
== CM_SMALL
21023 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21024 return DW_EH_PE_udata4
;
21025 return DW_EH_PE_absptr
;
21028 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21030 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
21034 machine_mode mode
= TImode
;
21036 if (vectype
!= NULL
)
21038 fp
= FLOAT_TYPE_P (vectype
);
21039 mode
= TYPE_MODE (vectype
);
21042 switch (type_of_cost
)
21045 return fp
? ix86_cost
->addss
: COSTS_N_INSNS (1);
21048 /* load/store costs are relative to register move which is 2. Recompute
21049 it to COSTS_N_INSNS so everything have same base. */
21050 return COSTS_N_INSNS (fp
? ix86_cost
->sse_load
[0]
21051 : ix86_cost
->int_load
[2]) / 2;
21054 return COSTS_N_INSNS (fp
? ix86_cost
->sse_store
[0]
21055 : ix86_cost
->int_store
[2]) / 2;
21058 return ix86_vec_cost (mode
,
21059 fp
? ix86_cost
->addss
: ix86_cost
->sse_op
);
21062 index
= sse_store_index (mode
);
21063 /* See PR82713 - we may end up being called on non-vector type. */
21066 return COSTS_N_INSNS (ix86_cost
->sse_load
[index
]) / 2;
21069 index
= sse_store_index (mode
);
21070 /* See PR82713 - we may end up being called on non-vector type. */
21073 return COSTS_N_INSNS (ix86_cost
->sse_store
[index
]) / 2;
21075 case vec_to_scalar
:
21076 case scalar_to_vec
:
21077 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21079 /* We should have separate costs for unaligned loads and gather/scatter.
21080 Do that incrementally. */
21081 case unaligned_load
:
21082 index
= sse_store_index (mode
);
21083 /* See PR82713 - we may end up being called on non-vector type. */
21086 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_load
[index
]) / 2;
21088 case unaligned_store
:
21089 index
= sse_store_index (mode
);
21090 /* See PR82713 - we may end up being called on non-vector type. */
21093 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_store
[index
]) / 2;
21095 case vector_gather_load
:
21096 return ix86_vec_cost (mode
,
21098 (ix86_cost
->gather_static
21099 + ix86_cost
->gather_per_elt
21100 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
21102 case vector_scatter_store
:
21103 return ix86_vec_cost (mode
,
21105 (ix86_cost
->scatter_static
21106 + ix86_cost
->scatter_per_elt
21107 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
21109 case cond_branch_taken
:
21110 return ix86_cost
->cond_taken_branch_cost
;
21112 case cond_branch_not_taken
:
21113 return ix86_cost
->cond_not_taken_branch_cost
;
21116 case vec_promote_demote
:
21117 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21119 case vec_construct
:
21121 /* N element inserts into SSE vectors. */
21122 int cost
= TYPE_VECTOR_SUBPARTS (vectype
) * ix86_cost
->sse_op
;
21123 /* One vinserti128 for combining two SSE vectors for AVX256. */
21124 if (GET_MODE_BITSIZE (mode
) == 256)
21125 cost
+= ix86_vec_cost (mode
, ix86_cost
->addss
);
21126 /* One vinserti64x4 and two vinserti128 for combining SSE
21127 and AVX256 vectors to AVX512. */
21128 else if (GET_MODE_BITSIZE (mode
) == 512)
21129 cost
+= 3 * ix86_vec_cost (mode
, ix86_cost
->addss
);
21134 gcc_unreachable ();
21139 /* This function returns the calling abi specific va_list type node.
21140 It returns the FNDECL specific va_list type. */
21143 ix86_fn_abi_va_list (tree fndecl
)
21146 return va_list_type_node
;
21147 gcc_assert (fndecl
!= NULL_TREE
);
21149 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
21150 return ms_va_list_type_node
;
21152 return sysv_va_list_type_node
;
21155 /* Returns the canonical va_list type specified by TYPE. If there
21156 is no valid TYPE provided, it return NULL_TREE. */
21159 ix86_canonical_va_list_type (tree type
)
21163 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type
)))
21164 return ms_va_list_type_node
;
21166 if ((TREE_CODE (type
) == ARRAY_TYPE
21167 && integer_zerop (array_type_nelts (type
)))
21168 || POINTER_TYPE_P (type
))
21170 tree elem_type
= TREE_TYPE (type
);
21171 if (TREE_CODE (elem_type
) == RECORD_TYPE
21172 && lookup_attribute ("sysv_abi va_list",
21173 TYPE_ATTRIBUTES (elem_type
)))
21174 return sysv_va_list_type_node
;
21180 return std_canonical_va_list_type (type
);
21183 /* Iterate through the target-specific builtin types for va_list.
21184 IDX denotes the iterator, *PTREE is set to the result type of
21185 the va_list builtin, and *PNAME to its internal type.
21186 Returns zero if there is no element for this index, otherwise
21187 IDX should be increased upon the next call.
21188 Note, do not iterate a base builtin's name like __builtin_va_list.
21189 Used from c_common_nodes_and_builtins. */
21192 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
21202 *ptree
= ms_va_list_type_node
;
21203 *pname
= "__builtin_ms_va_list";
21207 *ptree
= sysv_va_list_type_node
;
21208 *pname
= "__builtin_sysv_va_list";
21216 #undef TARGET_SCHED_DISPATCH
21217 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21218 #undef TARGET_SCHED_DISPATCH_DO
21219 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21220 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21221 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21222 #undef TARGET_SCHED_REORDER
21223 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21224 #undef TARGET_SCHED_ADJUST_PRIORITY
21225 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21226 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21227 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21228 ix86_dependencies_evaluation_hook
21231 /* Implementation of reassociation_width target hook used by
21232 reassoc phase to identify parallelism level in reassociated
21233 tree. Statements tree_code is passed in OPC. Arguments type
21234 is passed in MODE. */
21237 ix86_reassociation_width (unsigned int op
, machine_mode mode
)
21241 if (VECTOR_MODE_P (mode
))
21244 if (INTEGRAL_MODE_P (mode
))
21245 width
= ix86_cost
->reassoc_vec_int
;
21246 else if (FLOAT_MODE_P (mode
))
21247 width
= ix86_cost
->reassoc_vec_fp
;
21252 /* Integer vector instructions execute in FP unit
21253 and can execute 3 additions and one multiplication per cycle. */
21254 if ((ix86_tune
== PROCESSOR_ZNVER1
|| ix86_tune
== PROCESSOR_ZNVER2
)
21255 && INTEGRAL_MODE_P (mode
) && op
!= PLUS
&& op
!= MINUS
)
21258 /* Account for targets that splits wide vectors into multiple parts. */
21259 if (TARGET_AVX128_OPTIMAL
&& GET_MODE_BITSIZE (mode
) > 128)
21260 div
= GET_MODE_BITSIZE (mode
) / 128;
21261 else if (TARGET_SSE_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 64)
21262 div
= GET_MODE_BITSIZE (mode
) / 64;
21263 width
= (width
+ div
- 1) / div
;
21266 else if (INTEGRAL_MODE_P (mode
))
21267 width
= ix86_cost
->reassoc_int
;
21268 else if (FLOAT_MODE_P (mode
))
21269 width
= ix86_cost
->reassoc_fp
;
21271 /* Avoid using too many registers in 32bit mode. */
21272 if (!TARGET_64BIT
&& width
> 2)
21277 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
21278 place emms and femms instructions. */
21280 static machine_mode
21281 ix86_preferred_simd_mode (scalar_mode mode
)
21289 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
21291 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21297 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
21299 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21305 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21307 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21313 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21315 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21321 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21323 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21329 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21331 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21333 else if (TARGET_SSE2
)
21342 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
21343 vectors. If AVX512F is enabled then try vectorizing with 512bit,
21344 256bit and 128bit vectors. */
21347 ix86_autovectorize_vector_sizes (vector_sizes
*sizes
, bool all
)
21349 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
21351 sizes
->safe_push (64);
21352 sizes
->safe_push (32);
21353 sizes
->safe_push (16);
21355 else if (TARGET_AVX512F
&& all
)
21357 sizes
->safe_push (32);
21358 sizes
->safe_push (16);
21359 sizes
->safe_push (64);
21361 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
21363 sizes
->safe_push (32);
21364 sizes
->safe_push (16);
21366 else if (TARGET_AVX
&& all
)
21368 sizes
->safe_push (16);
21369 sizes
->safe_push (32);
21371 else if (TARGET_MMX_WITH_SSE
)
21372 sizes
->safe_push (16);
21374 if (TARGET_MMX_WITH_SSE
)
21375 sizes
->safe_push (8);
21378 /* Implemenation of targetm.vectorize.get_mask_mode. */
21380 static opt_machine_mode
21381 ix86_get_mask_mode (poly_uint64 nunits
, poly_uint64 vector_size
)
21383 unsigned elem_size
= vector_size
/ nunits
;
21385 /* Scalar mask case. */
21386 if ((TARGET_AVX512F
&& vector_size
== 64)
21387 || (TARGET_AVX512VL
&& (vector_size
== 32 || vector_size
== 16)))
21389 if (elem_size
== 4 || elem_size
== 8 || TARGET_AVX512BW
)
21390 return smallest_int_mode_for_size (nunits
);
21393 scalar_int_mode elem_mode
21394 = smallest_int_mode_for_size (elem_size
* BITS_PER_UNIT
);
21396 gcc_assert (elem_size
* nunits
== vector_size
);
21398 return mode_for_vector (elem_mode
, nunits
);
21403 /* Return class of registers which could be used for pseudo of MODE
21404 and of class RCLASS for spilling instead of memory. Return NO_REGS
21405 if it is not possible or non-profitable. */
21407 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
21410 ix86_spill_class (reg_class_t rclass
, machine_mode mode
)
21412 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
21414 && TARGET_INTER_UNIT_MOVES_TO_VEC
21415 && TARGET_INTER_UNIT_MOVES_FROM_VEC
21416 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
21417 && INTEGER_CLASS_P (rclass
))
21418 return ALL_SSE_REGS
;
21422 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
21423 but returns a lower bound. */
21425 static unsigned int
21426 ix86_max_noce_ifcvt_seq_cost (edge e
)
21428 bool predictable_p
= predictable_edge_p (e
);
21430 enum compiler_param param
21432 ? PARAM_MAX_RTL_IF_CONVERSION_PREDICTABLE_COST
21433 : PARAM_MAX_RTL_IF_CONVERSION_UNPREDICTABLE_COST
);
21435 /* If we have a parameter set, use that, otherwise take a guess using
21437 if (global_options_set
.x_param_values
[param
])
21438 return PARAM_VALUE (param
);
21440 return BRANCH_COST (true, predictable_p
) * COSTS_N_INSNS (2);
21443 /* Return true if SEQ is a good candidate as a replacement for the
21444 if-convertible sequence described in IF_INFO. */
21447 ix86_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
21449 if (TARGET_ONE_IF_CONV_INSN
&& if_info
->speed_p
)
21452 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
21453 Maybe we should allow even more conditional moves as long as they
21454 are used far enough not to stall the CPU, or also consider
21455 IF_INFO->TEST_BB succ edge probabilities. */
21456 for (rtx_insn
*insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
21458 rtx set
= single_set (insn
);
21461 if (GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
21463 rtx src
= SET_SRC (set
);
21464 machine_mode mode
= GET_MODE (src
);
21465 if (GET_MODE_CLASS (mode
) != MODE_INT
21466 && GET_MODE_CLASS (mode
) != MODE_FLOAT
)
21468 if ((!REG_P (XEXP (src
, 1)) && !MEM_P (XEXP (src
, 1)))
21469 || (!REG_P (XEXP (src
, 2)) && !MEM_P (XEXP (src
, 2))))
21471 /* insn is CMOV or FCMOV. */
21472 if (++cmov_cnt
> 1)
21476 return default_noce_conversion_profitable_p (seq
, if_info
);
21479 /* Implement targetm.vectorize.init_cost. */
21482 ix86_init_cost (class loop
*)
21484 unsigned *cost
= XNEWVEC (unsigned, 3);
21485 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
21489 /* Implement targetm.vectorize.add_stmt_cost. */
21492 ix86_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
21493 class _stmt_vec_info
*stmt_info
, int misalign
,
21494 enum vect_cost_model_location where
)
21496 unsigned *cost
= (unsigned *) data
;
21497 unsigned retval
= 0;
21499 = (kind
== scalar_stmt
|| kind
== scalar_load
|| kind
== scalar_store
);
21501 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
21502 int stmt_cost
= - 1;
21505 machine_mode mode
= scalar_p
? SImode
: TImode
;
21507 if (vectype
!= NULL
)
21509 fp
= FLOAT_TYPE_P (vectype
);
21510 mode
= TYPE_MODE (vectype
);
21512 mode
= TYPE_MODE (TREE_TYPE (vectype
));
21515 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
21517 && stmt_info
->stmt
&& gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
21519 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
21520 /*machine_mode inner_mode = mode;
21521 if (VECTOR_MODE_P (mode))
21522 inner_mode = GET_MODE_INNER (mode);*/
21527 case POINTER_PLUS_EXPR
:
21529 if (kind
== scalar_stmt
)
21531 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21532 stmt_cost
= ix86_cost
->addss
;
21533 else if (X87_FLOAT_MODE_P (mode
))
21534 stmt_cost
= ix86_cost
->fadd
;
21536 stmt_cost
= ix86_cost
->add
;
21539 stmt_cost
= ix86_vec_cost (mode
, fp
? ix86_cost
->addss
21540 : ix86_cost
->sse_op
);
21544 case WIDEN_MULT_EXPR
:
21545 case MULT_HIGHPART_EXPR
:
21546 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
21549 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21550 stmt_cost
= ix86_cost
->sse_op
;
21551 else if (X87_FLOAT_MODE_P (mode
))
21552 stmt_cost
= ix86_cost
->fchs
;
21553 else if (VECTOR_MODE_P (mode
))
21554 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21556 stmt_cost
= ix86_cost
->add
;
21558 case TRUNC_DIV_EXPR
:
21559 case CEIL_DIV_EXPR
:
21560 case FLOOR_DIV_EXPR
:
21561 case ROUND_DIV_EXPR
:
21562 case TRUNC_MOD_EXPR
:
21563 case CEIL_MOD_EXPR
:
21564 case FLOOR_MOD_EXPR
:
21566 case ROUND_MOD_EXPR
:
21567 case EXACT_DIV_EXPR
:
21568 stmt_cost
= ix86_division_cost (ix86_cost
, mode
);
21576 tree op2
= gimple_assign_rhs2 (stmt_info
->stmt
);
21577 stmt_cost
= ix86_shift_rotate_cost
21579 TREE_CODE (op2
) == INTEGER_CST
,
21580 cst_and_fits_in_hwi (op2
) ? int_cst_value (op2
) : -1,
21581 true, false, false, NULL
, NULL
);
21585 /* Only sign-conversions are free. */
21586 if (tree_nop_conversion_p
21587 (TREE_TYPE (gimple_assign_lhs (stmt_info
->stmt
)),
21588 TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
))))
21600 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
21601 stmt_cost
= ix86_cost
->sse_op
;
21602 else if (VECTOR_MODE_P (mode
))
21603 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21605 stmt_cost
= ix86_cost
->add
;
21613 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
21616 && (cfn
= gimple_call_combined_fn (stmt_info
->stmt
)) != CFN_LAST
)
21620 stmt_cost
= ix86_vec_cost (mode
,
21621 mode
== SFmode
? ix86_cost
->fmass
21622 : ix86_cost
->fmasd
);
21628 /* If we do elementwise loads into a vector then we are bound by
21629 latency and execution resources for the many scalar loads
21630 (AGU and load ports). Try to account for this by scaling the
21631 construction cost by the number of elements involved. */
21632 if ((kind
== vec_construct
|| kind
== vec_to_scalar
)
21634 && (STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
21635 || STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
21636 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
21637 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info
))) != INTEGER_CST
)
21639 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
21640 stmt_cost
*= (TYPE_VECTOR_SUBPARTS (vectype
) + 1);
21642 if (stmt_cost
== -1)
21643 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
21645 /* Penalize DFmode vector operations for Bonnell. */
21646 if (TARGET_BONNELL
&& kind
== vector_stmt
21647 && vectype
&& GET_MODE_INNER (TYPE_MODE (vectype
)) == DFmode
)
21648 stmt_cost
*= 5; /* FIXME: The value here is arbitrary. */
21650 /* Statements in an inner loop relative to the loop being
21651 vectorized are weighted more heavily. The value here is
21652 arbitrary and could potentially be improved with analysis. */
21653 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
21654 count
*= 50; /* FIXME. */
21656 retval
= (unsigned) (count
* stmt_cost
);
21658 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
21659 for Silvermont as it has out of order integer pipeline and can execute
21660 2 scalar instruction per tick, but has in order SIMD pipeline. */
21661 if ((TARGET_SILVERMONT
|| TARGET_GOLDMONT
|| TARGET_GOLDMONT_PLUS
21662 || TARGET_TREMONT
|| TARGET_INTEL
) && stmt_info
&& stmt_info
->stmt
)
21664 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
21665 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
21666 retval
= (retval
* 17) / 10;
21669 cost
[where
] += retval
;
21674 /* Implement targetm.vectorize.finish_cost. */
21677 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
21678 unsigned *body_cost
, unsigned *epilogue_cost
)
21680 unsigned *cost
= (unsigned *) data
;
21681 *prologue_cost
= cost
[vect_prologue
];
21682 *body_cost
= cost
[vect_body
];
21683 *epilogue_cost
= cost
[vect_epilogue
];
21686 /* Implement targetm.vectorize.destroy_cost_data. */
21689 ix86_destroy_cost_data (void *data
)
21694 /* Validate target specific memory model bits in VAL. */
21696 static unsigned HOST_WIDE_INT
21697 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
21699 enum memmodel model
= memmodel_from_int (val
);
21702 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
21704 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
21706 warning (OPT_Winvalid_memory_model
,
21707 "unknown architecture specific memory model");
21708 return MEMMODEL_SEQ_CST
;
21710 strong
= (is_mm_acq_rel (model
) || is_mm_seq_cst (model
));
21711 if (val
& IX86_HLE_ACQUIRE
&& !(is_mm_acquire (model
) || strong
))
21713 warning (OPT_Winvalid_memory_model
,
21714 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
21716 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
21718 if (val
& IX86_HLE_RELEASE
&& !(is_mm_release (model
) || strong
))
21720 warning (OPT_Winvalid_memory_model
,
21721 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
21723 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
21728 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
21729 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
21730 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
21731 or number of vecsize_mangle variants that should be emitted. */
21734 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
21735 struct cgraph_simd_clone
*clonei
,
21736 tree base_type
, int num
)
21740 if (clonei
->simdlen
21741 && (clonei
->simdlen
< 2
21742 || clonei
->simdlen
> 1024
21743 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
21745 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
21746 "unsupported simdlen %d", clonei
->simdlen
);
21750 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
21751 if (TREE_CODE (ret_type
) != VOID_TYPE
)
21752 switch (TYPE_MODE (ret_type
))
21760 /* case E_SCmode: */
21761 /* case E_DCmode: */
21762 if (!AGGREGATE_TYPE_P (ret_type
))
21766 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
21767 "unsupported return type %qT for simd", ret_type
);
21773 tree type_arg_types
= TYPE_ARG_TYPES (TREE_TYPE (node
->decl
));
21774 bool decl_arg_p
= (node
->definition
|| type_arg_types
== NULL_TREE
);
21776 for (t
= (decl_arg_p
? DECL_ARGUMENTS (node
->decl
) : type_arg_types
), i
= 0;
21777 t
&& t
!= void_list_node
; t
= TREE_CHAIN (t
), i
++)
21779 tree arg_type
= decl_arg_p
? TREE_TYPE (t
) : TREE_VALUE (t
);
21780 switch (TYPE_MODE (arg_type
))
21788 /* case E_SCmode: */
21789 /* case E_DCmode: */
21790 if (!AGGREGATE_TYPE_P (arg_type
))
21794 if (clonei
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_UNIFORM
)
21796 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
21797 "unsupported argument type %qT for simd", arg_type
);
21802 if (!TREE_PUBLIC (node
->decl
))
21804 /* If the function isn't exported, we can pick up just one ISA
21806 if (TARGET_AVX512F
)
21807 clonei
->vecsize_mangle
= 'e';
21808 else if (TARGET_AVX2
)
21809 clonei
->vecsize_mangle
= 'd';
21810 else if (TARGET_AVX
)
21811 clonei
->vecsize_mangle
= 'c';
21813 clonei
->vecsize_mangle
= 'b';
21818 clonei
->vecsize_mangle
= "bcde"[num
];
21821 clonei
->mask_mode
= VOIDmode
;
21822 switch (clonei
->vecsize_mangle
)
21825 clonei
->vecsize_int
= 128;
21826 clonei
->vecsize_float
= 128;
21829 clonei
->vecsize_int
= 128;
21830 clonei
->vecsize_float
= 256;
21833 clonei
->vecsize_int
= 256;
21834 clonei
->vecsize_float
= 256;
21837 clonei
->vecsize_int
= 512;
21838 clonei
->vecsize_float
= 512;
21839 if (TYPE_MODE (base_type
) == QImode
)
21840 clonei
->mask_mode
= DImode
;
21842 clonei
->mask_mode
= SImode
;
21845 if (clonei
->simdlen
== 0)
21847 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
21848 clonei
->simdlen
= clonei
->vecsize_int
;
21850 clonei
->simdlen
= clonei
->vecsize_float
;
21851 clonei
->simdlen
/= GET_MODE_BITSIZE (TYPE_MODE (base_type
));
21853 else if (clonei
->simdlen
> 16)
21855 /* For compatibility with ICC, use the same upper bounds
21856 for simdlen. In particular, for CTYPE below, use the return type,
21857 unless the function returns void, in that case use the characteristic
21858 type. If it is possible for given SIMDLEN to pass CTYPE value
21859 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
21860 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
21861 emit corresponding clone. */
21862 tree ctype
= ret_type
;
21863 if (TREE_CODE (ret_type
) == VOID_TYPE
)
21865 int cnt
= GET_MODE_BITSIZE (TYPE_MODE (ctype
)) * clonei
->simdlen
;
21866 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype
)))
21867 cnt
/= clonei
->vecsize_int
;
21869 cnt
/= clonei
->vecsize_float
;
21870 if (cnt
> (TARGET_64BIT
? 16 : 8))
21872 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
21873 "unsupported simdlen %d", clonei
->simdlen
);
21880 /* If SIMD clone NODE can't be used in a vectorized loop
21881 in current function, return -1, otherwise return a badness of using it
21882 (0 if it is most desirable from vecsize_mangle point of view, 1
21883 slightly less desirable, etc.). */
21886 ix86_simd_clone_usable (struct cgraph_node
*node
)
21888 switch (node
->simdclone
->vecsize_mangle
)
21895 return TARGET_AVX2
? 2 : 1;
21899 return TARGET_AVX2
? 1 : 0;
21905 if (!TARGET_AVX512F
)
21909 gcc_unreachable ();
21913 /* This function adjusts the unroll factor based on
21914 the hardware capabilities. For ex, bdver3 has
21915 a loop buffer which makes unrolling of smaller
21916 loops less important. This function decides the
21917 unroll factor using number of memory references
21918 (value 32 is used) as a heuristic. */
21921 ix86_loop_unroll_adjust (unsigned nunroll
, class loop
*loop
)
21926 unsigned mem_count
= 0;
21928 if (!TARGET_ADJUST_UNROLL
)
21931 /* Count the number of memory references within the loop body.
21932 This value determines the unrolling factor for bdver3 and bdver4
21934 subrtx_iterator::array_type array
;
21935 bbs
= get_loop_body (loop
);
21936 for (i
= 0; i
< loop
->num_nodes
; i
++)
21937 FOR_BB_INSNS (bbs
[i
], insn
)
21938 if (NONDEBUG_INSN_P (insn
))
21939 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
21940 if (const_rtx x
= *iter
)
21943 machine_mode mode
= GET_MODE (x
);
21944 unsigned int n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
21952 if (mem_count
&& mem_count
<=32)
21953 return MIN (nunroll
, 32 / mem_count
);
21959 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
21962 ix86_float_exceptions_rounding_supported_p (void)
21964 /* For x87 floating point with standard excess precision handling,
21965 there is no adddf3 pattern (since x87 floating point only has
21966 XFmode operations) so the default hook implementation gets this
21968 return TARGET_80387
|| (TARGET_SSE
&& TARGET_SSE_MATH
);
21971 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
21974 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
21976 if (!TARGET_80387
&& !(TARGET_SSE
&& TARGET_SSE_MATH
))
21978 tree exceptions_var
= create_tmp_var_raw (integer_type_node
);
21981 tree fenv_index_type
= build_index_type (size_int (6));
21982 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
21983 tree fenv_var
= create_tmp_var_raw (fenv_type
);
21984 TREE_ADDRESSABLE (fenv_var
) = 1;
21985 tree fenv_ptr
= build_pointer_type (fenv_type
);
21986 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
21987 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
21988 tree fnstenv
= get_ix86_builtin (IX86_BUILTIN_FNSTENV
);
21989 tree fldenv
= get_ix86_builtin (IX86_BUILTIN_FLDENV
);
21990 tree fnstsw
= get_ix86_builtin (IX86_BUILTIN_FNSTSW
);
21991 tree fnclex
= get_ix86_builtin (IX86_BUILTIN_FNCLEX
);
21992 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
21993 tree hold_fnclex
= build_call_expr (fnclex
, 0);
21994 fenv_var
= build4 (TARGET_EXPR
, fenv_type
, fenv_var
, hold_fnstenv
,
21995 NULL_TREE
, NULL_TREE
);
21996 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, fenv_var
,
21998 *clear
= build_call_expr (fnclex
, 0);
21999 tree sw_var
= create_tmp_var_raw (short_unsigned_type_node
);
22000 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
22001 tree sw_mod
= build2 (MODIFY_EXPR
, short_unsigned_type_node
,
22002 sw_var
, fnstsw_call
);
22003 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
22004 tree update_mod
= build2 (MODIFY_EXPR
, integer_type_node
,
22005 exceptions_var
, exceptions_x87
);
22006 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
22007 sw_mod
, update_mod
);
22008 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
22009 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
22011 if (TARGET_SSE
&& TARGET_SSE_MATH
)
22013 tree mxcsr_orig_var
= create_tmp_var_raw (unsigned_type_node
);
22014 tree mxcsr_mod_var
= create_tmp_var_raw (unsigned_type_node
);
22015 tree stmxcsr
= get_ix86_builtin (IX86_BUILTIN_STMXCSR
);
22016 tree ldmxcsr
= get_ix86_builtin (IX86_BUILTIN_LDMXCSR
);
22017 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
22018 tree hold_assign_orig
= build2 (MODIFY_EXPR
, unsigned_type_node
,
22019 mxcsr_orig_var
, stmxcsr_hold_call
);
22020 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
22022 build_int_cst (unsigned_type_node
, 0x1f80));
22023 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
22024 build_int_cst (unsigned_type_node
, 0xffffffc0));
22025 tree hold_assign_mod
= build2 (MODIFY_EXPR
, unsigned_type_node
,
22026 mxcsr_mod_var
, hold_mod_val
);
22027 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
22028 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
22029 hold_assign_orig
, hold_assign_mod
);
22030 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
22031 ldmxcsr_hold_call
);
22033 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
22036 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
22038 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
22039 ldmxcsr_clear_call
);
22041 *clear
= ldmxcsr_clear_call
;
22042 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
22043 tree exceptions_sse
= fold_convert (integer_type_node
,
22044 stxmcsr_update_call
);
22047 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
22048 exceptions_var
, exceptions_sse
);
22049 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
22050 exceptions_var
, exceptions_mod
);
22051 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
22052 exceptions_assign
);
22055 *update
= build2 (MODIFY_EXPR
, integer_type_node
,
22056 exceptions_var
, exceptions_sse
);
22057 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
22058 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
22059 ldmxcsr_update_call
);
22061 tree atomic_feraiseexcept
22062 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
22063 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
22064 1, exceptions_var
);
22065 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
22066 atomic_feraiseexcept_call
);
22069 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22070 /* For i386, common symbol is local only for non-PIE binaries. For
22071 x86-64, common symbol is local only for non-PIE binaries or linker
22072 supports copy reloc in PIE binaries. */
22075 ix86_binds_local_p (const_tree exp
)
22077 return default_binds_local_p_3 (exp
, flag_shlib
!= 0, true, true,
22080 && HAVE_LD_PIE_COPYRELOC
!= 0)));
22084 /* If MEM is in the form of [base+offset], extract the two parts
22085 of address and set to BASE and OFFSET, otherwise return false. */
22088 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
22092 gcc_assert (MEM_P (mem
));
22094 addr
= XEXP (mem
, 0);
22096 if (GET_CODE (addr
) == CONST
)
22097 addr
= XEXP (addr
, 0);
22099 if (REG_P (addr
) || GET_CODE (addr
) == SYMBOL_REF
)
22102 *offset
= const0_rtx
;
22106 if (GET_CODE (addr
) == PLUS
22107 && (REG_P (XEXP (addr
, 0))
22108 || GET_CODE (XEXP (addr
, 0)) == SYMBOL_REF
)
22109 && CONST_INT_P (XEXP (addr
, 1)))
22111 *base
= XEXP (addr
, 0);
22112 *offset
= XEXP (addr
, 1);
22119 /* Given OPERANDS of consecutive load/store, check if we can merge
22120 them into move multiple. LOAD is true if they are load instructions.
22121 MODE is the mode of memory operands. */
22124 ix86_operands_ok_for_move_multiple (rtx
*operands
, bool load
,
22127 HOST_WIDE_INT offval_1
, offval_2
, msize
;
22128 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
22132 mem_1
= operands
[1];
22133 mem_2
= operands
[3];
22134 reg_1
= operands
[0];
22135 reg_2
= operands
[2];
22139 mem_1
= operands
[0];
22140 mem_2
= operands
[2];
22141 reg_1
= operands
[1];
22142 reg_2
= operands
[3];
22145 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
22147 if (REGNO (reg_1
) != REGNO (reg_2
))
22150 /* Check if the addresses are in the form of [base+offset]. */
22151 if (!extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
))
22153 if (!extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
))
22156 /* Check if the bases are the same. */
22157 if (!rtx_equal_p (base_1
, base_2
))
22160 offval_1
= INTVAL (offset_1
);
22161 offval_2
= INTVAL (offset_2
);
22162 msize
= GET_MODE_SIZE (mode
);
22163 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22164 if (offval_1
+ msize
!= offval_2
)
22170 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22173 ix86_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
22174 optimization_type opt_type
)
22188 return opt_type
== OPTIMIZE_FOR_SPEED
;
22191 if (SSE_FLOAT_MODE_P (mode1
)
22193 && !flag_trapping_math
22195 return opt_type
== OPTIMIZE_FOR_SPEED
;
22201 if (SSE_FLOAT_MODE_P (mode1
)
22203 && !flag_trapping_math
22206 return opt_type
== OPTIMIZE_FOR_SPEED
;
22209 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p ();
22216 /* Address space support.
22218 This is not "far pointers" in the 16-bit sense, but an easy way
22219 to use %fs and %gs segment prefixes. Therefore:
22221 (a) All address spaces have the same modes,
22222 (b) All address spaces have the same addresss forms,
22223 (c) While %fs and %gs are technically subsets of the generic
22224 address space, they are probably not subsets of each other.
22225 (d) Since we have no access to the segment base register values
22226 without resorting to a system call, we cannot convert a
22227 non-default address space to a default address space.
22228 Therefore we do not claim %fs or %gs are subsets of generic.
22230 Therefore we can (mostly) use the default hooks. */
22232 /* All use of segmentation is assumed to make address 0 valid. */
22235 ix86_addr_space_zero_address_valid (addr_space_t as
)
22237 return as
!= ADDR_SPACE_GENERIC
;
22241 ix86_init_libfuncs (void)
22245 set_optab_libfunc (sdivmod_optab
, TImode
, "__divmodti4");
22246 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
22250 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
22251 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
22255 darwin_rename_builtins ();
22259 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
22260 FPU, assume that the fpcw is set to extended precision; when using
22261 only SSE, rounding is correct; when using both SSE and the FPU,
22262 the rounding precision is indeterminate, since either may be chosen
22263 apparently at random. */
22265 static enum flt_eval_method
22266 ix86_excess_precision (enum excess_precision_type type
)
22270 case EXCESS_PRECISION_TYPE_FAST
:
22271 /* The fastest type to promote to will always be the native type,
22272 whether that occurs with implicit excess precision or
22274 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
22275 case EXCESS_PRECISION_TYPE_STANDARD
:
22276 case EXCESS_PRECISION_TYPE_IMPLICIT
:
22277 /* Otherwise, the excess precision we want when we are
22278 in a standards compliant mode, and the implicit precision we
22279 provide would be identical were it not for the unpredictable
22282 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
22283 else if (!TARGET_MIX_SSE_I387
)
22285 if (!(TARGET_SSE
&& TARGET_SSE_MATH
))
22286 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE
;
22287 else if (TARGET_SSE2
)
22288 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
22291 /* If we are in standards compliant mode, but we know we will
22292 calculate in unpredictable precision, return
22293 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
22294 excess precision if the target can't guarantee it will honor
22296 return (type
== EXCESS_PRECISION_TYPE_STANDARD
22297 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
22298 : FLT_EVAL_METHOD_UNPREDICTABLE
);
22300 gcc_unreachable ();
22303 return FLT_EVAL_METHOD_UNPREDICTABLE
;
22306 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
22307 decrements by exactly 2 no matter what the position was, there is no pushb.
22309 But as CIE data alignment factor on this arch is -4 for 32bit targets
22310 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
22311 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
22314 ix86_push_rounding (poly_int64 bytes
)
22316 return ROUND_UP (bytes
, UNITS_PER_WORD
);
22319 /* Target-specific selftests. */
22323 namespace selftest
{
22325 /* Verify that hard regs are dumped as expected (in compact mode). */
22328 ix86_test_dumping_hard_regs ()
22330 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode
, 0));
22331 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode
, 1));
22334 /* Test dumping an insn with repeated references to the same SCRATCH,
22335 to verify the rtx_reuse code. */
22338 ix86_test_dumping_memory_blockage ()
22340 set_new_first_and_last_insn (NULL
, NULL
);
22342 rtx pat
= gen_memory_blockage ();
22343 rtx_reuse_manager r
;
22344 r
.preprocess (pat
);
22346 /* Verify that the repeated references to the SCRATCH show use
22347 reuse IDS. The first should be prefixed with a reuse ID,
22348 and the second should be dumped as a "reuse_rtx" of that ID.
22349 The expected string assumes Pmode == DImode. */
22350 if (Pmode
== DImode
)
22351 ASSERT_RTL_DUMP_EQ_WITH_REUSE
22352 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
22354 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
22355 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat
, &r
);
22358 /* Verify loading an RTL dump; specifically a dump of copying
22359 a param on x86_64 from a hard reg into the frame.
22360 This test is target-specific since the dump contains target-specific
22364 ix86_test_loading_dump_fragment_1 ()
22366 rtl_dump_test
t (SELFTEST_LOCATION
,
22367 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
22369 rtx_insn
*insn
= get_insn_by_uid (1);
22371 /* The block structure and indentation here is purely for
22372 readability; it mirrors the structure of the rtx. */
22375 rtx pat
= PATTERN (insn
);
22376 ASSERT_EQ (SET
, GET_CODE (pat
));
22378 rtx dest
= SET_DEST (pat
);
22379 ASSERT_EQ (MEM
, GET_CODE (dest
));
22380 /* Verify the "/c" was parsed. */
22381 ASSERT_TRUE (RTX_FLAG (dest
, call
));
22382 ASSERT_EQ (SImode
, GET_MODE (dest
));
22384 rtx addr
= XEXP (dest
, 0);
22385 ASSERT_EQ (PLUS
, GET_CODE (addr
));
22386 ASSERT_EQ (DImode
, GET_MODE (addr
));
22388 rtx lhs
= XEXP (addr
, 0);
22389 /* Verify that the "frame" REG was consolidated. */
22390 ASSERT_RTX_PTR_EQ (frame_pointer_rtx
, lhs
);
22393 rtx rhs
= XEXP (addr
, 1);
22394 ASSERT_EQ (CONST_INT
, GET_CODE (rhs
));
22395 ASSERT_EQ (-4, INTVAL (rhs
));
22398 /* Verify the "[1 i+0 S4 A32]" was parsed. */
22399 ASSERT_EQ (1, MEM_ALIAS_SET (dest
));
22400 /* "i" should have been handled by synthesizing a global int
22401 variable named "i". */
22402 mem_expr
= MEM_EXPR (dest
);
22403 ASSERT_NE (mem_expr
, NULL
);
22404 ASSERT_EQ (VAR_DECL
, TREE_CODE (mem_expr
));
22405 ASSERT_EQ (integer_type_node
, TREE_TYPE (mem_expr
));
22406 ASSERT_EQ (IDENTIFIER_NODE
, TREE_CODE (DECL_NAME (mem_expr
)));
22407 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr
)));
22409 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest
));
22410 ASSERT_EQ (0, MEM_OFFSET (dest
));
22412 ASSERT_EQ (4, MEM_SIZE (dest
));
22414 ASSERT_EQ (32, MEM_ALIGN (dest
));
22417 rtx src
= SET_SRC (pat
);
22418 ASSERT_EQ (REG
, GET_CODE (src
));
22419 ASSERT_EQ (SImode
, GET_MODE (src
));
22420 ASSERT_EQ (5, REGNO (src
));
22421 tree reg_expr
= REG_EXPR (src
);
22422 /* "i" here should point to the same var as for the MEM_EXPR. */
22423 ASSERT_EQ (reg_expr
, mem_expr
);
22428 /* Verify that the RTL loader copes with a call_insn dump.
22429 This test is target-specific since the dump contains a target-specific
22433 ix86_test_loading_call_insn ()
22435 /* The test dump includes register "xmm0", where requires TARGET_SSE
22440 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/call-insn.rtl"));
22442 rtx_insn
*insn
= get_insns ();
22443 ASSERT_EQ (CALL_INSN
, GET_CODE (insn
));
22446 ASSERT_TRUE (RTX_FLAG (insn
, jump
));
22448 rtx pat
= PATTERN (insn
);
22449 ASSERT_EQ (CALL
, GET_CODE (SET_SRC (pat
)));
22451 /* Verify REG_NOTES. */
22453 /* "(expr_list:REG_CALL_DECL". */
22454 ASSERT_EQ (EXPR_LIST
, GET_CODE (REG_NOTES (insn
)));
22455 rtx_expr_list
*note0
= as_a
<rtx_expr_list
*> (REG_NOTES (insn
));
22456 ASSERT_EQ (REG_CALL_DECL
, REG_NOTE_KIND (note0
));
22458 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
22459 rtx_expr_list
*note1
= note0
->next ();
22460 ASSERT_EQ (REG_EH_REGION
, REG_NOTE_KIND (note1
));
22462 ASSERT_EQ (NULL
, note1
->next ());
22465 /* Verify CALL_INSN_FUNCTION_USAGE. */
22467 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
22468 rtx_expr_list
*usage
22469 = as_a
<rtx_expr_list
*> (CALL_INSN_FUNCTION_USAGE (insn
));
22470 ASSERT_EQ (EXPR_LIST
, GET_CODE (usage
));
22471 ASSERT_EQ (DFmode
, GET_MODE (usage
));
22472 ASSERT_EQ (USE
, GET_CODE (usage
->element ()));
22473 ASSERT_EQ (NULL
, usage
->next ());
22477 /* Verify that the RTL loader copes a dump from print_rtx_function.
22478 This test is target-specific since the dump contains target-specific
22482 ix86_test_loading_full_dump ()
22484 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/times-two.rtl"));
22486 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
22488 rtx_insn
*insn_1
= get_insn_by_uid (1);
22489 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
22491 rtx_insn
*insn_7
= get_insn_by_uid (7);
22492 ASSERT_EQ (INSN
, GET_CODE (insn_7
));
22493 ASSERT_EQ (PARALLEL
, GET_CODE (PATTERN (insn_7
)));
22495 rtx_insn
*insn_15
= get_insn_by_uid (15);
22496 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
22497 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
22499 /* Verify crtl->return_rtx. */
22500 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
22501 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
22502 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
22505 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
22506 In particular, verify that it correctly loads the 2nd operand.
22507 This test is target-specific since these are machine-specific
22508 operands (and enums). */
22511 ix86_test_loading_unspec ()
22513 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/unspec.rtl"));
22515 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
22517 ASSERT_TRUE (cfun
);
22519 /* Test of an UNSPEC. */
22520 rtx_insn
*insn
= get_insns ();
22521 ASSERT_EQ (INSN
, GET_CODE (insn
));
22522 rtx set
= single_set (insn
);
22523 ASSERT_NE (NULL
, set
);
22524 rtx dst
= SET_DEST (set
);
22525 ASSERT_EQ (MEM
, GET_CODE (dst
));
22526 rtx src
= SET_SRC (set
);
22527 ASSERT_EQ (UNSPEC
, GET_CODE (src
));
22528 ASSERT_EQ (BLKmode
, GET_MODE (src
));
22529 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE
, XINT (src
, 1));
22531 rtx v0
= XVECEXP (src
, 0, 0);
22533 /* Verify that the two uses of the first SCRATCH have pointer
22535 rtx scratch_a
= XEXP (dst
, 0);
22536 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_a
));
22538 rtx scratch_b
= XEXP (v0
, 0);
22539 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_b
));
22541 ASSERT_EQ (scratch_a
, scratch_b
);
22543 /* Verify that the two mems are thus treated as equal. */
22544 ASSERT_TRUE (rtx_equal_p (dst
, v0
));
22546 /* Verify the the insn is recognized. */
22547 ASSERT_NE(-1, recog_memoized (insn
));
22549 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
22550 insn
= NEXT_INSN (insn
);
22551 ASSERT_EQ (INSN
, GET_CODE (insn
));
22553 set
= single_set (insn
);
22554 ASSERT_NE (NULL
, set
);
22556 src
= SET_SRC (set
);
22557 ASSERT_EQ (UNSPEC_VOLATILE
, GET_CODE (src
));
22558 ASSERT_EQ (UNSPECV_RDTSCP
, XINT (src
, 1));
22561 /* Run all target-specific selftests. */
22564 ix86_run_selftests (void)
22566 ix86_test_dumping_hard_regs ();
22567 ix86_test_dumping_memory_blockage ();
22569 /* Various tests of loading RTL dumps, here because they contain
22570 ix86-isms (e.g. names of hard regs). */
22571 ix86_test_loading_dump_fragment_1 ();
22572 ix86_test_loading_call_insn ();
22573 ix86_test_loading_full_dump ();
22574 ix86_test_loading_unspec ();
22577 } // namespace selftest
22579 #endif /* CHECKING_P */
22581 /* Initialize the GCC target structure. */
22582 #undef TARGET_RETURN_IN_MEMORY
22583 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
22585 #undef TARGET_LEGITIMIZE_ADDRESS
22586 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
22588 #undef TARGET_ATTRIBUTE_TABLE
22589 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22590 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
22591 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
22592 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22593 # undef TARGET_MERGE_DECL_ATTRIBUTES
22594 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22597 #undef TARGET_COMP_TYPE_ATTRIBUTES
22598 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22600 #undef TARGET_INIT_BUILTINS
22601 #define TARGET_INIT_BUILTINS ix86_init_builtins
22602 #undef TARGET_BUILTIN_DECL
22603 #define TARGET_BUILTIN_DECL ix86_builtin_decl
22604 #undef TARGET_EXPAND_BUILTIN
22605 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22607 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22608 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
22609 ix86_builtin_vectorized_function
22611 #undef TARGET_VECTORIZE_BUILTIN_GATHER
22612 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
22614 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
22615 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
22617 #undef TARGET_BUILTIN_RECIPROCAL
22618 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
22620 #undef TARGET_ASM_FUNCTION_EPILOGUE
22621 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22623 #undef TARGET_ENCODE_SECTION_INFO
22624 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22625 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22627 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22630 #undef TARGET_ASM_OPEN_PAREN
22631 #define TARGET_ASM_OPEN_PAREN ""
22632 #undef TARGET_ASM_CLOSE_PAREN
22633 #define TARGET_ASM_CLOSE_PAREN ""
22635 #undef TARGET_ASM_BYTE_OP
22636 #define TARGET_ASM_BYTE_OP ASM_BYTE
22638 #undef TARGET_ASM_ALIGNED_HI_OP
22639 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22640 #undef TARGET_ASM_ALIGNED_SI_OP
22641 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22643 #undef TARGET_ASM_ALIGNED_DI_OP
22644 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22647 #undef TARGET_PROFILE_BEFORE_PROLOGUE
22648 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
22650 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
22651 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
22653 #undef TARGET_ASM_UNALIGNED_HI_OP
22654 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22655 #undef TARGET_ASM_UNALIGNED_SI_OP
22656 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22657 #undef TARGET_ASM_UNALIGNED_DI_OP
22658 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22660 #undef TARGET_PRINT_OPERAND
22661 #define TARGET_PRINT_OPERAND ix86_print_operand
22662 #undef TARGET_PRINT_OPERAND_ADDRESS
22663 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
22664 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
22665 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
22666 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
22667 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
22669 #undef TARGET_SCHED_INIT_GLOBAL
22670 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
22671 #undef TARGET_SCHED_ADJUST_COST
22672 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22673 #undef TARGET_SCHED_ISSUE_RATE
22674 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22675 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22676 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22677 ia32_multipass_dfa_lookahead
22678 #undef TARGET_SCHED_MACRO_FUSION_P
22679 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
22680 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
22681 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
22683 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22684 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22686 #undef TARGET_MEMMODEL_CHECK
22687 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
22689 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
22690 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
22693 #undef TARGET_HAVE_TLS
22694 #define TARGET_HAVE_TLS true
22696 #undef TARGET_CANNOT_FORCE_CONST_MEM
22697 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22698 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22699 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
22701 #undef TARGET_DELEGITIMIZE_ADDRESS
22702 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22704 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
22705 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
22707 #undef TARGET_MS_BITFIELD_LAYOUT_P
22708 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22711 #undef TARGET_BINDS_LOCAL_P
22712 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22714 #undef TARGET_BINDS_LOCAL_P
22715 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
22717 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22718 #undef TARGET_BINDS_LOCAL_P
22719 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22722 #undef TARGET_ASM_OUTPUT_MI_THUNK
22723 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22724 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22725 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22727 #undef TARGET_ASM_FILE_START
22728 #define TARGET_ASM_FILE_START x86_file_start
22730 #undef TARGET_OPTION_OVERRIDE
22731 #define TARGET_OPTION_OVERRIDE ix86_option_override
22733 #undef TARGET_REGISTER_MOVE_COST
22734 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
22735 #undef TARGET_MEMORY_MOVE_COST
22736 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
22737 #undef TARGET_RTX_COSTS
22738 #define TARGET_RTX_COSTS ix86_rtx_costs
22739 #undef TARGET_ADDRESS_COST
22740 #define TARGET_ADDRESS_COST ix86_address_cost
22742 #undef TARGET_FLAGS_REGNUM
22743 #define TARGET_FLAGS_REGNUM FLAGS_REG
22744 #undef TARGET_FIXED_CONDITION_CODE_REGS
22745 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22746 #undef TARGET_CC_MODES_COMPATIBLE
22747 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22749 #undef TARGET_MACHINE_DEPENDENT_REORG
22750 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22752 #undef TARGET_BUILD_BUILTIN_VA_LIST
22753 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22755 #undef TARGET_FOLD_BUILTIN
22756 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
22758 #undef TARGET_GIMPLE_FOLD_BUILTIN
22759 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
22761 #undef TARGET_COMPARE_VERSION_PRIORITY
22762 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
22764 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
22765 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
22766 ix86_generate_version_dispatcher_body
22768 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
22769 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
22770 ix86_get_function_versions_dispatcher
22772 #undef TARGET_ENUM_VA_LIST_P
22773 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
22775 #undef TARGET_FN_ABI_VA_LIST
22776 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
22778 #undef TARGET_CANONICAL_VA_LIST_TYPE
22779 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
22781 #undef TARGET_EXPAND_BUILTIN_VA_START
22782 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
22784 #undef TARGET_MD_ASM_ADJUST
22785 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
22787 #undef TARGET_C_EXCESS_PRECISION
22788 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
22789 #undef TARGET_PROMOTE_PROTOTYPES
22790 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
22791 #undef TARGET_SETUP_INCOMING_VARARGS
22792 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22793 #undef TARGET_MUST_PASS_IN_STACK
22794 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22795 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
22796 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
22797 #undef TARGET_FUNCTION_ARG_ADVANCE
22798 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
22799 #undef TARGET_FUNCTION_ARG
22800 #define TARGET_FUNCTION_ARG ix86_function_arg
22801 #undef TARGET_INIT_PIC_REG
22802 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
22803 #undef TARGET_USE_PSEUDO_PIC_REG
22804 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
22805 #undef TARGET_FUNCTION_ARG_BOUNDARY
22806 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
22807 #undef TARGET_PASS_BY_REFERENCE
22808 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22809 #undef TARGET_INTERNAL_ARG_POINTER
22810 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22811 #undef TARGET_UPDATE_STACK_BOUNDARY
22812 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
22813 #undef TARGET_GET_DRAP_RTX
22814 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
22815 #undef TARGET_STRICT_ARGUMENT_NAMING
22816 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22817 #undef TARGET_STATIC_CHAIN
22818 #define TARGET_STATIC_CHAIN ix86_static_chain
22819 #undef TARGET_TRAMPOLINE_INIT
22820 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
22821 #undef TARGET_RETURN_POPS_ARGS
22822 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
22824 #undef TARGET_WARN_FUNC_RETURN
22825 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
22827 #undef TARGET_LEGITIMATE_COMBINED_INSN
22828 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
22830 #undef TARGET_ASAN_SHADOW_OFFSET
22831 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
22833 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22834 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22836 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22837 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22839 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22840 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22842 #undef TARGET_C_MODE_FOR_SUFFIX
22843 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
22846 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22847 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22850 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22851 #undef TARGET_INSERT_ATTRIBUTES
22852 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22855 #undef TARGET_MANGLE_TYPE
22856 #define TARGET_MANGLE_TYPE ix86_mangle_type
22858 #undef TARGET_STACK_PROTECT_GUARD
22859 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
22862 #undef TARGET_STACK_PROTECT_FAIL
22863 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22866 #undef TARGET_FUNCTION_VALUE
22867 #define TARGET_FUNCTION_VALUE ix86_function_value
22869 #undef TARGET_FUNCTION_VALUE_REGNO_P
22870 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
22872 #undef TARGET_PROMOTE_FUNCTION_MODE
22873 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
22875 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
22876 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
22878 #undef TARGET_MEMBER_TYPE_FORCES_BLK
22879 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
22881 #undef TARGET_INSTANTIATE_DECLS
22882 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
22884 #undef TARGET_SECONDARY_RELOAD
22885 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
22886 #undef TARGET_SECONDARY_MEMORY_NEEDED
22887 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
22888 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
22889 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
22891 #undef TARGET_CLASS_MAX_NREGS
22892 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
22894 #undef TARGET_PREFERRED_RELOAD_CLASS
22895 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
22896 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
22897 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
22898 #undef TARGET_CLASS_LIKELY_SPILLED_P
22899 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
22901 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
22902 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
22903 ix86_builtin_vectorization_cost
22904 #undef TARGET_VECTORIZE_VEC_PERM_CONST
22905 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
22906 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
22907 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
22908 ix86_preferred_simd_mode
22909 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
22910 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
22911 ix86_split_reduction
22912 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
22913 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
22914 ix86_autovectorize_vector_sizes
22915 #undef TARGET_VECTORIZE_GET_MASK_MODE
22916 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
22917 #undef TARGET_VECTORIZE_INIT_COST
22918 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
22919 #undef TARGET_VECTORIZE_ADD_STMT_COST
22920 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
22921 #undef TARGET_VECTORIZE_FINISH_COST
22922 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
22923 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
22924 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
22926 #undef TARGET_SET_CURRENT_FUNCTION
22927 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
22929 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
22930 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
22932 #undef TARGET_OPTION_SAVE
22933 #define TARGET_OPTION_SAVE ix86_function_specific_save
22935 #undef TARGET_OPTION_RESTORE
22936 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
22938 #undef TARGET_OPTION_POST_STREAM_IN
22939 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
22941 #undef TARGET_OPTION_PRINT
22942 #define TARGET_OPTION_PRINT ix86_function_specific_print
22944 #undef TARGET_OPTION_FUNCTION_VERSIONS
22945 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
22947 #undef TARGET_CAN_INLINE_P
22948 #define TARGET_CAN_INLINE_P ix86_can_inline_p
22950 #undef TARGET_LEGITIMATE_ADDRESS_P
22951 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
22953 #undef TARGET_REGISTER_PRIORITY
22954 #define TARGET_REGISTER_PRIORITY ix86_register_priority
22956 #undef TARGET_REGISTER_USAGE_LEVELING_P
22957 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
22959 #undef TARGET_LEGITIMATE_CONSTANT_P
22960 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
22962 #undef TARGET_COMPUTE_FRAME_LAYOUT
22963 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
22965 #undef TARGET_FRAME_POINTER_REQUIRED
22966 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
22968 #undef TARGET_CAN_ELIMINATE
22969 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
22971 #undef TARGET_EXTRA_LIVE_ON_ENTRY
22972 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
22974 #undef TARGET_ASM_CODE_END
22975 #define TARGET_ASM_CODE_END ix86_code_end
22977 #undef TARGET_CONDITIONAL_REGISTER_USAGE
22978 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
22980 #undef TARGET_CANONICALIZE_COMPARISON
22981 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
22983 #undef TARGET_LOOP_UNROLL_ADJUST
22984 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
22986 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22987 #undef TARGET_SPILL_CLASS
22988 #define TARGET_SPILL_CLASS ix86_spill_class
22990 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
22991 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
22992 ix86_simd_clone_compute_vecsize_and_simdlen
22994 #undef TARGET_SIMD_CLONE_ADJUST
22995 #define TARGET_SIMD_CLONE_ADJUST \
22996 ix86_simd_clone_adjust
22998 #undef TARGET_SIMD_CLONE_USABLE
22999 #define TARGET_SIMD_CLONE_USABLE \
23000 ix86_simd_clone_usable
23002 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23003 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23004 ix86_float_exceptions_rounding_supported_p
23006 #undef TARGET_MODE_EMIT
23007 #define TARGET_MODE_EMIT ix86_emit_mode_set
23009 #undef TARGET_MODE_NEEDED
23010 #define TARGET_MODE_NEEDED ix86_mode_needed
23012 #undef TARGET_MODE_AFTER
23013 #define TARGET_MODE_AFTER ix86_mode_after
23015 #undef TARGET_MODE_ENTRY
23016 #define TARGET_MODE_ENTRY ix86_mode_entry
23018 #undef TARGET_MODE_EXIT
23019 #define TARGET_MODE_EXIT ix86_mode_exit
23021 #undef TARGET_MODE_PRIORITY
23022 #define TARGET_MODE_PRIORITY ix86_mode_priority
23024 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23025 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23027 #undef TARGET_OFFLOAD_OPTIONS
23028 #define TARGET_OFFLOAD_OPTIONS \
23029 ix86_offload_options
23031 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23032 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23034 #undef TARGET_OPTAB_SUPPORTED_P
23035 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23037 #undef TARGET_HARD_REGNO_SCRATCH_OK
23038 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23040 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23041 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23043 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23044 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23046 #undef TARGET_INIT_LIBFUNCS
23047 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23049 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23050 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23052 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23053 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23055 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23056 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23058 #undef TARGET_HARD_REGNO_NREGS
23059 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23060 #undef TARGET_HARD_REGNO_MODE_OK
23061 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23063 #undef TARGET_MODES_TIEABLE_P
23064 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23066 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23067 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23068 ix86_hard_regno_call_part_clobbered
23070 #undef TARGET_CAN_CHANGE_MODE_CLASS
23071 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23073 #undef TARGET_STATIC_RTX_ALIGNMENT
23074 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23075 #undef TARGET_CONSTANT_ALIGNMENT
23076 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23078 #undef TARGET_EMPTY_RECORD_P
23079 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23081 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23082 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23084 #undef TARGET_GET_MULTILIB_ABI_NAME
23085 #define TARGET_GET_MULTILIB_ABI_NAME \
23086 ix86_get_multilib_abi_name
23088 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED
)
23090 #ifdef OPTION_GLIBC
23092 return (built_in_function
)fcode
== BUILT_IN_MEMPCPY
;
23100 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23101 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23104 #undef TARGET_RUN_TARGET_SELFTESTS
23105 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23106 #endif /* #if CHECKING_P */
23108 struct gcc_target targetm
= TARGET_INITIALIZER
;
23110 #include "gt-i386.h"