1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
34 #include "stringpool.h"
41 #include "diagnostic.h"
44 #include "fold-const.h"
47 #include "stor-layout.h"
50 #include "insn-attr.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
61 #include "tm-constrs.h"
63 #include "sched-int.h"
65 #include "tree-pass.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
74 #include "tree-iterator.h"
76 #include "case-cfn-macros.h"
78 #include "fold-const-call.h"
80 #include "tree-ssanames.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
86 #include "symbol-summary.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
92 #include "dwarf2out.h"
93 #include "i386-options.h"
94 #include "i386-builtins.h"
95 #include "i386-expand.h"
96 #include "i386-features.h"
97 #include "function-abi.h"
99 /* This file should be included last. */
100 #include "target-def.h"
102 static rtx
legitimize_dllimport_symbol (rtx
, bool);
103 static rtx
legitimize_pe_coff_extern_decl (rtx
, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx
, addr_space_t
, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx
);
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
122 const struct processor_costs
*ix86_tune_cost
= NULL
;
124 /* Set by -mtune or -Os. */
125 const struct processor_costs
*ix86_cost
= NULL
;
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
130 #define FAST_PROLOGUE_INSN_COUNT 20
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name
[] = QI_REGISTER_NAMES
;
134 static const char *const qi_high_reg_name
[] = QI_HIGH_REGISTER_NAMES
;
135 static const char *const hi_reg_name
[] = HI_REGISTER_NAMES
;
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
140 enum reg_class
const regclass_map
[FIRST_PSEUDO_REGISTER
] =
143 AREG
, DREG
, CREG
, BREG
,
145 SIREG
, DIREG
, NON_Q_REGS
, NON_Q_REGS
,
147 FP_TOP_REG
, FP_SECOND_REG
, FLOAT_REGS
, FLOAT_REGS
,
148 FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
, FLOAT_REGS
,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS
, NO_REGS
, NO_REGS
, NON_Q_REGS
,
152 SSE_FIRST_REG
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
153 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
155 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
156 MMX_REGS
, MMX_REGS
, MMX_REGS
, MMX_REGS
,
158 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
159 GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
, GENERAL_REGS
,
160 /* SSE REX registers */
161 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
162 SSE_REGS
, SSE_REGS
, SSE_REGS
, SSE_REGS
,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
165 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
166 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
167 ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
, ALL_SSE_REGS
,
168 /* Mask registers. */
169 ALL_MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
,
170 MASK_REGS
, MASK_REGS
, MASK_REGS
, MASK_REGS
173 /* The "default" register map used in 32bit mode. */
175 int const dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
178 0, 2, 1, 3, 6, 7, 4, 5,
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
183 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
185 21, 22, 23, 24, 25, 26, 27, 28,
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
190 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
191 /* extended sse registers */
192 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
193 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
196 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
199 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
201 93, 94, 95, 96, 97, 98, 99, 100
204 /* The "default" register map used in 64bit mode. */
206 int const dbx64_register_map
[FIRST_PSEUDO_REGISTER
] =
209 0, 1, 2, 3, 4, 5, 6, 7,
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
214 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
216 17, 18, 19, 20, 21, 22, 23, 24,
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
228 118, 119, 120, 121, 122, 123, 124, 125
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
285 int const svr4_dbx_register_map
[FIRST_PSEUDO_REGISTER
] =
288 0, 2, 1, 3, 6, 7, 5, 4,
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM
, 9,
293 IGNORED_DWARF_REGNUM
, IGNORED_DWARF_REGNUM
,
295 21, 22, 23, 24, 25, 26, 27, 28,
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
300 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
301 /* extended sse registers */
302 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
303 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
306 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
309 INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
, INVALID_REGNUM
,
311 93, 94, 95, 96, 97, 98, 99, 100
314 /* Define parameter passing and return registers. */
316 static int const x86_64_int_parameter_registers
[6] =
318 DI_REG
, SI_REG
, DX_REG
, CX_REG
, R8_REG
, R9_REG
321 static int const x86_64_ms_abi_int_parameter_registers
[4] =
323 CX_REG
, DX_REG
, R8_REG
, R9_REG
326 static int const x86_64_int_return_registers
[4] =
328 AX_REG
, DX_REG
, DI_REG
, SI_REG
331 /* Define the structure for the machine field in struct function. */
333 struct GTY(()) stack_local_entry
{
337 struct stack_local_entry
*next
;
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule
;
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune
;
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch
;
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse
;
352 /* Preferred alignment for stack boundary in bits. */
353 unsigned int ix86_preferred_stack_boundary
;
355 /* Alignment for incoming stack boundary in bits specified at
357 unsigned int ix86_user_incoming_stack_boundary
;
359 /* Default alignment for incoming stack boundary in bits. */
360 unsigned int ix86_default_incoming_stack_boundary
;
362 /* Alignment for incoming stack boundary in bits. */
363 unsigned int ix86_incoming_stack_boundary
;
365 /* Calling abi specific va_list type nodes. */
366 tree sysv_va_list_type_node
;
367 tree ms_va_list_type_node
;
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
370 char internal_label_prefix
[16];
371 int internal_label_prefix_len
;
373 /* Fence to use after loop using movnt. */
376 /* Register class used for passing given 64bit part of the argument.
377 These represent classes as documented by the PS ABI, with the exception
378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382 whenever possible (upper half does contain padding). */
383 enum x86_64_reg_class
386 X86_64_INTEGER_CLASS
,
387 X86_64_INTEGERSI_CLASS
,
394 X86_64_COMPLEX_X87_CLASS
,
398 #define MAX_CLASSES 8
400 /* Table of constants used by fldpi, fldln2, etc.... */
401 static REAL_VALUE_TYPE ext_80387_constants_table
[5];
402 static bool ext_80387_constants_init
;
405 static rtx
ix86_function_value (const_tree
, const_tree
, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode
,
409 static rtx
ix86_static_chain (const_tree
, bool);
410 static int ix86_function_regparm (const_tree
, const_tree
);
411 static void ix86_compute_frame_layout (void);
412 static tree
ix86_canonical_va_list_type (tree
);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx
);
416 static bool ix86_can_inline_p (tree
, tree
);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted
;
422 int ix86_arch_specified
;
424 /* Return true if a red-zone is in use. We can't use red-zone when
425 there are local indirect jumps, like "indirect_jump" or "tablejump",
426 which jumps to another place in the function, since "call" in the
427 indirect thunk pushes the return address onto stack, destroying
430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431 for CALL, in red-zone, we can allow local indirect jumps with
435 ix86_using_red_zone (void)
437 return (TARGET_RED_ZONE
438 && !TARGET_64BIT_MS_ABI
439 && (!cfun
->machine
->has_local_indirect_jump
440 || cfun
->machine
->indirect_branch_type
== indirect_branch_keep
));
443 /* Return true, if profiling code should be emitted before
444 prologue. Otherwise it returns false.
445 Note: For x86 with "hotfix" it is sorried. */
447 ix86_profile_before_prologue (void)
449 return flag_fentry
!= 0;
452 /* Update register usage after having seen the compiler flags. */
455 ix86_conditional_register_usage (void)
459 /* If there are no caller-saved registers, preserve all registers.
460 except fixed_regs and registers used for function return value
461 since aggregate_value_p checks call_used_regs[regno] on return
463 if (cfun
&& cfun
->machine
->no_caller_saved_registers
)
464 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
465 if (!fixed_regs
[i
] && !ix86_function_value_regno_p (i
))
466 call_used_regs
[i
] = 0;
468 /* For 32-bit targets, disable the REX registers. */
471 for (i
= FIRST_REX_INT_REG
; i
<= LAST_REX_INT_REG
; i
++)
472 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
473 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
474 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
475 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
476 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
479 /* See the definition of CALL_USED_REGISTERS in i386.h. */
480 c_mask
= CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI
);
482 CLEAR_HARD_REG_SET (reg_class_contents
[(int)CLOBBERED_REGS
]);
484 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
486 /* Set/reset conditionally defined registers from
487 CALL_USED_REGISTERS initializer. */
488 if (call_used_regs
[i
] > 1)
489 call_used_regs
[i
] = !!(call_used_regs
[i
] & c_mask
);
491 /* Calculate registers of CLOBBERED_REGS register set
492 as call used registers from GENERAL_REGS register set. */
493 if (TEST_HARD_REG_BIT (reg_class_contents
[(int)GENERAL_REGS
], i
)
494 && call_used_regs
[i
])
495 SET_HARD_REG_BIT (reg_class_contents
[(int)CLOBBERED_REGS
], i
);
498 /* If MMX is disabled, disable the registers. */
500 accessible_reg_set
&= ~reg_class_contents
[MMX_REGS
];
502 /* If SSE is disabled, disable the registers. */
504 accessible_reg_set
&= ~reg_class_contents
[ALL_SSE_REGS
];
506 /* If the FPU is disabled, disable the registers. */
507 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
508 accessible_reg_set
&= ~reg_class_contents
[FLOAT_REGS
];
510 /* If AVX512F is disabled, disable the registers. */
511 if (! TARGET_AVX512F
)
513 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
514 CLEAR_HARD_REG_BIT (accessible_reg_set
, i
);
516 accessible_reg_set
&= ~reg_class_contents
[ALL_MASK_REGS
];
520 /* Canonicalize a comparison from one we don't have to one we do have. */
523 ix86_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
524 bool op0_preserve_value
)
526 /* The order of operands in x87 ficom compare is forced by combine in
527 simplify_comparison () function. Float operator is treated as RTX_OBJ
528 with a precedence over other operators and is always put in the first
529 place. Swap condition and operands to match ficom instruction. */
530 if (!op0_preserve_value
531 && GET_CODE (*op0
) == FLOAT
&& MEM_P (XEXP (*op0
, 0)) && REG_P (*op1
))
533 enum rtx_code scode
= swap_condition ((enum rtx_code
) *code
);
535 /* We are called only for compares that are split to SAHF instruction.
536 Ensure that we have setcc/jcc insn for the swapped condition. */
537 if (ix86_fp_compare_code_to_integer (scode
) != UNKNOWN
)
539 std::swap (*op0
, *op1
);
546 /* Hook to determine if one function can safely inline another. */
549 ix86_can_inline_p (tree caller
, tree callee
)
551 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
552 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
554 /* Changes of those flags can be tolerated for always inlines. Lets hope
555 user knows what he is doing. */
556 const unsigned HOST_WIDE_INT always_inline_safe_mask
557 = (MASK_USE_8BIT_IDIV
| MASK_ACCUMULATE_OUTGOING_ARGS
558 | MASK_NO_ALIGN_STRINGOPS
| MASK_AVX256_SPLIT_UNALIGNED_LOAD
559 | MASK_AVX256_SPLIT_UNALIGNED_STORE
| MASK_CLD
560 | MASK_NO_FANCY_MATH_387
| MASK_IEEE_FP
| MASK_INLINE_ALL_STRINGOPS
561 | MASK_INLINE_STRINGOPS_DYNAMICALLY
| MASK_RECIP
| MASK_STACK_PROBE
562 | MASK_STV
| MASK_TLS_DIRECT_SEG_REFS
| MASK_VZEROUPPER
563 | MASK_NO_PUSH_ARGS
| MASK_OMIT_LEAF_FRAME_POINTER
);
567 callee_tree
= target_option_default_node
;
569 caller_tree
= target_option_default_node
;
570 if (callee_tree
== caller_tree
)
573 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
574 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
577 = (DECL_DISREGARD_INLINE_LIMITS (callee
)
578 && lookup_attribute ("always_inline",
579 DECL_ATTRIBUTES (callee
)));
581 cgraph_node
*callee_node
= cgraph_node::get (callee
);
582 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
583 function can inline a SSE2 function but a SSE2 function can't inline
585 if (((caller_opts
->x_ix86_isa_flags
& callee_opts
->x_ix86_isa_flags
)
586 != callee_opts
->x_ix86_isa_flags
)
587 || ((caller_opts
->x_ix86_isa_flags2
& callee_opts
->x_ix86_isa_flags2
)
588 != callee_opts
->x_ix86_isa_flags2
))
591 /* See if we have the same non-isa options. */
592 else if ((!always_inline
593 && caller_opts
->x_target_flags
!= callee_opts
->x_target_flags
)
594 || (caller_opts
->x_target_flags
& ~always_inline_safe_mask
)
595 != (callee_opts
->x_target_flags
& ~always_inline_safe_mask
))
598 /* See if arch, tune, etc. are the same. */
599 else if (caller_opts
->arch
!= callee_opts
->arch
)
602 else if (!always_inline
&& caller_opts
->tune
!= callee_opts
->tune
)
605 else if (caller_opts
->x_ix86_fpmath
!= callee_opts
->x_ix86_fpmath
606 /* If the calle doesn't use FP expressions differences in
607 ix86_fpmath can be ignored. We are called from FEs
608 for multi-versioning call optimization, so beware of
609 ipa_fn_summaries not available. */
610 && (! ipa_fn_summaries
611 || ipa_fn_summaries
->get (callee_node
) == NULL
612 || ipa_fn_summaries
->get (callee_node
)->fp_expressions
))
615 else if (!always_inline
616 && caller_opts
->branch_cost
!= callee_opts
->branch_cost
)
625 /* Return true if this goes in large data/bss. */
628 ix86_in_large_data_p (tree exp
)
630 if (ix86_cmodel
!= CM_MEDIUM
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
633 if (exp
== NULL_TREE
)
636 /* Functions are never large data. */
637 if (TREE_CODE (exp
) == FUNCTION_DECL
)
640 /* Automatic variables are never large data. */
641 if (VAR_P (exp
) && !is_global_var (exp
))
644 if (VAR_P (exp
) && DECL_SECTION_NAME (exp
))
646 const char *section
= DECL_SECTION_NAME (exp
);
647 if (strcmp (section
, ".ldata") == 0
648 || strcmp (section
, ".lbss") == 0)
654 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (exp
));
656 /* If this is an incomplete type with size 0, then we can't put it
657 in data because it might be too big when completed. Also,
658 int_size_in_bytes returns -1 if size can vary or is larger than
659 an integer in which case also it is safer to assume that it goes in
661 if (size
<= 0 || size
> ix86_section_threshold
)
668 /* i386-specific section flag to mark large sections. */
669 #define SECTION_LARGE SECTION_MACH_DEP
671 /* Switch to the appropriate section for output of DECL.
672 DECL is either a `VAR_DECL' node or a constant of some sort.
673 RELOC indicates whether forming the initial value of DECL requires
674 link-time relocations. */
676 ATTRIBUTE_UNUSED
static section
*
677 x86_64_elf_select_section (tree decl
, int reloc
,
678 unsigned HOST_WIDE_INT align
)
680 if (ix86_in_large_data_p (decl
))
682 const char *sname
= NULL
;
683 unsigned int flags
= SECTION_WRITE
| SECTION_LARGE
;
684 switch (categorize_decl_for_section (decl
, reloc
))
689 case SECCAT_DATA_REL
:
690 sname
= ".ldata.rel";
692 case SECCAT_DATA_REL_LOCAL
:
693 sname
= ".ldata.rel.local";
695 case SECCAT_DATA_REL_RO
:
696 sname
= ".ldata.rel.ro";
698 case SECCAT_DATA_REL_RO_LOCAL
:
699 sname
= ".ldata.rel.ro.local";
703 flags
|= SECTION_BSS
;
706 case SECCAT_RODATA_MERGE_STR
:
707 case SECCAT_RODATA_MERGE_STR_INIT
:
708 case SECCAT_RODATA_MERGE_CONST
:
710 flags
&= ~SECTION_WRITE
;
719 /* We don't split these for medium model. Place them into
720 default sections and hope for best. */
725 /* We might get called with string constants, but get_named_section
726 doesn't like them as they are not DECLs. Also, we need to set
727 flags in that case. */
729 return get_section (sname
, flags
, NULL
);
730 return get_named_section (decl
, sname
, reloc
);
733 return default_elf_select_section (decl
, reloc
, align
);
736 /* Select a set of attributes for section NAME based on the properties
737 of DECL and whether or not RELOC indicates that DECL's initializer
738 might contain runtime relocations. */
740 static unsigned int ATTRIBUTE_UNUSED
741 x86_64_elf_section_type_flags (tree decl
, const char *name
, int reloc
)
743 unsigned int flags
= default_section_type_flags (decl
, name
, reloc
);
745 if (ix86_in_large_data_p (decl
))
746 flags
|= SECTION_LARGE
;
748 if (decl
== NULL_TREE
749 && (strcmp (name
, ".ldata.rel.ro") == 0
750 || strcmp (name
, ".ldata.rel.ro.local") == 0))
751 flags
|= SECTION_RELRO
;
753 if (strcmp (name
, ".lbss") == 0
754 || strncmp (name
, ".lbss.", sizeof (".lbss.") - 1) == 0
755 || strncmp (name
, ".gnu.linkonce.lb.",
756 sizeof (".gnu.linkonce.lb.") - 1) == 0)
757 flags
|= SECTION_BSS
;
762 /* Build up a unique section name, expressed as a
763 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
764 RELOC indicates whether the initial value of EXP requires
765 link-time relocations. */
767 static void ATTRIBUTE_UNUSED
768 x86_64_elf_unique_section (tree decl
, int reloc
)
770 if (ix86_in_large_data_p (decl
))
772 const char *prefix
= NULL
;
773 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
774 bool one_only
= DECL_COMDAT_GROUP (decl
) && !HAVE_COMDAT_GROUP
;
776 switch (categorize_decl_for_section (decl
, reloc
))
779 case SECCAT_DATA_REL
:
780 case SECCAT_DATA_REL_LOCAL
:
781 case SECCAT_DATA_REL_RO
:
782 case SECCAT_DATA_REL_RO_LOCAL
:
783 prefix
= one_only
? ".ld" : ".ldata";
786 prefix
= one_only
? ".lb" : ".lbss";
789 case SECCAT_RODATA_MERGE_STR
:
790 case SECCAT_RODATA_MERGE_STR_INIT
:
791 case SECCAT_RODATA_MERGE_CONST
:
792 prefix
= one_only
? ".lr" : ".lrodata";
801 /* We don't split these for medium model. Place them into
802 default sections and hope for best. */
807 const char *name
, *linkonce
;
810 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
811 name
= targetm
.strip_name_encoding (name
);
813 /* If we're using one_only, then there needs to be a .gnu.linkonce
814 prefix to the section name. */
815 linkonce
= one_only
? ".gnu.linkonce" : "";
817 string
= ACONCAT ((linkonce
, prefix
, ".", name
, NULL
));
819 set_decl_section_name (decl
, string
);
823 default_unique_section (decl
, reloc
);
828 #ifndef LARGECOMM_SECTION_ASM_OP
829 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
832 /* This says how to output assembler code to declare an
833 uninitialized external linkage data object.
835 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
838 x86_elf_aligned_decl_common (FILE *file
, tree decl
,
839 const char *name
, unsigned HOST_WIDE_INT size
,
842 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
843 && size
> (unsigned int)ix86_section_threshold
)
845 switch_to_section (get_named_section (decl
, ".lbss", 0));
846 fputs (LARGECOMM_SECTION_ASM_OP
, file
);
849 fputs (COMMON_ASM_OP
, file
);
850 assemble_name (file
, name
);
851 fprintf (file
, "," HOST_WIDE_INT_PRINT_UNSIGNED
",%u\n",
852 size
, align
/ BITS_PER_UNIT
);
856 /* Utility function for targets to use in implementing
857 ASM_OUTPUT_ALIGNED_BSS. */
860 x86_output_aligned_bss (FILE *file
, tree decl
, const char *name
,
861 unsigned HOST_WIDE_INT size
, int align
)
863 if ((ix86_cmodel
== CM_MEDIUM
|| ix86_cmodel
== CM_MEDIUM_PIC
)
864 && size
> (unsigned int)ix86_section_threshold
)
865 switch_to_section (get_named_section (decl
, ".lbss", 0));
867 switch_to_section (bss_section
);
868 ASM_OUTPUT_ALIGN (file
, floor_log2 (align
/ BITS_PER_UNIT
));
869 #ifdef ASM_DECLARE_OBJECT_NAME
870 last_assemble_variable_decl
= decl
;
871 ASM_DECLARE_OBJECT_NAME (file
, name
, decl
);
873 /* Standard thing is just output label for the object. */
874 ASM_OUTPUT_LABEL (file
, name
);
875 #endif /* ASM_DECLARE_OBJECT_NAME */
876 ASM_OUTPUT_SKIP (file
, size
? size
: 1);
879 /* Decide whether we must probe the stack before any space allocation
880 on this target. It's essentially TARGET_STACK_PROBE except when
881 -fstack-check causes the stack to be already probed differently. */
884 ix86_target_stack_probe (void)
886 /* Do not probe the stack twice if static stack checking is enabled. */
887 if (flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
890 return TARGET_STACK_PROBE
;
893 /* Decide whether we can make a sibling call to a function. DECL is the
894 declaration of the function being targeted by the call and EXP is the
895 CALL_EXPR representing the call. */
898 ix86_function_ok_for_sibcall (tree decl
, tree exp
)
900 tree type
, decl_or_type
;
902 bool bind_global
= decl
&& !targetm
.binds_local_p (decl
);
904 if (ix86_function_naked (current_function_decl
))
907 /* Sibling call isn't OK if there are no caller-saved registers
908 since all registers must be preserved before return. */
909 if (cfun
->machine
->no_caller_saved_registers
)
912 /* If we are generating position-independent code, we cannot sibcall
913 optimize direct calls to global functions, as the PLT requires
914 %ebx be live. (Darwin does not have a PLT.) */
922 /* If we need to align the outgoing stack, then sibcalling would
923 unalign the stack, which may break the called function. */
924 if (ix86_minimum_incoming_stack_boundary (true)
925 < PREFERRED_STACK_BOUNDARY
)
931 type
= TREE_TYPE (decl
);
935 /* We're looking at the CALL_EXPR, we need the type of the function. */
936 type
= CALL_EXPR_FN (exp
); /* pointer expression */
937 type
= TREE_TYPE (type
); /* pointer type */
938 type
= TREE_TYPE (type
); /* function type */
942 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
943 if ((OUTGOING_REG_PARM_STACK_SPACE (type
)
944 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl
)))
945 || (REG_PARM_STACK_SPACE (decl_or_type
)
946 != REG_PARM_STACK_SPACE (current_function_decl
)))
948 maybe_complain_about_tail_call (exp
,
949 "inconsistent size of stack space"
950 " allocated for arguments which are"
951 " passed in registers");
955 /* Check that the return value locations are the same. Like
956 if we are returning floats on the 80387 register stack, we cannot
957 make a sibcall from a function that doesn't return a float to a
958 function that does or, conversely, from a function that does return
959 a float to a function that doesn't; the necessary stack adjustment
960 would not be executed. This is also the place we notice
961 differences in the return value ABI. Note that it is ok for one
962 of the functions to have void return type as long as the return
963 value of the other is passed in a register. */
964 a
= ix86_function_value (TREE_TYPE (exp
), decl_or_type
, false);
965 b
= ix86_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
967 if (STACK_REG_P (a
) || STACK_REG_P (b
))
969 if (!rtx_equal_p (a
, b
))
972 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
974 else if (!rtx_equal_p (a
, b
))
979 /* The SYSV ABI has more call-clobbered registers;
980 disallow sibcalls from MS to SYSV. */
981 if (cfun
->machine
->call_abi
== MS_ABI
982 && ix86_function_type_abi (type
) == SYSV_ABI
)
987 /* If this call is indirect, we'll need to be able to use a
988 call-clobbered register for the address of the target function.
989 Make sure that all such registers are not used for passing
990 parameters. Note that DLLIMPORT functions and call to global
991 function via GOT slot are indirect. */
993 || (bind_global
&& flag_pic
&& !flag_plt
)
994 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& DECL_DLLIMPORT_P (decl
))
995 || flag_force_indirect_call
)
997 /* Check if regparm >= 3 since arg_reg_available is set to
998 false if regparm == 0. If regparm is 1 or 2, there is
999 always a call-clobbered register available.
1001 ??? The symbol indirect call doesn't need a call-clobbered
1002 register. But we don't know if this is a symbol indirect
1003 call or not here. */
1004 if (ix86_function_regparm (type
, decl
) >= 3
1005 && !cfun
->machine
->arg_reg_available
)
1010 /* Otherwise okay. That also includes certain types of indirect calls. */
1014 /* This function determines from TYPE the calling-convention. */
1017 ix86_get_callcvt (const_tree type
)
1019 unsigned int ret
= 0;
1024 return IX86_CALLCVT_CDECL
;
1026 attrs
= TYPE_ATTRIBUTES (type
);
1027 if (attrs
!= NULL_TREE
)
1029 if (lookup_attribute ("cdecl", attrs
))
1030 ret
|= IX86_CALLCVT_CDECL
;
1031 else if (lookup_attribute ("stdcall", attrs
))
1032 ret
|= IX86_CALLCVT_STDCALL
;
1033 else if (lookup_attribute ("fastcall", attrs
))
1034 ret
|= IX86_CALLCVT_FASTCALL
;
1035 else if (lookup_attribute ("thiscall", attrs
))
1036 ret
|= IX86_CALLCVT_THISCALL
;
1038 /* Regparam isn't allowed for thiscall and fastcall. */
1039 if ((ret
& (IX86_CALLCVT_THISCALL
| IX86_CALLCVT_FASTCALL
)) == 0)
1041 if (lookup_attribute ("regparm", attrs
))
1042 ret
|= IX86_CALLCVT_REGPARM
;
1043 if (lookup_attribute ("sseregparm", attrs
))
1044 ret
|= IX86_CALLCVT_SSEREGPARM
;
1047 if (IX86_BASE_CALLCVT(ret
) != 0)
1051 is_stdarg
= stdarg_p (type
);
1052 if (TARGET_RTD
&& !is_stdarg
)
1053 return IX86_CALLCVT_STDCALL
| ret
;
1057 || TREE_CODE (type
) != METHOD_TYPE
1058 || ix86_function_type_abi (type
) != MS_ABI
)
1059 return IX86_CALLCVT_CDECL
| ret
;
1061 return IX86_CALLCVT_THISCALL
;
1064 /* Return 0 if the attributes for two types are incompatible, 1 if they
1065 are compatible, and 2 if they are nearly compatible (which causes a
1066 warning to be generated). */
1069 ix86_comp_type_attributes (const_tree type1
, const_tree type2
)
1071 unsigned int ccvt1
, ccvt2
;
1073 if (TREE_CODE (type1
) != FUNCTION_TYPE
1074 && TREE_CODE (type1
) != METHOD_TYPE
)
1077 ccvt1
= ix86_get_callcvt (type1
);
1078 ccvt2
= ix86_get_callcvt (type2
);
1081 if (ix86_function_regparm (type1
, NULL
)
1082 != ix86_function_regparm (type2
, NULL
))
1088 /* Return the regparm value for a function with the indicated TYPE and DECL.
1089 DECL may be NULL when calling function indirectly
1090 or considering a libcall. */
1093 ix86_function_regparm (const_tree type
, const_tree decl
)
1100 return (ix86_function_type_abi (type
) == SYSV_ABI
1101 ? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX
);
1102 ccvt
= ix86_get_callcvt (type
);
1103 regparm
= ix86_regparm
;
1105 if ((ccvt
& IX86_CALLCVT_REGPARM
) != 0)
1107 attr
= lookup_attribute ("regparm", TYPE_ATTRIBUTES (type
));
1110 regparm
= TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
)));
1114 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1116 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1119 /* Use register calling convention for local functions when possible. */
1121 && TREE_CODE (decl
) == FUNCTION_DECL
)
1123 cgraph_node
*target
= cgraph_node::get (decl
);
1125 target
= target
->function_symbol ();
1127 /* Caller and callee must agree on the calling convention, so
1128 checking here just optimize means that with
1129 __attribute__((optimize (...))) caller could use regparm convention
1130 and callee not, or vice versa. Instead look at whether the callee
1131 is optimized or not. */
1132 if (target
&& opt_for_fn (target
->decl
, optimize
)
1133 && !(profile_flag
&& !flag_fentry
))
1135 if (target
->local
&& target
->can_change_signature
)
1137 int local_regparm
, globals
= 0, regno
;
1139 /* Make sure no regparm register is taken by a
1140 fixed register variable. */
1141 for (local_regparm
= 0; local_regparm
< REGPARM_MAX
;
1143 if (fixed_regs
[local_regparm
])
1146 /* We don't want to use regparm(3) for nested functions as
1147 these use a static chain pointer in the third argument. */
1148 if (local_regparm
== 3 && DECL_STATIC_CHAIN (target
->decl
))
1151 /* Save a register for the split stack. */
1152 if (flag_split_stack
)
1154 if (local_regparm
== 3)
1156 else if (local_regparm
== 2
1157 && DECL_STATIC_CHAIN (target
->decl
))
1161 /* Each fixed register usage increases register pressure,
1162 so less registers should be used for argument passing.
1163 This functionality can be overriden by an explicit
1165 for (regno
= AX_REG
; regno
<= DI_REG
; regno
++)
1166 if (fixed_regs
[regno
])
1170 = globals
< local_regparm
? local_regparm
- globals
: 0;
1172 if (local_regparm
> regparm
)
1173 regparm
= local_regparm
;
1181 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1182 DFmode (2) arguments in SSE registers for a function with the
1183 indicated TYPE and DECL. DECL may be NULL when calling function
1184 indirectly or considering a libcall. Return -1 if any FP parameter
1185 should be rejected by error. This is used in siutation we imply SSE
1186 calling convetion but the function is called from another function with
1187 SSE disabled. Otherwise return 0. */
1190 ix86_function_sseregparm (const_tree type
, const_tree decl
, bool warn
)
1192 gcc_assert (!TARGET_64BIT
);
1194 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1195 by the sseregparm attribute. */
1196 if (TARGET_SSEREGPARM
1197 || (type
&& lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type
))))
1204 error ("calling %qD with attribute sseregparm without "
1205 "SSE/SSE2 enabled", decl
);
1207 error ("calling %qT with attribute sseregparm without "
1208 "SSE/SSE2 enabled", type
);
1219 cgraph_node
*target
= cgraph_node::get (decl
);
1221 target
= target
->function_symbol ();
1223 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1224 (and DFmode for SSE2) arguments in SSE registers. */
1226 /* TARGET_SSE_MATH */
1227 && (target_opts_for_fn (target
->decl
)->x_ix86_fpmath
& FPMATH_SSE
)
1228 && opt_for_fn (target
->decl
, optimize
)
1229 && !(profile_flag
&& !flag_fentry
))
1231 if (target
->local
&& target
->can_change_signature
)
1233 /* Refuse to produce wrong code when local function with SSE enabled
1234 is called from SSE disabled function.
1235 FIXME: We need a way to detect these cases cross-ltrans partition
1236 and avoid using SSE calling conventions on local functions called
1237 from function with SSE disabled. For now at least delay the
1238 warning until we know we are going to produce wrong code.
1240 if (!TARGET_SSE
&& warn
)
1242 return TARGET_SSE2_P (target_opts_for_fn (target
->decl
)
1243 ->x_ix86_isa_flags
) ? 2 : 1;
1250 /* Return true if EAX is live at the start of the function. Used by
1251 ix86_expand_prologue to determine if we need special help before
1252 calling allocate_stack_worker. */
1255 ix86_eax_live_at_start_p (void)
1257 /* Cheat. Don't bother working forward from ix86_function_regparm
1258 to the function type to whether an actual argument is located in
1259 eax. Instead just look at cfg info, which is still close enough
1260 to correct at this point. This gives false positives for broken
1261 functions that might use uninitialized data that happens to be
1262 allocated in eax, but who cares? */
1263 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun
)), 0);
1267 ix86_keep_aggregate_return_pointer (tree fntype
)
1273 attr
= lookup_attribute ("callee_pop_aggregate_return",
1274 TYPE_ATTRIBUTES (fntype
));
1276 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr
))) == 0);
1278 /* For 32-bit MS-ABI the default is to keep aggregate
1280 if (ix86_function_type_abi (fntype
) == MS_ABI
)
1283 return KEEP_AGGREGATE_RETURN_POINTER
!= 0;
1286 /* Value is the number of bytes of arguments automatically
1287 popped when returning from a subroutine call.
1288 FUNDECL is the declaration node of the function (as a tree),
1289 FUNTYPE is the data type of the function (as a tree),
1290 or for a library call it is an identifier node for the subroutine name.
1291 SIZE is the number of bytes of arguments passed on the stack.
1293 On the 80386, the RTD insn may be used to pop them if the number
1294 of args is fixed, but if the number is variable then the caller
1295 must pop them all. RTD can't be used for library calls now
1296 because the library is compiled with the Unix compiler.
1297 Use of RTD is a selectable option, since it is incompatible with
1298 standard Unix calling sequences. If the option is not selected,
1299 the caller must always pop the args.
1301 The attribute stdcall is equivalent to RTD on a per module basis. */
1304 ix86_return_pops_args (tree fundecl
, tree funtype
, poly_int64 size
)
1308 /* None of the 64-bit ABIs pop arguments. */
1312 ccvt
= ix86_get_callcvt (funtype
);
1314 if ((ccvt
& (IX86_CALLCVT_STDCALL
| IX86_CALLCVT_FASTCALL
1315 | IX86_CALLCVT_THISCALL
)) != 0
1316 && ! stdarg_p (funtype
))
1319 /* Lose any fake structure return argument if it is passed on the stack. */
1320 if (aggregate_value_p (TREE_TYPE (funtype
), fundecl
)
1321 && !ix86_keep_aggregate_return_pointer (funtype
))
1323 int nregs
= ix86_function_regparm (funtype
, fundecl
);
1325 return GET_MODE_SIZE (Pmode
);
1331 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1334 ix86_legitimate_combined_insn (rtx_insn
*insn
)
1338 /* Check operand constraints in case hard registers were propagated
1339 into insn pattern. This check prevents combine pass from
1340 generating insn patterns with invalid hard register operands.
1341 These invalid insns can eventually confuse reload to error out
1342 with a spill failure. See also PRs 46829 and 46843. */
1344 gcc_assert (INSN_CODE (insn
) >= 0);
1346 extract_insn (insn
);
1347 preprocess_constraints (insn
);
1349 int n_operands
= recog_data
.n_operands
;
1350 int n_alternatives
= recog_data
.n_alternatives
;
1351 for (i
= 0; i
< n_operands
; i
++)
1353 rtx op
= recog_data
.operand
[i
];
1354 machine_mode mode
= GET_MODE (op
);
1355 const operand_alternative
*op_alt
;
1360 /* A unary operator may be accepted by the predicate, but it
1361 is irrelevant for matching constraints. */
1367 if (REG_P (SUBREG_REG (op
))
1368 && REGNO (SUBREG_REG (op
)) < FIRST_PSEUDO_REGISTER
)
1369 offset
= subreg_regno_offset (REGNO (SUBREG_REG (op
)),
1370 GET_MODE (SUBREG_REG (op
)),
1373 op
= SUBREG_REG (op
);
1376 if (!(REG_P (op
) && HARD_REGISTER_P (op
)))
1379 op_alt
= recog_op_alt
;
1381 /* Operand has no constraints, anything is OK. */
1382 win
= !n_alternatives
;
1384 alternative_mask preferred
= get_preferred_alternatives (insn
);
1385 for (j
= 0; j
< n_alternatives
; j
++, op_alt
+= n_operands
)
1387 if (!TEST_BIT (preferred
, j
))
1389 if (op_alt
[i
].anything_ok
1390 || (op_alt
[i
].matches
!= -1
1392 (recog_data
.operand
[i
],
1393 recog_data
.operand
[op_alt
[i
].matches
]))
1394 || reg_fits_class_p (op
, op_alt
[i
].cl
, offset
, mode
))
1408 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1410 static unsigned HOST_WIDE_INT
1411 ix86_asan_shadow_offset (void)
1413 return SUBTARGET_SHADOW_OFFSET
;
1416 /* Argument support functions. */
1418 /* Return true when register may be used to pass function parameters. */
1420 ix86_function_arg_regno_p (int regno
)
1423 enum calling_abi call_abi
;
1424 const int *parm_regs
;
1426 if (TARGET_SSE
&& SSE_REGNO_P (regno
)
1427 && regno
< FIRST_SSE_REG
+ SSE_REGPARM_MAX
)
1431 return (regno
< REGPARM_MAX
1432 || (TARGET_MMX
&& MMX_REGNO_P (regno
)
1433 && regno
< FIRST_MMX_REG
+ MMX_REGPARM_MAX
));
1435 /* TODO: The function should depend on current function ABI but
1436 builtins.c would need updating then. Therefore we use the
1438 call_abi
= ix86_cfun_abi ();
1440 /* RAX is used as hidden argument to va_arg functions. */
1441 if (call_abi
== SYSV_ABI
&& regno
== AX_REG
)
1444 if (call_abi
== MS_ABI
)
1445 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
1447 parm_regs
= x86_64_int_parameter_registers
;
1449 for (i
= 0; i
< (call_abi
== MS_ABI
1450 ? X86_64_MS_REGPARM_MAX
: X86_64_REGPARM_MAX
); i
++)
1451 if (regno
== parm_regs
[i
])
1456 /* Return if we do not know how to pass ARG solely in registers. */
1459 ix86_must_pass_in_stack (const function_arg_info
&arg
)
1461 if (must_pass_in_stack_var_size_or_pad (arg
))
1464 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1465 The layout_type routine is crafty and tries to trick us into passing
1466 currently unsupported vector types on the stack by using TImode. */
1467 return (!TARGET_64BIT
&& arg
.mode
== TImode
1468 && arg
.type
&& TREE_CODE (arg
.type
) != VECTOR_TYPE
);
1471 /* It returns the size, in bytes, of the area reserved for arguments passed
1472 in registers for the function represented by fndecl dependent to the used
1475 ix86_reg_parm_stack_space (const_tree fndecl
)
1477 enum calling_abi call_abi
= SYSV_ABI
;
1478 if (fndecl
!= NULL_TREE
&& TREE_CODE (fndecl
) == FUNCTION_DECL
)
1479 call_abi
= ix86_function_abi (fndecl
);
1481 call_abi
= ix86_function_type_abi (fndecl
);
1482 if (TARGET_64BIT
&& call_abi
== MS_ABI
)
1487 /* We add this as a workaround in order to use libc_has_function
1490 ix86_libc_has_function (enum function_class fn_class
)
1492 return targetm
.libc_has_function (fn_class
, NULL_TREE
);
1495 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1496 specifying the call abi used. */
1498 ix86_function_type_abi (const_tree fntype
)
1500 enum calling_abi abi
= ix86_abi
;
1502 if (fntype
== NULL_TREE
|| TYPE_ATTRIBUTES (fntype
) == NULL_TREE
)
1506 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype
)))
1509 if (TARGET_X32
&& !warned
)
1511 error ("X32 does not support %<ms_abi%> attribute");
1517 else if (abi
== MS_ABI
1518 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype
)))
1525 ix86_function_abi (const_tree fndecl
)
1527 return fndecl
? ix86_function_type_abi (TREE_TYPE (fndecl
)) : ix86_abi
;
1530 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1531 specifying the call abi used. */
1533 ix86_cfun_abi (void)
1535 return cfun
? cfun
->machine
->call_abi
: ix86_abi
;
1539 ix86_function_ms_hook_prologue (const_tree fn
)
1541 if (fn
&& lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn
)))
1543 if (decl_function_context (fn
) != NULL_TREE
)
1544 error_at (DECL_SOURCE_LOCATION (fn
),
1545 "%<ms_hook_prologue%> attribute is not compatible "
1546 "with nested function");
1554 ix86_function_naked (const_tree fn
)
1556 if (fn
&& lookup_attribute ("naked", DECL_ATTRIBUTES (fn
)))
1562 /* Write the extra assembler code needed to declare a function properly. */
1565 ix86_asm_output_function_label (FILE *asm_out_file
, const char *fname
,
1568 bool is_ms_hook
= ix86_function_ms_hook_prologue (decl
);
1571 cfun
->machine
->function_label_emitted
= true;
1575 int i
, filler_count
= (TARGET_64BIT
? 32 : 16);
1576 unsigned int filler_cc
= 0xcccccccc;
1578 for (i
= 0; i
< filler_count
; i
+= 4)
1579 fprintf (asm_out_file
, ASM_LONG
" %#x\n", filler_cc
);
1582 #ifdef SUBTARGET_ASM_UNWIND_INIT
1583 SUBTARGET_ASM_UNWIND_INIT (asm_out_file
);
1586 ASM_OUTPUT_LABEL (asm_out_file
, fname
);
1588 /* Output magic byte marker, if hot-patch attribute is set. */
1593 /* leaq [%rsp + 0], %rsp */
1594 fputs (ASM_BYTE
"0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1599 /* movl.s %edi, %edi
1601 movl.s %esp, %ebp */
1602 fputs (ASM_BYTE
"0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file
);
1607 /* Implementation of call abi switching target hook. Specific to FNDECL
1608 the specific call register sets are set. See also
1609 ix86_conditional_register_usage for more details. */
1611 ix86_call_abi_override (const_tree fndecl
)
1613 cfun
->machine
->call_abi
= ix86_function_abi (fndecl
);
1616 /* Return 1 if pseudo register should be created and used to hold
1617 GOT address for PIC code. */
1619 ix86_use_pseudo_pic_reg (void)
1622 && (ix86_cmodel
== CM_SMALL_PIC
1629 /* Initialize large model PIC register. */
1632 ix86_init_large_pic_reg (unsigned int tmp_regno
)
1634 rtx_code_label
*label
;
1637 gcc_assert (Pmode
== DImode
);
1638 label
= gen_label_rtx ();
1640 LABEL_PRESERVE_P (label
) = 1;
1641 tmp_reg
= gen_rtx_REG (Pmode
, tmp_regno
);
1642 gcc_assert (REGNO (pic_offset_table_rtx
) != tmp_regno
);
1643 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx
,
1645 emit_insn (gen_set_got_offset_rex64 (tmp_reg
, label
));
1646 emit_insn (gen_add2_insn (pic_offset_table_rtx
, tmp_reg
));
1647 const char *name
= LABEL_NAME (label
);
1648 PUT_CODE (label
, NOTE
);
1649 NOTE_KIND (label
) = NOTE_INSN_DELETED_LABEL
;
1650 NOTE_DELETED_LABEL_NAME (label
) = name
;
1653 /* Create and initialize PIC register if required. */
1655 ix86_init_pic_reg (void)
1660 if (!ix86_use_pseudo_pic_reg ())
1667 if (ix86_cmodel
== CM_LARGE_PIC
)
1668 ix86_init_large_pic_reg (R11_REG
);
1670 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx
));
1674 /* If there is future mcount call in the function it is more profitable
1675 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1676 rtx reg
= crtl
->profile
1677 ? gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
)
1678 : pic_offset_table_rtx
;
1679 rtx_insn
*insn
= emit_insn (gen_set_got (reg
));
1680 RTX_FRAME_RELATED_P (insn
) = 1;
1682 emit_move_insn (pic_offset_table_rtx
, reg
);
1683 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
1689 entry_edge
= single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
1690 insert_insn_on_edge (seq
, entry_edge
);
1691 commit_one_edge_insertion (entry_edge
);
1694 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1695 for a call to a function whose data type is FNTYPE.
1696 For a library call, FNTYPE is 0. */
1699 init_cumulative_args (CUMULATIVE_ARGS
*cum
, /* Argument info to initialize */
1700 tree fntype
, /* tree ptr for function decl */
1701 rtx libname
, /* SYMBOL_REF of library name or 0 */
1705 struct cgraph_node
*local_info_node
= NULL
;
1706 struct cgraph_node
*target
= NULL
;
1708 memset (cum
, 0, sizeof (*cum
));
1712 target
= cgraph_node::get (fndecl
);
1715 target
= target
->function_symbol ();
1716 local_info_node
= cgraph_node::local_info_node (target
->decl
);
1717 cum
->call_abi
= ix86_function_abi (target
->decl
);
1720 cum
->call_abi
= ix86_function_abi (fndecl
);
1723 cum
->call_abi
= ix86_function_type_abi (fntype
);
1725 cum
->caller
= caller
;
1727 /* Set up the number of registers to use for passing arguments. */
1728 cum
->nregs
= ix86_regparm
;
1731 cum
->nregs
= (cum
->call_abi
== SYSV_ABI
1732 ? X86_64_REGPARM_MAX
1733 : X86_64_MS_REGPARM_MAX
);
1737 cum
->sse_nregs
= SSE_REGPARM_MAX
;
1740 cum
->sse_nregs
= (cum
->call_abi
== SYSV_ABI
1741 ? X86_64_SSE_REGPARM_MAX
1742 : X86_64_MS_SSE_REGPARM_MAX
);
1746 cum
->mmx_nregs
= MMX_REGPARM_MAX
;
1747 cum
->warn_avx512f
= true;
1748 cum
->warn_avx
= true;
1749 cum
->warn_sse
= true;
1750 cum
->warn_mmx
= true;
1752 /* Because type might mismatch in between caller and callee, we need to
1753 use actual type of function for local calls.
1754 FIXME: cgraph_analyze can be told to actually record if function uses
1755 va_start so for local functions maybe_vaarg can be made aggressive
1757 FIXME: once typesytem is fixed, we won't need this code anymore. */
1758 if (local_info_node
&& local_info_node
->local
1759 && local_info_node
->can_change_signature
)
1760 fntype
= TREE_TYPE (target
->decl
);
1761 cum
->stdarg
= stdarg_p (fntype
);
1762 cum
->maybe_vaarg
= (fntype
1763 ? (!prototype_p (fntype
) || stdarg_p (fntype
))
1768 cum
->warn_empty
= !warn_abi
|| cum
->stdarg
;
1769 if (!cum
->warn_empty
&& fntype
)
1771 function_args_iterator iter
;
1773 bool seen_empty_type
= false;
1774 FOREACH_FUNCTION_ARGS (fntype
, argtype
, iter
)
1776 if (argtype
== error_mark_node
|| VOID_TYPE_P (argtype
))
1778 if (TYPE_EMPTY_P (argtype
))
1779 seen_empty_type
= true;
1780 else if (seen_empty_type
)
1782 cum
->warn_empty
= true;
1790 /* If there are variable arguments, then we won't pass anything
1791 in registers in 32-bit mode. */
1792 if (stdarg_p (fntype
))
1795 /* Since in 32-bit, variable arguments are always passed on
1796 stack, there is scratch register available for indirect
1798 cfun
->machine
->arg_reg_available
= true;
1801 cum
->warn_avx512f
= false;
1802 cum
->warn_avx
= false;
1803 cum
->warn_sse
= false;
1804 cum
->warn_mmx
= false;
1808 /* Use ecx and edx registers if function has fastcall attribute,
1809 else look for regparm information. */
1812 unsigned int ccvt
= ix86_get_callcvt (fntype
);
1813 if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
1816 cum
->fastcall
= 1; /* Same first register as in fastcall. */
1818 else if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
1824 cum
->nregs
= ix86_function_regparm (fntype
, fndecl
);
1827 /* Set up the number of SSE registers used for passing SFmode
1828 and DFmode arguments. Warn for mismatching ABI. */
1829 cum
->float_in_sse
= ix86_function_sseregparm (fntype
, fndecl
, true);
1832 cfun
->machine
->arg_reg_available
= (cum
->nregs
> 0);
1835 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1836 But in the case of vector types, it is some vector mode.
1838 When we have only some of our vector isa extensions enabled, then there
1839 are some modes for which vector_mode_supported_p is false. For these
1840 modes, the generic vector support in gcc will choose some non-vector mode
1841 in order to implement the type. By computing the natural mode, we'll
1842 select the proper ABI location for the operand and not depend on whatever
1843 the middle-end decides to do with these vector types.
1845 The midde-end can't deal with the vector types > 16 bytes. In this
1846 case, we return the original mode and warn ABI change if CUM isn't
1849 If INT_RETURN is true, warn ABI change if the vector mode isn't
1850 available for function return value. */
1853 type_natural_mode (const_tree type
, const CUMULATIVE_ARGS
*cum
,
1856 machine_mode mode
= TYPE_MODE (type
);
1858 if (TREE_CODE (type
) == VECTOR_TYPE
&& !VECTOR_MODE_P (mode
))
1860 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1861 if ((size
== 8 || size
== 16 || size
== 32 || size
== 64)
1862 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1863 && TYPE_VECTOR_SUBPARTS (type
) > 1)
1865 machine_mode innermode
= TYPE_MODE (TREE_TYPE (type
));
1867 /* There are no XFmode vector modes. */
1868 if (innermode
== XFmode
)
1871 if (TREE_CODE (TREE_TYPE (type
)) == REAL_TYPE
)
1872 mode
= MIN_MODE_VECTOR_FLOAT
;
1874 mode
= MIN_MODE_VECTOR_INT
;
1876 /* Get the mode which has this inner mode and number of units. */
1877 FOR_EACH_MODE_FROM (mode
, mode
)
1878 if (GET_MODE_NUNITS (mode
) == TYPE_VECTOR_SUBPARTS (type
)
1879 && GET_MODE_INNER (mode
) == innermode
)
1881 if (size
== 64 && !TARGET_AVX512F
&& !TARGET_IAMCU
)
1883 static bool warnedavx512f
;
1884 static bool warnedavx512f_ret
;
1886 if (cum
&& cum
->warn_avx512f
&& !warnedavx512f
)
1888 if (warning (OPT_Wpsabi
, "AVX512F vector argument "
1889 "without AVX512F enabled changes the ABI"))
1890 warnedavx512f
= true;
1892 else if (in_return
&& !warnedavx512f_ret
)
1894 if (warning (OPT_Wpsabi
, "AVX512F vector return "
1895 "without AVX512F enabled changes the ABI"))
1896 warnedavx512f_ret
= true;
1899 return TYPE_MODE (type
);
1901 else if (size
== 32 && !TARGET_AVX
&& !TARGET_IAMCU
)
1903 static bool warnedavx
;
1904 static bool warnedavx_ret
;
1906 if (cum
&& cum
->warn_avx
&& !warnedavx
)
1908 if (warning (OPT_Wpsabi
, "AVX vector argument "
1909 "without AVX enabled changes the ABI"))
1912 else if (in_return
&& !warnedavx_ret
)
1914 if (warning (OPT_Wpsabi
, "AVX vector return "
1915 "without AVX enabled changes the ABI"))
1916 warnedavx_ret
= true;
1919 return TYPE_MODE (type
);
1921 else if (((size
== 8 && TARGET_64BIT
) || size
== 16)
1925 static bool warnedsse
;
1926 static bool warnedsse_ret
;
1928 if (cum
&& cum
->warn_sse
&& !warnedsse
)
1930 if (warning (OPT_Wpsabi
, "SSE vector argument "
1931 "without SSE enabled changes the ABI"))
1934 else if (!TARGET_64BIT
&& in_return
&& !warnedsse_ret
)
1936 if (warning (OPT_Wpsabi
, "SSE vector return "
1937 "without SSE enabled changes the ABI"))
1938 warnedsse_ret
= true;
1941 else if ((size
== 8 && !TARGET_64BIT
)
1943 || cfun
->machine
->func_type
== TYPE_NORMAL
)
1947 static bool warnedmmx
;
1948 static bool warnedmmx_ret
;
1950 if (cum
&& cum
->warn_mmx
&& !warnedmmx
)
1952 if (warning (OPT_Wpsabi
, "MMX vector argument "
1953 "without MMX enabled changes the ABI"))
1956 else if (in_return
&& !warnedmmx_ret
)
1958 if (warning (OPT_Wpsabi
, "MMX vector return "
1959 "without MMX enabled changes the ABI"))
1960 warnedmmx_ret
= true;
1973 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1974 this may not agree with the mode that the type system has chosen for the
1975 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1976 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1979 gen_reg_or_parallel (machine_mode mode
, machine_mode orig_mode
,
1984 if (orig_mode
!= BLKmode
)
1985 tmp
= gen_rtx_REG (orig_mode
, regno
);
1988 tmp
= gen_rtx_REG (mode
, regno
);
1989 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
, const0_rtx
);
1990 tmp
= gen_rtx_PARALLEL (orig_mode
, gen_rtvec (1, tmp
));
1996 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1997 of this code is to classify each 8bytes of incoming argument by the register
1998 class and assign registers accordingly. */
2000 /* Return the union class of CLASS1 and CLASS2.
2001 See the x86-64 PS ABI for details. */
2003 static enum x86_64_reg_class
2004 merge_classes (enum x86_64_reg_class class1
, enum x86_64_reg_class class2
)
2006 /* Rule #1: If both classes are equal, this is the resulting class. */
2007 if (class1
== class2
)
2010 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2012 if (class1
== X86_64_NO_CLASS
)
2014 if (class2
== X86_64_NO_CLASS
)
2017 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2018 if (class1
== X86_64_MEMORY_CLASS
|| class2
== X86_64_MEMORY_CLASS
)
2019 return X86_64_MEMORY_CLASS
;
2021 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2022 if ((class1
== X86_64_INTEGERSI_CLASS
&& class2
== X86_64_SSESF_CLASS
)
2023 || (class2
== X86_64_INTEGERSI_CLASS
&& class1
== X86_64_SSESF_CLASS
))
2024 return X86_64_INTEGERSI_CLASS
;
2025 if (class1
== X86_64_INTEGER_CLASS
|| class1
== X86_64_INTEGERSI_CLASS
2026 || class2
== X86_64_INTEGER_CLASS
|| class2
== X86_64_INTEGERSI_CLASS
)
2027 return X86_64_INTEGER_CLASS
;
2029 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2031 if (class1
== X86_64_X87_CLASS
2032 || class1
== X86_64_X87UP_CLASS
2033 || class1
== X86_64_COMPLEX_X87_CLASS
2034 || class2
== X86_64_X87_CLASS
2035 || class2
== X86_64_X87UP_CLASS
2036 || class2
== X86_64_COMPLEX_X87_CLASS
)
2037 return X86_64_MEMORY_CLASS
;
2039 /* Rule #6: Otherwise class SSE is used. */
2040 return X86_64_SSE_CLASS
;
2043 /* Classify the argument of type TYPE and mode MODE.
2044 CLASSES will be filled by the register class used to pass each word
2045 of the operand. The number of words is returned. In case the parameter
2046 should be passed in memory, 0 is returned. As a special case for zero
2047 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2049 BIT_OFFSET is used internally for handling records and specifies offset
2050 of the offset in bits modulo 512 to avoid overflow cases.
2052 See the x86-64 PS ABI for details.
2056 classify_argument (machine_mode mode
, const_tree type
,
2057 enum x86_64_reg_class classes
[MAX_CLASSES
], int bit_offset
)
2060 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2061 int words
= CEIL (bytes
+ (bit_offset
% 64) / 8, UNITS_PER_WORD
);
2063 /* Variable sized entities are always passed/returned in memory. */
2067 if (mode
!= VOIDmode
)
2069 /* The value of "named" doesn't matter. */
2070 function_arg_info
arg (const_cast<tree
> (type
), mode
, /*named=*/true);
2071 if (targetm
.calls
.must_pass_in_stack (arg
))
2075 if (type
&& AGGREGATE_TYPE_P (type
))
2079 enum x86_64_reg_class subclasses
[MAX_CLASSES
];
2081 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2085 for (i
= 0; i
< words
; i
++)
2086 classes
[i
] = X86_64_NO_CLASS
;
2088 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2089 signalize memory class, so handle it as special case. */
2092 classes
[0] = X86_64_NO_CLASS
;
2096 /* Classify each field of record and merge classes. */
2097 switch (TREE_CODE (type
))
2100 /* And now merge the fields of structure. */
2101 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2103 if (TREE_CODE (field
) == FIELD_DECL
)
2107 if (TREE_TYPE (field
) == error_mark_node
)
2110 /* Bitfields are always classified as integer. Handle them
2111 early, since later code would consider them to be
2112 misaligned integers. */
2113 if (DECL_BIT_FIELD (field
))
2115 for (i
= (int_bit_position (field
)
2116 + (bit_offset
% 64)) / 8 / 8;
2117 i
< ((int_bit_position (field
) + (bit_offset
% 64))
2118 + tree_to_shwi (DECL_SIZE (field
))
2121 = merge_classes (X86_64_INTEGER_CLASS
, classes
[i
]);
2127 type
= TREE_TYPE (field
);
2129 /* Flexible array member is ignored. */
2130 if (TYPE_MODE (type
) == BLKmode
2131 && TREE_CODE (type
) == ARRAY_TYPE
2132 && TYPE_SIZE (type
) == NULL_TREE
2133 && TYPE_DOMAIN (type
) != NULL_TREE
2134 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type
))
2139 if (!warned
&& warn_psabi
)
2142 inform (input_location
,
2143 "the ABI of passing struct with"
2144 " a flexible array member has"
2145 " changed in GCC 4.4");
2149 num
= classify_argument (TYPE_MODE (type
), type
,
2151 (int_bit_position (field
)
2152 + bit_offset
) % 512);
2155 pos
= (int_bit_position (field
)
2156 + (bit_offset
% 64)) / 8 / 8;
2157 for (i
= 0; i
< num
&& (i
+ pos
) < words
; i
++)
2159 = merge_classes (subclasses
[i
], classes
[i
+ pos
]);
2166 /* Arrays are handled as small records. */
2169 num
= classify_argument (TYPE_MODE (TREE_TYPE (type
)),
2170 TREE_TYPE (type
), subclasses
, bit_offset
);
2174 /* The partial classes are now full classes. */
2175 if (subclasses
[0] == X86_64_SSESF_CLASS
&& bytes
!= 4)
2176 subclasses
[0] = X86_64_SSE_CLASS
;
2177 if (subclasses
[0] == X86_64_INTEGERSI_CLASS
2178 && !((bit_offset
% 64) == 0 && bytes
== 4))
2179 subclasses
[0] = X86_64_INTEGER_CLASS
;
2181 for (i
= 0; i
< words
; i
++)
2182 classes
[i
] = subclasses
[i
% num
];
2187 case QUAL_UNION_TYPE
:
2188 /* Unions are similar to RECORD_TYPE but offset is always 0.
2190 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
2192 if (TREE_CODE (field
) == FIELD_DECL
)
2196 if (TREE_TYPE (field
) == error_mark_node
)
2199 num
= classify_argument (TYPE_MODE (TREE_TYPE (field
)),
2200 TREE_TYPE (field
), subclasses
,
2204 for (i
= 0; i
< num
&& i
< words
; i
++)
2205 classes
[i
] = merge_classes (subclasses
[i
], classes
[i
]);
2216 /* When size > 16 bytes, if the first one isn't
2217 X86_64_SSE_CLASS or any other ones aren't
2218 X86_64_SSEUP_CLASS, everything should be passed in
2220 if (classes
[0] != X86_64_SSE_CLASS
)
2223 for (i
= 1; i
< words
; i
++)
2224 if (classes
[i
] != X86_64_SSEUP_CLASS
)
2228 /* Final merger cleanup. */
2229 for (i
= 0; i
< words
; i
++)
2231 /* If one class is MEMORY, everything should be passed in
2233 if (classes
[i
] == X86_64_MEMORY_CLASS
)
2236 /* The X86_64_SSEUP_CLASS should be always preceded by
2237 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2238 if (classes
[i
] == X86_64_SSEUP_CLASS
2239 && classes
[i
- 1] != X86_64_SSE_CLASS
2240 && classes
[i
- 1] != X86_64_SSEUP_CLASS
)
2242 /* The first one should never be X86_64_SSEUP_CLASS. */
2243 gcc_assert (i
!= 0);
2244 classes
[i
] = X86_64_SSE_CLASS
;
2247 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2248 everything should be passed in memory. */
2249 if (classes
[i
] == X86_64_X87UP_CLASS
2250 && (classes
[i
- 1] != X86_64_X87_CLASS
))
2254 /* The first one should never be X86_64_X87UP_CLASS. */
2255 gcc_assert (i
!= 0);
2256 if (!warned
&& warn_psabi
)
2259 inform (input_location
,
2260 "the ABI of passing union with %<long double%>"
2261 " has changed in GCC 4.4");
2269 /* Compute alignment needed. We align all types to natural boundaries with
2270 exception of XFmode that is aligned to 64bits. */
2271 if (mode
!= VOIDmode
&& mode
!= BLKmode
)
2273 int mode_alignment
= GET_MODE_BITSIZE (mode
);
2276 mode_alignment
= 128;
2277 else if (mode
== XCmode
)
2278 mode_alignment
= 256;
2279 if (COMPLEX_MODE_P (mode
))
2280 mode_alignment
/= 2;
2281 /* Misaligned fields are always returned in memory. */
2282 if (bit_offset
% mode_alignment
)
2286 /* for V1xx modes, just use the base mode */
2287 if (VECTOR_MODE_P (mode
) && mode
!= V1DImode
&& mode
!= V1TImode
2288 && GET_MODE_UNIT_SIZE (mode
) == bytes
)
2289 mode
= GET_MODE_INNER (mode
);
2291 /* Classification of atomic types. */
2296 classes
[0] = X86_64_SSE_CLASS
;
2299 classes
[0] = X86_64_SSE_CLASS
;
2300 classes
[1] = X86_64_SSEUP_CLASS
;
2310 int size
= bit_offset
+ (int) GET_MODE_BITSIZE (mode
);
2312 /* Analyze last 128 bits only. */
2313 size
= (size
- 1) & 0x7f;
2317 classes
[0] = X86_64_INTEGERSI_CLASS
;
2322 classes
[0] = X86_64_INTEGER_CLASS
;
2325 else if (size
< 64+32)
2327 classes
[0] = X86_64_INTEGER_CLASS
;
2328 classes
[1] = X86_64_INTEGERSI_CLASS
;
2331 else if (size
< 64+64)
2333 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2341 classes
[0] = classes
[1] = X86_64_INTEGER_CLASS
;
2345 /* OImode shouldn't be used directly. */
2350 if (!(bit_offset
% 64))
2351 classes
[0] = X86_64_SSESF_CLASS
;
2353 classes
[0] = X86_64_SSE_CLASS
;
2356 classes
[0] = X86_64_SSEDF_CLASS
;
2359 classes
[0] = X86_64_X87_CLASS
;
2360 classes
[1] = X86_64_X87UP_CLASS
;
2363 classes
[0] = X86_64_SSE_CLASS
;
2364 classes
[1] = X86_64_SSEUP_CLASS
;
2367 classes
[0] = X86_64_SSE_CLASS
;
2368 if (!(bit_offset
% 64))
2374 if (!warned
&& warn_psabi
)
2377 inform (input_location
,
2378 "the ABI of passing structure with %<complex float%>"
2379 " member has changed in GCC 4.4");
2381 classes
[1] = X86_64_SSESF_CLASS
;
2385 classes
[0] = X86_64_SSEDF_CLASS
;
2386 classes
[1] = X86_64_SSEDF_CLASS
;
2389 classes
[0] = X86_64_COMPLEX_X87_CLASS
;
2392 /* This modes is larger than 16 bytes. */
2400 classes
[0] = X86_64_SSE_CLASS
;
2401 classes
[1] = X86_64_SSEUP_CLASS
;
2402 classes
[2] = X86_64_SSEUP_CLASS
;
2403 classes
[3] = X86_64_SSEUP_CLASS
;
2411 classes
[0] = X86_64_SSE_CLASS
;
2412 classes
[1] = X86_64_SSEUP_CLASS
;
2413 classes
[2] = X86_64_SSEUP_CLASS
;
2414 classes
[3] = X86_64_SSEUP_CLASS
;
2415 classes
[4] = X86_64_SSEUP_CLASS
;
2416 classes
[5] = X86_64_SSEUP_CLASS
;
2417 classes
[6] = X86_64_SSEUP_CLASS
;
2418 classes
[7] = X86_64_SSEUP_CLASS
;
2426 classes
[0] = X86_64_SSE_CLASS
;
2427 classes
[1] = X86_64_SSEUP_CLASS
;
2435 classes
[0] = X86_64_SSE_CLASS
;
2441 gcc_assert (VECTOR_MODE_P (mode
));
2446 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode
)) == MODE_INT
);
2448 if (bit_offset
+ GET_MODE_BITSIZE (mode
) <= 32)
2449 classes
[0] = X86_64_INTEGERSI_CLASS
;
2451 classes
[0] = X86_64_INTEGER_CLASS
;
2452 classes
[1] = X86_64_INTEGER_CLASS
;
2453 return 1 + (bytes
> 8);
2457 /* Examine the argument and return set number of register required in each
2458 class. Return true iff parameter should be passed in memory. */
2461 examine_argument (machine_mode mode
, const_tree type
, int in_return
,
2462 int *int_nregs
, int *sse_nregs
)
2464 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2465 int n
= classify_argument (mode
, type
, regclass
, 0);
2472 for (n
--; n
>= 0; n
--)
2473 switch (regclass
[n
])
2475 case X86_64_INTEGER_CLASS
:
2476 case X86_64_INTEGERSI_CLASS
:
2479 case X86_64_SSE_CLASS
:
2480 case X86_64_SSESF_CLASS
:
2481 case X86_64_SSEDF_CLASS
:
2484 case X86_64_NO_CLASS
:
2485 case X86_64_SSEUP_CLASS
:
2487 case X86_64_X87_CLASS
:
2488 case X86_64_X87UP_CLASS
:
2489 case X86_64_COMPLEX_X87_CLASS
:
2493 case X86_64_MEMORY_CLASS
:
2500 /* Construct container for the argument used by GCC interface. See
2501 FUNCTION_ARG for the detailed description. */
2504 construct_container (machine_mode mode
, machine_mode orig_mode
,
2505 const_tree type
, int in_return
, int nintregs
, int nsseregs
,
2506 const int *intreg
, int sse_regno
)
2508 /* The following variables hold the static issued_error state. */
2509 static bool issued_sse_arg_error
;
2510 static bool issued_sse_ret_error
;
2511 static bool issued_x87_ret_error
;
2513 machine_mode tmpmode
;
2515 = mode
== BLKmode
? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
2516 enum x86_64_reg_class regclass
[MAX_CLASSES
];
2520 int needed_sseregs
, needed_intregs
;
2521 rtx exp
[MAX_CLASSES
];
2524 n
= classify_argument (mode
, type
, regclass
, 0);
2527 if (examine_argument (mode
, type
, in_return
, &needed_intregs
,
2530 if (needed_intregs
> nintregs
|| needed_sseregs
> nsseregs
)
2533 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2534 some less clueful developer tries to use floating-point anyway. */
2535 if (needed_sseregs
&& !TARGET_SSE
)
2539 if (!issued_sse_ret_error
)
2541 error ("SSE register return with SSE disabled");
2542 issued_sse_ret_error
= true;
2545 else if (!issued_sse_arg_error
)
2547 error ("SSE register argument with SSE disabled");
2548 issued_sse_arg_error
= true;
2553 /* Likewise, error if the ABI requires us to return values in the
2554 x87 registers and the user specified -mno-80387. */
2555 if (!TARGET_FLOAT_RETURNS_IN_80387
&& in_return
)
2556 for (i
= 0; i
< n
; i
++)
2557 if (regclass
[i
] == X86_64_X87_CLASS
2558 || regclass
[i
] == X86_64_X87UP_CLASS
2559 || regclass
[i
] == X86_64_COMPLEX_X87_CLASS
)
2561 if (!issued_x87_ret_error
)
2563 error ("x87 register return with x87 disabled");
2564 issued_x87_ret_error
= true;
2569 /* First construct simple cases. Avoid SCmode, since we want to use
2570 single register to pass this type. */
2571 if (n
== 1 && mode
!= SCmode
)
2572 switch (regclass
[0])
2574 case X86_64_INTEGER_CLASS
:
2575 case X86_64_INTEGERSI_CLASS
:
2576 return gen_rtx_REG (mode
, intreg
[0]);
2577 case X86_64_SSE_CLASS
:
2578 case X86_64_SSESF_CLASS
:
2579 case X86_64_SSEDF_CLASS
:
2580 if (mode
!= BLKmode
)
2581 return gen_reg_or_parallel (mode
, orig_mode
,
2582 GET_SSE_REGNO (sse_regno
));
2584 case X86_64_X87_CLASS
:
2585 case X86_64_COMPLEX_X87_CLASS
:
2586 return gen_rtx_REG (mode
, FIRST_STACK_REG
);
2587 case X86_64_NO_CLASS
:
2588 /* Zero sized array, struct or class. */
2594 && regclass
[0] == X86_64_SSE_CLASS
2595 && regclass
[1] == X86_64_SSEUP_CLASS
2597 return gen_reg_or_parallel (mode
, orig_mode
,
2598 GET_SSE_REGNO (sse_regno
));
2600 && regclass
[0] == X86_64_SSE_CLASS
2601 && regclass
[1] == X86_64_SSEUP_CLASS
2602 && regclass
[2] == X86_64_SSEUP_CLASS
2603 && regclass
[3] == X86_64_SSEUP_CLASS
2605 return gen_reg_or_parallel (mode
, orig_mode
,
2606 GET_SSE_REGNO (sse_regno
));
2608 && regclass
[0] == X86_64_SSE_CLASS
2609 && regclass
[1] == X86_64_SSEUP_CLASS
2610 && regclass
[2] == X86_64_SSEUP_CLASS
2611 && regclass
[3] == X86_64_SSEUP_CLASS
2612 && regclass
[4] == X86_64_SSEUP_CLASS
2613 && regclass
[5] == X86_64_SSEUP_CLASS
2614 && regclass
[6] == X86_64_SSEUP_CLASS
2615 && regclass
[7] == X86_64_SSEUP_CLASS
2617 return gen_reg_or_parallel (mode
, orig_mode
,
2618 GET_SSE_REGNO (sse_regno
));
2620 && regclass
[0] == X86_64_X87_CLASS
2621 && regclass
[1] == X86_64_X87UP_CLASS
)
2622 return gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
2625 && regclass
[0] == X86_64_INTEGER_CLASS
2626 && regclass
[1] == X86_64_INTEGER_CLASS
2627 && (mode
== CDImode
|| mode
== TImode
|| mode
== BLKmode
)
2628 && intreg
[0] + 1 == intreg
[1])
2630 if (mode
== BLKmode
)
2632 /* Use TImode for BLKmode values in 2 integer registers. */
2633 exp
[0] = gen_rtx_EXPR_LIST (VOIDmode
,
2634 gen_rtx_REG (TImode
, intreg
[0]),
2636 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (1));
2637 XVECEXP (ret
, 0, 0) = exp
[0];
2641 return gen_rtx_REG (mode
, intreg
[0]);
2644 /* Otherwise figure out the entries of the PARALLEL. */
2645 for (i
= 0; i
< n
; i
++)
2649 switch (regclass
[i
])
2651 case X86_64_NO_CLASS
:
2653 case X86_64_INTEGER_CLASS
:
2654 case X86_64_INTEGERSI_CLASS
:
2655 /* Merge TImodes on aligned occasions here too. */
2656 if (i
* 8 + 8 > bytes
)
2658 unsigned int tmpbits
= (bytes
- i
* 8) * BITS_PER_UNIT
;
2659 if (!int_mode_for_size (tmpbits
, 0).exists (&tmpmode
))
2660 /* We've requested 24 bytes we
2661 don't have mode for. Use DImode. */
2664 else if (regclass
[i
] == X86_64_INTEGERSI_CLASS
)
2669 = gen_rtx_EXPR_LIST (VOIDmode
,
2670 gen_rtx_REG (tmpmode
, *intreg
),
2674 case X86_64_SSESF_CLASS
:
2676 = gen_rtx_EXPR_LIST (VOIDmode
,
2677 gen_rtx_REG (SFmode
,
2678 GET_SSE_REGNO (sse_regno
)),
2682 case X86_64_SSEDF_CLASS
:
2684 = gen_rtx_EXPR_LIST (VOIDmode
,
2685 gen_rtx_REG (DFmode
,
2686 GET_SSE_REGNO (sse_regno
)),
2690 case X86_64_SSE_CLASS
:
2698 if (i
== 0 && regclass
[1] == X86_64_SSEUP_CLASS
)
2708 && regclass
[1] == X86_64_SSEUP_CLASS
2709 && regclass
[2] == X86_64_SSEUP_CLASS
2710 && regclass
[3] == X86_64_SSEUP_CLASS
);
2716 && regclass
[1] == X86_64_SSEUP_CLASS
2717 && regclass
[2] == X86_64_SSEUP_CLASS
2718 && regclass
[3] == X86_64_SSEUP_CLASS
2719 && regclass
[4] == X86_64_SSEUP_CLASS
2720 && regclass
[5] == X86_64_SSEUP_CLASS
2721 && regclass
[6] == X86_64_SSEUP_CLASS
2722 && regclass
[7] == X86_64_SSEUP_CLASS
);
2730 = gen_rtx_EXPR_LIST (VOIDmode
,
2731 gen_rtx_REG (tmpmode
,
2732 GET_SSE_REGNO (sse_regno
)),
2741 /* Empty aligned struct, union or class. */
2745 ret
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nexps
));
2746 for (i
= 0; i
< nexps
; i
++)
2747 XVECEXP (ret
, 0, i
) = exp
[i
];
2751 /* Update the data in CUM to advance over an argument of mode MODE
2752 and data type TYPE. (TYPE is null for libcalls where that information
2753 may not be available.)
2755 Return a number of integer regsiters advanced over. */
2758 function_arg_advance_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2759 const_tree type
, HOST_WIDE_INT bytes
,
2760 HOST_WIDE_INT words
)
2763 bool error_p
= false;
2767 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2768 bytes in registers. */
2769 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
2789 cum
->words
+= words
;
2790 cum
->nregs
-= words
;
2791 cum
->regno
+= words
;
2792 if (cum
->nregs
>= 0)
2794 if (cum
->nregs
<= 0)
2797 cfun
->machine
->arg_reg_available
= false;
2803 /* OImode shouldn't be used directly. */
2807 if (cum
->float_in_sse
== -1)
2809 if (cum
->float_in_sse
< 2)
2813 if (cum
->float_in_sse
== -1)
2815 if (cum
->float_in_sse
< 1)
2838 if (!type
|| !AGGREGATE_TYPE_P (type
))
2840 cum
->sse_words
+= words
;
2841 cum
->sse_nregs
-= 1;
2842 cum
->sse_regno
+= 1;
2843 if (cum
->sse_nregs
<= 0)
2857 if (!type
|| !AGGREGATE_TYPE_P (type
))
2859 cum
->mmx_words
+= words
;
2860 cum
->mmx_nregs
-= 1;
2861 cum
->mmx_regno
+= 1;
2862 if (cum
->mmx_nregs
<= 0)
2872 cum
->float_in_sse
= 0;
2873 error ("calling %qD with SSE calling convention without "
2874 "SSE/SSE2 enabled", cum
->decl
);
2875 sorry ("this is a GCC bug that can be worked around by adding "
2876 "attribute used to function called");
2883 function_arg_advance_64 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2884 const_tree type
, HOST_WIDE_INT words
, bool named
)
2886 int int_nregs
, sse_nregs
;
2888 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2889 if (!named
&& (VALID_AVX512F_REG_MODE (mode
)
2890 || VALID_AVX256_REG_MODE (mode
)))
2893 if (!examine_argument (mode
, type
, 0, &int_nregs
, &sse_nregs
)
2894 && sse_nregs
<= cum
->sse_nregs
&& int_nregs
<= cum
->nregs
)
2896 cum
->nregs
-= int_nregs
;
2897 cum
->sse_nregs
-= sse_nregs
;
2898 cum
->regno
+= int_nregs
;
2899 cum
->sse_regno
+= sse_nregs
;
2904 int align
= ix86_function_arg_boundary (mode
, type
) / BITS_PER_WORD
;
2905 cum
->words
= ROUND_UP (cum
->words
, align
);
2906 cum
->words
+= words
;
2912 function_arg_advance_ms_64 (CUMULATIVE_ARGS
*cum
, HOST_WIDE_INT bytes
,
2913 HOST_WIDE_INT words
)
2915 /* Otherwise, this should be passed indirect. */
2916 gcc_assert (bytes
== 1 || bytes
== 2 || bytes
== 4 || bytes
== 8);
2918 cum
->words
+= words
;
2928 /* Update the data in CUM to advance over argument ARG. */
2931 ix86_function_arg_advance (cumulative_args_t cum_v
,
2932 const function_arg_info
&arg
)
2934 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
2935 machine_mode mode
= arg
.mode
;
2936 HOST_WIDE_INT bytes
, words
;
2939 /* The argument of interrupt handler is a special case and is
2940 handled in ix86_function_arg. */
2941 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
2944 bytes
= arg
.promoted_size_in_bytes ();
2945 words
= CEIL (bytes
, UNITS_PER_WORD
);
2948 mode
= type_natural_mode (arg
.type
, NULL
, false);
2952 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
2954 if (call_abi
== MS_ABI
)
2955 nregs
= function_arg_advance_ms_64 (cum
, bytes
, words
);
2957 nregs
= function_arg_advance_64 (cum
, mode
, arg
.type
, words
,
2961 nregs
= function_arg_advance_32 (cum
, mode
, arg
.type
, bytes
, words
);
2965 /* Track if there are outgoing arguments on stack. */
2967 cfun
->machine
->outgoing_args_on_stack
= true;
2971 /* Define where to put the arguments to a function.
2972 Value is zero to push the argument on the stack,
2973 or a hard register in which to store the argument.
2975 MODE is the argument's machine mode.
2976 TYPE is the data type of the argument (as a tree).
2977 This is null for libcalls where that information may
2979 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2980 the preceding args and about the function being called.
2981 NAMED is nonzero if this argument is a named parameter
2982 (otherwise it is an extra parameter matching an ellipsis). */
2985 function_arg_32 (CUMULATIVE_ARGS
*cum
, machine_mode mode
,
2986 machine_mode orig_mode
, const_tree type
,
2987 HOST_WIDE_INT bytes
, HOST_WIDE_INT words
)
2989 bool error_p
= false;
2991 /* Avoid the AL settings for the Unix64 ABI. */
2992 if (mode
== VOIDmode
)
2997 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2998 bytes in registers. */
2999 if (!VECTOR_MODE_P (mode
) && bytes
<= 8)
3018 if (words
<= cum
->nregs
)
3020 int regno
= cum
->regno
;
3022 /* Fastcall allocates the first two DWORD (SImode) or
3023 smaller arguments to ECX and EDX if it isn't an
3029 || (type
&& AGGREGATE_TYPE_P (type
)))
3032 /* ECX not EAX is the first allocated register. */
3033 if (regno
== AX_REG
)
3036 return gen_rtx_REG (mode
, regno
);
3041 if (cum
->float_in_sse
== -1)
3043 if (cum
->float_in_sse
< 2)
3047 if (cum
->float_in_sse
== -1)
3049 if (cum
->float_in_sse
< 1)
3053 /* In 32bit, we pass TImode in xmm registers. */
3060 if (!type
|| !AGGREGATE_TYPE_P (type
))
3063 return gen_reg_or_parallel (mode
, orig_mode
,
3064 cum
->sse_regno
+ FIRST_SSE_REG
);
3070 /* OImode and XImode shouldn't be used directly. */
3085 if (!type
|| !AGGREGATE_TYPE_P (type
))
3088 return gen_reg_or_parallel (mode
, orig_mode
,
3089 cum
->sse_regno
+ FIRST_SSE_REG
);
3099 if (!type
|| !AGGREGATE_TYPE_P (type
))
3102 return gen_reg_or_parallel (mode
, orig_mode
,
3103 cum
->mmx_regno
+ FIRST_MMX_REG
);
3109 cum
->float_in_sse
= 0;
3110 error ("calling %qD with SSE calling convention without "
3111 "SSE/SSE2 enabled", cum
->decl
);
3112 sorry ("this is a GCC bug that can be worked around by adding "
3113 "attribute used to function called");
3120 function_arg_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3121 machine_mode orig_mode
, const_tree type
, bool named
)
3123 /* Handle a hidden AL argument containing number of registers
3124 for varargs x86-64 functions. */
3125 if (mode
== VOIDmode
)
3126 return GEN_INT (cum
->maybe_vaarg
3127 ? (cum
->sse_nregs
< 0
3128 ? X86_64_SSE_REGPARM_MAX
3149 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3155 return construct_container (mode
, orig_mode
, type
, 0, cum
->nregs
,
3157 &x86_64_int_parameter_registers
[cum
->regno
],
3162 function_arg_ms_64 (const CUMULATIVE_ARGS
*cum
, machine_mode mode
,
3163 machine_mode orig_mode
, bool named
, const_tree type
,
3164 HOST_WIDE_INT bytes
)
3168 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3169 We use value of -2 to specify that current function call is MSABI. */
3170 if (mode
== VOIDmode
)
3171 return GEN_INT (-2);
3173 /* If we've run out of registers, it goes on the stack. */
3174 if (cum
->nregs
== 0)
3177 regno
= x86_64_ms_abi_int_parameter_registers
[cum
->regno
];
3179 /* Only floating point modes are passed in anything but integer regs. */
3180 if (TARGET_SSE
&& (mode
== SFmode
|| mode
== DFmode
))
3184 if (type
== NULL_TREE
|| !AGGREGATE_TYPE_P (type
))
3185 regno
= cum
->regno
+ FIRST_SSE_REG
;
3191 /* Unnamed floating parameters are passed in both the
3192 SSE and integer registers. */
3193 t1
= gen_rtx_REG (mode
, cum
->regno
+ FIRST_SSE_REG
);
3194 t2
= gen_rtx_REG (mode
, regno
);
3195 t1
= gen_rtx_EXPR_LIST (VOIDmode
, t1
, const0_rtx
);
3196 t2
= gen_rtx_EXPR_LIST (VOIDmode
, t2
, const0_rtx
);
3197 return gen_rtx_PARALLEL (mode
, gen_rtvec (2, t1
, t2
));
3200 /* Handle aggregated types passed in register. */
3201 if (orig_mode
== BLKmode
)
3203 if (bytes
> 0 && bytes
<= 8)
3204 mode
= (bytes
> 4 ? DImode
: SImode
);
3205 if (mode
== BLKmode
)
3209 return gen_reg_or_parallel (mode
, orig_mode
, regno
);
3212 /* Return where to put the arguments to a function.
3213 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3215 ARG describes the argument while CUM gives information about the
3216 preceding args and about the function being called. */
3219 ix86_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3221 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3222 machine_mode mode
= arg
.mode
;
3223 HOST_WIDE_INT bytes
, words
;
3226 if (!cum
->caller
&& cfun
->machine
->func_type
!= TYPE_NORMAL
)
3228 gcc_assert (arg
.type
!= NULL_TREE
);
3229 if (POINTER_TYPE_P (arg
.type
))
3231 /* This is the pointer argument. */
3232 gcc_assert (TYPE_MODE (arg
.type
) == Pmode
);
3233 /* It is at -WORD(AP) in the current frame in interrupt and
3234 exception handlers. */
3235 reg
= plus_constant (Pmode
, arg_pointer_rtx
, -UNITS_PER_WORD
);
3239 gcc_assert (cfun
->machine
->func_type
== TYPE_EXCEPTION
3240 && TREE_CODE (arg
.type
) == INTEGER_TYPE
3241 && TYPE_MODE (arg
.type
) == word_mode
);
3242 /* The error code is the word-mode integer argument at
3243 -2 * WORD(AP) in the current frame of the exception
3245 reg
= gen_rtx_MEM (word_mode
,
3246 plus_constant (Pmode
,
3248 -2 * UNITS_PER_WORD
));
3253 bytes
= arg
.promoted_size_in_bytes ();
3254 words
= CEIL (bytes
, UNITS_PER_WORD
);
3256 /* To simplify the code below, represent vector types with a vector mode
3257 even if MMX/SSE are not active. */
3258 if (arg
.type
&& TREE_CODE (arg
.type
) == VECTOR_TYPE
)
3259 mode
= type_natural_mode (arg
.type
, cum
, false);
3263 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3265 if (call_abi
== MS_ABI
)
3266 reg
= function_arg_ms_64 (cum
, mode
, arg
.mode
, arg
.named
,
3269 reg
= function_arg_64 (cum
, mode
, arg
.mode
, arg
.type
, arg
.named
);
3272 reg
= function_arg_32 (cum
, mode
, arg
.mode
, arg
.type
, bytes
, words
);
3274 /* Track if there are outgoing arguments on stack. */
3275 if (reg
== NULL_RTX
&& cum
->caller
)
3276 cfun
->machine
->outgoing_args_on_stack
= true;
3281 /* A C expression that indicates when an argument must be passed by
3282 reference. If nonzero for an argument, a copy of that argument is
3283 made in memory and a pointer to the argument is passed instead of
3284 the argument itself. The pointer is passed in whatever way is
3285 appropriate for passing a pointer to that type. */
3288 ix86_pass_by_reference (cumulative_args_t cum_v
, const function_arg_info
&arg
)
3290 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3294 enum calling_abi call_abi
= cum
? cum
->call_abi
: ix86_abi
;
3296 /* See Windows x64 Software Convention. */
3297 if (call_abi
== MS_ABI
)
3299 HOST_WIDE_INT msize
= GET_MODE_SIZE (arg
.mode
);
3301 if (tree type
= arg
.type
)
3303 /* Arrays are passed by reference. */
3304 if (TREE_CODE (type
) == ARRAY_TYPE
)
3307 if (RECORD_OR_UNION_TYPE_P (type
))
3309 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3310 are passed by reference. */
3311 msize
= int_size_in_bytes (type
);
3315 /* __m128 is passed by reference. */
3316 return msize
!= 1 && msize
!= 2 && msize
!= 4 && msize
!= 8;
3318 else if (arg
.type
&& int_size_in_bytes (arg
.type
) == -1)
3325 /* Return true when TYPE should be 128bit aligned for 32bit argument
3326 passing ABI. XXX: This function is obsolete and is only used for
3327 checking psABI compatibility with previous versions of GCC. */
3330 ix86_compat_aligned_value_p (const_tree type
)
3332 machine_mode mode
= TYPE_MODE (type
);
3333 if (((TARGET_SSE
&& SSE_REG_MODE_P (mode
))
3337 && (!TYPE_USER_ALIGN (type
) || TYPE_ALIGN (type
) > 128))
3339 if (TYPE_ALIGN (type
) < 128)
3342 if (AGGREGATE_TYPE_P (type
))
3344 /* Walk the aggregates recursively. */
3345 switch (TREE_CODE (type
))
3349 case QUAL_UNION_TYPE
:
3353 /* Walk all the structure fields. */
3354 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3356 if (TREE_CODE (field
) == FIELD_DECL
3357 && ix86_compat_aligned_value_p (TREE_TYPE (field
)))
3364 /* Just for use if some languages passes arrays by value. */
3365 if (ix86_compat_aligned_value_p (TREE_TYPE (type
)))
3376 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3377 XXX: This function is obsolete and is only used for checking psABI
3378 compatibility with previous versions of GCC. */
3381 ix86_compat_function_arg_boundary (machine_mode mode
,
3382 const_tree type
, unsigned int align
)
3384 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3385 natural boundaries. */
3386 if (!TARGET_64BIT
&& mode
!= TDmode
&& mode
!= TFmode
)
3388 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3389 make an exception for SSE modes since these require 128bit
3392 The handling here differs from field_alignment. ICC aligns MMX
3393 arguments to 4 byte boundaries, while structure fields are aligned
3394 to 8 byte boundaries. */
3397 if (!(TARGET_SSE
&& SSE_REG_MODE_P (mode
)))
3398 align
= PARM_BOUNDARY
;
3402 if (!ix86_compat_aligned_value_p (type
))
3403 align
= PARM_BOUNDARY
;
3406 if (align
> BIGGEST_ALIGNMENT
)
3407 align
= BIGGEST_ALIGNMENT
;
3411 /* Return true when TYPE should be 128bit aligned for 32bit argument
3415 ix86_contains_aligned_value_p (const_tree type
)
3417 machine_mode mode
= TYPE_MODE (type
);
3419 if (mode
== XFmode
|| mode
== XCmode
)
3422 if (TYPE_ALIGN (type
) < 128)
3425 if (AGGREGATE_TYPE_P (type
))
3427 /* Walk the aggregates recursively. */
3428 switch (TREE_CODE (type
))
3432 case QUAL_UNION_TYPE
:
3436 /* Walk all the structure fields. */
3437 for (field
= TYPE_FIELDS (type
);
3439 field
= DECL_CHAIN (field
))
3441 if (TREE_CODE (field
) == FIELD_DECL
3442 && ix86_contains_aligned_value_p (TREE_TYPE (field
)))
3449 /* Just for use if some languages passes arrays by value. */
3450 if (ix86_contains_aligned_value_p (TREE_TYPE (type
)))
3459 return TYPE_ALIGN (type
) >= 128;
3464 /* Gives the alignment boundary, in bits, of an argument with the
3465 specified mode and type. */
3468 ix86_function_arg_boundary (machine_mode mode
, const_tree type
)
3473 /* Since the main variant type is used for call, we convert it to
3474 the main variant type. */
3475 type
= TYPE_MAIN_VARIANT (type
);
3476 align
= TYPE_ALIGN (type
);
3477 if (TYPE_EMPTY_P (type
))
3478 return PARM_BOUNDARY
;
3481 align
= GET_MODE_ALIGNMENT (mode
);
3482 if (align
< PARM_BOUNDARY
)
3483 align
= PARM_BOUNDARY
;
3487 unsigned int saved_align
= align
;
3491 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3494 if (mode
== XFmode
|| mode
== XCmode
)
3495 align
= PARM_BOUNDARY
;
3497 else if (!ix86_contains_aligned_value_p (type
))
3498 align
= PARM_BOUNDARY
;
3501 align
= PARM_BOUNDARY
;
3506 && align
!= ix86_compat_function_arg_boundary (mode
, type
,
3510 inform (input_location
,
3511 "the ABI for passing parameters with %d-byte"
3512 " alignment has changed in GCC 4.6",
3513 align
/ BITS_PER_UNIT
);
3520 /* Return true if N is a possible register number of function value. */
3523 ix86_function_value_regno_p (const unsigned int regno
)
3530 return (!TARGET_64BIT
|| ix86_cfun_abi () != MS_ABI
);
3533 return TARGET_64BIT
&& ix86_cfun_abi () != MS_ABI
;
3535 /* Complex values are returned in %st(0)/%st(1) pair. */
3538 /* TODO: The function should depend on current function ABI but
3539 builtins.c would need updating then. Therefore we use the
3541 if (TARGET_64BIT
&& ix86_cfun_abi () == MS_ABI
)
3543 return TARGET_FLOAT_RETURNS_IN_80387
;
3545 /* Complex values are returned in %xmm0/%xmm1 pair. */
3551 if (TARGET_MACHO
|| TARGET_64BIT
)
3559 /* Check whether the register REGNO should be zeroed on X86.
3560 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3561 together, no need to zero it again.
3562 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3565 zero_call_used_regno_p (const unsigned int regno
,
3566 bool all_sse_zeroed
,
3569 return GENERAL_REGNO_P (regno
)
3570 || (!all_sse_zeroed
&& SSE_REGNO_P (regno
))
3571 || MASK_REGNO_P (regno
)
3572 || (need_zero_mmx
&& MMX_REGNO_P (regno
));
3575 /* Return the machine_mode that is used to zero register REGNO. */
3578 zero_call_used_regno_mode (const unsigned int regno
)
3580 /* NB: We only need to zero the lower 32 bits for integer registers
3581 and the lower 128 bits for vector registers since destination are
3582 zero-extended to the full register width. */
3583 if (GENERAL_REGNO_P (regno
))
3585 else if (SSE_REGNO_P (regno
))
3587 else if (MASK_REGNO_P (regno
))
3589 else if (MMX_REGNO_P (regno
))
3595 /* Generate a rtx to zero all vector registers together if possible,
3596 otherwise, return NULL. */
3599 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs
)
3604 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3605 if ((IN_RANGE (regno
, FIRST_SSE_REG
, LAST_SSE_REG
)
3607 && (REX_SSE_REGNO_P (regno
)
3608 || (TARGET_AVX512F
&& EXT_REX_SSE_REGNO_P (regno
)))))
3609 && !TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3612 return gen_avx_vzeroall ();
3615 /* Generate insns to zero all st registers together.
3616 Return true when zeroing instructions are generated.
3617 Assume the number of st registers that are zeroed is num_of_st,
3618 we will emit the following sequence to zero them together:
3627 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3628 mark stack slots empty.
3630 How to compute the num_of_st:
3631 There is no direct mapping from stack registers to hard register
3632 numbers. If one stack register needs to be cleared, we don't know
3633 where in the stack the value remains. So, if any stack register
3634 needs to be cleared, the whole stack should be cleared. However,
3635 x87 stack registers that hold the return value should be excluded.
3636 x87 returns in the top (two for complex values) register, so
3637 num_of_st should be 7/6 when x87 returns, otherwise it will be 8. */
3641 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs
)
3644 /* If the FPU is disabled, no need to zero all st registers. */
3645 if (! (TARGET_80387
|| TARGET_FLOAT_RETURNS_IN_80387
))
3648 unsigned int num_of_st
= 0;
3649 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3650 if ((STACK_REGNO_P (regno
) || MMX_REGNO_P (regno
))
3651 && TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3660 bool return_with_x87
= false;
3661 return_with_x87
= (crtl
->return_rtx
3662 && (STACK_REG_P (crtl
->return_rtx
)));
3664 bool complex_return
= false;
3665 complex_return
= (crtl
->return_rtx
3666 && COMPLEX_MODE_P (GET_MODE (crtl
->return_rtx
)));
3668 if (return_with_x87
)
3676 rtx st_reg
= gen_rtx_REG (XFmode
, FIRST_STACK_REG
);
3677 for (unsigned int i
= 0; i
< num_of_st
; i
++)
3678 emit_insn (gen_rtx_SET (st_reg
, CONST0_RTX (XFmode
)));
3680 for (unsigned int i
= 0; i
< num_of_st
; i
++)
3683 insn
= emit_insn (gen_rtx_SET (st_reg
, st_reg
));
3684 add_reg_note (insn
, REG_DEAD
, st_reg
);
3690 /* When the routine exit in MMX mode, if any ST register needs
3691 to be zeroed, we should clear all MMX registers except the
3692 RET_MMX_REGNO that holds the return value. */
3694 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs
,
3695 unsigned int ret_mmx_regno
)
3697 bool need_zero_all_mm
= false;
3698 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3699 if (STACK_REGNO_P (regno
)
3700 && TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3702 need_zero_all_mm
= true;
3706 if (!need_zero_all_mm
)
3709 rtx zero_mmx
= NULL_RTX
;
3710 machine_mode mode
= V4HImode
;
3711 for (unsigned int regno
= FIRST_MMX_REG
; regno
<= LAST_MMX_REG
; regno
++)
3712 if (regno
!= ret_mmx_regno
)
3714 rtx reg
= gen_rtx_REG (mode
, regno
);
3715 if (zero_mmx
== NULL_RTX
)
3718 emit_insn (gen_rtx_SET (reg
, CONST0_RTX (mode
)));
3721 emit_move_insn (reg
, zero_mmx
);
3726 /* TARGET_ZERO_CALL_USED_REGS. */
3727 /* Generate a sequence of instructions that zero registers specified by
3728 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3731 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs
)
3733 HARD_REG_SET zeroed_hardregs
;
3734 bool all_sse_zeroed
= false;
3735 bool all_st_zeroed
= false;
3736 bool all_mm_zeroed
= false;
3738 CLEAR_HARD_REG_SET (zeroed_hardregs
);
3740 /* first, let's see whether we can zero all vector registers together. */
3741 rtx zero_all_vec_insn
= zero_all_vector_registers (need_zeroed_hardregs
);
3742 if (zero_all_vec_insn
)
3744 emit_insn (zero_all_vec_insn
);
3745 all_sse_zeroed
= true;
3748 /* mm/st registers are shared registers set, we should follow the following
3749 rules to clear them:
3750 MMX exit mode x87 exit mode
3751 -------------|----------------------|---------------
3752 uses x87 reg | clear all MMX | clear all x87
3753 uses MMX reg | clear individual MMX | clear all x87
3754 x87 + MMX | clear all MMX | clear all x87
3756 first, we should decide which mode (MMX mode or x87 mode) the function
3759 bool exit_with_mmx_mode
= (crtl
->return_rtx
3760 && (MMX_REG_P (crtl
->return_rtx
)));
3762 if (!exit_with_mmx_mode
)
3763 /* x87 exit mode, we should zero all st registers together. */
3765 all_st_zeroed
= zero_all_st_registers (need_zeroed_hardregs
);
3767 SET_HARD_REG_BIT (zeroed_hardregs
, FIRST_STACK_REG
);
3770 /* MMX exit mode, check whether we can zero all mm registers. */
3772 unsigned int exit_mmx_regno
= REGNO (crtl
->return_rtx
);
3773 all_mm_zeroed
= zero_all_mm_registers (need_zeroed_hardregs
,
3776 for (unsigned int regno
= FIRST_MMX_REG
; regno
<= LAST_MMX_REG
; regno
++)
3777 if (regno
!= exit_mmx_regno
)
3778 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
3781 /* Now, generate instructions to zero all the other registers. */
3783 rtx zero_gpr
= NULL_RTX
;
3784 rtx zero_vector
= NULL_RTX
;
3785 rtx zero_mask
= NULL_RTX
;
3786 rtx zero_mmx
= NULL_RTX
;
3788 for (unsigned int regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
3790 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs
, regno
))
3792 if (!zero_call_used_regno_p (regno
, all_sse_zeroed
,
3793 exit_with_mmx_mode
&& !all_mm_zeroed
))
3796 SET_HARD_REG_BIT (zeroed_hardregs
, regno
);
3798 rtx reg
, tmp
, zero_rtx
;
3799 machine_mode mode
= zero_call_used_regno_mode (regno
);
3801 reg
= gen_rtx_REG (mode
, regno
);
3802 zero_rtx
= CONST0_RTX (mode
);
3805 if (zero_gpr
== NULL_RTX
)
3808 tmp
= gen_rtx_SET (reg
, zero_rtx
);
3809 if (!TARGET_USE_MOV0
|| optimize_insn_for_size_p ())
3811 rtx clob
= gen_rtx_CLOBBER (VOIDmode
,
3812 gen_rtx_REG (CCmode
,
3814 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2,
3821 emit_move_insn (reg
, zero_gpr
);
3822 else if (mode
== V4SFmode
)
3823 if (zero_vector
== NULL_RTX
)
3826 tmp
= gen_rtx_SET (reg
, zero_rtx
);
3830 emit_move_insn (reg
, zero_vector
);
3831 else if (mode
== HImode
)
3832 if (zero_mask
== NULL_RTX
)
3835 tmp
= gen_rtx_SET (reg
, zero_rtx
);
3839 emit_move_insn (reg
, zero_mask
);
3840 else if (mode
== V4HImode
)
3841 if (zero_mmx
== NULL_RTX
)
3844 tmp
= gen_rtx_SET (reg
, zero_rtx
);
3848 emit_move_insn (reg
, zero_mmx
);
3852 return zeroed_hardregs
;
3855 /* Define how to find the value returned by a function.
3856 VALTYPE is the data type of the value (as a tree).
3857 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3858 otherwise, FUNC is 0. */
3861 function_value_32 (machine_mode orig_mode
, machine_mode mode
,
3862 const_tree fntype
, const_tree fn
)
3866 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3867 we normally prevent this case when mmx is not available. However
3868 some ABIs may require the result to be returned like DImode. */
3869 if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 8)
3870 regno
= FIRST_MMX_REG
;
3872 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3873 we prevent this case when sse is not available. However some ABIs
3874 may require the result to be returned like integer TImode. */
3875 else if (mode
== TImode
3876 || (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 16))
3877 regno
= FIRST_SSE_REG
;
3879 /* 32-byte vector modes in %ymm0. */
3880 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 32)
3881 regno
= FIRST_SSE_REG
;
3883 /* 64-byte vector modes in %zmm0. */
3884 else if (VECTOR_MODE_P (mode
) && GET_MODE_SIZE (mode
) == 64)
3885 regno
= FIRST_SSE_REG
;
3887 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3888 else if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
)
3889 regno
= FIRST_FLOAT_REG
;
3891 /* Most things go in %eax. */
3894 /* Override FP return register with %xmm0 for local functions when
3895 SSE math is enabled or for functions with sseregparm attribute. */
3896 if ((fn
|| fntype
) && (mode
== SFmode
|| mode
== DFmode
))
3898 int sse_level
= ix86_function_sseregparm (fntype
, fn
, false);
3899 if (sse_level
== -1)
3901 error ("calling %qD with SSE calling convention without "
3902 "SSE/SSE2 enabled", fn
);
3903 sorry ("this is a GCC bug that can be worked around by adding "
3904 "attribute used to function called");
3906 else if ((sse_level
>= 1 && mode
== SFmode
)
3907 || (sse_level
== 2 && mode
== DFmode
))
3908 regno
= FIRST_SSE_REG
;
3911 /* OImode shouldn't be used directly. */
3912 gcc_assert (mode
!= OImode
);
3914 return gen_rtx_REG (orig_mode
, regno
);
3918 function_value_64 (machine_mode orig_mode
, machine_mode mode
,
3923 /* Handle libcalls, which don't provide a type node. */
3924 if (valtype
== NULL
)
3938 regno
= FIRST_SSE_REG
;
3942 regno
= FIRST_FLOAT_REG
;
3950 return gen_rtx_REG (mode
, regno
);
3952 else if (POINTER_TYPE_P (valtype
))
3954 /* Pointers are always returned in word_mode. */
3958 ret
= construct_container (mode
, orig_mode
, valtype
, 1,
3959 X86_64_REGPARM_MAX
, X86_64_SSE_REGPARM_MAX
,
3960 x86_64_int_return_registers
, 0);
3962 /* For zero sized structures, construct_container returns NULL, but we
3963 need to keep rest of compiler happy by returning meaningful value. */
3965 ret
= gen_rtx_REG (orig_mode
, AX_REG
);
3971 function_value_ms_32 (machine_mode orig_mode
, machine_mode mode
,
3972 const_tree fntype
, const_tree fn
, const_tree valtype
)
3976 /* Floating point return values in %st(0)
3977 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3978 if (X87_FLOAT_MODE_P (mode
) && TARGET_FLOAT_RETURNS_IN_80387
3979 && (GET_MODE_SIZE (mode
) > 8
3980 || valtype
== NULL_TREE
|| !AGGREGATE_TYPE_P (valtype
)))
3982 regno
= FIRST_FLOAT_REG
;
3983 return gen_rtx_REG (orig_mode
, regno
);
3986 return function_value_32(orig_mode
, mode
, fntype
,fn
);
3990 function_value_ms_64 (machine_mode orig_mode
, machine_mode mode
,
3993 unsigned int regno
= AX_REG
;
3997 switch (GET_MODE_SIZE (mode
))
4000 if (valtype
!= NULL_TREE
4001 && !VECTOR_INTEGER_TYPE_P (valtype
)
4002 && !VECTOR_INTEGER_TYPE_P (valtype
)
4003 && !INTEGRAL_TYPE_P (valtype
)
4004 && !VECTOR_FLOAT_TYPE_P (valtype
))
4006 if ((SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
4007 && !COMPLEX_MODE_P (mode
))
4008 regno
= FIRST_SSE_REG
;
4012 if (valtype
!= NULL_TREE
&& AGGREGATE_TYPE_P (valtype
))
4014 if (mode
== SFmode
|| mode
== DFmode
)
4015 regno
= FIRST_SSE_REG
;
4021 return gen_rtx_REG (orig_mode
, regno
);
4025 ix86_function_value_1 (const_tree valtype
, const_tree fntype_or_decl
,
4026 machine_mode orig_mode
, machine_mode mode
)
4028 const_tree fn
, fntype
;
4031 if (fntype_or_decl
&& DECL_P (fntype_or_decl
))
4032 fn
= fntype_or_decl
;
4033 fntype
= fn
? TREE_TYPE (fn
) : fntype_or_decl
;
4035 if (ix86_function_type_abi (fntype
) == MS_ABI
)
4038 return function_value_ms_64 (orig_mode
, mode
, valtype
);
4040 return function_value_ms_32 (orig_mode
, mode
, fntype
, fn
, valtype
);
4042 else if (TARGET_64BIT
)
4043 return function_value_64 (orig_mode
, mode
, valtype
);
4045 return function_value_32 (orig_mode
, mode
, fntype
, fn
);
4049 ix86_function_value (const_tree valtype
, const_tree fntype_or_decl
, bool)
4051 machine_mode mode
, orig_mode
;
4053 orig_mode
= TYPE_MODE (valtype
);
4054 mode
= type_natural_mode (valtype
, NULL
, true);
4055 return ix86_function_value_1 (valtype
, fntype_or_decl
, orig_mode
, mode
);
4058 /* Pointer function arguments and return values are promoted to
4059 word_mode for normal functions. */
4062 ix86_promote_function_mode (const_tree type
, machine_mode mode
,
4063 int *punsignedp
, const_tree fntype
,
4066 if (cfun
->machine
->func_type
== TYPE_NORMAL
4067 && type
!= NULL_TREE
4068 && POINTER_TYPE_P (type
))
4070 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
4073 return default_promote_function_mode (type
, mode
, punsignedp
, fntype
,
4077 /* Return true if a structure, union or array with MODE containing FIELD
4078 should be accessed using BLKmode. */
4081 ix86_member_type_forces_blk (const_tree field
, machine_mode mode
)
4083 /* Union with XFmode must be in BLKmode. */
4084 return (mode
== XFmode
4085 && (TREE_CODE (DECL_FIELD_CONTEXT (field
)) == UNION_TYPE
4086 || TREE_CODE (DECL_FIELD_CONTEXT (field
)) == QUAL_UNION_TYPE
));
4090 ix86_libcall_value (machine_mode mode
)
4092 return ix86_function_value_1 (NULL
, NULL
, mode
, mode
);
4095 /* Return true iff type is returned in memory. */
4098 ix86_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
4100 const machine_mode mode
= type_natural_mode (type
, NULL
, true);
4105 if (ix86_function_type_abi (fntype
) == MS_ABI
)
4107 size
= int_size_in_bytes (type
);
4109 /* __m128 is returned in xmm0. */
4110 if ((!type
|| VECTOR_INTEGER_TYPE_P (type
)
4111 || INTEGRAL_TYPE_P (type
)
4112 || VECTOR_FLOAT_TYPE_P (type
))
4113 && (SCALAR_INT_MODE_P (mode
) || VECTOR_MODE_P (mode
))
4114 && !COMPLEX_MODE_P (mode
)
4115 && (GET_MODE_SIZE (mode
) == 16 || size
== 16))
4118 /* Otherwise, the size must be exactly in [1248]. */
4119 return size
!= 1 && size
!= 2 && size
!= 4 && size
!= 8;
4123 int needed_intregs
, needed_sseregs
;
4125 return examine_argument (mode
, type
, 1,
4126 &needed_intregs
, &needed_sseregs
);
4131 size
= int_size_in_bytes (type
);
4133 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4134 bytes in registers. */
4136 return VECTOR_MODE_P (mode
) || size
< 0 || size
> 8;
4138 if (mode
== BLKmode
)
4141 if (MS_AGGREGATE_RETURN
&& AGGREGATE_TYPE_P (type
) && size
<= 8)
4144 if (VECTOR_MODE_P (mode
) || mode
== TImode
)
4146 /* User-created vectors small enough to fit in EAX. */
4150 /* Unless ABI prescibes otherwise,
4151 MMX/3dNow values are returned in MM0 if available. */
4154 return TARGET_VECT8_RETURNS
|| !TARGET_MMX
;
4156 /* SSE values are returned in XMM0 if available. */
4160 /* AVX values are returned in YMM0 if available. */
4164 /* AVX512F values are returned in ZMM0 if available. */
4166 return !TARGET_AVX512F
;
4175 /* OImode shouldn't be used directly. */
4176 gcc_assert (mode
!= OImode
);
4183 /* Create the va_list data type. */
4186 ix86_build_builtin_va_list_64 (void)
4188 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
4190 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
4191 type_decl
= build_decl (BUILTINS_LOCATION
,
4192 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4194 f_gpr
= build_decl (BUILTINS_LOCATION
,
4195 FIELD_DECL
, get_identifier ("gp_offset"),
4196 unsigned_type_node
);
4197 f_fpr
= build_decl (BUILTINS_LOCATION
,
4198 FIELD_DECL
, get_identifier ("fp_offset"),
4199 unsigned_type_node
);
4200 f_ovf
= build_decl (BUILTINS_LOCATION
,
4201 FIELD_DECL
, get_identifier ("overflow_arg_area"),
4203 f_sav
= build_decl (BUILTINS_LOCATION
,
4204 FIELD_DECL
, get_identifier ("reg_save_area"),
4207 va_list_gpr_counter_field
= f_gpr
;
4208 va_list_fpr_counter_field
= f_fpr
;
4210 DECL_FIELD_CONTEXT (f_gpr
) = record
;
4211 DECL_FIELD_CONTEXT (f_fpr
) = record
;
4212 DECL_FIELD_CONTEXT (f_ovf
) = record
;
4213 DECL_FIELD_CONTEXT (f_sav
) = record
;
4215 TYPE_STUB_DECL (record
) = type_decl
;
4216 TYPE_NAME (record
) = type_decl
;
4217 TYPE_FIELDS (record
) = f_gpr
;
4218 DECL_CHAIN (f_gpr
) = f_fpr
;
4219 DECL_CHAIN (f_fpr
) = f_ovf
;
4220 DECL_CHAIN (f_ovf
) = f_sav
;
4222 layout_type (record
);
4224 TYPE_ATTRIBUTES (record
) = tree_cons (get_identifier ("sysv_abi va_list"),
4225 NULL_TREE
, TYPE_ATTRIBUTES (record
));
4227 /* The correct type is an array type of one element. */
4228 return build_array_type (record
, build_index_type (size_zero_node
));
4231 /* Setup the builtin va_list data type and for 64-bit the additional
4232 calling convention specific va_list data types. */
4235 ix86_build_builtin_va_list (void)
4239 /* Initialize ABI specific va_list builtin types.
4241 In lto1, we can encounter two va_list types:
4242 - one as a result of the type-merge across TUs, and
4243 - the one constructed here.
4244 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4245 a type identity check in canonical_va_list_type based on
4246 TYPE_MAIN_VARIANT (which we used to have) will not work.
4247 Instead, we tag each va_list_type_node with its unique attribute, and
4248 look for the attribute in the type identity check in
4249 canonical_va_list_type.
4251 Tagging sysv_va_list_type_node directly with the attribute is
4252 problematic since it's a array of one record, which will degrade into a
4253 pointer to record when used as parameter (see build_va_arg comments for
4254 an example), dropping the attribute in the process. So we tag the
4257 /* For SYSV_ABI we use an array of one record. */
4258 sysv_va_list_type_node
= ix86_build_builtin_va_list_64 ();
4260 /* For MS_ABI we use plain pointer to argument area. */
4261 tree char_ptr_type
= build_pointer_type (char_type_node
);
4262 tree attr
= tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE
,
4263 TYPE_ATTRIBUTES (char_ptr_type
));
4264 ms_va_list_type_node
= build_type_attribute_variant (char_ptr_type
, attr
);
4266 return ((ix86_abi
== MS_ABI
)
4267 ? ms_va_list_type_node
4268 : sysv_va_list_type_node
);
4272 /* For i386 we use plain pointer to argument area. */
4273 return build_pointer_type (char_type_node
);
4277 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4280 setup_incoming_varargs_64 (CUMULATIVE_ARGS
*cum
)
4286 /* GPR size of varargs save area. */
4287 if (cfun
->va_list_gpr_size
)
4288 ix86_varargs_gpr_size
= X86_64_REGPARM_MAX
* UNITS_PER_WORD
;
4290 ix86_varargs_gpr_size
= 0;
4292 /* FPR size of varargs save area. We don't need it if we don't pass
4293 anything in SSE registers. */
4294 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4295 ix86_varargs_fpr_size
= X86_64_SSE_REGPARM_MAX
* 16;
4297 ix86_varargs_fpr_size
= 0;
4299 if (! ix86_varargs_gpr_size
&& ! ix86_varargs_fpr_size
)
4302 save_area
= frame_pointer_rtx
;
4303 set
= get_varargs_alias_set ();
4305 max
= cum
->regno
+ cfun
->va_list_gpr_size
/ UNITS_PER_WORD
;
4306 if (max
> X86_64_REGPARM_MAX
)
4307 max
= X86_64_REGPARM_MAX
;
4309 for (i
= cum
->regno
; i
< max
; i
++)
4311 mem
= gen_rtx_MEM (word_mode
,
4312 plus_constant (Pmode
, save_area
, i
* UNITS_PER_WORD
));
4313 MEM_NOTRAP_P (mem
) = 1;
4314 set_mem_alias_set (mem
, set
);
4315 emit_move_insn (mem
,
4316 gen_rtx_REG (word_mode
,
4317 x86_64_int_parameter_registers
[i
]));
4320 if (ix86_varargs_fpr_size
)
4323 rtx_code_label
*label
;
4326 /* Now emit code to save SSE registers. The AX parameter contains number
4327 of SSE parameter registers used to call this function, though all we
4328 actually check here is the zero/non-zero status. */
4330 label
= gen_label_rtx ();
4331 test
= gen_rtx_EQ (VOIDmode
, gen_rtx_REG (QImode
, AX_REG
), const0_rtx
);
4332 emit_jump_insn (gen_cbranchqi4 (test
, XEXP (test
, 0), XEXP (test
, 1),
4335 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4336 we used movdqa (i.e. TImode) instead? Perhaps even better would
4337 be if we could determine the real mode of the data, via a hook
4338 into pass_stdarg. Ignore all that for now. */
4340 if (crtl
->stack_alignment_needed
< GET_MODE_ALIGNMENT (smode
))
4341 crtl
->stack_alignment_needed
= GET_MODE_ALIGNMENT (smode
);
4343 max
= cum
->sse_regno
+ cfun
->va_list_fpr_size
/ 16;
4344 if (max
> X86_64_SSE_REGPARM_MAX
)
4345 max
= X86_64_SSE_REGPARM_MAX
;
4347 for (i
= cum
->sse_regno
; i
< max
; ++i
)
4349 mem
= plus_constant (Pmode
, save_area
,
4350 i
* 16 + ix86_varargs_gpr_size
);
4351 mem
= gen_rtx_MEM (smode
, mem
);
4352 MEM_NOTRAP_P (mem
) = 1;
4353 set_mem_alias_set (mem
, set
);
4354 set_mem_align (mem
, GET_MODE_ALIGNMENT (smode
));
4356 emit_move_insn (mem
, gen_rtx_REG (smode
, GET_SSE_REGNO (i
)));
4364 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS
*cum
)
4366 alias_set_type set
= get_varargs_alias_set ();
4369 /* Reset to zero, as there might be a sysv vaarg used
4371 ix86_varargs_gpr_size
= 0;
4372 ix86_varargs_fpr_size
= 0;
4374 for (i
= cum
->regno
; i
< X86_64_MS_REGPARM_MAX
; i
++)
4378 mem
= gen_rtx_MEM (Pmode
,
4379 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4380 i
* UNITS_PER_WORD
));
4381 MEM_NOTRAP_P (mem
) = 1;
4382 set_mem_alias_set (mem
, set
);
4384 reg
= gen_rtx_REG (Pmode
, x86_64_ms_abi_int_parameter_registers
[i
]);
4385 emit_move_insn (mem
, reg
);
4390 ix86_setup_incoming_varargs (cumulative_args_t cum_v
,
4391 const function_arg_info
&arg
,
4394 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
4395 CUMULATIVE_ARGS next_cum
;
4398 /* This argument doesn't appear to be used anymore. Which is good,
4399 because the old code here didn't suppress rtl generation. */
4400 gcc_assert (!no_rtl
);
4405 fntype
= TREE_TYPE (current_function_decl
);
4407 /* For varargs, we do not want to skip the dummy va_dcl argument.
4408 For stdargs, we do want to skip the last named argument. */
4410 if (stdarg_p (fntype
))
4411 ix86_function_arg_advance (pack_cumulative_args (&next_cum
), arg
);
4413 if (cum
->call_abi
== MS_ABI
)
4414 setup_incoming_varargs_ms_64 (&next_cum
);
4416 setup_incoming_varargs_64 (&next_cum
);
4419 /* Checks if TYPE is of kind va_list char *. */
4422 is_va_list_char_pointer (tree type
)
4426 /* For 32-bit it is always true. */
4429 canonic
= ix86_canonical_va_list_type (type
);
4430 return (canonic
== ms_va_list_type_node
4431 || (ix86_abi
== MS_ABI
&& canonic
== va_list_type_node
));
4434 /* Implement va_start. */
4437 ix86_va_start (tree valist
, rtx nextarg
)
4439 HOST_WIDE_INT words
, n_gpr
, n_fpr
;
4440 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4441 tree gpr
, fpr
, ovf
, sav
, t
;
4445 if (flag_split_stack
4446 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4448 unsigned int scratch_regno
;
4450 /* When we are splitting the stack, we can't refer to the stack
4451 arguments using internal_arg_pointer, because they may be on
4452 the old stack. The split stack prologue will arrange to
4453 leave a pointer to the old stack arguments in a scratch
4454 register, which we here copy to a pseudo-register. The split
4455 stack prologue can't set the pseudo-register directly because
4456 it (the prologue) runs before any registers have been saved. */
4458 scratch_regno
= split_stack_prologue_scratch_regno ();
4459 if (scratch_regno
!= INVALID_REGNUM
)
4464 reg
= gen_reg_rtx (Pmode
);
4465 cfun
->machine
->split_stack_varargs_pointer
= reg
;
4468 emit_move_insn (reg
, gen_rtx_REG (Pmode
, scratch_regno
));
4472 push_topmost_sequence ();
4473 emit_insn_after (seq
, entry_of_function ());
4474 pop_topmost_sequence ();
4478 /* Only 64bit target needs something special. */
4479 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4481 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4482 std_expand_builtin_va_start (valist
, nextarg
);
4487 va_r
= expand_expr (valist
, NULL_RTX
, VOIDmode
, EXPAND_WRITE
);
4488 next
= expand_binop (ptr_mode
, add_optab
,
4489 cfun
->machine
->split_stack_varargs_pointer
,
4490 crtl
->args
.arg_offset_rtx
,
4491 NULL_RTX
, 0, OPTAB_LIB_WIDEN
);
4492 convert_move (va_r
, next
, 0);
4497 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4498 f_fpr
= DECL_CHAIN (f_gpr
);
4499 f_ovf
= DECL_CHAIN (f_fpr
);
4500 f_sav
= DECL_CHAIN (f_ovf
);
4502 valist
= build_simple_mem_ref (valist
);
4503 TREE_TYPE (valist
) = TREE_TYPE (sysv_va_list_type_node
);
4504 /* The following should be folded into the MEM_REF offset. */
4505 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), unshare_expr (valist
),
4507 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), unshare_expr (valist
),
4509 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), unshare_expr (valist
),
4511 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), unshare_expr (valist
),
4514 /* Count number of gp and fp argument registers used. */
4515 words
= crtl
->args
.info
.words
;
4516 n_gpr
= crtl
->args
.info
.regno
;
4517 n_fpr
= crtl
->args
.info
.sse_regno
;
4519 if (cfun
->va_list_gpr_size
)
4521 type
= TREE_TYPE (gpr
);
4522 t
= build2 (MODIFY_EXPR
, type
,
4523 gpr
, build_int_cst (type
, n_gpr
* 8));
4524 TREE_SIDE_EFFECTS (t
) = 1;
4525 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4528 if (TARGET_SSE
&& cfun
->va_list_fpr_size
)
4530 type
= TREE_TYPE (fpr
);
4531 t
= build2 (MODIFY_EXPR
, type
, fpr
,
4532 build_int_cst (type
, n_fpr
* 16 + 8*X86_64_REGPARM_MAX
));
4533 TREE_SIDE_EFFECTS (t
) = 1;
4534 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4537 /* Find the overflow area. */
4538 type
= TREE_TYPE (ovf
);
4539 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
4540 ovf_rtx
= crtl
->args
.internal_arg_pointer
;
4542 ovf_rtx
= cfun
->machine
->split_stack_varargs_pointer
;
4543 t
= make_tree (type
, ovf_rtx
);
4545 t
= fold_build_pointer_plus_hwi (t
, words
* UNITS_PER_WORD
);
4547 t
= build2 (MODIFY_EXPR
, type
, ovf
, t
);
4548 TREE_SIDE_EFFECTS (t
) = 1;
4549 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4551 if (ix86_varargs_gpr_size
|| ix86_varargs_fpr_size
)
4553 /* Find the register save area.
4554 Prologue of the function save it right above stack frame. */
4555 type
= TREE_TYPE (sav
);
4556 t
= make_tree (type
, frame_pointer_rtx
);
4557 if (!ix86_varargs_gpr_size
)
4558 t
= fold_build_pointer_plus_hwi (t
, -8 * X86_64_REGPARM_MAX
);
4560 t
= build2 (MODIFY_EXPR
, type
, sav
, t
);
4561 TREE_SIDE_EFFECTS (t
) = 1;
4562 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4566 /* Implement va_arg. */
4569 ix86_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
4572 static const int intreg
[6] = { 0, 1, 2, 3, 4, 5 };
4573 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
4574 tree gpr
, fpr
, ovf
, sav
, t
;
4576 tree lab_false
, lab_over
= NULL_TREE
;
4581 machine_mode nat_mode
;
4582 unsigned int arg_boundary
;
4583 unsigned int type_align
;
4585 /* Only 64bit target needs something special. */
4586 if (is_va_list_char_pointer (TREE_TYPE (valist
)))
4587 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
4589 f_gpr
= TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node
));
4590 f_fpr
= DECL_CHAIN (f_gpr
);
4591 f_ovf
= DECL_CHAIN (f_fpr
);
4592 f_sav
= DECL_CHAIN (f_ovf
);
4594 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
),
4595 valist
, f_gpr
, NULL_TREE
);
4597 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
4598 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
4599 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
4601 indirect_p
= pass_va_arg_by_reference (type
);
4603 type
= build_pointer_type (type
);
4604 size
= arg_int_size_in_bytes (type
);
4605 rsize
= CEIL (size
, UNITS_PER_WORD
);
4607 nat_mode
= type_natural_mode (type
, NULL
, false);
4622 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4623 if (!TARGET_64BIT_MS_ABI
)
4631 container
= construct_container (nat_mode
, TYPE_MODE (type
),
4632 type
, 0, X86_64_REGPARM_MAX
,
4633 X86_64_SSE_REGPARM_MAX
, intreg
,
4638 /* Pull the value out of the saved registers. */
4640 addr
= create_tmp_var (ptr_type_node
, "addr");
4641 type_align
= TYPE_ALIGN (type
);
4645 int needed_intregs
, needed_sseregs
;
4647 tree int_addr
, sse_addr
;
4649 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
4650 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
4652 examine_argument (nat_mode
, type
, 0, &needed_intregs
, &needed_sseregs
);
4654 need_temp
= (!REG_P (container
)
4655 && ((needed_intregs
&& TYPE_ALIGN (type
) > 64)
4656 || TYPE_ALIGN (type
) > 128));
4658 /* In case we are passing structure, verify that it is consecutive block
4659 on the register save area. If not we need to do moves. */
4660 if (!need_temp
&& !REG_P (container
))
4662 /* Verify that all registers are strictly consecutive */
4663 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container
, 0, 0), 0))))
4667 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4669 rtx slot
= XVECEXP (container
, 0, i
);
4670 if (REGNO (XEXP (slot
, 0)) != FIRST_SSE_REG
+ (unsigned int) i
4671 || INTVAL (XEXP (slot
, 1)) != i
* 16)
4679 for (i
= 0; i
< XVECLEN (container
, 0) && !need_temp
; i
++)
4681 rtx slot
= XVECEXP (container
, 0, i
);
4682 if (REGNO (XEXP (slot
, 0)) != (unsigned int) i
4683 || INTVAL (XEXP (slot
, 1)) != i
* 8)
4695 int_addr
= create_tmp_var (ptr_type_node
, "int_addr");
4696 sse_addr
= create_tmp_var (ptr_type_node
, "sse_addr");
4699 /* First ensure that we fit completely in registers. */
4702 t
= build_int_cst (TREE_TYPE (gpr
),
4703 (X86_64_REGPARM_MAX
- needed_intregs
+ 1) * 8);
4704 t
= build2 (GE_EXPR
, boolean_type_node
, gpr
, t
);
4705 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4706 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4707 gimplify_and_add (t
, pre_p
);
4711 t
= build_int_cst (TREE_TYPE (fpr
),
4712 (X86_64_SSE_REGPARM_MAX
- needed_sseregs
+ 1) * 16
4713 + X86_64_REGPARM_MAX
* 8);
4714 t
= build2 (GE_EXPR
, boolean_type_node
, fpr
, t
);
4715 t2
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
4716 t
= build3 (COND_EXPR
, void_type_node
, t
, t2
, NULL_TREE
);
4717 gimplify_and_add (t
, pre_p
);
4720 /* Compute index to start of area used for integer regs. */
4723 /* int_addr = gpr + sav; */
4724 t
= fold_build_pointer_plus (sav
, gpr
);
4725 gimplify_assign (int_addr
, t
, pre_p
);
4729 /* sse_addr = fpr + sav; */
4730 t
= fold_build_pointer_plus (sav
, fpr
);
4731 gimplify_assign (sse_addr
, t
, pre_p
);
4735 int i
, prev_size
= 0;
4736 tree temp
= create_tmp_var (type
, "va_arg_tmp");
4739 t
= build1 (ADDR_EXPR
, build_pointer_type (type
), temp
);
4740 gimplify_assign (addr
, t
, pre_p
);
4742 for (i
= 0; i
< XVECLEN (container
, 0); i
++)
4744 rtx slot
= XVECEXP (container
, 0, i
);
4745 rtx reg
= XEXP (slot
, 0);
4746 machine_mode mode
= GET_MODE (reg
);
4752 tree dest_addr
, dest
;
4753 int cur_size
= GET_MODE_SIZE (mode
);
4755 gcc_assert (prev_size
<= INTVAL (XEXP (slot
, 1)));
4756 prev_size
= INTVAL (XEXP (slot
, 1));
4757 if (prev_size
+ cur_size
> size
)
4759 cur_size
= size
- prev_size
;
4760 unsigned int nbits
= cur_size
* BITS_PER_UNIT
;
4761 if (!int_mode_for_size (nbits
, 1).exists (&mode
))
4764 piece_type
= lang_hooks
.types
.type_for_mode (mode
, 1);
4765 if (mode
== GET_MODE (reg
))
4766 addr_type
= build_pointer_type (piece_type
);
4768 addr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4770 daddr_type
= build_pointer_type_for_mode (piece_type
, ptr_mode
,
4773 if (SSE_REGNO_P (REGNO (reg
)))
4775 src_addr
= sse_addr
;
4776 src_offset
= (REGNO (reg
) - FIRST_SSE_REG
) * 16;
4780 src_addr
= int_addr
;
4781 src_offset
= REGNO (reg
) * 8;
4783 src_addr
= fold_convert (addr_type
, src_addr
);
4784 src_addr
= fold_build_pointer_plus_hwi (src_addr
, src_offset
);
4786 dest_addr
= fold_convert (daddr_type
, addr
);
4787 dest_addr
= fold_build_pointer_plus_hwi (dest_addr
, prev_size
);
4788 if (cur_size
== GET_MODE_SIZE (mode
))
4790 src
= build_va_arg_indirect_ref (src_addr
);
4791 dest
= build_va_arg_indirect_ref (dest_addr
);
4793 gimplify_assign (dest
, src
, pre_p
);
4798 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY
),
4799 3, dest_addr
, src_addr
,
4800 size_int (cur_size
));
4801 gimplify_and_add (copy
, pre_p
);
4803 prev_size
+= cur_size
;
4809 t
= build2 (PLUS_EXPR
, TREE_TYPE (gpr
), gpr
,
4810 build_int_cst (TREE_TYPE (gpr
), needed_intregs
* 8));
4811 gimplify_assign (gpr
, t
, pre_p
);
4812 /* The GPR save area guarantees only 8-byte alignment. */
4814 type_align
= MIN (type_align
, 64);
4819 t
= build2 (PLUS_EXPR
, TREE_TYPE (fpr
), fpr
,
4820 build_int_cst (TREE_TYPE (fpr
), needed_sseregs
* 16));
4821 gimplify_assign (unshare_expr (fpr
), t
, pre_p
);
4824 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
4826 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
4829 /* ... otherwise out of the overflow area. */
4831 /* When we align parameter on stack for caller, if the parameter
4832 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4833 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4834 here with caller. */
4835 arg_boundary
= ix86_function_arg_boundary (VOIDmode
, type
);
4836 if ((unsigned int) arg_boundary
> MAX_SUPPORTED_STACK_ALIGNMENT
)
4837 arg_boundary
= MAX_SUPPORTED_STACK_ALIGNMENT
;
4839 /* Care for on-stack alignment if needed. */
4840 if (arg_boundary
<= 64 || size
== 0)
4844 HOST_WIDE_INT align
= arg_boundary
/ 8;
4845 t
= fold_build_pointer_plus_hwi (ovf
, align
- 1);
4846 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
4847 build_int_cst (TREE_TYPE (t
), -align
));
4850 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
4851 gimplify_assign (addr
, t
, pre_p
);
4853 t
= fold_build_pointer_plus_hwi (t
, rsize
* UNITS_PER_WORD
);
4854 gimplify_assign (unshare_expr (ovf
), t
, pre_p
);
4857 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
4859 type
= build_aligned_type (type
, type_align
);
4860 ptrtype
= build_pointer_type_for_mode (type
, ptr_mode
, true);
4861 addr
= fold_convert (ptrtype
, addr
);
4864 addr
= build_va_arg_indirect_ref (addr
);
4865 return build_va_arg_indirect_ref (addr
);
4868 /* Return true if OPNUM's MEM should be matched
4869 in movabs* patterns. */
4872 ix86_check_movabs (rtx insn
, int opnum
)
4876 set
= PATTERN (insn
);
4877 if (GET_CODE (set
) == PARALLEL
)
4878 set
= XVECEXP (set
, 0, 0);
4879 gcc_assert (GET_CODE (set
) == SET
);
4880 mem
= XEXP (set
, opnum
);
4881 while (SUBREG_P (mem
))
4882 mem
= SUBREG_REG (mem
);
4883 gcc_assert (MEM_P (mem
));
4884 return volatile_ok
|| !MEM_VOLATILE_P (mem
);
4887 /* Return false if INSN contains a MEM with a non-default address space. */
4889 ix86_check_no_addr_space (rtx insn
)
4891 subrtx_var_iterator::array_type array
;
4892 FOR_EACH_SUBRTX_VAR (iter
, array
, PATTERN (insn
), ALL
)
4895 if (MEM_P (x
) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x
)))
4901 /* Initialize the table of extra 80387 mathematical constants. */
4904 init_ext_80387_constants (void)
4906 static const char * cst
[5] =
4908 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4909 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4910 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4911 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4912 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4916 for (i
= 0; i
< 5; i
++)
4918 real_from_string (&ext_80387_constants_table
[i
], cst
[i
]);
4919 /* Ensure each constant is rounded to XFmode precision. */
4920 real_convert (&ext_80387_constants_table
[i
],
4921 XFmode
, &ext_80387_constants_table
[i
]);
4924 ext_80387_constants_init
= 1;
4927 /* Return non-zero if the constant is something that
4928 can be loaded with a special instruction. */
4931 standard_80387_constant_p (rtx x
)
4933 machine_mode mode
= GET_MODE (x
);
4935 const REAL_VALUE_TYPE
*r
;
4937 if (!(CONST_DOUBLE_P (x
) && X87_FLOAT_MODE_P (mode
)))
4940 if (x
== CONST0_RTX (mode
))
4942 if (x
== CONST1_RTX (mode
))
4945 r
= CONST_DOUBLE_REAL_VALUE (x
);
4947 /* For XFmode constants, try to find a special 80387 instruction when
4948 optimizing for size or on those CPUs that benefit from them. */
4950 && (optimize_function_for_size_p (cfun
) || TARGET_EXT_80387_CONSTANTS
))
4954 if (! ext_80387_constants_init
)
4955 init_ext_80387_constants ();
4957 for (i
= 0; i
< 5; i
++)
4958 if (real_identical (r
, &ext_80387_constants_table
[i
]))
4962 /* Load of the constant -0.0 or -1.0 will be split as
4963 fldz;fchs or fld1;fchs sequence. */
4964 if (real_isnegzero (r
))
4966 if (real_identical (r
, &dconstm1
))
4972 /* Return the opcode of the special instruction to be used to load
4976 standard_80387_constant_opcode (rtx x
)
4978 switch (standard_80387_constant_p (x
))
5002 /* Return the CONST_DOUBLE representing the 80387 constant that is
5003 loaded by the specified special instruction. The argument IDX
5004 matches the return value from standard_80387_constant_p. */
5007 standard_80387_constant_rtx (int idx
)
5011 if (! ext_80387_constants_init
)
5012 init_ext_80387_constants ();
5028 return const_double_from_real_value (ext_80387_constants_table
[i
],
5032 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
5033 in supported SSE/AVX vector mode. */
5036 standard_sse_constant_p (rtx x
, machine_mode pred_mode
)
5043 mode
= GET_MODE (x
);
5045 if (x
== const0_rtx
|| const0_operand (x
, mode
))
5048 if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
5050 /* VOIDmode integer constant, get mode from the predicate. */
5051 if (mode
== VOIDmode
)
5054 switch (GET_MODE_SIZE (mode
))
5079 /* Return the opcode of the special instruction to be used to load
5080 the constant operands[1] into operands[0]. */
5083 standard_sse_constant_opcode (rtx_insn
*insn
, rtx
*operands
)
5086 rtx x
= operands
[1];
5088 gcc_assert (TARGET_SSE
);
5090 mode
= GET_MODE (x
);
5092 if (x
== const0_rtx
|| const0_operand (x
, mode
))
5094 switch (get_attr_mode (insn
))
5097 if (!EXT_REX_SSE_REG_P (operands
[0]))
5098 return "%vpxor\t%0, %d0";
5102 if (EXT_REX_SSE_REG_P (operands
[0]))
5103 return (TARGET_AVX512VL
5104 ? "vpxord\t%x0, %x0, %x0"
5105 : "vpxord\t%g0, %g0, %g0");
5106 return "vpxor\t%x0, %x0, %x0";
5109 if (!EXT_REX_SSE_REG_P (operands
[0]))
5110 return "%vxorpd\t%0, %d0";
5114 if (!EXT_REX_SSE_REG_P (operands
[0]))
5115 return "vxorpd\t%x0, %x0, %x0";
5116 else if (TARGET_AVX512DQ
)
5117 return (TARGET_AVX512VL
5118 ? "vxorpd\t%x0, %x0, %x0"
5119 : "vxorpd\t%g0, %g0, %g0");
5121 return (TARGET_AVX512VL
5122 ? "vpxorq\t%x0, %x0, %x0"
5123 : "vpxorq\t%g0, %g0, %g0");
5126 if (!EXT_REX_SSE_REG_P (operands
[0]))
5127 return "%vxorps\t%0, %d0";
5131 if (!EXT_REX_SSE_REG_P (operands
[0]))
5132 return "vxorps\t%x0, %x0, %x0";
5133 else if (TARGET_AVX512DQ
)
5134 return (TARGET_AVX512VL
5135 ? "vxorps\t%x0, %x0, %x0"
5136 : "vxorps\t%g0, %g0, %g0");
5138 return (TARGET_AVX512VL
5139 ? "vpxord\t%x0, %x0, %x0"
5140 : "vpxord\t%g0, %g0, %g0");
5146 else if (x
== constm1_rtx
|| vector_all_ones_operand (x
, mode
))
5148 enum attr_mode insn_mode
= get_attr_mode (insn
);
5155 gcc_assert (TARGET_AVX512F
);
5156 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5161 gcc_assert (TARGET_AVX2
);
5166 gcc_assert (TARGET_SSE2
);
5167 if (!EXT_REX_SSE_REG_P (operands
[0]))
5169 ? "vpcmpeqd\t%0, %0, %0"
5170 : "pcmpeqd\t%0, %0");
5171 else if (TARGET_AVX512VL
)
5172 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5174 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5184 /* Returns true if INSN can be transformed from a memory load
5185 to a supported FP constant load. */
5188 ix86_standard_x87sse_constant_load_p (const rtx_insn
*insn
, rtx dst
)
5190 rtx src
= find_constant_src (insn
);
5192 gcc_assert (REG_P (dst
));
5195 || (SSE_REGNO_P (REGNO (dst
))
5196 && standard_sse_constant_p (src
, GET_MODE (dst
)) != 1)
5197 || (STACK_REGNO_P (REGNO (dst
))
5198 && standard_80387_constant_p (src
) < 1))
5204 /* Predicate for pre-reload splitters with associated instructions,
5205 which can match any time before the split1 pass (usually combine),
5206 then are unconditionally split in that pass and should not be
5207 matched again afterwards. */
5210 ix86_pre_reload_split (void)
5212 return (can_create_pseudo_p ()
5213 && !(cfun
->curr_properties
& PROP_rtl_split_insns
));
5216 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5217 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5218 TARGET_AVX512VL or it is a register to register move which can
5219 be done with zmm register move. */
5222 ix86_get_ssemov (rtx
*operands
, unsigned size
,
5223 enum attr_mode insn_mode
, machine_mode mode
)
5226 bool misaligned_p
= (misaligned_operand (operands
[0], mode
)
5227 || misaligned_operand (operands
[1], mode
));
5228 bool evex_reg_p
= (size
== 64
5229 || EXT_REX_SSE_REG_P (operands
[0])
5230 || EXT_REX_SSE_REG_P (operands
[1]));
5231 machine_mode scalar_mode
;
5233 const char *opcode
= NULL
;
5239 } type
= opcode_int
;
5246 scalar_mode
= E_SFmode
;
5247 type
= opcode_float
;
5252 scalar_mode
= E_DFmode
;
5253 type
= opcode_double
;
5258 scalar_mode
= GET_MODE_INNER (mode
);
5264 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5265 we can only use zmm register move without memory operand. */
5268 && GET_MODE_SIZE (mode
) < 64)
5270 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5271 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5272 AVX512VL is disabled, LRA can still generate reg to
5273 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5275 if (memory_operand (operands
[0], mode
)
5276 || memory_operand (operands
[1], mode
))
5282 opcode
= misaligned_p
? "vmovdqu32" : "vmovdqa32";
5285 opcode
= misaligned_p
? "vmovups" : "vmovaps";
5288 opcode
= misaligned_p
? "vmovupd" : "vmovapd";
5292 else if (SCALAR_FLOAT_MODE_P (scalar_mode
))
5294 switch (scalar_mode
)
5297 opcode
= misaligned_p
? "%vmovups" : "%vmovaps";
5300 opcode
= misaligned_p
? "%vmovupd" : "%vmovapd";
5304 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5306 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5312 else if (SCALAR_INT_MODE_P (scalar_mode
))
5314 switch (scalar_mode
)
5318 opcode
= (misaligned_p
5324 opcode
= (misaligned_p
5332 opcode
= (misaligned_p
5338 opcode
= (misaligned_p
5346 opcode
= misaligned_p
? "vmovdqu32" : "vmovdqa32";
5348 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5354 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5356 opcode
= misaligned_p
? "%vmovdqu" : "%vmovdqa";
5359 opcode
= misaligned_p
? "vmovdqu64" : "vmovdqa64";
5371 snprintf (buf
, sizeof (buf
), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5375 snprintf (buf
, sizeof (buf
), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5379 snprintf (buf
, sizeof (buf
), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5385 output_asm_insn (buf
, operands
);
5389 /* Return the template of the TYPE_SSEMOV instruction to move
5390 operands[1] into operands[0]. */
5393 ix86_output_ssemov (rtx_insn
*insn
, rtx
*operands
)
5395 machine_mode mode
= GET_MODE (operands
[0]);
5396 if (get_attr_type (insn
) != TYPE_SSEMOV
5397 || mode
!= GET_MODE (operands
[1]))
5400 enum attr_mode insn_mode
= get_attr_mode (insn
);
5407 return ix86_get_ssemov (operands
, 64, insn_mode
, mode
);
5412 return ix86_get_ssemov (operands
, 32, insn_mode
, mode
);
5417 return ix86_get_ssemov (operands
, 16, insn_mode
, mode
);
5420 /* Handle broken assemblers that require movd instead of movq. */
5421 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
5422 && (GENERAL_REG_P (operands
[0])
5423 || GENERAL_REG_P (operands
[1])))
5424 return "%vmovd\t{%1, %0|%0, %1}";
5426 return "%vmovq\t{%1, %0|%0, %1}";
5429 return "%vmovd\t{%1, %0|%0, %1}";
5432 if (TARGET_AVX
&& REG_P (operands
[0]) && REG_P (operands
[1]))
5433 return "vmovsd\t{%d1, %0|%0, %d1}";
5435 return "%vmovsd\t{%1, %0|%0, %1}";
5438 if (TARGET_AVX
&& REG_P (operands
[0]) && REG_P (operands
[1]))
5439 return "vmovss\t{%d1, %0|%0, %d1}";
5441 return "%vmovss\t{%1, %0|%0, %1}";
5444 gcc_assert (!TARGET_AVX
);
5445 return "movlpd\t{%1, %0|%0, %1}";
5448 if (TARGET_AVX
&& REG_P (operands
[0]))
5449 return "vmovlps\t{%1, %d0|%d0, %1}";
5451 return "%vmovlps\t{%1, %0|%0, %1}";
5458 /* Returns true if OP contains a symbol reference */
5461 symbolic_reference_mentioned_p (rtx op
)
5466 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
5469 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
5470 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
5476 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
5477 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
5481 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
5488 /* Return true if it is appropriate to emit `ret' instructions in the
5489 body of a function. Do this only if the epilogue is simple, needing a
5490 couple of insns. Prior to reloading, we can't tell how many registers
5491 must be saved, so return false then. Return false if there is no frame
5492 marker to de-allocate. */
5495 ix86_can_use_return_insn_p (void)
5497 if (ix86_function_ms_hook_prologue (current_function_decl
))
5500 if (ix86_function_naked (current_function_decl
))
5503 /* Don't use `ret' instruction in interrupt handler. */
5504 if (! reload_completed
5505 || frame_pointer_needed
5506 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
5509 /* Don't allow more than 32k pop, since that's all we can do
5510 with one instruction. */
5511 if (crtl
->args
.pops_args
&& crtl
->args
.size
>= 32768)
5514 struct ix86_frame
&frame
= cfun
->machine
->frame
;
5515 return (frame
.stack_pointer_offset
== UNITS_PER_WORD
5516 && (frame
.nregs
+ frame
.nsseregs
) == 0);
5519 /* Return stack frame size. get_frame_size () returns used stack slots
5520 during compilation, which may be optimized out later. If stack frame
5521 is needed, stack_frame_required should be true. */
5523 static HOST_WIDE_INT
5524 ix86_get_frame_size (void)
5526 if (cfun
->machine
->stack_frame_required
)
5527 return get_frame_size ();
5532 /* Value should be nonzero if functions must have frame pointers.
5533 Zero means the frame pointer need not be set up (and parms may
5534 be accessed via the stack pointer) in functions that seem suitable. */
5537 ix86_frame_pointer_required (void)
5539 /* If we accessed previous frames, then the generated code expects
5540 to be able to access the saved ebp value in our frame. */
5541 if (cfun
->machine
->accesses_prev_frame
)
5544 /* Several x86 os'es need a frame pointer for other reasons,
5545 usually pertaining to setjmp. */
5546 if (SUBTARGET_FRAME_POINTER_REQUIRED
)
5549 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5550 if (TARGET_32BIT_MS_ABI
&& cfun
->calls_setjmp
)
5553 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5554 allocation is 4GB. */
5555 if (TARGET_64BIT_MS_ABI
&& ix86_get_frame_size () > SEH_MAX_FRAME_SIZE
)
5558 /* SSE saves require frame-pointer when stack is misaligned. */
5559 if (TARGET_64BIT_MS_ABI
&& ix86_incoming_stack_boundary
< 128)
5562 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5563 turns off the frame pointer by default. Turn it back on now if
5564 we've not got a leaf function. */
5565 if (TARGET_OMIT_LEAF_FRAME_POINTER
5567 || ix86_current_function_calls_tls_descriptor
))
5570 /* Several versions of mcount for the x86 assumes that there is a
5571 frame, so we cannot allow profiling without a frame pointer. */
5572 if (crtl
->profile
&& !flag_fentry
)
5578 /* Record that the current function accesses previous call frames. */
5581 ix86_setup_frame_addresses (void)
5583 cfun
->machine
->accesses_prev_frame
= 1;
5586 #ifndef USE_HIDDEN_LINKONCE
5587 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5588 # define USE_HIDDEN_LINKONCE 1
5590 # define USE_HIDDEN_LINKONCE 0
5594 /* Label count for call and return thunks. It is used to make unique
5595 labels in call and return thunks. */
5596 static int indirectlabelno
;
5598 /* True if call thunk function is needed. */
5599 static bool indirect_thunk_needed
= false;
5601 /* Bit masks of integer registers, which contain branch target, used
5602 by call thunk functions. */
5603 static int indirect_thunks_used
;
5605 /* True if return thunk function is needed. */
5606 static bool indirect_return_needed
= false;
5608 /* True if return thunk function via CX is needed. */
5609 static bool indirect_return_via_cx
;
5611 #ifndef INDIRECT_LABEL
5612 # define INDIRECT_LABEL "LIND"
5615 /* Indicate what prefix is needed for an indirect branch. */
5616 enum indirect_thunk_prefix
5618 indirect_thunk_prefix_none
,
5619 indirect_thunk_prefix_nt
5622 /* Return the prefix needed for an indirect branch INSN. */
5624 enum indirect_thunk_prefix
5625 indirect_thunk_need_prefix (rtx_insn
*insn
)
5627 enum indirect_thunk_prefix need_prefix
;
5628 if ((cfun
->machine
->indirect_branch_type
5629 == indirect_branch_thunk_extern
)
5630 && ix86_notrack_prefixed_insn_p (insn
))
5632 /* NOTRACK prefix is only used with external thunk so that it
5633 can be properly updated to support CET at run-time. */
5634 need_prefix
= indirect_thunk_prefix_nt
;
5637 need_prefix
= indirect_thunk_prefix_none
;
5641 /* Fills in the label name that should be used for the indirect thunk. */
5644 indirect_thunk_name (char name
[32], unsigned int regno
,
5645 enum indirect_thunk_prefix need_prefix
,
5648 if (regno
!= INVALID_REGNUM
&& regno
!= CX_REG
&& ret_p
)
5651 if (USE_HIDDEN_LINKONCE
)
5655 if (need_prefix
== indirect_thunk_prefix_nt
5656 && regno
!= INVALID_REGNUM
)
5658 /* NOTRACK prefix is only used with external thunk via
5659 register so that NOTRACK prefix can be added to indirect
5660 branch via register to support CET at run-time. */
5666 const char *ret
= ret_p
? "return" : "indirect";
5668 if (regno
!= INVALID_REGNUM
)
5670 const char *reg_prefix
;
5671 if (LEGACY_INT_REGNO_P (regno
))
5672 reg_prefix
= TARGET_64BIT
? "r" : "e";
5675 sprintf (name
, "__x86_%s_thunk%s_%s%s",
5676 ret
, prefix
, reg_prefix
, reg_names
[regno
]);
5679 sprintf (name
, "__x86_%s_thunk%s", ret
, prefix
);
5683 if (regno
!= INVALID_REGNUM
)
5684 ASM_GENERATE_INTERNAL_LABEL (name
, "LITR", regno
);
5688 ASM_GENERATE_INTERNAL_LABEL (name
, "LRT", 0);
5690 ASM_GENERATE_INTERNAL_LABEL (name
, "LIT", 0);
5695 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5696 the function address is in REGNO and the call and return thunk looks like:
5707 Otherwise, the function address is on the top of stack and the
5708 call and return thunk looks like:
5716 lea WORD_SIZE(%sp), %sp
5721 output_indirect_thunk (unsigned int regno
)
5723 char indirectlabel1
[32];
5724 char indirectlabel2
[32];
5726 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
, INDIRECT_LABEL
,
5728 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
, INDIRECT_LABEL
,
5732 fputs ("\tcall\t", asm_out_file
);
5733 assemble_name_raw (asm_out_file
, indirectlabel2
);
5734 fputc ('\n', asm_out_file
);
5736 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
5738 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5739 Usage of both pause + lfence is compromise solution. */
5740 fprintf (asm_out_file
, "\tpause\n\tlfence\n");
5743 fputs ("\tjmp\t", asm_out_file
);
5744 assemble_name_raw (asm_out_file
, indirectlabel1
);
5745 fputc ('\n', asm_out_file
);
5747 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
5749 /* The above call insn pushed a word to stack. Adjust CFI info. */
5750 if (flag_asynchronous_unwind_tables
&& dwarf2out_do_frame ())
5752 if (! dwarf2out_do_cfi_asm ())
5754 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
5755 xcfi
->dw_cfi_opc
= DW_CFA_advance_loc4
;
5756 xcfi
->dw_cfi_oprnd1
.dw_cfi_addr
= ggc_strdup (indirectlabel2
);
5757 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
5759 dw_cfi_ref xcfi
= ggc_cleared_alloc
<dw_cfi_node
> ();
5760 xcfi
->dw_cfi_opc
= DW_CFA_def_cfa_offset
;
5761 xcfi
->dw_cfi_oprnd1
.dw_cfi_offset
= 2 * UNITS_PER_WORD
;
5762 vec_safe_push (cfun
->fde
->dw_fde_cfi
, xcfi
);
5763 dwarf2out_emit_cfi (xcfi
);
5766 if (regno
!= INVALID_REGNUM
)
5770 xops
[0] = gen_rtx_MEM (word_mode
, stack_pointer_rtx
);
5771 xops
[1] = gen_rtx_REG (word_mode
, regno
);
5772 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops
);
5778 xops
[0] = stack_pointer_rtx
;
5779 xops
[1] = plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
5780 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops
);
5783 fputs ("\tret\n", asm_out_file
);
5786 /* Output a funtion with a call and return thunk for indirect branch.
5787 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5788 Otherwise, the function address is on the top of stack. Thunk is
5789 used for function return if RET_P is true. */
5792 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix
,
5793 unsigned int regno
, bool ret_p
)
5798 /* Create __x86_indirect_thunk. */
5799 indirect_thunk_name (name
, regno
, need_prefix
, ret_p
);
5800 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
5801 get_identifier (name
),
5802 build_function_type_list (void_type_node
, NULL_TREE
));
5803 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
5804 NULL_TREE
, void_type_node
);
5805 TREE_PUBLIC (decl
) = 1;
5806 TREE_STATIC (decl
) = 1;
5807 DECL_IGNORED_P (decl
) = 1;
5812 switch_to_section (darwin_sections
[picbase_thunk_section
]);
5813 fputs ("\t.weak_definition\t", asm_out_file
);
5814 assemble_name (asm_out_file
, name
);
5815 fputs ("\n\t.private_extern\t", asm_out_file
);
5816 assemble_name (asm_out_file
, name
);
5817 putc ('\n', asm_out_file
);
5818 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5819 DECL_WEAK (decl
) = 1;
5823 if (USE_HIDDEN_LINKONCE
)
5825 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
5827 targetm
.asm_out
.unique_section (decl
, 0);
5828 switch_to_section (get_named_section (decl
, NULL
, 0));
5830 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
5831 fputs ("\t.hidden\t", asm_out_file
);
5832 assemble_name (asm_out_file
, name
);
5833 putc ('\n', asm_out_file
);
5834 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5838 switch_to_section (text_section
);
5839 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5842 DECL_INITIAL (decl
) = make_node (BLOCK
);
5843 current_function_decl
= decl
;
5844 allocate_struct_function (decl
, false);
5845 init_function_start (decl
);
5846 /* We're about to hide the function body from callees of final_* by
5847 emitting it directly; tell them we're a thunk, if they care. */
5848 cfun
->is_thunk
= true;
5849 first_function_block_is_cold
= false;
5850 /* Make sure unwind info is emitted for the thunk if needed. */
5851 final_start_function (emit_barrier (), asm_out_file
, 1);
5853 output_indirect_thunk (regno
);
5855 final_end_function ();
5856 init_insn_lengths ();
5857 free_after_compilation (cfun
);
5859 current_function_decl
= NULL
;
5862 static int pic_labels_used
;
5864 /* Fills in the label name that should be used for a pc thunk for
5865 the given register. */
5868 get_pc_thunk_name (char name
[32], unsigned int regno
)
5870 gcc_assert (!TARGET_64BIT
);
5872 if (USE_HIDDEN_LINKONCE
)
5873 sprintf (name
, "__x86.get_pc_thunk.%s", reg_names
[regno
]);
5875 ASM_GENERATE_INTERNAL_LABEL (name
, "LPR", regno
);
5879 /* This function generates code for -fpic that loads %ebx with
5880 the return address of the caller and then returns. */
5883 ix86_code_end (void)
5888 if (indirect_return_needed
)
5889 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5890 INVALID_REGNUM
, true);
5891 if (indirect_return_via_cx
)
5892 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5894 if (indirect_thunk_needed
)
5895 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5896 INVALID_REGNUM
, false);
5898 for (regno
= FIRST_REX_INT_REG
; regno
<= LAST_REX_INT_REG
; regno
++)
5900 unsigned int i
= regno
- FIRST_REX_INT_REG
+ LAST_INT_REG
+ 1;
5901 if ((indirect_thunks_used
& (1 << i
)))
5902 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5906 for (regno
= FIRST_INT_REG
; regno
<= LAST_INT_REG
; regno
++)
5911 if ((indirect_thunks_used
& (1 << regno
)))
5912 output_indirect_thunk_function (indirect_thunk_prefix_none
,
5915 if (!(pic_labels_used
& (1 << regno
)))
5918 get_pc_thunk_name (name
, regno
);
5920 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
5921 get_identifier (name
),
5922 build_function_type_list (void_type_node
, NULL_TREE
));
5923 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
5924 NULL_TREE
, void_type_node
);
5925 TREE_PUBLIC (decl
) = 1;
5926 TREE_STATIC (decl
) = 1;
5927 DECL_IGNORED_P (decl
) = 1;
5932 switch_to_section (darwin_sections
[picbase_thunk_section
]);
5933 fputs ("\t.weak_definition\t", asm_out_file
);
5934 assemble_name (asm_out_file
, name
);
5935 fputs ("\n\t.private_extern\t", asm_out_file
);
5936 assemble_name (asm_out_file
, name
);
5937 putc ('\n', asm_out_file
);
5938 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5939 DECL_WEAK (decl
) = 1;
5943 if (USE_HIDDEN_LINKONCE
)
5945 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
5947 targetm
.asm_out
.unique_section (decl
, 0);
5948 switch_to_section (get_named_section (decl
, NULL
, 0));
5950 targetm
.asm_out
.globalize_label (asm_out_file
, name
);
5951 fputs ("\t.hidden\t", asm_out_file
);
5952 assemble_name (asm_out_file
, name
);
5953 putc ('\n', asm_out_file
);
5954 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, name
, decl
);
5958 switch_to_section (text_section
);
5959 ASM_OUTPUT_LABEL (asm_out_file
, name
);
5962 DECL_INITIAL (decl
) = make_node (BLOCK
);
5963 current_function_decl
= decl
;
5964 allocate_struct_function (decl
, false);
5965 init_function_start (decl
);
5966 /* We're about to hide the function body from callees of final_* by
5967 emitting it directly; tell them we're a thunk, if they care. */
5968 cfun
->is_thunk
= true;
5969 first_function_block_is_cold
= false;
5970 /* Make sure unwind info is emitted for the thunk if needed. */
5971 final_start_function (emit_barrier (), asm_out_file
, 1);
5973 /* Pad stack IP move with 4 instructions (two NOPs count
5974 as one instruction). */
5975 if (TARGET_PAD_SHORT_FUNCTION
)
5980 fputs ("\tnop\n", asm_out_file
);
5983 xops
[0] = gen_rtx_REG (Pmode
, regno
);
5984 xops
[1] = gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
5985 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops
);
5986 output_asm_insn ("%!ret", NULL
);
5987 final_end_function ();
5988 init_insn_lengths ();
5989 free_after_compilation (cfun
);
5991 current_function_decl
= NULL
;
5994 if (flag_split_stack
)
5995 file_end_indicate_split_stack ();
5998 /* Emit code for the SET_GOT patterns. */
6001 output_set_got (rtx dest
, rtx label
)
6007 if (TARGET_VXWORKS_RTP
&& flag_pic
)
6009 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6010 xops
[2] = gen_rtx_MEM (Pmode
,
6011 gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
));
6012 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops
);
6014 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6015 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6016 an unadorned address. */
6017 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
6018 SYMBOL_REF_FLAGS (xops
[2]) |= SYMBOL_FLAG_LOCAL
;
6019 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops
);
6023 xops
[1] = gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
6028 get_pc_thunk_name (name
, REGNO (dest
));
6029 pic_labels_used
|= 1 << REGNO (dest
);
6031 xops
[2] = gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (name
));
6032 xops
[2] = gen_rtx_MEM (QImode
, xops
[2]);
6033 output_asm_insn ("%!call\t%X2", xops
);
6036 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6037 This is what will be referenced by the Mach-O PIC subsystem. */
6038 if (machopic_should_output_picbase_label () || !label
)
6039 ASM_OUTPUT_LABEL (asm_out_file
, MACHOPIC_FUNCTION_BASE_NAME
);
6041 /* When we are restoring the pic base at the site of a nonlocal label,
6042 and we decided to emit the pic base above, we will still output a
6043 local label used for calculating the correction offset (even though
6044 the offset will be 0 in that case). */
6046 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6047 CODE_LABEL_NUMBER (label
));
6053 /* We don't need a pic base, we're not producing pic. */
6056 xops
[2] = gen_rtx_LABEL_REF (Pmode
, label
? label
: gen_label_rtx ());
6057 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops
);
6058 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6059 CODE_LABEL_NUMBER (XEXP (xops
[2], 0)));
6063 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops
);
6068 /* Generate an "push" pattern for input ARG. */
6073 struct machine_function
*m
= cfun
->machine
;
6075 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
6076 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
6077 m
->fs
.sp_offset
+= UNITS_PER_WORD
;
6079 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
6080 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
6082 return gen_rtx_SET (gen_rtx_MEM (word_mode
,
6083 gen_rtx_PRE_DEC (Pmode
,
6084 stack_pointer_rtx
)),
6088 /* Generate an "pop" pattern for input ARG. */
6093 if (REG_P (arg
) && GET_MODE (arg
) != word_mode
)
6094 arg
= gen_rtx_REG (word_mode
, REGNO (arg
));
6096 return gen_rtx_SET (arg
,
6097 gen_rtx_MEM (word_mode
,
6098 gen_rtx_POST_INC (Pmode
,
6099 stack_pointer_rtx
)));
6102 /* Return >= 0 if there is an unused call-clobbered register available
6103 for the entire function. */
6106 ix86_select_alt_pic_regnum (void)
6108 if (ix86_use_pseudo_pic_reg ())
6109 return INVALID_REGNUM
;
6113 && !ix86_current_function_calls_tls_descriptor
)
6116 /* Can't use the same register for both PIC and DRAP. */
6118 drap
= REGNO (crtl
->drap_reg
);
6121 for (i
= 2; i
>= 0; --i
)
6122 if (i
!= drap
&& !df_regs_ever_live_p (i
))
6126 return INVALID_REGNUM
;
6129 /* Return true if REGNO is used by the epilogue. */
6132 ix86_epilogue_uses (int regno
)
6134 /* If there are no caller-saved registers, we preserve all registers,
6135 except for MMX and x87 registers which aren't supported when saving
6136 and restoring registers. Don't explicitly save SP register since
6137 it is always preserved. */
6138 return (epilogue_completed
6139 && cfun
->machine
->no_caller_saved_registers
6140 && !fixed_regs
[regno
]
6141 && !STACK_REGNO_P (regno
)
6142 && !MMX_REGNO_P (regno
));
6145 /* Return nonzero if register REGNO can be used as a scratch register
6149 ix86_hard_regno_scratch_ok (unsigned int regno
)
6151 /* If there are no caller-saved registers, we can't use any register
6152 as a scratch register after epilogue and use REGNO as scratch
6153 register only if it has been used before to avoid saving and
6155 return (!cfun
->machine
->no_caller_saved_registers
6156 || (!epilogue_completed
6157 && df_regs_ever_live_p (regno
)));
6160 /* Return TRUE if we need to save REGNO. */
6163 ix86_save_reg (unsigned int regno
, bool maybe_eh_return
, bool ignore_outlined
)
6165 /* If there are no caller-saved registers, we preserve all registers,
6166 except for MMX and x87 registers which aren't supported when saving
6167 and restoring registers. Don't explicitly save SP register since
6168 it is always preserved. */
6169 if (cfun
->machine
->no_caller_saved_registers
)
6171 /* Don't preserve registers used for function return value. */
6172 rtx reg
= crtl
->return_rtx
;
6175 unsigned int i
= REGNO (reg
);
6176 unsigned int nregs
= REG_NREGS (reg
);
6178 if ((i
+ nregs
) == regno
)
6182 return (df_regs_ever_live_p (regno
)
6183 && !fixed_regs
[regno
]
6184 && !STACK_REGNO_P (regno
)
6185 && !MMX_REGNO_P (regno
)
6186 && (regno
!= HARD_FRAME_POINTER_REGNUM
6187 || !frame_pointer_needed
));
6190 if (regno
== REAL_PIC_OFFSET_TABLE_REGNUM
6191 && pic_offset_table_rtx
)
6193 if (ix86_use_pseudo_pic_reg ())
6195 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6196 _mcount in prologue. */
6197 if (!TARGET_64BIT
&& flag_pic
&& crtl
->profile
)
6200 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM
)
6202 || crtl
->calls_eh_return
6203 || crtl
->uses_const_pool
6204 || cfun
->has_nonlocal_label
)
6205 return ix86_select_alt_pic_regnum () == INVALID_REGNUM
;
6208 if (crtl
->calls_eh_return
&& maybe_eh_return
)
6213 unsigned test
= EH_RETURN_DATA_REGNO (i
);
6214 if (test
== INVALID_REGNUM
)
6221 if (ignore_outlined
&& cfun
->machine
->call_ms2sysv
)
6223 unsigned count
= cfun
->machine
->call_ms2sysv_extra_regs
6224 + xlogue_layout::MIN_REGS
;
6225 if (xlogue_layout::is_stub_managed_reg (regno
, count
))
6230 && regno
== REGNO (crtl
->drap_reg
)
6231 && !cfun
->machine
->no_drap_save_restore
)
6234 return (df_regs_ever_live_p (regno
)
6235 && !call_used_or_fixed_reg_p (regno
)
6236 && (regno
!= HARD_FRAME_POINTER_REGNUM
|| !frame_pointer_needed
));
6239 /* Return number of saved general prupose registers. */
6242 ix86_nsaved_regs (void)
6247 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6248 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6253 /* Return number of saved SSE registers. */
6256 ix86_nsaved_sseregs (void)
6261 if (!TARGET_64BIT_MS_ABI
)
6263 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6264 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6269 /* Given FROM and TO register numbers, say whether this elimination is
6270 allowed. If stack alignment is needed, we can only replace argument
6271 pointer with hard frame pointer, or replace frame pointer with stack
6272 pointer. Otherwise, frame pointer elimination is automatically
6273 handled and all other eliminations are valid. */
6276 ix86_can_eliminate (const int from
, const int to
)
6278 if (stack_realign_fp
)
6279 return ((from
== ARG_POINTER_REGNUM
6280 && to
== HARD_FRAME_POINTER_REGNUM
)
6281 || (from
== FRAME_POINTER_REGNUM
6282 && to
== STACK_POINTER_REGNUM
));
6284 return to
== STACK_POINTER_REGNUM
? !frame_pointer_needed
: true;
6287 /* Return the offset between two registers, one to be eliminated, and the other
6288 its replacement, at the start of a routine. */
6291 ix86_initial_elimination_offset (int from
, int to
)
6293 struct ix86_frame
&frame
= cfun
->machine
->frame
;
6295 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
6296 return frame
.hard_frame_pointer_offset
;
6297 else if (from
== FRAME_POINTER_REGNUM
6298 && to
== HARD_FRAME_POINTER_REGNUM
)
6299 return frame
.hard_frame_pointer_offset
- frame
.frame_pointer_offset
;
6302 gcc_assert (to
== STACK_POINTER_REGNUM
);
6304 if (from
== ARG_POINTER_REGNUM
)
6305 return frame
.stack_pointer_offset
;
6307 gcc_assert (from
== FRAME_POINTER_REGNUM
);
6308 return frame
.stack_pointer_offset
- frame
.frame_pointer_offset
;
6312 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6313 void warn_once_call_ms2sysv_xlogues (const char *feature
)
6315 static bool warned_once
= false;
6318 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6324 /* Return the probing interval for -fstack-clash-protection. */
6326 static HOST_WIDE_INT
6327 get_probe_interval (void)
6329 if (flag_stack_clash_protection
)
6330 return (HOST_WIDE_INT_1U
6331 << param_stack_clash_protection_probe_interval
);
6333 return (HOST_WIDE_INT_1U
<< STACK_CHECK_PROBE_INTERVAL_EXP
);
6336 /* When using -fsplit-stack, the allocation routines set a field in
6337 the TCB to the bottom of the stack plus this much space, measured
6340 #define SPLIT_STACK_AVAILABLE 256
6342 /* Fill structure ix86_frame about frame of currently computed function. */
6345 ix86_compute_frame_layout (void)
6347 struct ix86_frame
*frame
= &cfun
->machine
->frame
;
6348 struct machine_function
*m
= cfun
->machine
;
6349 unsigned HOST_WIDE_INT stack_alignment_needed
;
6350 HOST_WIDE_INT offset
;
6351 unsigned HOST_WIDE_INT preferred_alignment
;
6352 HOST_WIDE_INT size
= ix86_get_frame_size ();
6353 HOST_WIDE_INT to_allocate
;
6355 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6356 * ms_abi functions that call a sysv function. We now need to prune away
6357 * cases where it should be disabled. */
6358 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6360 gcc_assert (TARGET_64BIT_MS_ABI
);
6361 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES
);
6362 gcc_assert (!TARGET_SEH
);
6363 gcc_assert (TARGET_SSE
);
6364 gcc_assert (!ix86_using_red_zone ());
6366 if (crtl
->calls_eh_return
)
6368 gcc_assert (!reload_completed
);
6369 m
->call_ms2sysv
= false;
6370 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6373 else if (ix86_static_chain_on_stack
)
6375 gcc_assert (!reload_completed
);
6376 m
->call_ms2sysv
= false;
6377 warn_once_call_ms2sysv_xlogues ("static call chains");
6380 /* Finally, compute which registers the stub will manage. */
6383 unsigned count
= xlogue_layout::count_stub_managed_regs ();
6384 m
->call_ms2sysv_extra_regs
= count
- xlogue_layout::MIN_REGS
;
6385 m
->call_ms2sysv_pad_in
= 0;
6389 frame
->nregs
= ix86_nsaved_regs ();
6390 frame
->nsseregs
= ix86_nsaved_sseregs ();
6392 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6393 except for function prologues, leaf functions and when the defult
6394 incoming stack boundary is overriden at command line or via
6395 force_align_arg_pointer attribute.
6397 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6398 at call sites, including profile function calls.
6400 if (((TARGET_64BIT_MS_ABI
|| TARGET_MACHO
)
6401 && crtl
->preferred_stack_boundary
< 128)
6402 && (!crtl
->is_leaf
|| cfun
->calls_alloca
!= 0
6403 || ix86_current_function_calls_tls_descriptor
6404 || (TARGET_MACHO
&& crtl
->profile
)
6405 || ix86_incoming_stack_boundary
< 128))
6407 crtl
->preferred_stack_boundary
= 128;
6408 crtl
->stack_alignment_needed
= 128;
6411 stack_alignment_needed
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
6412 preferred_alignment
= crtl
->preferred_stack_boundary
/ BITS_PER_UNIT
;
6414 gcc_assert (!size
|| stack_alignment_needed
);
6415 gcc_assert (preferred_alignment
>= STACK_BOUNDARY
/ BITS_PER_UNIT
);
6416 gcc_assert (preferred_alignment
<= stack_alignment_needed
);
6418 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6419 gcc_assert (TARGET_64BIT
|| !frame
->nsseregs
);
6420 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6422 gcc_assert (stack_alignment_needed
>= 16);
6423 gcc_assert (!frame
->nsseregs
);
6426 /* For SEH we have to limit the amount of code movement into the prologue.
6427 At present we do this via a BLOCKAGE, at which point there's very little
6428 scheduling that can be done, which means that there's very little point
6429 in doing anything except PUSHs. */
6431 m
->use_fast_prologue_epilogue
= false;
6432 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun
)))
6434 int count
= frame
->nregs
;
6435 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
6437 /* The fast prologue uses move instead of push to save registers. This
6438 is significantly longer, but also executes faster as modern hardware
6439 can execute the moves in parallel, but can't do that for push/pop.
6441 Be careful about choosing what prologue to emit: When function takes
6442 many instructions to execute we may use slow version as well as in
6443 case function is known to be outside hot spot (this is known with
6444 feedback only). Weight the size of function by number of registers
6445 to save as it is cheap to use one or two push instructions but very
6446 slow to use many of them.
6448 Calling this hook multiple times with the same frame requirements
6449 must produce the same layout, since the RA might otherwise be
6450 unable to reach a fixed point or might fail its final sanity checks.
6451 This means that once we've assumed that a function does or doesn't
6452 have a particular size, we have to stick to that assumption
6453 regardless of how the function has changed since. */
6455 count
= (count
- 1) * FAST_PROLOGUE_INSN_COUNT
;
6456 if (node
->frequency
< NODE_FREQUENCY_NORMAL
6457 || (flag_branch_probabilities
6458 && node
->frequency
< NODE_FREQUENCY_HOT
))
6459 m
->use_fast_prologue_epilogue
= false;
6462 if (count
!= frame
->expensive_count
)
6464 frame
->expensive_count
= count
;
6465 frame
->expensive_p
= expensive_function_p (count
);
6467 m
->use_fast_prologue_epilogue
= !frame
->expensive_p
;
6471 frame
->save_regs_using_mov
6472 = TARGET_PROLOGUE_USING_MOVE
&& m
->use_fast_prologue_epilogue
;
6474 /* Skip return address and error code in exception handler. */
6475 offset
= INCOMING_FRAME_SP_OFFSET
;
6477 /* Skip pushed static chain. */
6478 if (ix86_static_chain_on_stack
)
6479 offset
+= UNITS_PER_WORD
;
6481 /* Skip saved base pointer. */
6482 if (frame_pointer_needed
)
6483 offset
+= UNITS_PER_WORD
;
6484 frame
->hfp_save_offset
= offset
;
6486 /* The traditional frame pointer location is at the top of the frame. */
6487 frame
->hard_frame_pointer_offset
= offset
;
6489 /* Register save area */
6490 offset
+= frame
->nregs
* UNITS_PER_WORD
;
6491 frame
->reg_save_offset
= offset
;
6493 /* On SEH target, registers are pushed just before the frame pointer
6496 frame
->hard_frame_pointer_offset
= offset
;
6498 /* Calculate the size of the va-arg area (not including padding, if any). */
6499 frame
->va_arg_size
= ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
;
6501 /* Also adjust stack_realign_offset for the largest alignment of
6502 stack slot actually used. */
6503 if (stack_realign_fp
6504 || (cfun
->machine
->max_used_stack_alignment
!= 0
6505 && (offset
% cfun
->machine
->max_used_stack_alignment
) != 0))
6507 /* We may need a 16-byte aligned stack for the remainder of the
6508 register save area, but the stack frame for the local function
6509 may require a greater alignment if using AVX/2/512. In order
6510 to avoid wasting space, we first calculate the space needed for
6511 the rest of the register saves, add that to the stack pointer,
6512 and then realign the stack to the boundary of the start of the
6513 frame for the local function. */
6514 HOST_WIDE_INT space_needed
= 0;
6515 HOST_WIDE_INT sse_reg_space_needed
= 0;
6519 if (m
->call_ms2sysv
)
6521 m
->call_ms2sysv_pad_in
= 0;
6522 space_needed
= xlogue_layout::get_instance ().get_stack_space_used ();
6525 else if (frame
->nsseregs
)
6526 /* The only ABI that has saved SSE registers (Win64) also has a
6527 16-byte aligned default stack. However, many programs violate
6528 the ABI, and Wine64 forces stack realignment to compensate. */
6529 space_needed
= frame
->nsseregs
* 16;
6531 sse_reg_space_needed
= space_needed
= ROUND_UP (space_needed
, 16);
6533 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6534 rounding to be pedantic. */
6535 space_needed
= ROUND_UP (space_needed
+ frame
->va_arg_size
, 16);
6538 space_needed
= frame
->va_arg_size
;
6540 /* Record the allocation size required prior to the realignment AND. */
6541 frame
->stack_realign_allocate
= space_needed
;
6543 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6544 before this point are not directly comparable with values below
6545 this point. Use sp_valid_at to determine if the stack pointer is
6546 valid for a given offset, fp_valid_at for the frame pointer, or
6547 choose_baseaddr to have a base register chosen for you.
6549 Note that the result of (frame->stack_realign_offset
6550 & (stack_alignment_needed - 1)) may not equal zero. */
6551 offset
= ROUND_UP (offset
+ space_needed
, stack_alignment_needed
);
6552 frame
->stack_realign_offset
= offset
- space_needed
;
6553 frame
->sse_reg_save_offset
= frame
->stack_realign_offset
6554 + sse_reg_space_needed
;
6558 frame
->stack_realign_offset
= offset
;
6560 if (TARGET_64BIT
&& m
->call_ms2sysv
)
6562 m
->call_ms2sysv_pad_in
= !!(offset
& UNITS_PER_WORD
);
6563 offset
+= xlogue_layout::get_instance ().get_stack_space_used ();
6566 /* Align and set SSE register save area. */
6567 else if (frame
->nsseregs
)
6569 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6570 required and the DRAP re-alignment boundary is at least 16 bytes,
6571 then we want the SSE register save area properly aligned. */
6572 if (ix86_incoming_stack_boundary
>= 128
6573 || (stack_realign_drap
&& stack_alignment_needed
>= 16))
6574 offset
= ROUND_UP (offset
, 16);
6575 offset
+= frame
->nsseregs
* 16;
6577 frame
->sse_reg_save_offset
= offset
;
6578 offset
+= frame
->va_arg_size
;
6581 /* Align start of frame for local function. When a function call
6582 is removed, it may become a leaf function. But if argument may
6583 be passed on stack, we need to align the stack when there is no
6586 || frame
->va_arg_size
!= 0
6589 || (!crtl
->tail_call_emit
6590 && cfun
->machine
->outgoing_args_on_stack
)
6591 || cfun
->calls_alloca
6592 || ix86_current_function_calls_tls_descriptor
)
6593 offset
= ROUND_UP (offset
, stack_alignment_needed
);
6595 /* Frame pointer points here. */
6596 frame
->frame_pointer_offset
= offset
;
6600 /* Add outgoing arguments area. Can be skipped if we eliminated
6601 all the function calls as dead code.
6602 Skipping is however impossible when function calls alloca. Alloca
6603 expander assumes that last crtl->outgoing_args_size
6604 of stack frame are unused. */
6605 if (ACCUMULATE_OUTGOING_ARGS
6606 && (!crtl
->is_leaf
|| cfun
->calls_alloca
6607 || ix86_current_function_calls_tls_descriptor
))
6609 offset
+= crtl
->outgoing_args_size
;
6610 frame
->outgoing_arguments_size
= crtl
->outgoing_args_size
;
6613 frame
->outgoing_arguments_size
= 0;
6615 /* Align stack boundary. Only needed if we're calling another function
6617 if (!crtl
->is_leaf
|| cfun
->calls_alloca
6618 || ix86_current_function_calls_tls_descriptor
)
6619 offset
= ROUND_UP (offset
, preferred_alignment
);
6621 /* We've reached end of stack frame. */
6622 frame
->stack_pointer_offset
= offset
;
6624 /* Size prologue needs to allocate. */
6625 to_allocate
= offset
- frame
->sse_reg_save_offset
;
6627 if ((!to_allocate
&& frame
->nregs
<= 1)
6628 || (TARGET_64BIT
&& to_allocate
>= HOST_WIDE_INT_C (0x80000000))
6629 /* If static stack checking is enabled and done with probes,
6630 the registers need to be saved before allocating the frame. */
6631 || flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
6632 /* If stack clash probing needs a loop, then it needs a
6633 scratch register. But the returned register is only guaranteed
6634 to be safe to use after register saves are complete. So if
6635 stack clash protections are enabled and the allocated frame is
6636 larger than the probe interval, then use pushes to save
6637 callee saved registers. */
6638 || (flag_stack_clash_protection
&& to_allocate
> get_probe_interval ()))
6639 frame
->save_regs_using_mov
= false;
6641 if (ix86_using_red_zone ()
6642 && crtl
->sp_is_unchanging
6644 && !ix86_pc_thunk_call_expanded
6645 && !ix86_current_function_calls_tls_descriptor
)
6647 frame
->red_zone_size
= to_allocate
;
6648 if (frame
->save_regs_using_mov
)
6649 frame
->red_zone_size
+= frame
->nregs
* UNITS_PER_WORD
;
6650 if (frame
->red_zone_size
> RED_ZONE_SIZE
- RED_ZONE_RESERVE
)
6651 frame
->red_zone_size
= RED_ZONE_SIZE
- RED_ZONE_RESERVE
;
6654 frame
->red_zone_size
= 0;
6655 frame
->stack_pointer_offset
-= frame
->red_zone_size
;
6657 /* The SEH frame pointer location is near the bottom of the frame.
6658 This is enforced by the fact that the difference between the
6659 stack pointer and the frame pointer is limited to 240 bytes in
6660 the unwind data structure. */
6665 /* If we can leave the frame pointer where it is, do so. Also, returns
6666 the establisher frame for __builtin_frame_address (0). */
6667 diff
= frame
->stack_pointer_offset
- frame
->hard_frame_pointer_offset
;
6668 if (diff
<= SEH_MAX_FRAME_SIZE
6669 && (diff
> 240 || (diff
& 15) != 0)
6670 && !crtl
->accesses_prior_frames
)
6672 /* Ideally we'd determine what portion of the local stack frame
6673 (within the constraint of the lowest 240) is most heavily used.
6674 But without that complication, simply bias the frame pointer
6675 by 128 bytes so as to maximize the amount of the local stack
6676 frame that is addressable with 8-bit offsets. */
6677 frame
->hard_frame_pointer_offset
= frame
->stack_pointer_offset
- 128;
6682 /* This is semi-inlined memory_address_length, but simplified
6683 since we know that we're always dealing with reg+offset, and
6684 to avoid having to create and discard all that rtl. */
6687 choose_baseaddr_len (unsigned int regno
, HOST_WIDE_INT offset
)
6693 /* EBP and R13 cannot be encoded without an offset. */
6694 len
= (regno
== BP_REG
|| regno
== R13_REG
);
6696 else if (IN_RANGE (offset
, -128, 127))
6699 /* ESP and R12 must be encoded with a SIB byte. */
6700 if (regno
== SP_REG
|| regno
== R12_REG
)
6706 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6707 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6710 sp_valid_at (HOST_WIDE_INT cfa_offset
)
6712 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
6713 if (fs
.sp_realigned
&& cfa_offset
<= fs
.sp_realigned_offset
)
6715 /* Validate that the cfa_offset isn't in a "no-man's land". */
6716 gcc_assert (cfa_offset
<= fs
.sp_realigned_fp_last
);
6722 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6723 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6726 fp_valid_at (HOST_WIDE_INT cfa_offset
)
6728 const struct machine_frame_state
&fs
= cfun
->machine
->fs
;
6729 if (fs
.sp_realigned
&& cfa_offset
> fs
.sp_realigned_fp_last
)
6731 /* Validate that the cfa_offset isn't in a "no-man's land". */
6732 gcc_assert (cfa_offset
>= fs
.sp_realigned_offset
);
6738 /* Choose a base register based upon alignment requested, speed and/or
6742 choose_basereg (HOST_WIDE_INT cfa_offset
, rtx
&base_reg
,
6743 HOST_WIDE_INT
&base_offset
,
6744 unsigned int align_reqested
, unsigned int *align
)
6746 const struct machine_function
*m
= cfun
->machine
;
6747 unsigned int hfp_align
;
6748 unsigned int drap_align
;
6749 unsigned int sp_align
;
6750 bool hfp_ok
= fp_valid_at (cfa_offset
);
6751 bool drap_ok
= m
->fs
.drap_valid
;
6752 bool sp_ok
= sp_valid_at (cfa_offset
);
6754 hfp_align
= drap_align
= sp_align
= INCOMING_STACK_BOUNDARY
;
6756 /* Filter out any registers that don't meet the requested alignment
6760 if (m
->fs
.realigned
)
6761 hfp_align
= drap_align
= sp_align
= crtl
->stack_alignment_needed
;
6762 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6763 notes (which we would need to use a realigned stack pointer),
6764 so disable on SEH targets. */
6765 else if (m
->fs
.sp_realigned
)
6766 sp_align
= crtl
->stack_alignment_needed
;
6768 hfp_ok
= hfp_ok
&& hfp_align
>= align_reqested
;
6769 drap_ok
= drap_ok
&& drap_align
>= align_reqested
;
6770 sp_ok
= sp_ok
&& sp_align
>= align_reqested
;
6773 if (m
->use_fast_prologue_epilogue
)
6775 /* Choose the base register most likely to allow the most scheduling
6776 opportunities. Generally FP is valid throughout the function,
6777 while DRAP must be reloaded within the epilogue. But choose either
6778 over the SP due to increased encoding size. */
6782 base_reg
= hard_frame_pointer_rtx
;
6783 base_offset
= m
->fs
.fp_offset
- cfa_offset
;
6787 base_reg
= crtl
->drap_reg
;
6788 base_offset
= 0 - cfa_offset
;
6792 base_reg
= stack_pointer_rtx
;
6793 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
6798 HOST_WIDE_INT toffset
;
6801 /* Choose the base register with the smallest address encoding.
6802 With a tie, choose FP > DRAP > SP. */
6805 base_reg
= stack_pointer_rtx
;
6806 base_offset
= m
->fs
.sp_offset
- cfa_offset
;
6807 len
= choose_baseaddr_len (STACK_POINTER_REGNUM
, base_offset
);
6811 toffset
= 0 - cfa_offset
;
6812 tlen
= choose_baseaddr_len (REGNO (crtl
->drap_reg
), toffset
);
6815 base_reg
= crtl
->drap_reg
;
6816 base_offset
= toffset
;
6822 toffset
= m
->fs
.fp_offset
- cfa_offset
;
6823 tlen
= choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM
, toffset
);
6826 base_reg
= hard_frame_pointer_rtx
;
6827 base_offset
= toffset
;
6832 /* Set the align return value. */
6835 if (base_reg
== stack_pointer_rtx
)
6837 else if (base_reg
== crtl
->drap_reg
)
6838 *align
= drap_align
;
6839 else if (base_reg
== hard_frame_pointer_rtx
)
6844 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6845 the alignment of address. If ALIGN is non-null, it should point to
6846 an alignment value (in bits) that is preferred or zero and will
6847 recieve the alignment of the base register that was selected,
6848 irrespective of rather or not CFA_OFFSET is a multiple of that
6849 alignment value. If it is possible for the base register offset to be
6850 non-immediate then SCRATCH_REGNO should specify a scratch register to
6853 The valid base registers are taken from CFUN->MACHINE->FS. */
6856 choose_baseaddr (HOST_WIDE_INT cfa_offset
, unsigned int *align
,
6857 unsigned int scratch_regno
= INVALID_REGNUM
)
6859 rtx base_reg
= NULL
;
6860 HOST_WIDE_INT base_offset
= 0;
6862 /* If a specific alignment is requested, try to get a base register
6863 with that alignment first. */
6864 if (align
&& *align
)
6865 choose_basereg (cfa_offset
, base_reg
, base_offset
, *align
, align
);
6868 choose_basereg (cfa_offset
, base_reg
, base_offset
, 0, align
);
6870 gcc_assert (base_reg
!= NULL
);
6872 rtx base_offset_rtx
= GEN_INT (base_offset
);
6874 if (!x86_64_immediate_operand (base_offset_rtx
, Pmode
))
6876 gcc_assert (scratch_regno
!= INVALID_REGNUM
);
6878 rtx scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
6879 emit_move_insn (scratch_reg
, base_offset_rtx
);
6881 return gen_rtx_PLUS (Pmode
, base_reg
, scratch_reg
);
6884 return plus_constant (Pmode
, base_reg
, base_offset
);
6887 /* Emit code to save registers in the prologue. */
6890 ix86_emit_save_regs (void)
6895 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
-- > 0; )
6896 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6898 insn
= emit_insn (gen_push (gen_rtx_REG (word_mode
, regno
)));
6899 RTX_FRAME_RELATED_P (insn
) = 1;
6903 /* Emit a single register save at CFA - CFA_OFFSET. */
6906 ix86_emit_save_reg_using_mov (machine_mode mode
, unsigned int regno
,
6907 HOST_WIDE_INT cfa_offset
)
6909 struct machine_function
*m
= cfun
->machine
;
6910 rtx reg
= gen_rtx_REG (mode
, regno
);
6911 rtx mem
, addr
, base
, insn
;
6912 unsigned int align
= GET_MODE_ALIGNMENT (mode
);
6914 addr
= choose_baseaddr (cfa_offset
, &align
);
6915 mem
= gen_frame_mem (mode
, addr
);
6917 /* The location aligment depends upon the base register. */
6918 align
= MIN (GET_MODE_ALIGNMENT (mode
), align
);
6919 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
6920 set_mem_align (mem
, align
);
6922 insn
= emit_insn (gen_rtx_SET (mem
, reg
));
6923 RTX_FRAME_RELATED_P (insn
) = 1;
6926 if (GET_CODE (base
) == PLUS
)
6927 base
= XEXP (base
, 0);
6928 gcc_checking_assert (REG_P (base
));
6930 /* When saving registers into a re-aligned local stack frame, avoid
6931 any tricky guessing by dwarf2out. */
6932 if (m
->fs
.realigned
)
6934 gcc_checking_assert (stack_realign_drap
);
6936 if (regno
== REGNO (crtl
->drap_reg
))
6938 /* A bit of a hack. We force the DRAP register to be saved in
6939 the re-aligned stack frame, which provides us with a copy
6940 of the CFA that will last past the prologue. Install it. */
6941 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
6942 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
6943 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
6944 mem
= gen_rtx_MEM (mode
, addr
);
6945 add_reg_note (insn
, REG_CFA_DEF_CFA
, mem
);
6949 /* The frame pointer is a stable reference within the
6950 aligned frame. Use it. */
6951 gcc_checking_assert (cfun
->machine
->fs
.fp_valid
);
6952 addr
= plus_constant (Pmode
, hard_frame_pointer_rtx
,
6953 cfun
->machine
->fs
.fp_offset
- cfa_offset
);
6954 mem
= gen_rtx_MEM (mode
, addr
);
6955 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
6959 else if (base
== stack_pointer_rtx
&& m
->fs
.sp_realigned
6960 && cfa_offset
>= m
->fs
.sp_realigned_offset
)
6962 gcc_checking_assert (stack_realign_fp
);
6963 add_reg_note (insn
, REG_CFA_EXPRESSION
, gen_rtx_SET (mem
, reg
));
6966 /* The memory may not be relative to the current CFA register,
6967 which means that we may need to generate a new pattern for
6968 use by the unwind info. */
6969 else if (base
!= m
->fs
.cfa_reg
)
6971 addr
= plus_constant (Pmode
, m
->fs
.cfa_reg
,
6972 m
->fs
.cfa_offset
- cfa_offset
);
6973 mem
= gen_rtx_MEM (mode
, addr
);
6974 add_reg_note (insn
, REG_CFA_OFFSET
, gen_rtx_SET (mem
, reg
));
6978 /* Emit code to save registers using MOV insns.
6979 First register is stored at CFA - CFA_OFFSET. */
6981 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset
)
6985 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
6986 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
6988 ix86_emit_save_reg_using_mov (word_mode
, regno
, cfa_offset
);
6989 cfa_offset
-= UNITS_PER_WORD
;
6993 /* Emit code to save SSE registers using MOV insns.
6994 First register is stored at CFA - CFA_OFFSET. */
6996 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
)
7000 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
7001 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, true, true))
7003 ix86_emit_save_reg_using_mov (V4SFmode
, regno
, cfa_offset
);
7004 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
7008 static GTY(()) rtx queued_cfa_restores
;
7010 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7011 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7012 Don't add the note if the previously saved value will be left untouched
7013 within stack red-zone till return, as unwinders can find the same value
7014 in the register and on the stack. */
7017 ix86_add_cfa_restore_note (rtx_insn
*insn
, rtx reg
, HOST_WIDE_INT cfa_offset
)
7019 if (!crtl
->shrink_wrapped
7020 && cfa_offset
<= cfun
->machine
->fs
.red_zone_offset
)
7025 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
7026 RTX_FRAME_RELATED_P (insn
) = 1;
7030 = alloc_reg_note (REG_CFA_RESTORE
, reg
, queued_cfa_restores
);
7033 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7036 ix86_add_queued_cfa_restore_notes (rtx insn
)
7039 if (!queued_cfa_restores
)
7041 for (last
= queued_cfa_restores
; XEXP (last
, 1); last
= XEXP (last
, 1))
7043 XEXP (last
, 1) = REG_NOTES (insn
);
7044 REG_NOTES (insn
) = queued_cfa_restores
;
7045 queued_cfa_restores
= NULL_RTX
;
7046 RTX_FRAME_RELATED_P (insn
) = 1;
7049 /* Expand prologue or epilogue stack adjustment.
7050 The pattern exist to put a dependency on all ebp-based memory accesses.
7051 STYLE should be negative if instructions should be marked as frame related,
7052 zero if %r11 register is live and cannot be freely used and positive
7056 pro_epilogue_adjust_stack (rtx dest
, rtx src
, rtx offset
,
7057 int style
, bool set_cfa
)
7059 struct machine_function
*m
= cfun
->machine
;
7060 rtx addend
= offset
;
7062 bool add_frame_related_expr
= false;
7064 if (!x86_64_immediate_operand (offset
, Pmode
))
7066 /* r11 is used by indirect sibcall return as well, set before the
7067 epilogue and used after the epilogue. */
7069 addend
= gen_rtx_REG (Pmode
, R11_REG
);
7072 gcc_assert (src
!= hard_frame_pointer_rtx
7073 && dest
!= hard_frame_pointer_rtx
);
7074 addend
= hard_frame_pointer_rtx
;
7076 emit_insn (gen_rtx_SET (addend
, offset
));
7078 add_frame_related_expr
= true;
7081 insn
= emit_insn (gen_pro_epilogue_adjust_stack_add
7082 (Pmode
, dest
, src
, addend
));
7084 ix86_add_queued_cfa_restore_notes (insn
);
7090 gcc_assert (m
->fs
.cfa_reg
== src
);
7091 m
->fs
.cfa_offset
+= INTVAL (offset
);
7092 m
->fs
.cfa_reg
= dest
;
7094 r
= gen_rtx_PLUS (Pmode
, src
, offset
);
7095 r
= gen_rtx_SET (dest
, r
);
7096 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, r
);
7097 RTX_FRAME_RELATED_P (insn
) = 1;
7101 RTX_FRAME_RELATED_P (insn
) = 1;
7102 if (add_frame_related_expr
)
7104 rtx r
= gen_rtx_PLUS (Pmode
, src
, offset
);
7105 r
= gen_rtx_SET (dest
, r
);
7106 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, r
);
7110 if (dest
== stack_pointer_rtx
)
7112 HOST_WIDE_INT ooffset
= m
->fs
.sp_offset
;
7113 bool valid
= m
->fs
.sp_valid
;
7114 bool realigned
= m
->fs
.sp_realigned
;
7116 if (src
== hard_frame_pointer_rtx
)
7118 valid
= m
->fs
.fp_valid
;
7120 ooffset
= m
->fs
.fp_offset
;
7122 else if (src
== crtl
->drap_reg
)
7124 valid
= m
->fs
.drap_valid
;
7130 /* Else there are two possibilities: SP itself, which we set
7131 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7132 taken care of this by hand along the eh_return path. */
7133 gcc_checking_assert (src
== stack_pointer_rtx
7134 || offset
== const0_rtx
);
7137 m
->fs
.sp_offset
= ooffset
- INTVAL (offset
);
7138 m
->fs
.sp_valid
= valid
;
7139 m
->fs
.sp_realigned
= realigned
;
7144 /* Find an available register to be used as dynamic realign argument
7145 pointer regsiter. Such a register will be written in prologue and
7146 used in begin of body, so it must not be
7147 1. parameter passing register.
7149 We reuse static-chain register if it is available. Otherwise, we
7150 use DI for i386 and R13 for x86-64. We chose R13 since it has
7153 Return: the regno of chosen register. */
7156 find_drap_reg (void)
7158 tree decl
= cfun
->decl
;
7160 /* Always use callee-saved register if there are no caller-saved
7164 /* Use R13 for nested function or function need static chain.
7165 Since function with tail call may use any caller-saved
7166 registers in epilogue, DRAP must not use caller-saved
7167 register in such case. */
7168 if (DECL_STATIC_CHAIN (decl
)
7169 || cfun
->machine
->no_caller_saved_registers
7170 || crtl
->tail_call_emit
)
7177 /* Use DI for nested function or function need static chain.
7178 Since function with tail call may use any caller-saved
7179 registers in epilogue, DRAP must not use caller-saved
7180 register in such case. */
7181 if (DECL_STATIC_CHAIN (decl
)
7182 || cfun
->machine
->no_caller_saved_registers
7183 || crtl
->tail_call_emit
)
7186 /* Reuse static chain register if it isn't used for parameter
7188 if (ix86_function_regparm (TREE_TYPE (decl
), decl
) <= 2)
7190 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (decl
));
7191 if ((ccvt
& (IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL
)) == 0)
7198 /* Return minimum incoming stack alignment. */
7201 ix86_minimum_incoming_stack_boundary (bool sibcall
)
7203 unsigned int incoming_stack_boundary
;
7205 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7206 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
7207 incoming_stack_boundary
= TARGET_64BIT
? 128 : MIN_STACK_BOUNDARY
;
7208 /* Prefer the one specified at command line. */
7209 else if (ix86_user_incoming_stack_boundary
)
7210 incoming_stack_boundary
= ix86_user_incoming_stack_boundary
;
7211 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7212 if -mstackrealign is used, it isn't used for sibcall check and
7213 estimated stack alignment is 128bit. */
7215 && ix86_force_align_arg_pointer
7216 && crtl
->stack_alignment_estimated
== 128)
7217 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
7219 incoming_stack_boundary
= ix86_default_incoming_stack_boundary
;
7221 /* Incoming stack alignment can be changed on individual functions
7222 via force_align_arg_pointer attribute. We use the smallest
7223 incoming stack boundary. */
7224 if (incoming_stack_boundary
> MIN_STACK_BOUNDARY
7225 && lookup_attribute ("force_align_arg_pointer",
7226 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl
))))
7227 incoming_stack_boundary
= MIN_STACK_BOUNDARY
;
7229 /* The incoming stack frame has to be aligned at least at
7230 parm_stack_boundary. */
7231 if (incoming_stack_boundary
< crtl
->parm_stack_boundary
)
7232 incoming_stack_boundary
= crtl
->parm_stack_boundary
;
7234 /* Stack at entrance of main is aligned by runtime. We use the
7235 smallest incoming stack boundary. */
7236 if (incoming_stack_boundary
> MAIN_STACK_BOUNDARY
7237 && DECL_NAME (current_function_decl
)
7238 && MAIN_NAME_P (DECL_NAME (current_function_decl
))
7239 && DECL_FILE_SCOPE_P (current_function_decl
))
7240 incoming_stack_boundary
= MAIN_STACK_BOUNDARY
;
7242 return incoming_stack_boundary
;
7245 /* Update incoming stack boundary and estimated stack alignment. */
7248 ix86_update_stack_boundary (void)
7250 ix86_incoming_stack_boundary
7251 = ix86_minimum_incoming_stack_boundary (false);
7253 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7256 && crtl
->stack_alignment_estimated
< 128)
7257 crtl
->stack_alignment_estimated
= 128;
7259 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7260 if (ix86_tls_descriptor_calls_expanded_in_cfun
7261 && crtl
->preferred_stack_boundary
< 128)
7262 crtl
->preferred_stack_boundary
= 128;
7265 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7266 needed or an rtx for DRAP otherwise. */
7269 ix86_get_drap_rtx (void)
7271 /* We must use DRAP if there are outgoing arguments on stack or
7272 the stack pointer register is clobbered by asm statment and
7273 ACCUMULATE_OUTGOING_ARGS is false. */
7275 || ((cfun
->machine
->outgoing_args_on_stack
7276 || crtl
->sp_is_clobbered_by_asm
)
7277 && !ACCUMULATE_OUTGOING_ARGS
))
7278 crtl
->need_drap
= true;
7280 if (stack_realign_drap
)
7282 /* Assign DRAP to vDRAP and returns vDRAP */
7283 unsigned int regno
= find_drap_reg ();
7286 rtx_insn
*seq
, *insn
;
7288 arg_ptr
= gen_rtx_REG (Pmode
, regno
);
7289 crtl
->drap_reg
= arg_ptr
;
7292 drap_vreg
= copy_to_reg (arg_ptr
);
7296 insn
= emit_insn_before (seq
, NEXT_INSN (entry_of_function ()));
7299 add_reg_note (insn
, REG_CFA_SET_VDRAP
, drap_vreg
);
7300 RTX_FRAME_RELATED_P (insn
) = 1;
7308 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7311 ix86_internal_arg_pointer (void)
7313 return virtual_incoming_args_rtx
;
7316 struct scratch_reg
{
7321 /* Return a short-lived scratch register for use on function entry.
7322 In 32-bit mode, it is valid only after the registers are saved
7323 in the prologue. This register must be released by means of
7324 release_scratch_register_on_entry once it is dead. */
7327 get_scratch_register_on_entry (struct scratch_reg
*sr
)
7335 /* We always use R11 in 64-bit mode. */
7340 tree decl
= current_function_decl
, fntype
= TREE_TYPE (decl
);
7342 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
7344 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype
)) != NULL_TREE
;
7345 bool static_chain_p
= DECL_STATIC_CHAIN (decl
);
7346 int regparm
= ix86_function_regparm (fntype
, decl
);
7348 = crtl
->drap_reg
? REGNO (crtl
->drap_reg
) : INVALID_REGNUM
;
7350 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7351 for the static chain register. */
7352 if ((regparm
< 1 || (fastcall_p
&& !static_chain_p
))
7353 && drap_regno
!= AX_REG
)
7355 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7356 for the static chain register. */
7357 else if (thiscall_p
&& !static_chain_p
&& drap_regno
!= AX_REG
)
7359 else if (regparm
< 2 && !thiscall_p
&& drap_regno
!= DX_REG
)
7361 /* ecx is the static chain register. */
7362 else if (regparm
< 3 && !fastcall_p
&& !thiscall_p
7364 && drap_regno
!= CX_REG
)
7366 else if (ix86_save_reg (BX_REG
, true, false))
7368 /* esi is the static chain register. */
7369 else if (!(regparm
== 3 && static_chain_p
)
7370 && ix86_save_reg (SI_REG
, true, false))
7372 else if (ix86_save_reg (DI_REG
, true, false))
7376 regno
= (drap_regno
== AX_REG
? DX_REG
: AX_REG
);
7381 sr
->reg
= gen_rtx_REG (Pmode
, regno
);
7384 rtx_insn
*insn
= emit_insn (gen_push (sr
->reg
));
7385 RTX_FRAME_RELATED_P (insn
) = 1;
7389 /* Release a scratch register obtained from the preceding function.
7391 If RELEASE_VIA_POP is true, we just pop the register off the stack
7392 to release it. This is what non-Linux systems use with -fstack-check.
7394 Otherwise we use OFFSET to locate the saved register and the
7395 allocated stack space becomes part of the local frame and is
7396 deallocated by the epilogue. */
7399 release_scratch_register_on_entry (struct scratch_reg
*sr
, HOST_WIDE_INT offset
,
7400 bool release_via_pop
)
7404 if (release_via_pop
)
7406 struct machine_function
*m
= cfun
->machine
;
7407 rtx x
, insn
= emit_insn (gen_pop (sr
->reg
));
7409 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7410 RTX_FRAME_RELATED_P (insn
) = 1;
7411 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
7412 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7413 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, x
);
7414 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
7418 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
7419 x
= gen_rtx_SET (sr
->reg
, gen_rtx_MEM (word_mode
, x
));
7425 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7427 If INT_REGISTERS_SAVED is true, then integer registers have already been
7428 pushed on the stack.
7430 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7433 This assumes no knowledge of the current probing state, i.e. it is never
7434 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7435 a suitable probe. */
7438 ix86_adjust_stack_and_probe (HOST_WIDE_INT size
,
7439 const bool int_registers_saved
,
7440 const bool protection_area
)
7442 struct machine_function
*m
= cfun
->machine
;
7444 /* If this function does not statically allocate stack space, then
7445 no probes are needed. */
7448 /* However, the allocation of space via pushes for register
7449 saves could be viewed as allocating space, but without the
7451 if (m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
)
7452 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
7454 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
7458 /* If we are a noreturn function, then we have to consider the
7459 possibility that we're called via a jump rather than a call.
7461 Thus we don't have the implicit probe generated by saving the
7462 return address into the stack at the call. Thus, the stack
7463 pointer could be anywhere in the guard page. The safe thing
7464 to do is emit a probe now.
7466 The probe can be avoided if we have already emitted any callee
7467 register saves into the stack or have a frame pointer (which will
7468 have been saved as well). Those saves will function as implicit
7471 ?!? This should be revamped to work like aarch64 and s390 where
7472 we track the offset from the most recent probe. Normally that
7473 offset would be zero. For a noreturn function we would reset
7474 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7475 we just probe when we cross PROBE_INTERVAL. */
7476 if (TREE_THIS_VOLATILE (cfun
->decl
)
7477 && !(m
->frame
.nregs
|| m
->frame
.nsseregs
|| frame_pointer_needed
))
7479 /* We can safely use any register here since we're just going to push
7480 its value and immediately pop it back. But we do try and avoid
7481 argument passing registers so as not to introduce dependencies in
7482 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7483 rtx dummy_reg
= gen_rtx_REG (word_mode
, TARGET_64BIT
? AX_REG
: SI_REG
);
7484 rtx_insn
*insn_push
= emit_insn (gen_push (dummy_reg
));
7485 rtx_insn
*insn_pop
= emit_insn (gen_pop (dummy_reg
));
7486 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
7487 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7489 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
7490 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
7491 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7492 add_reg_note (insn_push
, REG_CFA_ADJUST_CFA
, x
);
7493 RTX_FRAME_RELATED_P (insn_push
) = 1;
7494 x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
7495 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
7496 add_reg_note (insn_pop
, REG_CFA_ADJUST_CFA
, x
);
7497 RTX_FRAME_RELATED_P (insn_pop
) = 1;
7499 emit_insn (gen_blockage ());
7502 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
7503 const int dope
= 4 * UNITS_PER_WORD
;
7505 /* If there is protection area, take it into account in the size. */
7506 if (protection_area
)
7507 size
+= probe_interval
+ dope
;
7509 /* If we allocate less than the size of the guard statically,
7510 then no probing is necessary, but we do need to allocate
7512 else if (size
< (1 << param_stack_clash_protection_guard_size
))
7514 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7515 GEN_INT (-size
), -1,
7516 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7517 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
7521 /* We're allocating a large enough stack frame that we need to
7522 emit probes. Either emit them inline or in a loop depending
7524 if (size
<= 4 * probe_interval
)
7527 for (i
= probe_interval
; i
<= size
; i
+= probe_interval
)
7529 /* Allocate PROBE_INTERVAL bytes. */
7531 = pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7532 GEN_INT (-probe_interval
), -1,
7533 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7534 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
7536 /* And probe at *sp. */
7537 emit_stack_probe (stack_pointer_rtx
);
7538 emit_insn (gen_blockage ());
7541 /* We need to allocate space for the residual, but we do not need
7542 to probe the residual... */
7543 HOST_WIDE_INT residual
= (i
- probe_interval
- size
);
7546 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7547 GEN_INT (residual
), -1,
7548 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7550 /* ...except if there is a protection area to maintain. */
7551 if (protection_area
)
7552 emit_stack_probe (stack_pointer_rtx
);
7555 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
7559 /* We expect the GP registers to be saved when probes are used
7560 as the probing sequences might need a scratch register and
7561 the routine to allocate one assumes the integer registers
7562 have already been saved. */
7563 gcc_assert (int_registers_saved
);
7565 struct scratch_reg sr
;
7566 get_scratch_register_on_entry (&sr
);
7568 /* If we needed to save a register, then account for any space
7569 that was pushed (we are not going to pop the register when
7570 we do the restore). */
7572 size
-= UNITS_PER_WORD
;
7574 /* Step 1: round SIZE down to a multiple of the interval. */
7575 HOST_WIDE_INT rounded_size
= size
& -probe_interval
;
7577 /* Step 2: compute final value of the loop counter. Use lea if
7579 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
, -rounded_size
);
7581 if (address_no_seg_operand (addr
, Pmode
))
7582 insn
= emit_insn (gen_rtx_SET (sr
.reg
, addr
));
7585 emit_move_insn (sr
.reg
, GEN_INT (-rounded_size
));
7586 insn
= emit_insn (gen_rtx_SET (sr
.reg
,
7587 gen_rtx_PLUS (Pmode
, sr
.reg
,
7588 stack_pointer_rtx
)));
7590 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7592 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7593 plus_constant (Pmode
, sr
.reg
,
7594 m
->fs
.cfa_offset
+ rounded_size
));
7595 RTX_FRAME_RELATED_P (insn
) = 1;
7598 /* Step 3: the loop. */
7599 rtx size_rtx
= GEN_INT (rounded_size
);
7600 insn
= emit_insn (gen_adjust_stack_and_probe (Pmode
, sr
.reg
, sr
.reg
,
7602 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
7604 m
->fs
.cfa_offset
+= rounded_size
;
7605 add_reg_note (insn
, REG_CFA_DEF_CFA
,
7606 plus_constant (Pmode
, stack_pointer_rtx
,
7608 RTX_FRAME_RELATED_P (insn
) = 1;
7610 m
->fs
.sp_offset
+= rounded_size
;
7611 emit_insn (gen_blockage ());
7613 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7614 is equal to ROUNDED_SIZE. */
7616 if (size
!= rounded_size
)
7618 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7619 GEN_INT (rounded_size
- size
), -1,
7620 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7622 if (protection_area
)
7623 emit_stack_probe (stack_pointer_rtx
);
7626 dump_stack_clash_frame_info (PROBE_LOOP
, size
!= rounded_size
);
7628 /* This does not deallocate the space reserved for the scratch
7629 register. That will be deallocated in the epilogue. */
7630 release_scratch_register_on_entry (&sr
, size
, false);
7633 /* Adjust back to account for the protection area. */
7634 if (protection_area
)
7635 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
7636 GEN_INT (probe_interval
+ dope
), -1,
7637 m
->fs
.cfa_reg
== stack_pointer_rtx
);
7639 /* Make sure nothing is scheduled before we are done. */
7640 emit_insn (gen_blockage ());
7643 /* Adjust the stack pointer up to REG while probing it. */
7646 output_adjust_stack_and_probe (rtx reg
)
7648 static int labelno
= 0;
7652 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7655 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7657 /* SP = SP + PROBE_INTERVAL. */
7658 xops
[0] = stack_pointer_rtx
;
7659 xops
[1] = GEN_INT (get_probe_interval ());
7660 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7663 xops
[1] = const0_rtx
;
7664 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops
);
7666 /* Test if SP == LAST_ADDR. */
7667 xops
[0] = stack_pointer_rtx
;
7669 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7672 fputs ("\tjne\t", asm_out_file
);
7673 assemble_name_raw (asm_out_file
, loop_lab
);
7674 fputc ('\n', asm_out_file
);
7679 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7680 inclusive. These are offsets from the current stack pointer.
7682 INT_REGISTERS_SAVED is true if integer registers have already been
7683 pushed on the stack. */
7686 ix86_emit_probe_stack_range (HOST_WIDE_INT first
, HOST_WIDE_INT size
,
7687 const bool int_registers_saved
)
7689 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
7691 /* See if we have a constant small number of probes to generate. If so,
7692 that's the easy case. The run-time loop is made up of 6 insns in the
7693 generic case while the compile-time loop is made up of n insns for n #
7695 if (size
<= 6 * probe_interval
)
7699 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7700 it exceeds SIZE. If only one probe is needed, this will not
7701 generate any code. Then probe at FIRST + SIZE. */
7702 for (i
= probe_interval
; i
< size
; i
+= probe_interval
)
7703 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
7706 emit_stack_probe (plus_constant (Pmode
, stack_pointer_rtx
,
7710 /* Otherwise, do the same as above, but in a loop. Note that we must be
7711 extra careful with variables wrapping around because we might be at
7712 the very top (or the very bottom) of the address space and we have
7713 to be able to handle this case properly; in particular, we use an
7714 equality test for the loop condition. */
7717 /* We expect the GP registers to be saved when probes are used
7718 as the probing sequences might need a scratch register and
7719 the routine to allocate one assumes the integer registers
7720 have already been saved. */
7721 gcc_assert (int_registers_saved
);
7723 HOST_WIDE_INT rounded_size
, last
;
7724 struct scratch_reg sr
;
7726 get_scratch_register_on_entry (&sr
);
7729 /* Step 1: round SIZE to the previous multiple of the interval. */
7731 rounded_size
= ROUND_DOWN (size
, probe_interval
);
7734 /* Step 2: compute initial and final value of the loop counter. */
7736 /* TEST_OFFSET = FIRST. */
7737 emit_move_insn (sr
.reg
, GEN_INT (-first
));
7739 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7740 last
= first
+ rounded_size
;
7747 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7750 while (TEST_ADDR != LAST_ADDR)
7752 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7753 until it is equal to ROUNDED_SIZE. */
7756 (gen_probe_stack_range (Pmode
, sr
.reg
, sr
.reg
, GEN_INT (-last
)));
7759 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7760 that SIZE is equal to ROUNDED_SIZE. */
7762 if (size
!= rounded_size
)
7763 emit_stack_probe (plus_constant (Pmode
,
7764 gen_rtx_PLUS (Pmode
,
7767 rounded_size
- size
));
7769 release_scratch_register_on_entry (&sr
, size
, true);
7772 /* Make sure nothing is scheduled before we are done. */
7773 emit_insn (gen_blockage ());
7776 /* Probe a range of stack addresses from REG to END, inclusive. These are
7777 offsets from the current stack pointer. */
7780 output_probe_stack_range (rtx reg
, rtx end
)
7782 static int labelno
= 0;
7786 ASM_GENERATE_INTERNAL_LABEL (loop_lab
, "LPSRL", labelno
++);
7789 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, loop_lab
);
7791 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7793 xops
[1] = GEN_INT (get_probe_interval ());
7794 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops
);
7796 /* Probe at TEST_ADDR. */
7797 xops
[0] = stack_pointer_rtx
;
7799 xops
[2] = const0_rtx
;
7800 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops
);
7802 /* Test if TEST_ADDR == LAST_ADDR. */
7805 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops
);
7808 fputs ("\tjne\t", asm_out_file
);
7809 assemble_name_raw (asm_out_file
, loop_lab
);
7810 fputc ('\n', asm_out_file
);
7815 /* Set stack_frame_required to false if stack frame isn't required.
7816 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7817 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
7820 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment
,
7821 bool check_stack_slot
)
7823 HARD_REG_SET set_up_by_prologue
, prologue_used
;
7826 CLEAR_HARD_REG_SET (prologue_used
);
7827 CLEAR_HARD_REG_SET (set_up_by_prologue
);
7828 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, STACK_POINTER_REGNUM
);
7829 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
, ARG_POINTER_REGNUM
);
7830 add_to_hard_reg_set (&set_up_by_prologue
, Pmode
,
7831 HARD_FRAME_POINTER_REGNUM
);
7833 /* The preferred stack alignment is the minimum stack alignment. */
7834 if (stack_alignment
> crtl
->preferred_stack_boundary
)
7835 stack_alignment
= crtl
->preferred_stack_boundary
;
7837 bool require_stack_frame
= false;
7839 FOR_EACH_BB_FN (bb
, cfun
)
7842 FOR_BB_INSNS (bb
, insn
)
7843 if (NONDEBUG_INSN_P (insn
)
7844 && requires_stack_frame_p (insn
, prologue_used
,
7845 set_up_by_prologue
))
7847 require_stack_frame
= true;
7849 if (check_stack_slot
)
7851 /* Find the maximum stack alignment. */
7852 subrtx_iterator::array_type array
;
7853 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), ALL
)
7855 && (reg_mentioned_p (stack_pointer_rtx
,
7857 || reg_mentioned_p (frame_pointer_rtx
,
7860 unsigned int alignment
= MEM_ALIGN (*iter
);
7861 if (alignment
> stack_alignment
)
7862 stack_alignment
= alignment
;
7868 cfun
->machine
->stack_frame_required
= require_stack_frame
;
7871 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7872 will guide prologue/epilogue to be generated in correct form. */
7875 ix86_finalize_stack_frame_flags (void)
7877 /* Check if stack realign is really needed after reload, and
7878 stores result in cfun */
7879 unsigned int incoming_stack_boundary
7880 = (crtl
->parm_stack_boundary
> ix86_incoming_stack_boundary
7881 ? crtl
->parm_stack_boundary
: ix86_incoming_stack_boundary
);
7882 unsigned int stack_alignment
7883 = (crtl
->is_leaf
&& !ix86_current_function_calls_tls_descriptor
7884 ? crtl
->max_used_stack_slot_alignment
7885 : crtl
->stack_alignment_needed
);
7886 unsigned int stack_realign
7887 = (incoming_stack_boundary
< stack_alignment
);
7888 bool recompute_frame_layout_p
= false;
7890 if (crtl
->stack_realign_finalized
)
7892 /* After stack_realign_needed is finalized, we can't no longer
7894 gcc_assert (crtl
->stack_realign_needed
== stack_realign
);
7898 /* It is always safe to compute max_used_stack_alignment. We
7899 compute it only if 128-bit aligned load/store may be generated
7900 on misaligned stack slot which will lead to segfault. */
7901 bool check_stack_slot
7902 = (stack_realign
|| crtl
->max_used_stack_slot_alignment
>= 128);
7903 ix86_find_max_used_stack_alignment (stack_alignment
,
7906 /* If the only reason for frame_pointer_needed is that we conservatively
7907 assumed stack realignment might be needed or -fno-omit-frame-pointer
7908 is used, but in the end nothing that needed the stack alignment had
7909 been spilled nor stack access, clear frame_pointer_needed and say we
7910 don't need stack realignment. */
7911 if ((stack_realign
|| (!flag_omit_frame_pointer
&& optimize
))
7912 && frame_pointer_needed
7914 && crtl
->sp_is_unchanging
7915 && !ix86_current_function_calls_tls_descriptor
7916 && !crtl
->accesses_prior_frames
7917 && !cfun
->calls_alloca
7918 && !crtl
->calls_eh_return
7919 /* See ira_setup_eliminable_regset for the rationale. */
7920 && !(STACK_CHECK_MOVING_SP
7923 && cfun
->can_throw_non_call_exceptions
)
7924 && !ix86_frame_pointer_required ()
7925 && ix86_get_frame_size () == 0
7926 && ix86_nsaved_sseregs () == 0
7927 && ix86_varargs_gpr_size
+ ix86_varargs_fpr_size
== 0)
7929 if (cfun
->machine
->stack_frame_required
)
7931 /* Stack frame is required. If stack alignment needed is less
7932 than incoming stack boundary, don't realign stack. */
7933 stack_realign
= incoming_stack_boundary
< stack_alignment
;
7936 crtl
->max_used_stack_slot_alignment
7937 = incoming_stack_boundary
;
7938 crtl
->stack_alignment_needed
7939 = incoming_stack_boundary
;
7940 /* Also update preferred_stack_boundary for leaf
7942 crtl
->preferred_stack_boundary
7943 = incoming_stack_boundary
;
7948 /* If drap has been set, but it actually isn't live at the
7949 start of the function, there is no reason to set it up. */
7952 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
7953 if (! REGNO_REG_SET_P (DF_LR_IN (bb
),
7954 REGNO (crtl
->drap_reg
)))
7956 crtl
->drap_reg
= NULL_RTX
;
7957 crtl
->need_drap
= false;
7961 cfun
->machine
->no_drap_save_restore
= true;
7963 frame_pointer_needed
= false;
7964 stack_realign
= false;
7965 crtl
->max_used_stack_slot_alignment
= incoming_stack_boundary
;
7966 crtl
->stack_alignment_needed
= incoming_stack_boundary
;
7967 crtl
->stack_alignment_estimated
= incoming_stack_boundary
;
7968 if (crtl
->preferred_stack_boundary
> incoming_stack_boundary
)
7969 crtl
->preferred_stack_boundary
= incoming_stack_boundary
;
7970 df_finish_pass (true);
7971 df_scan_alloc (NULL
);
7973 df_compute_regs_ever_live (true);
7976 if (flag_var_tracking
)
7978 /* Since frame pointer is no longer available, replace it with
7979 stack pointer - UNITS_PER_WORD in debug insns. */
7981 for (ref
= DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM
);
7984 next
= DF_REF_NEXT_REG (ref
);
7985 if (!DF_REF_INSN_INFO (ref
))
7988 /* Make sure the next ref is for a different instruction,
7989 so that we're not affected by the rescan. */
7990 rtx_insn
*insn
= DF_REF_INSN (ref
);
7991 while (next
&& DF_REF_INSN (next
) == insn
)
7992 next
= DF_REF_NEXT_REG (next
);
7994 if (DEBUG_INSN_P (insn
))
7996 bool changed
= false;
7997 for (; ref
!= next
; ref
= DF_REF_NEXT_REG (ref
))
7999 rtx
*loc
= DF_REF_LOC (ref
);
8000 if (*loc
== hard_frame_pointer_rtx
)
8002 *loc
= plus_constant (Pmode
,
8009 df_insn_rescan (insn
);
8014 recompute_frame_layout_p
= true;
8017 else if (crtl
->max_used_stack_slot_alignment
>= 128
8018 && cfun
->machine
->stack_frame_required
)
8020 /* We don't need to realign stack. max_used_stack_alignment is
8021 used to decide how stack frame should be aligned. This is
8022 independent of any psABIs nor 32-bit vs 64-bit. */
8023 cfun
->machine
->max_used_stack_alignment
8024 = stack_alignment
/ BITS_PER_UNIT
;
8027 if (crtl
->stack_realign_needed
!= stack_realign
)
8028 recompute_frame_layout_p
= true;
8029 crtl
->stack_realign_needed
= stack_realign
;
8030 crtl
->stack_realign_finalized
= true;
8031 if (recompute_frame_layout_p
)
8032 ix86_compute_frame_layout ();
8035 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8038 ix86_elim_entry_set_got (rtx reg
)
8040 basic_block bb
= ENTRY_BLOCK_PTR_FOR_FN (cfun
)->next_bb
;
8041 rtx_insn
*c_insn
= BB_HEAD (bb
);
8042 if (!NONDEBUG_INSN_P (c_insn
))
8043 c_insn
= next_nonnote_nondebug_insn (c_insn
);
8044 if (c_insn
&& NONJUMP_INSN_P (c_insn
))
8046 rtx pat
= PATTERN (c_insn
);
8047 if (GET_CODE (pat
) == PARALLEL
)
8049 rtx vec
= XVECEXP (pat
, 0, 0);
8050 if (GET_CODE (vec
) == SET
8051 && XINT (XEXP (vec
, 1), 1) == UNSPEC_SET_GOT
8052 && REGNO (XEXP (vec
, 0)) == REGNO (reg
))
8053 delete_insn (c_insn
);
8059 gen_frame_set (rtx reg
, rtx frame_reg
, int offset
, bool store
)
8064 addr
= plus_constant (Pmode
, frame_reg
, offset
);
8065 mem
= gen_frame_mem (GET_MODE (reg
), offset
? addr
: frame_reg
);
8066 return gen_rtx_SET (store
? mem
: reg
, store
? reg
: mem
);
8070 gen_frame_load (rtx reg
, rtx frame_reg
, int offset
)
8072 return gen_frame_set (reg
, frame_reg
, offset
, false);
8076 gen_frame_store (rtx reg
, rtx frame_reg
, int offset
)
8078 return gen_frame_set (reg
, frame_reg
, offset
, true);
8082 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame
&frame
)
8084 struct machine_function
*m
= cfun
->machine
;
8085 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
8086 + m
->call_ms2sysv_extra_regs
;
8087 rtvec v
= rtvec_alloc (ncregs
+ 1);
8088 unsigned int align
, i
, vi
= 0;
8091 rtx rax
= gen_rtx_REG (word_mode
, AX_REG
);
8092 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
8094 /* AL should only be live with sysv_abi. */
8095 gcc_assert (!ix86_eax_live_at_start_p ());
8096 gcc_assert (m
->fs
.sp_offset
>= frame
.sse_reg_save_offset
);
8098 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8099 we've actually realigned the stack or not. */
8100 align
= GET_MODE_ALIGNMENT (V4SFmode
);
8101 addr
= choose_baseaddr (frame
.stack_realign_offset
8102 + xlogue
.get_stub_ptr_offset (), &align
, AX_REG
);
8103 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
8105 emit_insn (gen_rtx_SET (rax
, addr
));
8107 /* Get the stub symbol. */
8108 sym
= xlogue
.get_stub_rtx (frame_pointer_needed
? XLOGUE_STUB_SAVE_HFP
8109 : XLOGUE_STUB_SAVE
);
8110 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8112 for (i
= 0; i
< ncregs
; ++i
)
8114 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
8115 rtx reg
= gen_rtx_REG ((SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
),
8117 RTVEC_ELT (v
, vi
++) = gen_frame_store (reg
, rax
, -r
.offset
);
8120 gcc_assert (vi
== (unsigned)GET_NUM_ELEM (v
));
8122 insn
= emit_insn (gen_rtx_PARALLEL (VOIDmode
, v
));
8123 RTX_FRAME_RELATED_P (insn
) = true;
8126 /* Generate and return an insn body to AND X with Y. */
8129 gen_and2_insn (rtx x
, rtx y
)
8131 enum insn_code icode
= optab_handler (and_optab
, GET_MODE (x
));
8133 gcc_assert (insn_operand_matches (icode
, 0, x
));
8134 gcc_assert (insn_operand_matches (icode
, 1, x
));
8135 gcc_assert (insn_operand_matches (icode
, 2, y
));
8137 return GEN_FCN (icode
) (x
, x
, y
);
8140 /* Expand the prologue into a bunch of separate insns. */
8143 ix86_expand_prologue (void)
8145 struct machine_function
*m
= cfun
->machine
;
8147 HOST_WIDE_INT allocate
;
8148 bool int_registers_saved
;
8149 bool sse_registers_saved
;
8150 bool save_stub_call_needed
;
8151 rtx static_chain
= NULL_RTX
;
8153 if (ix86_function_naked (current_function_decl
))
8155 if (flag_stack_usage_info
)
8156 current_function_static_stack_size
= 0;
8160 ix86_finalize_stack_frame_flags ();
8162 /* DRAP should not coexist with stack_realign_fp */
8163 gcc_assert (!(crtl
->drap_reg
&& stack_realign_fp
));
8165 memset (&m
->fs
, 0, sizeof (m
->fs
));
8167 /* Initialize CFA state for before the prologue. */
8168 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8169 m
->fs
.cfa_offset
= INCOMING_FRAME_SP_OFFSET
;
8171 /* Track SP offset to the CFA. We continue tracking this after we've
8172 swapped the CFA register away from SP. In the case of re-alignment
8173 this is fudged; we're interested to offsets within the local frame. */
8174 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
8175 m
->fs
.sp_valid
= true;
8176 m
->fs
.sp_realigned
= false;
8178 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
8180 if (!TARGET_64BIT
&& ix86_function_ms_hook_prologue (current_function_decl
))
8182 /* We should have already generated an error for any use of
8183 ms_hook on a nested function. */
8184 gcc_checking_assert (!ix86_static_chain_on_stack
);
8186 /* Check if profiling is active and we shall use profiling before
8187 prologue variant. If so sorry. */
8188 if (crtl
->profile
&& flag_fentry
!= 0)
8189 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8190 "with %<-mfentry%> for 32-bit");
8192 /* In ix86_asm_output_function_label we emitted:
8193 8b ff movl.s %edi,%edi
8195 8b ec movl.s %esp,%ebp
8197 This matches the hookable function prologue in Win32 API
8198 functions in Microsoft Windows XP Service Pack 2 and newer.
8199 Wine uses this to enable Windows apps to hook the Win32 API
8200 functions provided by Wine.
8202 What that means is that we've already set up the frame pointer. */
8204 if (frame_pointer_needed
8205 && !(crtl
->drap_reg
&& crtl
->stack_realign_needed
))
8209 /* We've decided to use the frame pointer already set up.
8210 Describe this to the unwinder by pretending that both
8211 push and mov insns happen right here.
8213 Putting the unwind info here at the end of the ms_hook
8214 is done so that we can make absolutely certain we get
8215 the required byte sequence at the start of the function,
8216 rather than relying on an assembler that can produce
8217 the exact encoding required.
8219 However it does mean (in the unpatched case) that we have
8220 a 1 insn window where the asynchronous unwind info is
8221 incorrect. However, if we placed the unwind info at
8222 its correct location we would have incorrect unwind info
8223 in the patched case. Which is probably all moot since
8224 I don't expect Wine generates dwarf2 unwind info for the
8225 system libraries that use this feature. */
8227 insn
= emit_insn (gen_blockage ());
8229 push
= gen_push (hard_frame_pointer_rtx
);
8230 mov
= gen_rtx_SET (hard_frame_pointer_rtx
,
8232 RTX_FRAME_RELATED_P (push
) = 1;
8233 RTX_FRAME_RELATED_P (mov
) = 1;
8235 RTX_FRAME_RELATED_P (insn
) = 1;
8236 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8237 gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, push
, mov
)));
8239 /* Note that gen_push incremented m->fs.cfa_offset, even
8240 though we didn't emit the push insn here. */
8241 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8242 m
->fs
.fp_offset
= m
->fs
.cfa_offset
;
8243 m
->fs
.fp_valid
= true;
8247 /* The frame pointer is not needed so pop %ebp again.
8248 This leaves us with a pristine state. */
8249 emit_insn (gen_pop (hard_frame_pointer_rtx
));
8253 /* The first insn of a function that accepts its static chain on the
8254 stack is to push the register that would be filled in by a direct
8255 call. This insn will be skipped by the trampoline. */
8256 else if (ix86_static_chain_on_stack
)
8258 static_chain
= ix86_static_chain (cfun
->decl
, false);
8259 insn
= emit_insn (gen_push (static_chain
));
8260 emit_insn (gen_blockage ());
8262 /* We don't want to interpret this push insn as a register save,
8263 only as a stack adjustment. The real copy of the register as
8264 a save will be done later, if needed. */
8265 t
= plus_constant (Pmode
, stack_pointer_rtx
, -UNITS_PER_WORD
);
8266 t
= gen_rtx_SET (stack_pointer_rtx
, t
);
8267 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, t
);
8268 RTX_FRAME_RELATED_P (insn
) = 1;
8271 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8272 of DRAP is needed and stack realignment is really needed after reload */
8273 if (stack_realign_drap
)
8275 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8277 /* Can't use DRAP in interrupt function. */
8278 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
8279 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8280 "in interrupt service routine. This may be worked "
8281 "around by avoiding functions with aggregate return.");
8283 /* Only need to push parameter pointer reg if it is caller saved. */
8284 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
8286 /* Push arg pointer reg */
8287 insn
= emit_insn (gen_push (crtl
->drap_reg
));
8288 RTX_FRAME_RELATED_P (insn
) = 1;
8291 /* Grab the argument pointer. */
8292 t
= plus_constant (Pmode
, stack_pointer_rtx
, m
->fs
.sp_offset
);
8293 insn
= emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
8294 RTX_FRAME_RELATED_P (insn
) = 1;
8295 m
->fs
.cfa_reg
= crtl
->drap_reg
;
8296 m
->fs
.cfa_offset
= 0;
8298 /* Align the stack. */
8299 insn
= emit_insn (gen_and2_insn (stack_pointer_rtx
,
8300 GEN_INT (-align_bytes
)));
8301 RTX_FRAME_RELATED_P (insn
) = 1;
8303 /* Replicate the return address on the stack so that return
8304 address can be reached via (argp - 1) slot. This is needed
8305 to implement macro RETURN_ADDR_RTX and intrinsic function
8306 expand_builtin_return_addr etc. */
8307 t
= plus_constant (Pmode
, crtl
->drap_reg
, -UNITS_PER_WORD
);
8308 t
= gen_frame_mem (word_mode
, t
);
8309 insn
= emit_insn (gen_push (t
));
8310 RTX_FRAME_RELATED_P (insn
) = 1;
8312 /* For the purposes of frame and register save area addressing,
8313 we've started over with a new frame. */
8314 m
->fs
.sp_offset
= INCOMING_FRAME_SP_OFFSET
;
8315 m
->fs
.realigned
= true;
8319 /* Replicate static chain on the stack so that static chain
8320 can be reached via (argp - 2) slot. This is needed for
8321 nested function with stack realignment. */
8322 insn
= emit_insn (gen_push (static_chain
));
8323 RTX_FRAME_RELATED_P (insn
) = 1;
8327 int_registers_saved
= (frame
.nregs
== 0);
8328 sse_registers_saved
= (frame
.nsseregs
== 0);
8329 save_stub_call_needed
= (m
->call_ms2sysv
);
8330 gcc_assert (sse_registers_saved
|| !save_stub_call_needed
);
8332 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8334 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8335 slower on all targets. Also sdb didn't like it. */
8336 insn
= emit_insn (gen_push (hard_frame_pointer_rtx
));
8337 RTX_FRAME_RELATED_P (insn
) = 1;
8339 /* Push registers now, before setting the frame pointer
8341 if (!int_registers_saved
8343 && !frame
.save_regs_using_mov
)
8345 ix86_emit_save_regs ();
8346 int_registers_saved
= true;
8347 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
8350 if (m
->fs
.sp_offset
== frame
.hard_frame_pointer_offset
)
8352 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
8353 RTX_FRAME_RELATED_P (insn
) = 1;
8355 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8356 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8357 m
->fs
.fp_offset
= m
->fs
.sp_offset
;
8358 m
->fs
.fp_valid
= true;
8362 if (!int_registers_saved
)
8364 /* If saving registers via PUSH, do so now. */
8365 if (!frame
.save_regs_using_mov
)
8367 ix86_emit_save_regs ();
8368 int_registers_saved
= true;
8369 gcc_assert (m
->fs
.sp_offset
== frame
.reg_save_offset
);
8372 /* When using red zone we may start register saving before allocating
8373 the stack frame saving one cycle of the prologue. However, avoid
8374 doing this if we have to probe the stack; at least on x86_64 the
8375 stack probe can turn into a call that clobbers a red zone location. */
8376 else if (ix86_using_red_zone ()
8377 && (! TARGET_STACK_PROBE
8378 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
))
8380 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8381 int_registers_saved
= true;
8385 if (stack_realign_fp
)
8387 int align_bytes
= crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8388 gcc_assert (align_bytes
> MIN_STACK_BOUNDARY
/ BITS_PER_UNIT
);
8390 /* Record last valid frame pointer offset. */
8391 m
->fs
.sp_realigned_fp_last
= frame
.reg_save_offset
;
8393 /* The computation of the size of the re-aligned stack frame means
8394 that we must allocate the size of the register save area before
8395 performing the actual alignment. Otherwise we cannot guarantee
8396 that there's enough storage above the realignment point. */
8397 allocate
= frame
.reg_save_offset
- m
->fs
.sp_offset
8398 + frame
.stack_realign_allocate
;
8400 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8401 GEN_INT (-allocate
), -1, false);
8403 /* Align the stack. */
8404 emit_insn (gen_and2_insn (stack_pointer_rtx
, GEN_INT (-align_bytes
)));
8405 m
->fs
.sp_offset
= ROUND_UP (m
->fs
.sp_offset
, align_bytes
);
8406 m
->fs
.sp_realigned_offset
= m
->fs
.sp_offset
8407 - frame
.stack_realign_allocate
;
8408 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8409 Beyond this point, stack access should be done via choose_baseaddr or
8410 by using sp_valid_at and fp_valid_at to determine the correct base
8411 register. Henceforth, any CFA offset should be thought of as logical
8412 and not physical. */
8413 gcc_assert (m
->fs
.sp_realigned_offset
>= m
->fs
.sp_realigned_fp_last
);
8414 gcc_assert (m
->fs
.sp_realigned_offset
== frame
.stack_realign_offset
);
8415 m
->fs
.sp_realigned
= true;
8417 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8418 is needed to describe where a register is saved using a realigned
8419 stack pointer, so we need to invalidate the stack pointer for that
8422 m
->fs
.sp_valid
= false;
8424 /* If SP offset is non-immediate after allocation of the stack frame,
8425 then emit SSE saves or stub call prior to allocating the rest of the
8426 stack frame. This is less efficient for the out-of-line stub because
8427 we can't combine allocations across the call barrier, but it's better
8428 than using a scratch register. */
8429 else if (!x86_64_immediate_operand (GEN_INT (frame
.stack_pointer_offset
8430 - m
->fs
.sp_realigned_offset
),
8433 if (!sse_registers_saved
)
8435 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8436 sse_registers_saved
= true;
8438 else if (save_stub_call_needed
)
8440 ix86_emit_outlined_ms2sysv_save (frame
);
8441 save_stub_call_needed
= false;
8446 allocate
= frame
.stack_pointer_offset
- m
->fs
.sp_offset
;
8448 if (flag_stack_usage_info
)
8450 /* We start to count from ARG_POINTER. */
8451 HOST_WIDE_INT stack_size
= frame
.stack_pointer_offset
;
8453 /* If it was realigned, take into account the fake frame. */
8454 if (stack_realign_drap
)
8456 if (ix86_static_chain_on_stack
)
8457 stack_size
+= UNITS_PER_WORD
;
8459 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
8460 stack_size
+= UNITS_PER_WORD
;
8462 /* This over-estimates by 1 minimal-stack-alignment-unit but
8463 mitigates that by counting in the new return address slot. */
8464 current_function_dynamic_stack_size
8465 += crtl
->stack_alignment_needed
/ BITS_PER_UNIT
;
8468 current_function_static_stack_size
= stack_size
;
8471 /* On SEH target with very large frame size, allocate an area to save
8472 SSE registers (as the very large allocation won't be described). */
8474 && frame
.stack_pointer_offset
> SEH_MAX_FRAME_SIZE
8475 && !sse_registers_saved
)
8477 HOST_WIDE_INT sse_size
8478 = frame
.sse_reg_save_offset
- frame
.reg_save_offset
;
8480 gcc_assert (int_registers_saved
);
8482 /* No need to do stack checking as the area will be immediately
8484 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8485 GEN_INT (-sse_size
), -1,
8486 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8487 allocate
-= sse_size
;
8488 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8489 sse_registers_saved
= true;
8492 /* If stack clash protection is requested, then probe the stack. */
8493 if (allocate
>= 0 && flag_stack_clash_protection
)
8495 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
, false);
8499 /* The stack has already been decremented by the instruction calling us
8500 so probe if the size is non-negative to preserve the protection area. */
8501 else if (allocate
>= 0 && flag_stack_check
== STATIC_BUILTIN_STACK_CHECK
)
8503 const HOST_WIDE_INT probe_interval
= get_probe_interval ();
8505 if (STACK_CHECK_MOVING_SP
)
8508 && !cfun
->calls_alloca
8509 && allocate
<= probe_interval
)
8514 ix86_adjust_stack_and_probe (allocate
, int_registers_saved
, true);
8521 HOST_WIDE_INT size
= allocate
;
8523 if (TARGET_64BIT
&& size
>= HOST_WIDE_INT_C (0x80000000))
8524 size
= 0x80000000 - get_stack_check_protect () - 1;
8526 if (TARGET_STACK_PROBE
)
8528 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8530 if (size
> probe_interval
)
8531 ix86_emit_probe_stack_range (0, size
, int_registers_saved
);
8534 ix86_emit_probe_stack_range (0,
8535 size
+ get_stack_check_protect (),
8536 int_registers_saved
);
8540 if (crtl
->is_leaf
&& !cfun
->calls_alloca
)
8542 if (size
> probe_interval
8543 && size
> get_stack_check_protect ())
8544 ix86_emit_probe_stack_range (get_stack_check_protect (),
8546 - get_stack_check_protect ()),
8547 int_registers_saved
);
8550 ix86_emit_probe_stack_range (get_stack_check_protect (), size
,
8551 int_registers_saved
);
8558 else if (!ix86_target_stack_probe ()
8559 || frame
.stack_pointer_offset
< CHECK_STACK_LIMIT
)
8561 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
8562 GEN_INT (-allocate
), -1,
8563 m
->fs
.cfa_reg
== stack_pointer_rtx
);
8567 rtx eax
= gen_rtx_REG (Pmode
, AX_REG
);
8569 const bool sp_is_cfa_reg
= (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8570 bool eax_live
= ix86_eax_live_at_start_p ();
8571 bool r10_live
= false;
8574 r10_live
= (DECL_STATIC_CHAIN (current_function_decl
) != 0);
8578 insn
= emit_insn (gen_push (eax
));
8579 allocate
-= UNITS_PER_WORD
;
8580 /* Note that SEH directives need to continue tracking the stack
8581 pointer even after the frame pointer has been set up. */
8582 if (sp_is_cfa_reg
|| TARGET_SEH
)
8585 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8586 RTX_FRAME_RELATED_P (insn
) = 1;
8587 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8588 gen_rtx_SET (stack_pointer_rtx
,
8589 plus_constant (Pmode
,
8597 r10
= gen_rtx_REG (Pmode
, R10_REG
);
8598 insn
= emit_insn (gen_push (r10
));
8599 allocate
-= UNITS_PER_WORD
;
8600 if (sp_is_cfa_reg
|| TARGET_SEH
)
8603 m
->fs
.cfa_offset
+= UNITS_PER_WORD
;
8604 RTX_FRAME_RELATED_P (insn
) = 1;
8605 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8606 gen_rtx_SET (stack_pointer_rtx
,
8607 plus_constant (Pmode
,
8613 emit_move_insn (eax
, GEN_INT (allocate
));
8614 emit_insn (gen_allocate_stack_worker_probe (Pmode
, eax
, eax
));
8616 /* Use the fact that AX still contains ALLOCATE. */
8617 insn
= emit_insn (gen_pro_epilogue_adjust_stack_sub
8618 (Pmode
, stack_pointer_rtx
, stack_pointer_rtx
, eax
));
8620 if (sp_is_cfa_reg
|| TARGET_SEH
)
8623 m
->fs
.cfa_offset
+= allocate
;
8624 RTX_FRAME_RELATED_P (insn
) = 1;
8625 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
8626 gen_rtx_SET (stack_pointer_rtx
,
8627 plus_constant (Pmode
, stack_pointer_rtx
,
8630 m
->fs
.sp_offset
+= allocate
;
8632 /* Use stack_pointer_rtx for relative addressing so that code works for
8633 realigned stack. But this means that we need a blockage to prevent
8634 stores based on the frame pointer from being scheduled before. */
8635 if (r10_live
&& eax_live
)
8637 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8638 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
8639 gen_frame_mem (word_mode
, t
));
8640 t
= plus_constant (Pmode
, t
, UNITS_PER_WORD
);
8641 emit_move_insn (gen_rtx_REG (word_mode
, AX_REG
),
8642 gen_frame_mem (word_mode
, t
));
8643 emit_insn (gen_memory_blockage ());
8645 else if (eax_live
|| r10_live
)
8647 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, eax
);
8648 emit_move_insn (gen_rtx_REG (word_mode
,
8649 (eax_live
? AX_REG
: R10_REG
)),
8650 gen_frame_mem (word_mode
, t
));
8651 emit_insn (gen_memory_blockage ());
8654 gcc_assert (m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
8656 /* If we havn't already set up the frame pointer, do so now. */
8657 if (frame_pointer_needed
&& !m
->fs
.fp_valid
)
8659 insn
= gen_add3_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
,
8660 GEN_INT (frame
.stack_pointer_offset
8661 - frame
.hard_frame_pointer_offset
));
8662 insn
= emit_insn (insn
);
8663 RTX_FRAME_RELATED_P (insn
) = 1;
8664 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, NULL
);
8666 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8667 m
->fs
.cfa_reg
= hard_frame_pointer_rtx
;
8668 m
->fs
.fp_offset
= frame
.hard_frame_pointer_offset
;
8669 m
->fs
.fp_valid
= true;
8672 if (!int_registers_saved
)
8673 ix86_emit_save_regs_using_mov (frame
.reg_save_offset
);
8674 if (!sse_registers_saved
)
8675 ix86_emit_save_sse_regs_using_mov (frame
.sse_reg_save_offset
);
8676 else if (save_stub_call_needed
)
8677 ix86_emit_outlined_ms2sysv_save (frame
);
8679 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8681 if (!TARGET_64BIT
&& pic_offset_table_rtx
&& crtl
->profile
&& !flag_fentry
)
8683 rtx pic
= gen_rtx_REG (Pmode
, REAL_PIC_OFFSET_TABLE_REGNUM
);
8684 insn
= emit_insn (gen_set_got (pic
));
8685 RTX_FRAME_RELATED_P (insn
) = 1;
8686 add_reg_note (insn
, REG_CFA_FLUSH_QUEUE
, NULL_RTX
);
8687 emit_insn (gen_prologue_use (pic
));
8688 /* Deleting already emmitted SET_GOT if exist and allocated to
8689 REAL_PIC_OFFSET_TABLE_REGNUM. */
8690 ix86_elim_entry_set_got (pic
);
8693 if (crtl
->drap_reg
&& !crtl
->stack_realign_needed
)
8695 /* vDRAP is setup but after reload it turns out stack realign
8696 isn't necessary, here we will emit prologue to setup DRAP
8697 without stack realign adjustment */
8698 t
= choose_baseaddr (0, NULL
);
8699 emit_insn (gen_rtx_SET (crtl
->drap_reg
, t
));
8702 /* Prevent instructions from being scheduled into register save push
8703 sequence when access to the redzone area is done through frame pointer.
8704 The offset between the frame pointer and the stack pointer is calculated
8705 relative to the value of the stack pointer at the end of the function
8706 prologue, and moving instructions that access redzone area via frame
8707 pointer inside push sequence violates this assumption. */
8708 if (frame_pointer_needed
&& frame
.red_zone_size
)
8709 emit_insn (gen_memory_blockage ());
8711 /* SEH requires that the prologue end within 256 bytes of the start of
8712 the function. Prevent instruction schedules that would extend that.
8713 Further, prevent alloca modifications to the stack pointer from being
8714 combined with prologue modifications. */
8716 emit_insn (gen_prologue_use (stack_pointer_rtx
));
8719 /* Emit code to restore REG using a POP insn. */
8722 ix86_emit_restore_reg_using_pop (rtx reg
)
8724 struct machine_function
*m
= cfun
->machine
;
8725 rtx_insn
*insn
= emit_insn (gen_pop (reg
));
8727 ix86_add_cfa_restore_note (insn
, reg
, m
->fs
.sp_offset
);
8728 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
8730 if (m
->fs
.cfa_reg
== crtl
->drap_reg
8731 && REGNO (reg
) == REGNO (crtl
->drap_reg
))
8733 /* Previously we'd represented the CFA as an expression
8734 like *(%ebp - 8). We've just popped that value from
8735 the stack, which means we need to reset the CFA to
8736 the drap register. This will remain until we restore
8737 the stack pointer. */
8738 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8739 RTX_FRAME_RELATED_P (insn
) = 1;
8741 /* This means that the DRAP register is valid for addressing too. */
8742 m
->fs
.drap_valid
= true;
8746 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
8748 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
8749 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
8750 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
8751 RTX_FRAME_RELATED_P (insn
) = 1;
8753 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8756 /* When the frame pointer is the CFA, and we pop it, we are
8757 swapping back to the stack pointer as the CFA. This happens
8758 for stack frames that don't allocate other data, so we assume
8759 the stack pointer is now pointing at the return address, i.e.
8760 the function entry state, which makes the offset be 1 word. */
8761 if (reg
== hard_frame_pointer_rtx
)
8763 m
->fs
.fp_valid
= false;
8764 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
8766 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8767 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
8769 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8770 plus_constant (Pmode
, stack_pointer_rtx
,
8772 RTX_FRAME_RELATED_P (insn
) = 1;
8777 /* Emit code to restore saved registers using POP insns. */
8780 ix86_emit_restore_regs_using_pop (void)
8784 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8785 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, false, true))
8786 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode
, regno
));
8789 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8790 omits the emit and only attaches the notes. */
8793 ix86_emit_leave (rtx_insn
*insn
)
8795 struct machine_function
*m
= cfun
->machine
;
8798 insn
= emit_insn (gen_leave (word_mode
));
8800 ix86_add_queued_cfa_restore_notes (insn
);
8802 gcc_assert (m
->fs
.fp_valid
);
8803 m
->fs
.sp_valid
= true;
8804 m
->fs
.sp_realigned
= false;
8805 m
->fs
.sp_offset
= m
->fs
.fp_offset
- UNITS_PER_WORD
;
8806 m
->fs
.fp_valid
= false;
8808 if (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
)
8810 m
->fs
.cfa_reg
= stack_pointer_rtx
;
8811 m
->fs
.cfa_offset
= m
->fs
.sp_offset
;
8813 add_reg_note (insn
, REG_CFA_DEF_CFA
,
8814 plus_constant (Pmode
, stack_pointer_rtx
,
8816 RTX_FRAME_RELATED_P (insn
) = 1;
8818 ix86_add_cfa_restore_note (insn
, hard_frame_pointer_rtx
,
8822 /* Emit code to restore saved registers using MOV insns.
8823 First register is restored from CFA - CFA_OFFSET. */
8825 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset
,
8826 bool maybe_eh_return
)
8828 struct machine_function
*m
= cfun
->machine
;
8831 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8832 if (GENERAL_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
8834 rtx reg
= gen_rtx_REG (word_mode
, regno
);
8838 mem
= choose_baseaddr (cfa_offset
, NULL
);
8839 mem
= gen_frame_mem (word_mode
, mem
);
8840 insn
= emit_move_insn (reg
, mem
);
8842 if (m
->fs
.cfa_reg
== crtl
->drap_reg
&& regno
== REGNO (crtl
->drap_reg
))
8844 /* Previously we'd represented the CFA as an expression
8845 like *(%ebp - 8). We've just popped that value from
8846 the stack, which means we need to reset the CFA to
8847 the drap register. This will remain until we restore
8848 the stack pointer. */
8849 add_reg_note (insn
, REG_CFA_DEF_CFA
, reg
);
8850 RTX_FRAME_RELATED_P (insn
) = 1;
8852 /* This means that the DRAP register is valid for addressing. */
8853 m
->fs
.drap_valid
= true;
8856 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
8858 cfa_offset
-= UNITS_PER_WORD
;
8862 /* Emit code to restore saved registers using MOV insns.
8863 First register is restored from CFA - CFA_OFFSET. */
8865 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset
,
8866 bool maybe_eh_return
)
8870 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; regno
++)
8871 if (SSE_REGNO_P (regno
) && ix86_save_reg (regno
, maybe_eh_return
, true))
8873 rtx reg
= gen_rtx_REG (V4SFmode
, regno
);
8875 unsigned int align
= GET_MODE_ALIGNMENT (V4SFmode
);
8877 mem
= choose_baseaddr (cfa_offset
, &align
);
8878 mem
= gen_rtx_MEM (V4SFmode
, mem
);
8880 /* The location aligment depends upon the base register. */
8881 align
= MIN (GET_MODE_ALIGNMENT (V4SFmode
), align
);
8882 gcc_assert (! (cfa_offset
& (align
/ BITS_PER_UNIT
- 1)));
8883 set_mem_align (mem
, align
);
8884 emit_insn (gen_rtx_SET (reg
, mem
));
8886 ix86_add_cfa_restore_note (NULL
, reg
, cfa_offset
);
8888 cfa_offset
-= GET_MODE_SIZE (V4SFmode
);
8893 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame
&frame
,
8894 bool use_call
, int style
)
8896 struct machine_function
*m
= cfun
->machine
;
8897 const unsigned ncregs
= NUM_X86_64_MS_CLOBBERED_REGS
8898 + m
->call_ms2sysv_extra_regs
;
8900 unsigned int elems_needed
, align
, i
, vi
= 0;
8903 rtx rsi
= gen_rtx_REG (word_mode
, SI_REG
);
8905 const class xlogue_layout
&xlogue
= xlogue_layout::get_instance ();
8906 HOST_WIDE_INT stub_ptr_offset
= xlogue
.get_stub_ptr_offset ();
8907 HOST_WIDE_INT rsi_offset
= frame
.stack_realign_offset
+ stub_ptr_offset
;
8908 rtx rsi_frame_load
= NULL_RTX
;
8909 HOST_WIDE_INT rsi_restore_offset
= (HOST_WIDE_INT
)-1;
8910 enum xlogue_stub stub
;
8912 gcc_assert (!m
->fs
.fp_valid
|| frame_pointer_needed
);
8914 /* If using a realigned stack, we should never start with padding. */
8915 gcc_assert (!stack_realign_fp
|| !xlogue
.get_stack_align_off_in ());
8917 /* Setup RSI as the stub's base pointer. */
8918 align
= GET_MODE_ALIGNMENT (V4SFmode
);
8919 tmp
= choose_baseaddr (rsi_offset
, &align
, SI_REG
);
8920 gcc_assert (align
>= GET_MODE_ALIGNMENT (V4SFmode
));
8922 emit_insn (gen_rtx_SET (rsi
, tmp
));
8924 /* Get a symbol for the stub. */
8925 if (frame_pointer_needed
)
8926 stub
= use_call
? XLOGUE_STUB_RESTORE_HFP
8927 : XLOGUE_STUB_RESTORE_HFP_TAIL
;
8929 stub
= use_call
? XLOGUE_STUB_RESTORE
8930 : XLOGUE_STUB_RESTORE_TAIL
;
8931 sym
= xlogue
.get_stub_rtx (stub
);
8933 elems_needed
= ncregs
;
8937 elems_needed
+= frame_pointer_needed
? 5 : 3;
8938 v
= rtvec_alloc (elems_needed
);
8940 /* We call the epilogue stub when we need to pop incoming args or we are
8941 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8942 epilogue stub and it is the tail-call. */
8944 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8947 RTVEC_ELT (v
, vi
++) = ret_rtx
;
8948 RTVEC_ELT (v
, vi
++) = gen_rtx_USE (VOIDmode
, sym
);
8949 if (frame_pointer_needed
)
8951 rtx rbp
= gen_rtx_REG (DImode
, BP_REG
);
8952 gcc_assert (m
->fs
.fp_valid
);
8953 gcc_assert (m
->fs
.cfa_reg
== hard_frame_pointer_rtx
);
8955 tmp
= plus_constant (DImode
, rbp
, 8);
8956 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, tmp
);
8957 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (rbp
, gen_rtx_MEM (DImode
, rbp
));
8958 tmp
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
8959 RTVEC_ELT (v
, vi
++) = gen_rtx_CLOBBER (VOIDmode
, tmp
);
8963 /* If no hard frame pointer, we set R10 to the SP restore value. */
8964 gcc_assert (!m
->fs
.fp_valid
);
8965 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
8966 gcc_assert (m
->fs
.sp_valid
);
8968 r10
= gen_rtx_REG (DImode
, R10_REG
);
8969 tmp
= plus_constant (Pmode
, rsi
, stub_ptr_offset
);
8970 emit_insn (gen_rtx_SET (r10
, tmp
));
8972 RTVEC_ELT (v
, vi
++) = gen_rtx_SET (stack_pointer_rtx
, r10
);
8976 /* Generate frame load insns and restore notes. */
8977 for (i
= 0; i
< ncregs
; ++i
)
8979 const xlogue_layout::reginfo
&r
= xlogue
.get_reginfo (i
);
8980 machine_mode mode
= SSE_REGNO_P (r
.regno
) ? V4SFmode
: word_mode
;
8981 rtx reg
, frame_load
;
8983 reg
= gen_rtx_REG (mode
, r
.regno
);
8984 frame_load
= gen_frame_load (reg
, rsi
, r
.offset
);
8986 /* Save RSI frame load insn & note to add last. */
8987 if (r
.regno
== SI_REG
)
8989 gcc_assert (!rsi_frame_load
);
8990 rsi_frame_load
= frame_load
;
8991 rsi_restore_offset
= r
.offset
;
8995 RTVEC_ELT (v
, vi
++) = frame_load
;
8996 ix86_add_cfa_restore_note (NULL
, reg
, r
.offset
);
9000 /* Add RSI frame load & restore note at the end. */
9001 gcc_assert (rsi_frame_load
);
9002 gcc_assert (rsi_restore_offset
!= (HOST_WIDE_INT
)-1);
9003 RTVEC_ELT (v
, vi
++) = rsi_frame_load
;
9004 ix86_add_cfa_restore_note (NULL
, gen_rtx_REG (DImode
, SI_REG
),
9005 rsi_restore_offset
);
9007 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9008 if (!use_call
&& !frame_pointer_needed
)
9010 gcc_assert (m
->fs
.sp_valid
);
9011 gcc_assert (!m
->fs
.sp_realigned
);
9013 /* At this point, R10 should point to frame.stack_realign_offset. */
9014 if (m
->fs
.cfa_reg
== stack_pointer_rtx
)
9015 m
->fs
.cfa_offset
+= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
9016 m
->fs
.sp_offset
= frame
.stack_realign_offset
;
9019 gcc_assert (vi
== (unsigned int)GET_NUM_ELEM (v
));
9020 tmp
= gen_rtx_PARALLEL (VOIDmode
, v
);
9022 insn
= emit_insn (tmp
);
9025 insn
= emit_jump_insn (tmp
);
9026 JUMP_LABEL (insn
) = ret_rtx
;
9028 if (frame_pointer_needed
)
9029 ix86_emit_leave (insn
);
9032 /* Need CFA adjust note. */
9033 tmp
= gen_rtx_SET (stack_pointer_rtx
, r10
);
9034 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, tmp
);
9038 RTX_FRAME_RELATED_P (insn
) = true;
9039 ix86_add_queued_cfa_restore_notes (insn
);
9041 /* If we're not doing a tail-call, we need to adjust the stack. */
9042 if (use_call
&& m
->fs
.sp_valid
)
9044 HOST_WIDE_INT dealloc
= m
->fs
.sp_offset
- frame
.stack_realign_offset
;
9045 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9046 GEN_INT (dealloc
), style
,
9047 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9051 /* Restore function stack, frame, and registers. */
9054 ix86_expand_epilogue (int style
)
9056 struct machine_function
*m
= cfun
->machine
;
9057 struct machine_frame_state frame_state_save
= m
->fs
;
9058 bool restore_regs_via_mov
;
9060 bool restore_stub_is_tail
= false;
9062 if (ix86_function_naked (current_function_decl
))
9064 /* The program should not reach this point. */
9065 emit_insn (gen_ud2 ());
9069 ix86_finalize_stack_frame_flags ();
9070 const struct ix86_frame
&frame
= cfun
->machine
->frame
;
9072 m
->fs
.sp_realigned
= stack_realign_fp
;
9073 m
->fs
.sp_valid
= stack_realign_fp
9074 || !frame_pointer_needed
9075 || crtl
->sp_is_unchanging
;
9076 gcc_assert (!m
->fs
.sp_valid
9077 || m
->fs
.sp_offset
== frame
.stack_pointer_offset
);
9079 /* The FP must be valid if the frame pointer is present. */
9080 gcc_assert (frame_pointer_needed
== m
->fs
.fp_valid
);
9081 gcc_assert (!m
->fs
.fp_valid
9082 || m
->fs
.fp_offset
== frame
.hard_frame_pointer_offset
);
9084 /* We must have *some* valid pointer to the stack frame. */
9085 gcc_assert (m
->fs
.sp_valid
|| m
->fs
.fp_valid
);
9087 /* The DRAP is never valid at this point. */
9088 gcc_assert (!m
->fs
.drap_valid
);
9090 /* See the comment about red zone and frame
9091 pointer usage in ix86_expand_prologue. */
9092 if (frame_pointer_needed
&& frame
.red_zone_size
)
9093 emit_insn (gen_memory_blockage ());
9095 using_drap
= crtl
->drap_reg
&& crtl
->stack_realign_needed
;
9096 gcc_assert (!using_drap
|| m
->fs
.cfa_reg
== crtl
->drap_reg
);
9098 /* Determine the CFA offset of the end of the red-zone. */
9099 m
->fs
.red_zone_offset
= 0;
9100 if (ix86_using_red_zone () && crtl
->args
.pops_args
< 65536)
9102 /* The red-zone begins below return address and error code in
9103 exception handler. */
9104 m
->fs
.red_zone_offset
= RED_ZONE_SIZE
+ INCOMING_FRAME_SP_OFFSET
;
9106 /* When the register save area is in the aligned portion of
9107 the stack, determine the maximum runtime displacement that
9108 matches up with the aligned frame. */
9109 if (stack_realign_drap
)
9110 m
->fs
.red_zone_offset
-= (crtl
->stack_alignment_needed
/ BITS_PER_UNIT
9114 HOST_WIDE_INT reg_save_offset
= frame
.reg_save_offset
;
9116 /* Special care must be taken for the normal return case of a function
9117 using eh_return: the eax and edx registers are marked as saved, but
9118 not restored along this path. Adjust the save location to match. */
9119 if (crtl
->calls_eh_return
&& style
!= 2)
9120 reg_save_offset
-= 2 * UNITS_PER_WORD
;
9122 /* EH_RETURN requires the use of moves to function properly. */
9123 if (crtl
->calls_eh_return
)
9124 restore_regs_via_mov
= true;
9125 /* SEH requires the use of pops to identify the epilogue. */
9126 else if (TARGET_SEH
)
9127 restore_regs_via_mov
= false;
9128 /* If we're only restoring one register and sp cannot be used then
9129 using a move instruction to restore the register since it's
9130 less work than reloading sp and popping the register. */
9131 else if (!sp_valid_at (frame
.hfp_save_offset
) && frame
.nregs
<= 1)
9132 restore_regs_via_mov
= true;
9133 else if (TARGET_EPILOGUE_USING_MOVE
9134 && cfun
->machine
->use_fast_prologue_epilogue
9136 || m
->fs
.sp_offset
!= reg_save_offset
))
9137 restore_regs_via_mov
= true;
9138 else if (frame_pointer_needed
9140 && m
->fs
.sp_offset
!= reg_save_offset
)
9141 restore_regs_via_mov
= true;
9142 else if (frame_pointer_needed
9144 && cfun
->machine
->use_fast_prologue_epilogue
9145 && frame
.nregs
== 1)
9146 restore_regs_via_mov
= true;
9148 restore_regs_via_mov
= false;
9150 if (restore_regs_via_mov
|| frame
.nsseregs
)
9152 /* Ensure that the entire register save area is addressable via
9153 the stack pointer, if we will restore SSE regs via sp. */
9155 && m
->fs
.sp_offset
> 0x7fffffff
9156 && sp_valid_at (frame
.stack_realign_offset
+ 1)
9157 && (frame
.nsseregs
+ frame
.nregs
) != 0)
9159 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9160 GEN_INT (m
->fs
.sp_offset
9161 - frame
.sse_reg_save_offset
),
9163 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9167 /* If there are any SSE registers to restore, then we have to do it
9168 via moves, since there's obviously no pop for SSE regs. */
9170 ix86_emit_restore_sse_regs_using_mov (frame
.sse_reg_save_offset
,
9173 if (m
->call_ms2sysv
)
9175 int pop_incoming_args
= crtl
->args
.pops_args
&& crtl
->args
.size
;
9177 /* We cannot use a tail-call for the stub if:
9178 1. We have to pop incoming args,
9179 2. We have additional int regs to restore, or
9180 3. A sibling call will be the tail-call, or
9181 4. We are emitting an eh_return_internal epilogue.
9183 TODO: Item 4 has not yet tested!
9185 If any of the above are true, we will call the stub rather than
9187 restore_stub_is_tail
= !(pop_incoming_args
|| frame
.nregs
|| style
!= 1);
9188 ix86_emit_outlined_ms2sysv_restore (frame
, !restore_stub_is_tail
, style
);
9191 /* If using out-of-line stub that is a tail-call, then...*/
9192 if (m
->call_ms2sysv
&& restore_stub_is_tail
)
9194 /* TODO: parinoid tests. (remove eventually) */
9195 gcc_assert (m
->fs
.sp_valid
);
9196 gcc_assert (!m
->fs
.sp_realigned
);
9197 gcc_assert (!m
->fs
.fp_valid
);
9198 gcc_assert (!m
->fs
.realigned
);
9199 gcc_assert (m
->fs
.sp_offset
== UNITS_PER_WORD
);
9200 gcc_assert (!crtl
->drap_reg
);
9201 gcc_assert (!frame
.nregs
);
9203 else if (restore_regs_via_mov
)
9208 ix86_emit_restore_regs_using_mov (reg_save_offset
, style
== 2);
9210 /* eh_return epilogues need %ecx added to the stack pointer. */
9213 rtx sa
= EH_RETURN_STACKADJ_RTX
;
9216 /* %ecx can't be used for both DRAP register and eh_return. */
9218 gcc_assert (REGNO (crtl
->drap_reg
) != CX_REG
);
9220 /* regparm nested functions don't work with eh_return. */
9221 gcc_assert (!ix86_static_chain_on_stack
);
9223 if (frame_pointer_needed
)
9225 t
= gen_rtx_PLUS (Pmode
, hard_frame_pointer_rtx
, sa
);
9226 t
= plus_constant (Pmode
, t
, m
->fs
.fp_offset
- UNITS_PER_WORD
);
9227 emit_insn (gen_rtx_SET (sa
, t
));
9229 /* NB: eh_return epilogues must restore the frame pointer
9230 in word_mode since the upper 32 bits of RBP register
9231 can have any values. */
9232 t
= gen_frame_mem (word_mode
, hard_frame_pointer_rtx
);
9233 rtx frame_reg
= gen_rtx_REG (word_mode
,
9234 HARD_FRAME_POINTER_REGNUM
);
9235 insn
= emit_move_insn (frame_reg
, t
);
9237 /* Note that we use SA as a temporary CFA, as the return
9238 address is at the proper place relative to it. We
9239 pretend this happens at the FP restore insn because
9240 prior to this insn the FP would be stored at the wrong
9241 offset relative to SA, and after this insn we have no
9242 other reasonable register to use for the CFA. We don't
9243 bother resetting the CFA to the SP for the duration of
9244 the return insn, unless the control flow instrumentation
9245 is done. In this case the SP is used later and we have
9246 to reset CFA to SP. */
9247 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9248 plus_constant (Pmode
, sa
, UNITS_PER_WORD
));
9249 ix86_add_queued_cfa_restore_notes (insn
);
9250 add_reg_note (insn
, REG_CFA_RESTORE
, frame_reg
);
9251 RTX_FRAME_RELATED_P (insn
) = 1;
9254 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
9255 m
->fs
.fp_valid
= false;
9257 pro_epilogue_adjust_stack (stack_pointer_rtx
, sa
,
9259 flag_cf_protection
);
9263 t
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
, sa
);
9264 t
= plus_constant (Pmode
, t
, m
->fs
.sp_offset
- UNITS_PER_WORD
);
9265 insn
= emit_insn (gen_rtx_SET (stack_pointer_rtx
, t
));
9266 ix86_add_queued_cfa_restore_notes (insn
);
9268 gcc_assert (m
->fs
.cfa_reg
== stack_pointer_rtx
);
9269 if (m
->fs
.cfa_offset
!= UNITS_PER_WORD
)
9271 m
->fs
.cfa_offset
= UNITS_PER_WORD
;
9272 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9273 plus_constant (Pmode
, stack_pointer_rtx
,
9275 RTX_FRAME_RELATED_P (insn
) = 1;
9278 m
->fs
.sp_offset
= UNITS_PER_WORD
;
9279 m
->fs
.sp_valid
= true;
9280 m
->fs
.sp_realigned
= false;
9285 /* SEH requires that the function end with (1) a stack adjustment
9286 if necessary, (2) a sequence of pops, and (3) a return or
9287 jump instruction. Prevent insns from the function body from
9288 being scheduled into this sequence. */
9291 /* Prevent a catch region from being adjacent to the standard
9292 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9293 nor several other flags that would be interesting to test are
9295 if (flag_non_call_exceptions
)
9296 emit_insn (gen_nops (const1_rtx
));
9298 emit_insn (gen_blockage ());
9301 /* First step is to deallocate the stack frame so that we can
9302 pop the registers. If the stack pointer was realigned, it needs
9303 to be restored now. Also do it on SEH target for very large
9304 frame as the emitted instructions aren't allowed by the ABI
9306 if (!m
->fs
.sp_valid
|| m
->fs
.sp_realigned
9308 && (m
->fs
.sp_offset
- reg_save_offset
9309 >= SEH_MAX_FRAME_SIZE
)))
9311 pro_epilogue_adjust_stack (stack_pointer_rtx
, hard_frame_pointer_rtx
,
9312 GEN_INT (m
->fs
.fp_offset
9316 else if (m
->fs
.sp_offset
!= reg_save_offset
)
9318 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9319 GEN_INT (m
->fs
.sp_offset
9322 m
->fs
.cfa_reg
== stack_pointer_rtx
);
9325 ix86_emit_restore_regs_using_pop ();
9328 /* If we used a stack pointer and haven't already got rid of it,
9332 /* If the stack pointer is valid and pointing at the frame
9333 pointer store address, then we only need a pop. */
9334 if (sp_valid_at (frame
.hfp_save_offset
)
9335 && m
->fs
.sp_offset
== frame
.hfp_save_offset
)
9336 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
9337 /* Leave results in shorter dependency chains on CPUs that are
9338 able to grok it fast. */
9339 else if (TARGET_USE_LEAVE
9340 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun
))
9341 || !cfun
->machine
->use_fast_prologue_epilogue
)
9342 ix86_emit_leave (NULL
);
9345 pro_epilogue_adjust_stack (stack_pointer_rtx
,
9346 hard_frame_pointer_rtx
,
9347 const0_rtx
, style
, !using_drap
);
9348 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx
);
9354 int param_ptr_offset
= UNITS_PER_WORD
;
9357 gcc_assert (stack_realign_drap
);
9359 if (ix86_static_chain_on_stack
)
9360 param_ptr_offset
+= UNITS_PER_WORD
;
9361 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
9362 param_ptr_offset
+= UNITS_PER_WORD
;
9364 insn
= emit_insn (gen_rtx_SET
9366 plus_constant (Pmode
, crtl
->drap_reg
,
9367 -param_ptr_offset
)));
9368 m
->fs
.cfa_reg
= stack_pointer_rtx
;
9369 m
->fs
.cfa_offset
= param_ptr_offset
;
9370 m
->fs
.sp_offset
= param_ptr_offset
;
9371 m
->fs
.realigned
= false;
9373 add_reg_note (insn
, REG_CFA_DEF_CFA
,
9374 plus_constant (Pmode
, stack_pointer_rtx
,
9376 RTX_FRAME_RELATED_P (insn
) = 1;
9378 if (!call_used_or_fixed_reg_p (REGNO (crtl
->drap_reg
)))
9379 ix86_emit_restore_reg_using_pop (crtl
->drap_reg
);
9382 /* At this point the stack pointer must be valid, and we must have
9383 restored all of the registers. We may not have deallocated the
9384 entire stack frame. We've delayed this until now because it may
9385 be possible to merge the local stack deallocation with the
9386 deallocation forced by ix86_static_chain_on_stack. */
9387 gcc_assert (m
->fs
.sp_valid
);
9388 gcc_assert (!m
->fs
.sp_realigned
);
9389 gcc_assert (!m
->fs
.fp_valid
);
9390 gcc_assert (!m
->fs
.realigned
);
9391 if (m
->fs
.sp_offset
!= UNITS_PER_WORD
)
9393 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9394 GEN_INT (m
->fs
.sp_offset
- UNITS_PER_WORD
),
9398 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9400 /* Sibcall epilogues don't want a return instruction. */
9403 m
->fs
= frame_state_save
;
9407 if (cfun
->machine
->func_type
!= TYPE_NORMAL
)
9408 emit_jump_insn (gen_interrupt_return ());
9409 else if (crtl
->args
.pops_args
&& crtl
->args
.size
)
9411 rtx popc
= GEN_INT (crtl
->args
.pops_args
);
9413 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9414 address, do explicit add, and jump indirectly to the caller. */
9416 if (crtl
->args
.pops_args
>= 65536)
9418 rtx ecx
= gen_rtx_REG (SImode
, CX_REG
);
9421 /* There is no "pascal" calling convention in any 64bit ABI. */
9422 gcc_assert (!TARGET_64BIT
);
9424 insn
= emit_insn (gen_pop (ecx
));
9425 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9426 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9428 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9429 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9430 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9431 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9432 RTX_FRAME_RELATED_P (insn
) = 1;
9434 pro_epilogue_adjust_stack (stack_pointer_rtx
, stack_pointer_rtx
,
9436 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9439 emit_jump_insn (gen_simple_return_pop_internal (popc
));
9441 else if (!m
->call_ms2sysv
|| !restore_stub_is_tail
)
9443 /* In case of return from EH a simple return cannot be used
9444 as a return address will be compared with a shadow stack
9445 return address. Use indirect jump instead. */
9446 if (style
== 2 && flag_cf_protection
)
9448 /* Register used in indirect jump must be in word_mode. But
9449 Pmode may not be the same as word_mode for x32. */
9450 rtx ecx
= gen_rtx_REG (word_mode
, CX_REG
);
9453 insn
= emit_insn (gen_pop (ecx
));
9454 m
->fs
.cfa_offset
-= UNITS_PER_WORD
;
9455 m
->fs
.sp_offset
-= UNITS_PER_WORD
;
9457 rtx x
= plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
);
9458 x
= gen_rtx_SET (stack_pointer_rtx
, x
);
9459 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, x
);
9460 add_reg_note (insn
, REG_CFA_REGISTER
, gen_rtx_SET (ecx
, pc_rtx
));
9461 RTX_FRAME_RELATED_P (insn
) = 1;
9463 emit_jump_insn (gen_simple_return_indirect_internal (ecx
));
9466 emit_jump_insn (gen_simple_return_internal ());
9469 /* Restore the state back to the state from the prologue,
9470 so that it's correct for the next epilogue. */
9471 m
->fs
= frame_state_save
;
9474 /* Reset from the function's potential modifications. */
9477 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
)
9479 if (pic_offset_table_rtx
9480 && !ix86_use_pseudo_pic_reg ())
9481 SET_REGNO (pic_offset_table_rtx
, REAL_PIC_OFFSET_TABLE_REGNUM
);
9485 rtx_insn
*insn
= get_last_insn ();
9486 rtx_insn
*deleted_debug_label
= NULL
;
9488 /* Mach-O doesn't support labels at the end of objects, so if
9489 it looks like we might want one, take special action.
9490 First, collect any sequence of deleted debug labels. */
9493 && NOTE_KIND (insn
) != NOTE_INSN_DELETED_LABEL
)
9495 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9496 notes only, instead set their CODE_LABEL_NUMBER to -1,
9497 otherwise there would be code generation differences
9498 in between -g and -g0. */
9499 if (NOTE_P (insn
) && NOTE_KIND (insn
)
9500 == NOTE_INSN_DELETED_DEBUG_LABEL
)
9501 deleted_debug_label
= insn
;
9502 insn
= PREV_INSN (insn
);
9508 then this needs to be detected, so skip past the barrier. */
9510 if (insn
&& BARRIER_P (insn
))
9511 insn
= PREV_INSN (insn
);
9513 /* Up to now we've only seen notes or barriers. */
9518 && NOTE_KIND (insn
) == NOTE_INSN_DELETED_LABEL
))
9519 /* Trailing label. */
9520 fputs ("\tnop\n", file
);
9521 else if (cfun
&& ! cfun
->is_thunk
)
9523 /* See if we have a completely empty function body, skipping
9524 the special case of the picbase thunk emitted as asm. */
9525 while (insn
&& ! INSN_P (insn
))
9526 insn
= PREV_INSN (insn
);
9527 /* If we don't find any insns, we've got an empty function body;
9528 I.e. completely empty - without a return or branch. This is
9529 taken as the case where a function body has been removed
9530 because it contains an inline __builtin_unreachable(). GCC
9531 declares that reaching __builtin_unreachable() means UB so
9532 we're not obliged to do anything special; however, we want
9533 non-zero-sized function bodies. To meet this, and help the
9534 user out, let's trap the case. */
9536 fputs ("\tud2\n", file
);
9539 else if (deleted_debug_label
)
9540 for (insn
= deleted_debug_label
; insn
; insn
= NEXT_INSN (insn
))
9541 if (NOTE_KIND (insn
) == NOTE_INSN_DELETED_DEBUG_LABEL
)
9542 CODE_LABEL_NUMBER (insn
) = -1;
9546 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9549 ix86_print_patchable_function_entry (FILE *file
,
9550 unsigned HOST_WIDE_INT patch_area_size
,
9553 if (cfun
->machine
->function_label_emitted
)
9555 /* NB: When ix86_print_patchable_function_entry is called after
9556 function table has been emitted, we have inserted or queued
9557 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9558 place. There is nothing to do here. */
9562 default_print_patchable_function_entry (file
, patch_area_size
,
9566 /* Output patchable area. NB: default_print_patchable_function_entry
9567 isn't available in i386.md. */
9570 ix86_output_patchable_area (unsigned int patch_area_size
,
9573 default_print_patchable_function_entry (asm_out_file
,
9578 /* Return a scratch register to use in the split stack prologue. The
9579 split stack prologue is used for -fsplit-stack. It is the first
9580 instructions in the function, even before the regular prologue.
9581 The scratch register can be any caller-saved register which is not
9582 used for parameters or for the static chain. */
9585 split_stack_prologue_scratch_regno (void)
9591 bool is_fastcall
, is_thiscall
;
9594 is_fastcall
= (lookup_attribute ("fastcall",
9595 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9597 is_thiscall
= (lookup_attribute ("thiscall",
9598 TYPE_ATTRIBUTES (TREE_TYPE (cfun
->decl
)))
9600 regparm
= ix86_function_regparm (TREE_TYPE (cfun
->decl
), cfun
->decl
);
9604 if (DECL_STATIC_CHAIN (cfun
->decl
))
9606 sorry ("%<-fsplit-stack%> does not support fastcall with "
9608 return INVALID_REGNUM
;
9612 else if (is_thiscall
)
9614 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9618 else if (regparm
< 3)
9620 if (!DECL_STATIC_CHAIN (cfun
->decl
))
9626 sorry ("%<-fsplit-stack%> does not support 2 register "
9627 "parameters for a nested function");
9628 return INVALID_REGNUM
;
9635 /* FIXME: We could make this work by pushing a register
9636 around the addition and comparison. */
9637 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9638 return INVALID_REGNUM
;
9643 /* A SYMBOL_REF for the function which allocates new stackspace for
9646 static GTY(()) rtx split_stack_fn
;
9648 /* A SYMBOL_REF for the more stack function when using the large
9651 static GTY(()) rtx split_stack_fn_large
;
9653 /* Return location of the stack guard value in the TLS block. */
9656 ix86_split_stack_guard (void)
9659 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
9662 gcc_assert (flag_split_stack
);
9664 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9665 offset
= TARGET_THREAD_SPLIT_STACK_OFFSET
;
9670 r
= GEN_INT (offset
);
9671 r
= gen_const_mem (Pmode
, r
);
9672 set_mem_addr_space (r
, as
);
9677 /* Handle -fsplit-stack. These are the first instructions in the
9678 function, even before the regular prologue. */
9681 ix86_expand_split_stack_prologue (void)
9683 HOST_WIDE_INT allocate
;
9684 unsigned HOST_WIDE_INT args_size
;
9685 rtx_code_label
*label
;
9686 rtx limit
, current
, allocate_rtx
, call_fusage
;
9687 rtx_insn
*call_insn
;
9688 rtx scratch_reg
= NULL_RTX
;
9689 rtx_code_label
*varargs_label
= NULL
;
9692 gcc_assert (flag_split_stack
&& reload_completed
);
9694 ix86_finalize_stack_frame_flags ();
9695 struct ix86_frame
&frame
= cfun
->machine
->frame
;
9696 allocate
= frame
.stack_pointer_offset
- INCOMING_FRAME_SP_OFFSET
;
9698 /* This is the label we will branch to if we have enough stack
9699 space. We expect the basic block reordering pass to reverse this
9700 branch if optimizing, so that we branch in the unlikely case. */
9701 label
= gen_label_rtx ();
9703 /* We need to compare the stack pointer minus the frame size with
9704 the stack boundary in the TCB. The stack boundary always gives
9705 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9706 can compare directly. Otherwise we need to do an addition. */
9708 limit
= ix86_split_stack_guard ();
9710 if (allocate
< SPLIT_STACK_AVAILABLE
)
9711 current
= stack_pointer_rtx
;
9714 unsigned int scratch_regno
;
9717 /* We need a scratch register to hold the stack pointer minus
9718 the required frame size. Since this is the very start of the
9719 function, the scratch register can be any caller-saved
9720 register which is not used for parameters. */
9721 offset
= GEN_INT (- allocate
);
9722 scratch_regno
= split_stack_prologue_scratch_regno ();
9723 if (scratch_regno
== INVALID_REGNUM
)
9725 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
9726 if (!TARGET_64BIT
|| x86_64_immediate_operand (offset
, Pmode
))
9728 /* We don't use gen_add in this case because it will
9729 want to split to lea, but when not optimizing the insn
9730 will not be split after this point. */
9731 emit_insn (gen_rtx_SET (scratch_reg
,
9732 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
9737 emit_move_insn (scratch_reg
, offset
);
9738 emit_insn (gen_add2_insn (scratch_reg
, stack_pointer_rtx
));
9740 current
= scratch_reg
;
9743 ix86_expand_branch (GEU
, current
, limit
, label
);
9744 rtx_insn
*jump_insn
= get_last_insn ();
9745 JUMP_LABEL (jump_insn
) = label
;
9747 /* Mark the jump as very likely to be taken. */
9748 add_reg_br_prob_note (jump_insn
, profile_probability::very_likely ());
9750 if (split_stack_fn
== NULL_RTX
)
9752 split_stack_fn
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
9753 SYMBOL_REF_FLAGS (split_stack_fn
) |= SYMBOL_FLAG_LOCAL
;
9755 fn
= split_stack_fn
;
9757 /* Get more stack space. We pass in the desired stack space and the
9758 size of the arguments to copy to the new stack. In 32-bit mode
9759 we push the parameters; __morestack will return on a new stack
9760 anyhow. In 64-bit mode we pass the parameters in r10 and
9762 allocate_rtx
= GEN_INT (allocate
);
9763 args_size
= crtl
->args
.size
>= 0 ? (HOST_WIDE_INT
) crtl
->args
.size
: 0;
9764 call_fusage
= NULL_RTX
;
9770 reg10
= gen_rtx_REG (Pmode
, R10_REG
);
9771 reg11
= gen_rtx_REG (Pmode
, R11_REG
);
9773 /* If this function uses a static chain, it will be in %r10.
9774 Preserve it across the call to __morestack. */
9775 if (DECL_STATIC_CHAIN (cfun
->decl
))
9779 rax
= gen_rtx_REG (word_mode
, AX_REG
);
9780 emit_move_insn (rax
, gen_rtx_REG (word_mode
, R10_REG
));
9781 use_reg (&call_fusage
, rax
);
9784 if ((ix86_cmodel
== CM_LARGE
|| ix86_cmodel
== CM_LARGE_PIC
)
9787 HOST_WIDE_INT argval
;
9789 gcc_assert (Pmode
== DImode
);
9790 /* When using the large model we need to load the address
9791 into a register, and we've run out of registers. So we
9792 switch to a different calling convention, and we call a
9793 different function: __morestack_large. We pass the
9794 argument size in the upper 32 bits of r10 and pass the
9795 frame size in the lower 32 bits. */
9796 gcc_assert ((allocate
& HOST_WIDE_INT_C (0xffffffff)) == allocate
);
9797 gcc_assert ((args_size
& 0xffffffff) == args_size
);
9799 if (split_stack_fn_large
== NULL_RTX
)
9801 split_stack_fn_large
9802 = gen_rtx_SYMBOL_REF (Pmode
, "__morestack_large_model");
9803 SYMBOL_REF_FLAGS (split_stack_fn_large
) |= SYMBOL_FLAG_LOCAL
;
9805 if (ix86_cmodel
== CM_LARGE_PIC
)
9807 rtx_code_label
*label
;
9810 label
= gen_label_rtx ();
9812 LABEL_PRESERVE_P (label
) = 1;
9813 emit_insn (gen_set_rip_rex64 (reg10
, label
));
9814 emit_insn (gen_set_got_offset_rex64 (reg11
, label
));
9815 emit_insn (gen_add2_insn (reg10
, reg11
));
9816 x
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, split_stack_fn_large
),
9818 x
= gen_rtx_CONST (Pmode
, x
);
9819 emit_move_insn (reg11
, x
);
9820 x
= gen_rtx_PLUS (Pmode
, reg10
, reg11
);
9821 x
= gen_const_mem (Pmode
, x
);
9822 emit_move_insn (reg11
, x
);
9825 emit_move_insn (reg11
, split_stack_fn_large
);
9829 argval
= ((args_size
<< 16) << 16) + allocate
;
9830 emit_move_insn (reg10
, GEN_INT (argval
));
9834 emit_move_insn (reg10
, allocate_rtx
);
9835 emit_move_insn (reg11
, GEN_INT (args_size
));
9836 use_reg (&call_fusage
, reg11
);
9839 use_reg (&call_fusage
, reg10
);
9843 rtx_insn
*insn
= emit_insn (gen_push (GEN_INT (args_size
)));
9844 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (UNITS_PER_WORD
));
9845 insn
= emit_insn (gen_push (allocate_rtx
));
9846 add_reg_note (insn
, REG_ARGS_SIZE
, GEN_INT (2 * UNITS_PER_WORD
));
9847 pop
= GEN_INT (2 * UNITS_PER_WORD
);
9849 call_insn
= ix86_expand_call (NULL_RTX
, gen_rtx_MEM (QImode
, fn
),
9850 GEN_INT (UNITS_PER_WORD
), constm1_rtx
,
9852 add_function_usage_to (call_insn
, call_fusage
);
9854 add_reg_note (call_insn
, REG_ARGS_SIZE
, GEN_INT (0));
9855 /* Indicate that this function can't jump to non-local gotos. */
9856 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
9858 /* In order to make call/return prediction work right, we now need
9859 to execute a return instruction. See
9860 libgcc/config/i386/morestack.S for the details on how this works.
9862 For flow purposes gcc must not see this as a return
9863 instruction--we need control flow to continue at the subsequent
9864 label. Therefore, we use an unspec. */
9865 gcc_assert (crtl
->args
.pops_args
< 65536);
9867 = emit_insn (gen_split_stack_return (GEN_INT (crtl
->args
.pops_args
)));
9869 if ((flag_cf_protection
& CF_BRANCH
))
9871 /* Insert ENDBR since __morestack will jump back here via indirect
9873 rtx cet_eb
= gen_nop_endbr ();
9874 emit_insn_after (cet_eb
, ret_insn
);
9877 /* If we are in 64-bit mode and this function uses a static chain,
9878 we saved %r10 in %rax before calling _morestack. */
9879 if (TARGET_64BIT
&& DECL_STATIC_CHAIN (cfun
->decl
))
9880 emit_move_insn (gen_rtx_REG (word_mode
, R10_REG
),
9881 gen_rtx_REG (word_mode
, AX_REG
));
9883 /* If this function calls va_start, we need to store a pointer to
9884 the arguments on the old stack, because they may not have been
9885 all copied to the new stack. At this point the old stack can be
9886 found at the frame pointer value used by __morestack, because
9887 __morestack has set that up before calling back to us. Here we
9888 store that pointer in a scratch register, and in
9889 ix86_expand_prologue we store the scratch register in a stack
9891 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9893 unsigned int scratch_regno
;
9897 scratch_regno
= split_stack_prologue_scratch_regno ();
9898 scratch_reg
= gen_rtx_REG (Pmode
, scratch_regno
);
9899 frame_reg
= gen_rtx_REG (Pmode
, BP_REG
);
9903 return address within this function
9904 return address of caller of this function
9906 So we add three words to get to the stack arguments.
9910 return address within this function
9911 first argument to __morestack
9912 second argument to __morestack
9913 return address of caller of this function
9915 So we add five words to get to the stack arguments.
9917 words
= TARGET_64BIT
? 3 : 5;
9918 emit_insn (gen_rtx_SET (scratch_reg
,
9919 plus_constant (Pmode
, frame_reg
,
9920 words
* UNITS_PER_WORD
)));
9922 varargs_label
= gen_label_rtx ();
9923 emit_jump_insn (gen_jump (varargs_label
));
9924 JUMP_LABEL (get_last_insn ()) = varargs_label
;
9930 LABEL_NUSES (label
) = 1;
9932 /* If this function calls va_start, we now have to set the scratch
9933 register for the case where we do not call __morestack. In this
9934 case we need to set it based on the stack pointer. */
9935 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9937 emit_insn (gen_rtx_SET (scratch_reg
,
9938 plus_constant (Pmode
, stack_pointer_rtx
,
9941 emit_label (varargs_label
);
9942 LABEL_NUSES (varargs_label
) = 1;
9946 /* We may have to tell the dataflow pass that the split stack prologue
9947 is initializing a scratch register. */
9950 ix86_live_on_entry (bitmap regs
)
9952 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
9954 gcc_assert (flag_split_stack
);
9955 bitmap_set_bit (regs
, split_stack_prologue_scratch_regno ());
9959 /* Extract the parts of an RTL expression that is a valid memory address
9960 for an instruction. Return 0 if the structure of the address is
9961 grossly off. Return -1 if the address contains ASHIFT, so it is not
9962 strictly valid, but still used for computing length of lea instruction. */
9965 ix86_decompose_address (rtx addr
, struct ix86_address
*out
)
9967 rtx base
= NULL_RTX
, index
= NULL_RTX
, disp
= NULL_RTX
;
9968 rtx base_reg
, index_reg
;
9969 HOST_WIDE_INT scale
= 1;
9970 rtx scale_rtx
= NULL_RTX
;
9973 addr_space_t seg
= ADDR_SPACE_GENERIC
;
9975 /* Allow zero-extended SImode addresses,
9976 they will be emitted with addr32 prefix. */
9977 if (TARGET_64BIT
&& GET_MODE (addr
) == DImode
)
9979 if (GET_CODE (addr
) == ZERO_EXTEND
9980 && GET_MODE (XEXP (addr
, 0)) == SImode
)
9982 addr
= XEXP (addr
, 0);
9983 if (CONST_INT_P (addr
))
9986 else if (GET_CODE (addr
) == AND
9987 && const_32bit_mask (XEXP (addr
, 1), DImode
))
9989 addr
= lowpart_subreg (SImode
, XEXP (addr
, 0), DImode
);
9990 if (addr
== NULL_RTX
)
9993 if (CONST_INT_P (addr
))
9998 /* Allow SImode subregs of DImode addresses,
9999 they will be emitted with addr32 prefix. */
10000 if (TARGET_64BIT
&& GET_MODE (addr
) == SImode
)
10002 if (SUBREG_P (addr
)
10003 && GET_MODE (SUBREG_REG (addr
)) == DImode
)
10005 addr
= SUBREG_REG (addr
);
10006 if (CONST_INT_P (addr
))
10013 else if (SUBREG_P (addr
))
10015 if (REG_P (SUBREG_REG (addr
)))
10020 else if (GET_CODE (addr
) == PLUS
)
10022 rtx addends
[4], op
;
10030 addends
[n
++] = XEXP (op
, 1);
10033 while (GET_CODE (op
) == PLUS
);
10038 for (i
= n
; i
>= 0; --i
)
10041 switch (GET_CODE (op
))
10046 index
= XEXP (op
, 0);
10047 scale_rtx
= XEXP (op
, 1);
10053 index
= XEXP (op
, 0);
10054 tmp
= XEXP (op
, 1);
10055 if (!CONST_INT_P (tmp
))
10057 scale
= INTVAL (tmp
);
10058 if ((unsigned HOST_WIDE_INT
) scale
> 3)
10060 scale
= 1 << scale
;
10065 if (GET_CODE (op
) != UNSPEC
)
10070 if (XINT (op
, 1) == UNSPEC_TP
10071 && TARGET_TLS_DIRECT_SEG_REFS
10072 && seg
== ADDR_SPACE_GENERIC
)
10073 seg
= DEFAULT_TLS_SEG_REG
;
10079 if (!REG_P (SUBREG_REG (op
)))
10106 else if (GET_CODE (addr
) == MULT
)
10108 index
= XEXP (addr
, 0); /* index*scale */
10109 scale_rtx
= XEXP (addr
, 1);
10111 else if (GET_CODE (addr
) == ASHIFT
)
10113 /* We're called for lea too, which implements ashift on occasion. */
10114 index
= XEXP (addr
, 0);
10115 tmp
= XEXP (addr
, 1);
10116 if (!CONST_INT_P (tmp
))
10118 scale
= INTVAL (tmp
);
10119 if ((unsigned HOST_WIDE_INT
) scale
> 3)
10121 scale
= 1 << scale
;
10125 disp
= addr
; /* displacement */
10131 else if (SUBREG_P (index
)
10132 && REG_P (SUBREG_REG (index
)))
10138 /* Extract the integral value of scale. */
10141 if (!CONST_INT_P (scale_rtx
))
10143 scale
= INTVAL (scale_rtx
);
10146 base_reg
= base
&& SUBREG_P (base
) ? SUBREG_REG (base
) : base
;
10147 index_reg
= index
&& SUBREG_P (index
) ? SUBREG_REG (index
) : index
;
10149 /* Avoid useless 0 displacement. */
10150 if (disp
== const0_rtx
&& (base
|| index
))
10153 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10154 if (base_reg
&& index_reg
&& scale
== 1
10155 && (REGNO (index_reg
) == ARG_POINTER_REGNUM
10156 || REGNO (index_reg
) == FRAME_POINTER_REGNUM
10157 || REGNO (index_reg
) == SP_REG
))
10159 std::swap (base
, index
);
10160 std::swap (base_reg
, index_reg
);
10163 /* Special case: %ebp cannot be encoded as a base without a displacement.
10165 if (!disp
&& base_reg
10166 && (REGNO (base_reg
) == ARG_POINTER_REGNUM
10167 || REGNO (base_reg
) == FRAME_POINTER_REGNUM
10168 || REGNO (base_reg
) == BP_REG
10169 || REGNO (base_reg
) == R13_REG
))
10172 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10173 Avoid this by transforming to [%esi+0].
10174 Reload calls address legitimization without cfun defined, so we need
10175 to test cfun for being non-NULL. */
10176 if (TARGET_K6
&& cfun
&& optimize_function_for_speed_p (cfun
)
10177 && base_reg
&& !index_reg
&& !disp
10178 && REGNO (base_reg
) == SI_REG
)
10181 /* Special case: encode reg+reg instead of reg*2. */
10182 if (!base
&& index
&& scale
== 2)
10183 base
= index
, base_reg
= index_reg
, scale
= 1;
10185 /* Special case: scaling cannot be encoded without base or displacement. */
10186 if (!base
&& !disp
&& index
&& scale
!= 1)
10190 out
->index
= index
;
10192 out
->scale
= scale
;
10198 /* Return cost of the memory address x.
10199 For i386, it is better to use a complex address than let gcc copy
10200 the address into a reg and make a new pseudo. But not if the address
10201 requires to two regs - that would mean more pseudos with longer
10204 ix86_address_cost (rtx x
, machine_mode
, addr_space_t
, bool)
10206 struct ix86_address parts
;
10208 int ok
= ix86_decompose_address (x
, &parts
);
10212 if (parts
.base
&& SUBREG_P (parts
.base
))
10213 parts
.base
= SUBREG_REG (parts
.base
);
10214 if (parts
.index
&& SUBREG_P (parts
.index
))
10215 parts
.index
= SUBREG_REG (parts
.index
);
10217 /* Attempt to minimize number of registers in the address by increasing
10218 address cost for each used register. We don't increase address cost
10219 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10220 is not invariant itself it most likely means that base or index is not
10221 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10222 which is not profitable for x86. */
10224 && (!REG_P (parts
.base
) || REGNO (parts
.base
) >= FIRST_PSEUDO_REGISTER
)
10225 && (current_pass
->type
== GIMPLE_PASS
10226 || !pic_offset_table_rtx
10227 || !REG_P (parts
.base
)
10228 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.base
)))
10232 && (!REG_P (parts
.index
) || REGNO (parts
.index
) >= FIRST_PSEUDO_REGISTER
)
10233 && (current_pass
->type
== GIMPLE_PASS
10234 || !pic_offset_table_rtx
10235 || !REG_P (parts
.index
)
10236 || REGNO (pic_offset_table_rtx
) != REGNO (parts
.index
)))
10239 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10240 since it's predecode logic can't detect the length of instructions
10241 and it degenerates to vector decoded. Increase cost of such
10242 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10243 to split such addresses or even refuse such addresses at all.
10245 Following addressing modes are affected:
10250 The first and last case may be avoidable by explicitly coding the zero in
10251 memory address, but I don't have AMD-K6 machine handy to check this
10255 && ((!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
!= 1)
10256 || (parts
.disp
&& !parts
.base
&& parts
.index
&& parts
.scale
!= 1)
10257 || (!parts
.disp
&& parts
.base
&& parts
.index
&& parts
.scale
== 1)))
10263 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10264 this is used for to form addresses to local data when -fPIC is in
10268 darwin_local_data_pic (rtx disp
)
10270 return (GET_CODE (disp
) == UNSPEC
10271 && XINT (disp
, 1) == UNSPEC_MACHOPIC_OFFSET
);
10274 /* True if operand X should be loaded from GOT. */
10277 ix86_force_load_from_GOT_p (rtx x
)
10279 return ((TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
10280 && !TARGET_PECOFF
&& !TARGET_MACHO
10282 && ix86_cmodel
!= CM_LARGE
10283 && GET_CODE (x
) == SYMBOL_REF
10284 && SYMBOL_REF_FUNCTION_P (x
)
10286 || (SYMBOL_REF_DECL (x
)
10287 && lookup_attribute ("noplt",
10288 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x
)))))
10289 && !SYMBOL_REF_LOCAL_P (x
));
10292 /* Determine if a given RTX is a valid constant. We already know this
10293 satisfies CONSTANT_P. */
10296 ix86_legitimate_constant_p (machine_mode mode
, rtx x
)
10298 switch (GET_CODE (x
))
10303 if (GET_CODE (x
) == PLUS
)
10305 if (!CONST_INT_P (XEXP (x
, 1)))
10310 if (TARGET_MACHO
&& darwin_local_data_pic (x
))
10313 /* Only some unspecs are valid as "constants". */
10314 if (GET_CODE (x
) == UNSPEC
)
10315 switch (XINT (x
, 1))
10318 case UNSPEC_GOTOFF
:
10319 case UNSPEC_PLTOFF
:
10320 return TARGET_64BIT
;
10322 case UNSPEC_NTPOFF
:
10323 x
= XVECEXP (x
, 0, 0);
10324 return (GET_CODE (x
) == SYMBOL_REF
10325 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
10326 case UNSPEC_DTPOFF
:
10327 x
= XVECEXP (x
, 0, 0);
10328 return (GET_CODE (x
) == SYMBOL_REF
10329 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_DYNAMIC
);
10334 /* We must have drilled down to a symbol. */
10335 if (GET_CODE (x
) == LABEL_REF
)
10337 if (GET_CODE (x
) != SYMBOL_REF
)
10342 /* TLS symbols are never valid. */
10343 if (SYMBOL_REF_TLS_MODEL (x
))
10346 /* DLLIMPORT symbols are never valid. */
10347 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10348 && SYMBOL_REF_DLLIMPORT_P (x
))
10352 /* mdynamic-no-pic */
10353 if (MACHO_DYNAMIC_NO_PIC_P
)
10354 return machopic_symbol_defined_p (x
);
10357 /* External function address should be loaded
10358 via the GOT slot to avoid PLT. */
10359 if (ix86_force_load_from_GOT_p (x
))
10364 CASE_CONST_SCALAR_INT
:
10365 if (ix86_endbr_immediate_operand (x
, VOIDmode
))
10376 if (!standard_sse_constant_p (x
, mode
))
10384 if (!standard_sse_constant_p (x
, mode
))
10391 /* Otherwise we handle everything else in the move patterns. */
10395 /* Determine if it's legal to put X into the constant pool. This
10396 is not possible for the address of thread-local symbols, which
10397 is checked above. */
10400 ix86_cannot_force_const_mem (machine_mode mode
, rtx x
)
10402 /* We can put any immediate constant in memory. */
10403 switch (GET_CODE (x
))
10412 return !ix86_legitimate_constant_p (mode
, x
);
10415 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10419 is_imported_p (rtx x
)
10421 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10422 || GET_CODE (x
) != SYMBOL_REF
)
10425 return SYMBOL_REF_DLLIMPORT_P (x
) || SYMBOL_REF_STUBVAR_P (x
);
10429 /* Nonzero if the constant value X is a legitimate general operand
10430 when generating PIC code. It is given that flag_pic is on and
10431 that X satisfies CONSTANT_P. */
10434 legitimate_pic_operand_p (rtx x
)
10438 switch (GET_CODE (x
))
10441 inner
= XEXP (x
, 0);
10442 if (GET_CODE (inner
) == PLUS
10443 && CONST_INT_P (XEXP (inner
, 1)))
10444 inner
= XEXP (inner
, 0);
10446 /* Only some unspecs are valid as "constants". */
10447 if (GET_CODE (inner
) == UNSPEC
)
10448 switch (XINT (inner
, 1))
10451 case UNSPEC_GOTOFF
:
10452 case UNSPEC_PLTOFF
:
10453 return TARGET_64BIT
;
10455 x
= XVECEXP (inner
, 0, 0);
10456 return (GET_CODE (x
) == SYMBOL_REF
10457 && SYMBOL_REF_TLS_MODEL (x
) == TLS_MODEL_LOCAL_EXEC
);
10458 case UNSPEC_MACHOPIC_OFFSET
:
10459 return legitimate_pic_address_disp_p (x
);
10467 return legitimate_pic_address_disp_p (x
);
10474 /* Determine if a given CONST RTX is a valid memory displacement
10478 legitimate_pic_address_disp_p (rtx disp
)
10482 /* In 64bit mode we can allow direct addresses of symbols and labels
10483 when they are not dynamic symbols. */
10486 rtx op0
= disp
, op1
;
10488 switch (GET_CODE (disp
))
10494 if (GET_CODE (XEXP (disp
, 0)) != PLUS
)
10496 op0
= XEXP (XEXP (disp
, 0), 0);
10497 op1
= XEXP (XEXP (disp
, 0), 1);
10498 if (!CONST_INT_P (op1
))
10500 if (GET_CODE (op0
) == UNSPEC
10501 && (XINT (op0
, 1) == UNSPEC_DTPOFF
10502 || XINT (op0
, 1) == UNSPEC_NTPOFF
)
10503 && trunc_int_for_mode (INTVAL (op1
), SImode
) == INTVAL (op1
))
10505 if (INTVAL (op1
) >= 16*1024*1024
10506 || INTVAL (op1
) < -16*1024*1024)
10508 if (GET_CODE (op0
) == LABEL_REF
)
10510 if (GET_CODE (op0
) == CONST
10511 && GET_CODE (XEXP (op0
, 0)) == UNSPEC
10512 && XINT (XEXP (op0
, 0), 1) == UNSPEC_PCREL
)
10514 if (GET_CODE (op0
) == UNSPEC
10515 && XINT (op0
, 1) == UNSPEC_PCREL
)
10517 if (GET_CODE (op0
) != SYMBOL_REF
)
10522 /* TLS references should always be enclosed in UNSPEC.
10523 The dllimported symbol needs always to be resolved. */
10524 if (SYMBOL_REF_TLS_MODEL (op0
)
10525 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
&& SYMBOL_REF_DLLIMPORT_P (op0
)))
10530 if (is_imported_p (op0
))
10533 if (SYMBOL_REF_FAR_ADDR_P (op0
)
10534 || !SYMBOL_REF_LOCAL_P (op0
))
10537 /* Function-symbols need to be resolved only for
10539 For the small-model we don't need to resolve anything
10541 if ((ix86_cmodel
!= CM_LARGE_PIC
10542 && SYMBOL_REF_FUNCTION_P (op0
))
10543 || ix86_cmodel
== CM_SMALL_PIC
)
10545 /* Non-external symbols don't need to be resolved for
10546 large, and medium-model. */
10547 if ((ix86_cmodel
== CM_LARGE_PIC
10548 || ix86_cmodel
== CM_MEDIUM_PIC
)
10549 && !SYMBOL_REF_EXTERNAL_P (op0
))
10552 else if (!SYMBOL_REF_FAR_ADDR_P (op0
)
10553 && (SYMBOL_REF_LOCAL_P (op0
)
10554 || (HAVE_LD_PIE_COPYRELOC
10556 && !SYMBOL_REF_WEAK (op0
)
10557 && !SYMBOL_REF_FUNCTION_P (op0
)))
10558 && ix86_cmodel
!= CM_LARGE_PIC
)
10566 if (GET_CODE (disp
) != CONST
)
10568 disp
= XEXP (disp
, 0);
10572 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10573 of GOT tables. We should not need these anyway. */
10574 if (GET_CODE (disp
) != UNSPEC
10575 || (XINT (disp
, 1) != UNSPEC_GOTPCREL
10576 && XINT (disp
, 1) != UNSPEC_GOTOFF
10577 && XINT (disp
, 1) != UNSPEC_PCREL
10578 && XINT (disp
, 1) != UNSPEC_PLTOFF
))
10581 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
10582 && GET_CODE (XVECEXP (disp
, 0, 0)) != LABEL_REF
)
10588 if (GET_CODE (disp
) == PLUS
)
10590 if (!CONST_INT_P (XEXP (disp
, 1)))
10592 disp
= XEXP (disp
, 0);
10596 if (TARGET_MACHO
&& darwin_local_data_pic (disp
))
10599 if (GET_CODE (disp
) != UNSPEC
)
10602 switch (XINT (disp
, 1))
10607 /* We need to check for both symbols and labels because VxWorks loads
10608 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10610 return (GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10611 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
);
10612 case UNSPEC_GOTOFF
:
10613 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10614 While ABI specify also 32bit relocation but we don't produce it in
10615 small PIC model at all. */
10616 if ((GET_CODE (XVECEXP (disp
, 0, 0)) == SYMBOL_REF
10617 || GET_CODE (XVECEXP (disp
, 0, 0)) == LABEL_REF
)
10619 return !TARGET_PECOFF
&& gotoff_operand (XVECEXP (disp
, 0, 0), Pmode
);
10621 case UNSPEC_GOTTPOFF
:
10622 case UNSPEC_GOTNTPOFF
:
10623 case UNSPEC_INDNTPOFF
:
10626 disp
= XVECEXP (disp
, 0, 0);
10627 return (GET_CODE (disp
) == SYMBOL_REF
10628 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_INITIAL_EXEC
);
10629 case UNSPEC_NTPOFF
:
10630 disp
= XVECEXP (disp
, 0, 0);
10631 return (GET_CODE (disp
) == SYMBOL_REF
10632 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_EXEC
);
10633 case UNSPEC_DTPOFF
:
10634 disp
= XVECEXP (disp
, 0, 0);
10635 return (GET_CODE (disp
) == SYMBOL_REF
10636 && SYMBOL_REF_TLS_MODEL (disp
) == TLS_MODEL_LOCAL_DYNAMIC
);
10642 /* Determine if op is suitable RTX for an address register.
10643 Return naked register if a register or a register subreg is
10644 found, otherwise return NULL_RTX. */
10647 ix86_validate_address_register (rtx op
)
10649 machine_mode mode
= GET_MODE (op
);
10651 /* Only SImode or DImode registers can form the address. */
10652 if (mode
!= SImode
&& mode
!= DImode
)
10657 else if (SUBREG_P (op
))
10659 rtx reg
= SUBREG_REG (op
);
10664 mode
= GET_MODE (reg
);
10666 /* Don't allow SUBREGs that span more than a word. It can
10667 lead to spill failures when the register is one word out
10668 of a two word structure. */
10669 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
10672 /* Allow only SUBREGs of non-eliminable hard registers. */
10673 if (register_no_elim_operand (reg
, mode
))
10677 /* Op is not a register. */
10681 /* Recognizes RTL expressions that are valid memory addresses for an
10682 instruction. The MODE argument is the machine mode for the MEM
10683 expression that wants to use this address.
10685 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10686 convert common non-canonical forms to canonical form so that they will
10690 ix86_legitimate_address_p (machine_mode
, rtx addr
, bool strict
)
10692 struct ix86_address parts
;
10693 rtx base
, index
, disp
;
10694 HOST_WIDE_INT scale
;
10697 if (ix86_decompose_address (addr
, &parts
) <= 0)
10698 /* Decomposition failed. */
10702 index
= parts
.index
;
10704 scale
= parts
.scale
;
10707 /* Validate base register. */
10710 rtx reg
= ix86_validate_address_register (base
);
10712 if (reg
== NULL_RTX
)
10715 if ((strict
&& ! REG_OK_FOR_BASE_STRICT_P (reg
))
10716 || (! strict
&& ! REG_OK_FOR_BASE_NONSTRICT_P (reg
)))
10717 /* Base is not valid. */
10721 /* Validate index register. */
10724 rtx reg
= ix86_validate_address_register (index
);
10726 if (reg
== NULL_RTX
)
10729 if ((strict
&& ! REG_OK_FOR_INDEX_STRICT_P (reg
))
10730 || (! strict
&& ! REG_OK_FOR_INDEX_NONSTRICT_P (reg
)))
10731 /* Index is not valid. */
10735 /* Index and base should have the same mode. */
10737 && GET_MODE (base
) != GET_MODE (index
))
10740 /* Address override works only on the (%reg) part of %fs:(%reg). */
10741 if (seg
!= ADDR_SPACE_GENERIC
10742 && ((base
&& GET_MODE (base
) != word_mode
)
10743 || (index
&& GET_MODE (index
) != word_mode
)))
10746 /* Validate scale factor. */
10750 /* Scale without index. */
10753 if (scale
!= 2 && scale
!= 4 && scale
!= 8)
10754 /* Scale is not a valid multiplier. */
10758 /* Validate displacement. */
10761 if (ix86_endbr_immediate_operand (disp
, VOIDmode
))
10764 if (GET_CODE (disp
) == CONST
10765 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
10766 && XINT (XEXP (disp
, 0), 1) != UNSPEC_MACHOPIC_OFFSET
)
10767 switch (XINT (XEXP (disp
, 0), 1))
10769 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10770 when used. While ABI specify also 32bit relocations, we
10771 don't produce them at all and use IP relative instead.
10772 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10773 should be loaded via GOT. */
10776 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
10777 goto is_legitimate_pic
;
10779 case UNSPEC_GOTOFF
:
10780 gcc_assert (flag_pic
);
10782 goto is_legitimate_pic
;
10784 /* 64bit address unspec. */
10787 case UNSPEC_GOTPCREL
:
10788 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
10789 goto is_legitimate_pic
;
10792 gcc_assert (flag_pic
);
10793 goto is_legitimate_pic
;
10795 case UNSPEC_GOTTPOFF
:
10796 case UNSPEC_GOTNTPOFF
:
10797 case UNSPEC_INDNTPOFF
:
10798 case UNSPEC_NTPOFF
:
10799 case UNSPEC_DTPOFF
:
10803 /* Invalid address unspec. */
10807 else if (SYMBOLIC_CONST (disp
)
10811 && MACHOPIC_INDIRECT
10812 && !machopic_operand_p (disp
)
10818 if (TARGET_64BIT
&& (index
|| base
))
10820 /* foo@dtpoff(%rX) is ok. */
10821 if (GET_CODE (disp
) != CONST
10822 || GET_CODE (XEXP (disp
, 0)) != PLUS
10823 || GET_CODE (XEXP (XEXP (disp
, 0), 0)) != UNSPEC
10824 || !CONST_INT_P (XEXP (XEXP (disp
, 0), 1))
10825 || (XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_DTPOFF
10826 && XINT (XEXP (XEXP (disp
, 0), 0), 1) != UNSPEC_NTPOFF
))
10827 /* Non-constant pic memory reference. */
10830 else if ((!TARGET_MACHO
|| flag_pic
)
10831 && ! legitimate_pic_address_disp_p (disp
))
10832 /* Displacement is an invalid pic construct. */
10835 else if (MACHO_DYNAMIC_NO_PIC_P
10836 && !ix86_legitimate_constant_p (Pmode
, disp
))
10837 /* displacment must be referenced via non_lazy_pointer */
10841 /* This code used to verify that a symbolic pic displacement
10842 includes the pic_offset_table_rtx register.
10844 While this is good idea, unfortunately these constructs may
10845 be created by "adds using lea" optimization for incorrect
10854 This code is nonsensical, but results in addressing
10855 GOT table with pic_offset_table_rtx base. We can't
10856 just refuse it easily, since it gets matched by
10857 "addsi3" pattern, that later gets split to lea in the
10858 case output register differs from input. While this
10859 can be handled by separate addsi pattern for this case
10860 that never results in lea, this seems to be easier and
10861 correct fix for crash to disable this test. */
10863 else if (GET_CODE (disp
) != LABEL_REF
10864 && !CONST_INT_P (disp
)
10865 && (GET_CODE (disp
) != CONST
10866 || !ix86_legitimate_constant_p (Pmode
, disp
))
10867 && (GET_CODE (disp
) != SYMBOL_REF
10868 || !ix86_legitimate_constant_p (Pmode
, disp
)))
10869 /* Displacement is not constant. */
10871 else if (TARGET_64BIT
10872 && !x86_64_immediate_operand (disp
, VOIDmode
))
10873 /* Displacement is out of range. */
10875 /* In x32 mode, constant addresses are sign extended to 64bit, so
10876 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10877 else if (TARGET_X32
&& !(index
|| base
)
10878 && CONST_INT_P (disp
)
10879 && val_signbit_known_set_p (SImode
, INTVAL (disp
)))
10883 /* Everything looks valid. */
10887 /* Determine if a given RTX is a valid constant address. */
10890 constant_address_p (rtx x
)
10892 return CONSTANT_P (x
) && ix86_legitimate_address_p (Pmode
, x
, 1);
10895 /* Return a unique alias set for the GOT. */
10898 ix86_GOT_alias_set (void)
10900 static alias_set_type set
= -1;
10902 set
= new_alias_set ();
10906 /* Return a legitimate reference for ORIG (an address) using the
10907 register REG. If REG is 0, a new pseudo is generated.
10909 There are two types of references that must be handled:
10911 1. Global data references must load the address from the GOT, via
10912 the PIC reg. An insn is emitted to do this load, and the reg is
10915 2. Static data references, constant pool addresses, and code labels
10916 compute the address as an offset from the GOT, whose base is in
10917 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10918 differentiate them from global data objects. The returned
10919 address is the PIC reg + an unspec constant.
10921 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10922 reg also appears in the address. */
10925 legitimize_pic_address (rtx orig
, rtx reg
)
10928 rtx new_rtx
= orig
;
10931 if (TARGET_MACHO
&& !TARGET_64BIT
)
10934 reg
= gen_reg_rtx (Pmode
);
10935 /* Use the generic Mach-O PIC machinery. */
10936 return machopic_legitimize_pic_address (orig
, GET_MODE (orig
), reg
);
10940 if (TARGET_64BIT
&& TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
10942 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
10947 if (TARGET_64BIT
&& legitimate_pic_address_disp_p (addr
))
10949 else if ((!TARGET_64BIT
10950 || /* TARGET_64BIT && */ ix86_cmodel
!= CM_SMALL_PIC
)
10952 && gotoff_operand (addr
, Pmode
))
10954 /* This symbol may be referenced via a displacement
10955 from the PIC base address (@GOTOFF). */
10956 if (GET_CODE (addr
) == CONST
)
10957 addr
= XEXP (addr
, 0);
10959 if (GET_CODE (addr
) == PLUS
)
10961 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, XEXP (addr
, 0)),
10963 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, XEXP (addr
, 1));
10966 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
10968 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10971 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
10975 gcc_assert (REG_P (reg
));
10976 new_rtx
= expand_simple_binop (Pmode
, PLUS
, pic_offset_table_rtx
,
10977 new_rtx
, reg
, 1, OPTAB_DIRECT
);
10980 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
10982 else if ((GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (addr
) == 0)
10983 /* We can't use @GOTOFF for text labels
10984 on VxWorks, see gotoff_operand. */
10985 || (TARGET_VXWORKS_RTP
&& GET_CODE (addr
) == LABEL_REF
))
10987 rtx tmp
= legitimize_pe_coff_symbol (addr
, true);
10991 /* For x64 PE-COFF there is no GOT table,
10992 so we use address directly. */
10993 if (TARGET_64BIT
&& TARGET_PECOFF
)
10995 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_PCREL
);
10996 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
10998 else if (TARGET_64BIT
&& ix86_cmodel
!= CM_LARGE_PIC
)
11000 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
),
11002 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11003 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
11004 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
11008 /* This symbol must be referenced via a load
11009 from the Global Offset Table (@GOT). */
11010 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
11011 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11013 new_rtx
= force_reg (Pmode
, new_rtx
);
11014 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11015 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
11016 set_mem_alias_set (new_rtx
, ix86_GOT_alias_set ());
11019 new_rtx
= copy_to_suggested_reg (new_rtx
, reg
, Pmode
);
11023 if (CONST_INT_P (addr
)
11024 && !x86_64_immediate_operand (addr
, VOIDmode
))
11025 new_rtx
= copy_to_suggested_reg (addr
, reg
, Pmode
);
11026 else if (GET_CODE (addr
) == CONST
)
11028 addr
= XEXP (addr
, 0);
11030 /* We must match stuff we generate before. Assume the only
11031 unspecs that can get here are ours. Not that we could do
11032 anything with them anyway.... */
11033 if (GET_CODE (addr
) == UNSPEC
11034 || (GET_CODE (addr
) == PLUS
11035 && GET_CODE (XEXP (addr
, 0)) == UNSPEC
))
11037 gcc_assert (GET_CODE (addr
) == PLUS
);
11040 if (GET_CODE (addr
) == PLUS
)
11042 rtx op0
= XEXP (addr
, 0), op1
= XEXP (addr
, 1);
11044 /* Check first to see if this is a constant
11045 offset from a @GOTOFF symbol reference. */
11047 && gotoff_operand (op0
, Pmode
)
11048 && CONST_INT_P (op1
))
11052 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op0
),
11054 new_rtx
= gen_rtx_PLUS (Pmode
, new_rtx
, op1
);
11055 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
11059 gcc_assert (REG_P (reg
));
11060 new_rtx
= expand_simple_binop (Pmode
, PLUS
,
11061 pic_offset_table_rtx
,
11067 = gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
11071 if (INTVAL (op1
) < -16*1024*1024
11072 || INTVAL (op1
) >= 16*1024*1024)
11074 if (!x86_64_immediate_operand (op1
, Pmode
))
11075 op1
= force_reg (Pmode
, op1
);
11078 = gen_rtx_PLUS (Pmode
, force_reg (Pmode
, op0
), op1
);
11084 rtx base
= legitimize_pic_address (op0
, reg
);
11085 machine_mode mode
= GET_MODE (base
);
11087 = legitimize_pic_address (op1
, base
== reg
? NULL_RTX
: reg
);
11089 if (CONST_INT_P (new_rtx
))
11091 if (INTVAL (new_rtx
) < -16*1024*1024
11092 || INTVAL (new_rtx
) >= 16*1024*1024)
11094 if (!x86_64_immediate_operand (new_rtx
, mode
))
11095 new_rtx
= force_reg (mode
, new_rtx
);
11098 = gen_rtx_PLUS (mode
, force_reg (mode
, base
), new_rtx
);
11101 new_rtx
= plus_constant (mode
, base
, INTVAL (new_rtx
));
11105 /* For %rip addressing, we have to use
11106 just disp32, not base nor index. */
11108 && (GET_CODE (base
) == SYMBOL_REF
11109 || GET_CODE (base
) == LABEL_REF
))
11110 base
= force_reg (mode
, base
);
11111 if (GET_CODE (new_rtx
) == PLUS
11112 && CONSTANT_P (XEXP (new_rtx
, 1)))
11114 base
= gen_rtx_PLUS (mode
, base
, XEXP (new_rtx
, 0));
11115 new_rtx
= XEXP (new_rtx
, 1);
11117 new_rtx
= gen_rtx_PLUS (mode
, base
, new_rtx
);
11125 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11128 get_thread_pointer (machine_mode tp_mode
, bool to_reg
)
11130 rtx tp
= gen_rtx_UNSPEC (ptr_mode
, gen_rtvec (1, const0_rtx
), UNSPEC_TP
);
11132 if (GET_MODE (tp
) != tp_mode
)
11134 gcc_assert (GET_MODE (tp
) == SImode
);
11135 gcc_assert (tp_mode
== DImode
);
11137 tp
= gen_rtx_ZERO_EXTEND (tp_mode
, tp
);
11141 tp
= copy_to_mode_reg (tp_mode
, tp
);
11146 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11148 static GTY(()) rtx ix86_tls_symbol
;
11151 ix86_tls_get_addr (void)
11153 if (!ix86_tls_symbol
)
11156 = ((TARGET_ANY_GNU_TLS
&& !TARGET_64BIT
)
11157 ? "___tls_get_addr" : "__tls_get_addr");
11159 ix86_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, sym
);
11162 if (ix86_cmodel
== CM_LARGE_PIC
&& !TARGET_PECOFF
)
11164 rtx unspec
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, ix86_tls_symbol
),
11166 return gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
,
11167 gen_rtx_CONST (Pmode
, unspec
));
11170 return ix86_tls_symbol
;
11173 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11175 static GTY(()) rtx ix86_tls_module_base_symbol
;
11178 ix86_tls_module_base (void)
11180 if (!ix86_tls_module_base_symbol
)
11182 ix86_tls_module_base_symbol
11183 = gen_rtx_SYMBOL_REF (ptr_mode
, "_TLS_MODULE_BASE_");
11185 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol
)
11186 |= TLS_MODEL_GLOBAL_DYNAMIC
<< SYMBOL_FLAG_TLS_SHIFT
;
11189 return ix86_tls_module_base_symbol
;
11192 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11193 false if we expect this to be used for a memory address and true if
11194 we expect to load the address into a register. */
11197 legitimize_tls_address (rtx x
, enum tls_model model
, bool for_mov
)
11199 rtx dest
, base
, off
;
11200 rtx pic
= NULL_RTX
, tp
= NULL_RTX
;
11201 machine_mode tp_mode
= Pmode
;
11204 /* Fall back to global dynamic model if tool chain cannot support local
11206 if (TARGET_SUN_TLS
&& !TARGET_64BIT
11207 && !HAVE_AS_IX86_TLSLDMPLT
&& !HAVE_AS_IX86_TLSLDM
11208 && model
== TLS_MODEL_LOCAL_DYNAMIC
)
11209 model
= TLS_MODEL_GLOBAL_DYNAMIC
;
11213 case TLS_MODEL_GLOBAL_DYNAMIC
:
11216 if (flag_pic
&& !TARGET_PECOFF
)
11217 pic
= pic_offset_table_rtx
;
11220 pic
= gen_reg_rtx (Pmode
);
11221 emit_insn (gen_set_got (pic
));
11225 if (TARGET_GNU2_TLS
)
11227 dest
= gen_reg_rtx (ptr_mode
);
11229 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode
, dest
, x
));
11231 emit_insn (gen_tls_dynamic_gnu2_32 (dest
, x
, pic
));
11233 tp
= get_thread_pointer (ptr_mode
, true);
11234 dest
= gen_rtx_PLUS (ptr_mode
, tp
, dest
);
11235 if (GET_MODE (dest
) != Pmode
)
11236 dest
= gen_rtx_ZERO_EXTEND (Pmode
, dest
);
11237 dest
= force_reg (Pmode
, dest
);
11239 if (GET_MODE (x
) != Pmode
)
11240 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11242 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
11246 rtx caddr
= ix86_tls_get_addr ();
11248 dest
= gen_reg_rtx (Pmode
);
11251 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
11256 (gen_tls_global_dynamic_64 (Pmode
, rax
, x
, caddr
));
11257 insns
= get_insns ();
11260 if (GET_MODE (x
) != Pmode
)
11261 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11263 RTL_CONST_CALL_P (insns
) = 1;
11264 emit_libcall_block (insns
, dest
, rax
, x
);
11267 emit_insn (gen_tls_global_dynamic_32 (dest
, x
, pic
, caddr
));
11271 case TLS_MODEL_LOCAL_DYNAMIC
:
11275 pic
= pic_offset_table_rtx
;
11278 pic
= gen_reg_rtx (Pmode
);
11279 emit_insn (gen_set_got (pic
));
11283 if (TARGET_GNU2_TLS
)
11285 rtx tmp
= ix86_tls_module_base ();
11287 base
= gen_reg_rtx (ptr_mode
);
11289 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode
, base
, tmp
));
11291 emit_insn (gen_tls_dynamic_gnu2_32 (base
, tmp
, pic
));
11293 tp
= get_thread_pointer (ptr_mode
, true);
11294 if (GET_MODE (base
) != Pmode
)
11295 base
= gen_rtx_ZERO_EXTEND (Pmode
, base
);
11296 base
= force_reg (Pmode
, base
);
11300 rtx caddr
= ix86_tls_get_addr ();
11302 base
= gen_reg_rtx (Pmode
);
11305 rtx rax
= gen_rtx_REG (Pmode
, AX_REG
);
11311 (gen_tls_local_dynamic_base_64 (Pmode
, rax
, caddr
));
11312 insns
= get_insns ();
11315 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11316 share the LD_BASE result with other LD model accesses. */
11317 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
11318 UNSPEC_TLS_LD_BASE
);
11320 RTL_CONST_CALL_P (insns
) = 1;
11321 emit_libcall_block (insns
, base
, rax
, eqv
);
11324 emit_insn (gen_tls_local_dynamic_base_32 (base
, pic
, caddr
));
11327 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
), UNSPEC_DTPOFF
);
11328 off
= gen_rtx_CONST (Pmode
, off
);
11330 dest
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, off
));
11332 if (TARGET_GNU2_TLS
)
11334 if (GET_MODE (tp
) != Pmode
)
11336 dest
= lowpart_subreg (ptr_mode
, dest
, Pmode
);
11337 dest
= gen_rtx_PLUS (ptr_mode
, tp
, dest
);
11338 dest
= gen_rtx_ZERO_EXTEND (Pmode
, dest
);
11341 dest
= gen_rtx_PLUS (Pmode
, tp
, dest
);
11342 dest
= force_reg (Pmode
, dest
);
11344 if (GET_MODE (x
) != Pmode
)
11345 x
= gen_rtx_ZERO_EXTEND (Pmode
, x
);
11347 set_unique_reg_note (get_last_insn (), REG_EQUAL
, x
);
11351 case TLS_MODEL_INITIAL_EXEC
:
11354 if (TARGET_SUN_TLS
&& !TARGET_X32
)
11356 /* The Sun linker took the AMD64 TLS spec literally
11357 and can only handle %rax as destination of the
11358 initial executable code sequence. */
11360 dest
= gen_reg_rtx (DImode
);
11361 emit_insn (gen_tls_initial_exec_64_sun (dest
, x
));
11365 /* Generate DImode references to avoid %fs:(%reg32)
11366 problems and linker IE->LE relaxation bug. */
11369 type
= UNSPEC_GOTNTPOFF
;
11373 pic
= pic_offset_table_rtx
;
11374 type
= TARGET_ANY_GNU_TLS
? UNSPEC_GOTNTPOFF
: UNSPEC_GOTTPOFF
;
11376 else if (!TARGET_ANY_GNU_TLS
)
11378 pic
= gen_reg_rtx (Pmode
);
11379 emit_insn (gen_set_got (pic
));
11380 type
= UNSPEC_GOTTPOFF
;
11385 type
= UNSPEC_INDNTPOFF
;
11388 off
= gen_rtx_UNSPEC (tp_mode
, gen_rtvec (1, x
), type
);
11389 off
= gen_rtx_CONST (tp_mode
, off
);
11391 off
= gen_rtx_PLUS (tp_mode
, pic
, off
);
11392 off
= gen_const_mem (tp_mode
, off
);
11393 set_mem_alias_set (off
, ix86_GOT_alias_set ());
11395 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11397 base
= get_thread_pointer (tp_mode
,
11398 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
11399 off
= force_reg (tp_mode
, off
);
11400 dest
= gen_rtx_PLUS (tp_mode
, base
, off
);
11401 if (tp_mode
!= Pmode
)
11402 dest
= convert_to_mode (Pmode
, dest
, 1);
11406 base
= get_thread_pointer (Pmode
, true);
11407 dest
= gen_reg_rtx (Pmode
);
11408 emit_insn (gen_sub3_insn (dest
, base
, off
));
11412 case TLS_MODEL_LOCAL_EXEC
:
11413 off
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, x
),
11414 (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11415 ? UNSPEC_NTPOFF
: UNSPEC_TPOFF
);
11416 off
= gen_rtx_CONST (Pmode
, off
);
11418 if (TARGET_64BIT
|| TARGET_ANY_GNU_TLS
)
11420 base
= get_thread_pointer (Pmode
,
11421 for_mov
|| !TARGET_TLS_DIRECT_SEG_REFS
);
11422 return gen_rtx_PLUS (Pmode
, base
, off
);
11426 base
= get_thread_pointer (Pmode
, true);
11427 dest
= gen_reg_rtx (Pmode
);
11428 emit_insn (gen_sub3_insn (dest
, base
, off
));
11433 gcc_unreachable ();
11439 /* Return true if OP refers to a TLS address. */
11441 ix86_tls_address_pattern_p (rtx op
)
11443 subrtx_var_iterator::array_type array
;
11444 FOR_EACH_SUBRTX_VAR (iter
, array
, op
, ALL
)
11449 rtx
*x
= &XEXP (op
, 0);
11450 while (GET_CODE (*x
) == PLUS
)
11453 for (i
= 0; i
< 2; i
++)
11455 rtx u
= XEXP (*x
, i
);
11456 if (GET_CODE (u
) == ZERO_EXTEND
)
11458 if (GET_CODE (u
) == UNSPEC
11459 && XINT (u
, 1) == UNSPEC_TP
)
11465 iter
.skip_subrtxes ();
11472 /* Rewrite *LOC so that it refers to a default TLS address space. */
11474 ix86_rewrite_tls_address_1 (rtx
*loc
)
11476 subrtx_ptr_iterator::array_type array
;
11477 FOR_EACH_SUBRTX_PTR (iter
, array
, loc
, ALL
)
11482 rtx addr
= XEXP (*loc
, 0);
11484 while (GET_CODE (*x
) == PLUS
)
11487 for (i
= 0; i
< 2; i
++)
11489 rtx u
= XEXP (*x
, i
);
11490 if (GET_CODE (u
) == ZERO_EXTEND
)
11492 if (GET_CODE (u
) == UNSPEC
11493 && XINT (u
, 1) == UNSPEC_TP
)
11495 addr_space_t as
= DEFAULT_TLS_SEG_REG
;
11497 *x
= XEXP (*x
, 1 - i
);
11499 *loc
= replace_equiv_address_nv (*loc
, addr
, true);
11500 set_mem_addr_space (*loc
, as
);
11507 iter
.skip_subrtxes ();
11512 /* Rewrite instruction pattern involvning TLS address
11513 so that it refers to a default TLS address space. */
11515 ix86_rewrite_tls_address (rtx pattern
)
11517 pattern
= copy_insn (pattern
);
11518 ix86_rewrite_tls_address_1 (&pattern
);
11522 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11523 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11524 unique refptr-DECL symbol corresponding to symbol DECL. */
11526 struct dllimport_hasher
: ggc_cache_ptr_hash
<tree_map
>
11528 static inline hashval_t
hash (tree_map
*m
) { return m
->hash
; }
11530 equal (tree_map
*a
, tree_map
*b
)
11532 return a
->base
.from
== b
->base
.from
;
11536 keep_cache_entry (tree_map
*&m
)
11538 return ggc_marked_p (m
->base
.from
);
11542 static GTY((cache
)) hash_table
<dllimport_hasher
> *dllimport_map
;
11545 get_dllimport_decl (tree decl
, bool beimport
)
11547 struct tree_map
*h
, in
;
11549 const char *prefix
;
11550 size_t namelen
, prefixlen
;
11555 if (!dllimport_map
)
11556 dllimport_map
= hash_table
<dllimport_hasher
>::create_ggc (512);
11558 in
.hash
= htab_hash_pointer (decl
);
11559 in
.base
.from
= decl
;
11560 tree_map
**loc
= dllimport_map
->find_slot_with_hash (&in
, in
.hash
, INSERT
);
11565 *loc
= h
= ggc_alloc
<tree_map
> ();
11567 h
->base
.from
= decl
;
11568 h
->to
= to
= build_decl (DECL_SOURCE_LOCATION (decl
),
11569 VAR_DECL
, NULL
, ptr_type_node
);
11570 DECL_ARTIFICIAL (to
) = 1;
11571 DECL_IGNORED_P (to
) = 1;
11572 DECL_EXTERNAL (to
) = 1;
11573 TREE_READONLY (to
) = 1;
11575 name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
11576 name
= targetm
.strip_name_encoding (name
);
11578 prefix
= name
[0] == FASTCALL_PREFIX
|| user_label_prefix
[0] == 0
11579 ? "*__imp_" : "*__imp__";
11581 prefix
= user_label_prefix
[0] == 0 ? "*.refptr." : "*refptr.";
11582 namelen
= strlen (name
);
11583 prefixlen
= strlen (prefix
);
11584 imp_name
= (char *) alloca (namelen
+ prefixlen
+ 1);
11585 memcpy (imp_name
, prefix
, prefixlen
);
11586 memcpy (imp_name
+ prefixlen
, name
, namelen
+ 1);
11588 name
= ggc_alloc_string (imp_name
, namelen
+ prefixlen
);
11589 rtl
= gen_rtx_SYMBOL_REF (Pmode
, name
);
11590 SET_SYMBOL_REF_DECL (rtl
, to
);
11591 SYMBOL_REF_FLAGS (rtl
) = SYMBOL_FLAG_LOCAL
| SYMBOL_FLAG_STUBVAR
;
11594 SYMBOL_REF_FLAGS (rtl
) |= SYMBOL_FLAG_EXTERNAL
;
11595 #ifdef SUB_TARGET_RECORD_STUB
11596 SUB_TARGET_RECORD_STUB (name
);
11600 rtl
= gen_const_mem (Pmode
, rtl
);
11601 set_mem_alias_set (rtl
, ix86_GOT_alias_set ());
11603 SET_DECL_RTL (to
, rtl
);
11604 SET_DECL_ASSEMBLER_NAME (to
, get_identifier (name
));
11609 /* Expand SYMBOL into its corresponding far-address symbol.
11610 WANT_REG is true if we require the result be a register. */
11613 legitimize_pe_coff_extern_decl (rtx symbol
, bool want_reg
)
11618 gcc_assert (SYMBOL_REF_DECL (symbol
));
11619 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), false);
11621 x
= DECL_RTL (imp_decl
);
11623 x
= force_reg (Pmode
, x
);
11627 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11628 true if we require the result be a register. */
11631 legitimize_dllimport_symbol (rtx symbol
, bool want_reg
)
11636 gcc_assert (SYMBOL_REF_DECL (symbol
));
11637 imp_decl
= get_dllimport_decl (SYMBOL_REF_DECL (symbol
), true);
11639 x
= DECL_RTL (imp_decl
);
11641 x
= force_reg (Pmode
, x
);
11645 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11646 is true if we require the result be a register. */
11649 legitimize_pe_coff_symbol (rtx addr
, bool inreg
)
11651 if (!TARGET_PECOFF
)
11654 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11656 if (GET_CODE (addr
) == SYMBOL_REF
&& SYMBOL_REF_DLLIMPORT_P (addr
))
11657 return legitimize_dllimport_symbol (addr
, inreg
);
11658 if (GET_CODE (addr
) == CONST
11659 && GET_CODE (XEXP (addr
, 0)) == PLUS
11660 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11661 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr
, 0), 0)))
11663 rtx t
= legitimize_dllimport_symbol (XEXP (XEXP (addr
, 0), 0), inreg
);
11664 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11668 if (ix86_cmodel
!= CM_LARGE_PIC
&& ix86_cmodel
!= CM_MEDIUM_PIC
)
11670 if (GET_CODE (addr
) == SYMBOL_REF
11671 && !is_imported_p (addr
)
11672 && SYMBOL_REF_EXTERNAL_P (addr
)
11673 && SYMBOL_REF_DECL (addr
))
11674 return legitimize_pe_coff_extern_decl (addr
, inreg
);
11676 if (GET_CODE (addr
) == CONST
11677 && GET_CODE (XEXP (addr
, 0)) == PLUS
11678 && GET_CODE (XEXP (XEXP (addr
, 0), 0)) == SYMBOL_REF
11679 && !is_imported_p (XEXP (XEXP (addr
, 0), 0))
11680 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr
, 0), 0))
11681 && SYMBOL_REF_DECL (XEXP (XEXP (addr
, 0), 0)))
11683 rtx t
= legitimize_pe_coff_extern_decl (XEXP (XEXP (addr
, 0), 0), inreg
);
11684 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (addr
, 0), 1));
11689 /* Try machine-dependent ways of modifying an illegitimate address
11690 to be legitimate. If we find one, return the new, valid address.
11691 This macro is used in only one place: `memory_address' in explow.c.
11693 OLDX is the address as it was before break_out_memory_refs was called.
11694 In some cases it is useful to look at this to decide what needs to be done.
11696 It is always safe for this macro to do nothing. It exists to recognize
11697 opportunities to optimize the output.
11699 For the 80386, we handle X+REG by loading X into a register R and
11700 using R+REG. R will go in a general reg and indexing will be used.
11701 However, if REG is a broken-out memory address or multiplication,
11702 nothing needs to be done because REG can certainly go in a general reg.
11704 When -fpic is used, special handling is needed for symbolic references.
11705 See comments by legitimize_pic_address in i386.c for details. */
11708 ix86_legitimize_address (rtx x
, rtx
, machine_mode mode
)
11710 bool changed
= false;
11713 log
= GET_CODE (x
) == SYMBOL_REF
? SYMBOL_REF_TLS_MODEL (x
) : 0;
11715 return legitimize_tls_address (x
, (enum tls_model
) log
, false);
11716 if (GET_CODE (x
) == CONST
11717 && GET_CODE (XEXP (x
, 0)) == PLUS
11718 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
11719 && (log
= SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x
, 0), 0))))
11721 rtx t
= legitimize_tls_address (XEXP (XEXP (x
, 0), 0),
11722 (enum tls_model
) log
, false);
11723 return gen_rtx_PLUS (Pmode
, t
, XEXP (XEXP (x
, 0), 1));
11726 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
11728 rtx tmp
= legitimize_pe_coff_symbol (x
, true);
11733 if (flag_pic
&& SYMBOLIC_CONST (x
))
11734 return legitimize_pic_address (x
, 0);
11737 if (MACHO_DYNAMIC_NO_PIC_P
&& SYMBOLIC_CONST (x
))
11738 return machopic_indirect_data_reference (x
, 0);
11741 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11742 if (GET_CODE (x
) == ASHIFT
11743 && CONST_INT_P (XEXP (x
, 1))
11744 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (x
, 1)) < 4)
11747 log
= INTVAL (XEXP (x
, 1));
11748 x
= gen_rtx_MULT (Pmode
, force_reg (Pmode
, XEXP (x
, 0)),
11749 GEN_INT (1 << log
));
11752 if (GET_CODE (x
) == PLUS
)
11754 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11756 if (GET_CODE (XEXP (x
, 0)) == ASHIFT
11757 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
11758 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 0), 1)) < 4)
11761 log
= INTVAL (XEXP (XEXP (x
, 0), 1));
11762 XEXP (x
, 0) = gen_rtx_MULT (Pmode
,
11763 force_reg (Pmode
, XEXP (XEXP (x
, 0), 0)),
11764 GEN_INT (1 << log
));
11767 if (GET_CODE (XEXP (x
, 1)) == ASHIFT
11768 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
11769 && (unsigned HOST_WIDE_INT
) INTVAL (XEXP (XEXP (x
, 1), 1)) < 4)
11772 log
= INTVAL (XEXP (XEXP (x
, 1), 1));
11773 XEXP (x
, 1) = gen_rtx_MULT (Pmode
,
11774 force_reg (Pmode
, XEXP (XEXP (x
, 1), 0)),
11775 GEN_INT (1 << log
));
11778 /* Put multiply first if it isn't already. */
11779 if (GET_CODE (XEXP (x
, 1)) == MULT
)
11781 std::swap (XEXP (x
, 0), XEXP (x
, 1));
11785 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11786 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11787 created by virtual register instantiation, register elimination, and
11788 similar optimizations. */
11789 if (GET_CODE (XEXP (x
, 0)) == MULT
&& GET_CODE (XEXP (x
, 1)) == PLUS
)
11792 x
= gen_rtx_PLUS (Pmode
,
11793 gen_rtx_PLUS (Pmode
, XEXP (x
, 0),
11794 XEXP (XEXP (x
, 1), 0)),
11795 XEXP (XEXP (x
, 1), 1));
11799 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11800 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11801 else if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == PLUS
11802 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
11803 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == PLUS
11804 && CONSTANT_P (XEXP (x
, 1)))
11807 rtx other
= NULL_RTX
;
11809 if (CONST_INT_P (XEXP (x
, 1)))
11811 constant
= XEXP (x
, 1);
11812 other
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
11814 else if (CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 1), 1)))
11816 constant
= XEXP (XEXP (XEXP (x
, 0), 1), 1);
11817 other
= XEXP (x
, 1);
11825 x
= gen_rtx_PLUS (Pmode
,
11826 gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 0),
11827 XEXP (XEXP (XEXP (x
, 0), 1), 0)),
11828 plus_constant (Pmode
, other
,
11829 INTVAL (constant
)));
11833 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
11836 if (GET_CODE (XEXP (x
, 0)) == MULT
)
11839 XEXP (x
, 0) = copy_addr_to_reg (XEXP (x
, 0));
11842 if (GET_CODE (XEXP (x
, 1)) == MULT
)
11845 XEXP (x
, 1) = copy_addr_to_reg (XEXP (x
, 1));
11849 && REG_P (XEXP (x
, 1))
11850 && REG_P (XEXP (x
, 0)))
11853 if (flag_pic
&& SYMBOLIC_CONST (XEXP (x
, 1)))
11856 x
= legitimize_pic_address (x
, 0);
11859 if (changed
&& ix86_legitimate_address_p (mode
, x
, false))
11862 if (REG_P (XEXP (x
, 0)))
11864 rtx temp
= gen_reg_rtx (Pmode
);
11865 rtx val
= force_operand (XEXP (x
, 1), temp
);
11868 val
= convert_to_mode (Pmode
, val
, 1);
11869 emit_move_insn (temp
, val
);
11872 XEXP (x
, 1) = temp
;
11876 else if (REG_P (XEXP (x
, 1)))
11878 rtx temp
= gen_reg_rtx (Pmode
);
11879 rtx val
= force_operand (XEXP (x
, 0), temp
);
11882 val
= convert_to_mode (Pmode
, val
, 1);
11883 emit_move_insn (temp
, val
);
11886 XEXP (x
, 0) = temp
;
11894 /* Print an integer constant expression in assembler syntax. Addition
11895 and subtraction are the only arithmetic that may appear in these
11896 expressions. FILE is the stdio stream to write to, X is the rtx, and
11897 CODE is the operand print code from the output string. */
11900 output_pic_addr_const (FILE *file
, rtx x
, int code
)
11904 switch (GET_CODE (x
))
11907 gcc_assert (flag_pic
);
11912 if (TARGET_64BIT
|| ! TARGET_MACHO_SYMBOL_STUBS
)
11913 output_addr_const (file
, x
);
11916 const char *name
= XSTR (x
, 0);
11918 /* Mark the decl as referenced so that cgraph will
11919 output the function. */
11920 if (SYMBOL_REF_DECL (x
))
11921 mark_decl_referenced (SYMBOL_REF_DECL (x
));
11924 if (MACHOPIC_INDIRECT
11925 && machopic_classify_symbol (x
) == MACHOPIC_UNDEFINED_FUNCTION
)
11926 name
= machopic_indirection_name (x
, /*stub_p=*/true);
11928 assemble_name (file
, name
);
11930 if (!TARGET_MACHO
&& !(TARGET_64BIT
&& TARGET_PECOFF
)
11931 && code
== 'P' && ! SYMBOL_REF_LOCAL_P (x
))
11932 fputs ("@PLT", file
);
11939 ASM_GENERATE_INTERNAL_LABEL (buf
, "L", CODE_LABEL_NUMBER (x
));
11940 assemble_name (asm_out_file
, buf
);
11944 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
11948 /* This used to output parentheses around the expression,
11949 but that does not work on the 386 (either ATT or BSD assembler). */
11950 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11954 /* We can't handle floating point constants;
11955 TARGET_PRINT_OPERAND must handle them. */
11956 output_operand_lossage ("floating constant misused");
11960 /* Some assemblers need integer constants to appear first. */
11961 if (CONST_INT_P (XEXP (x
, 0)))
11963 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11965 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11969 gcc_assert (CONST_INT_P (XEXP (x
, 1)));
11970 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11972 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11978 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? '(' : '[', file
);
11979 output_pic_addr_const (file
, XEXP (x
, 0), code
);
11981 output_pic_addr_const (file
, XEXP (x
, 1), code
);
11983 putc (ASSEMBLER_DIALECT
== ASM_INTEL
? ')' : ']', file
);
11987 gcc_assert (XVECLEN (x
, 0) == 1);
11988 output_pic_addr_const (file
, XVECEXP (x
, 0, 0), code
);
11989 switch (XINT (x
, 1))
11992 fputs ("@GOT", file
);
11994 case UNSPEC_GOTOFF
:
11995 fputs ("@GOTOFF", file
);
11997 case UNSPEC_PLTOFF
:
11998 fputs ("@PLTOFF", file
);
12001 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12002 "(%rip)" : "[rip]", file
);
12004 case UNSPEC_GOTPCREL
:
12005 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12006 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file
);
12008 case UNSPEC_GOTTPOFF
:
12009 /* FIXME: This might be @TPOFF in Sun ld too. */
12010 fputs ("@gottpoff", file
);
12013 fputs ("@tpoff", file
);
12015 case UNSPEC_NTPOFF
:
12017 fputs ("@tpoff", file
);
12019 fputs ("@ntpoff", file
);
12021 case UNSPEC_DTPOFF
:
12022 fputs ("@dtpoff", file
);
12024 case UNSPEC_GOTNTPOFF
:
12026 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
12027 "@gottpoff(%rip)": "@gottpoff[rip]", file
);
12029 fputs ("@gotntpoff", file
);
12031 case UNSPEC_INDNTPOFF
:
12032 fputs ("@indntpoff", file
);
12035 case UNSPEC_MACHOPIC_OFFSET
:
12037 machopic_output_function_base_name (file
);
12041 output_operand_lossage ("invalid UNSPEC as operand");
12047 output_operand_lossage ("invalid expression as operand");
12051 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12052 We need to emit DTP-relative relocations. */
12054 static void ATTRIBUTE_UNUSED
12055 i386_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
12057 fputs (ASM_LONG
, file
);
12058 output_addr_const (file
, x
);
12059 fputs ("@dtpoff", file
);
12065 fputs (", 0", file
);
12068 gcc_unreachable ();
12072 /* Return true if X is a representation of the PIC register. This copes
12073 with calls from ix86_find_base_term, where the register might have
12074 been replaced by a cselib value. */
12077 ix86_pic_register_p (rtx x
)
12079 if (GET_CODE (x
) == VALUE
&& CSELIB_VAL_PTR (x
))
12080 return (pic_offset_table_rtx
12081 && rtx_equal_for_cselib_p (x
, pic_offset_table_rtx
));
12082 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SET_GOT
)
12084 else if (!REG_P (x
))
12086 else if (pic_offset_table_rtx
)
12088 if (REGNO (x
) == REGNO (pic_offset_table_rtx
))
12090 if (HARD_REGISTER_P (x
)
12091 && !HARD_REGISTER_P (pic_offset_table_rtx
)
12092 && ORIGINAL_REGNO (x
) == REGNO (pic_offset_table_rtx
))
12097 return REGNO (x
) == PIC_OFFSET_TABLE_REGNUM
;
12100 /* Helper function for ix86_delegitimize_address.
12101 Attempt to delegitimize TLS local-exec accesses. */
12104 ix86_delegitimize_tls_address (rtx orig_x
)
12106 rtx x
= orig_x
, unspec
;
12107 struct ix86_address addr
;
12109 if (!TARGET_TLS_DIRECT_SEG_REFS
)
12113 if (GET_CODE (x
) != PLUS
|| GET_MODE (x
) != Pmode
)
12115 if (ix86_decompose_address (x
, &addr
) == 0
12116 || addr
.seg
!= DEFAULT_TLS_SEG_REG
12117 || addr
.disp
== NULL_RTX
12118 || GET_CODE (addr
.disp
) != CONST
)
12120 unspec
= XEXP (addr
.disp
, 0);
12121 if (GET_CODE (unspec
) == PLUS
&& CONST_INT_P (XEXP (unspec
, 1)))
12122 unspec
= XEXP (unspec
, 0);
12123 if (GET_CODE (unspec
) != UNSPEC
|| XINT (unspec
, 1) != UNSPEC_NTPOFF
)
12125 x
= XVECEXP (unspec
, 0, 0);
12126 gcc_assert (GET_CODE (x
) == SYMBOL_REF
);
12127 if (unspec
!= XEXP (addr
.disp
, 0))
12128 x
= gen_rtx_PLUS (Pmode
, x
, XEXP (XEXP (addr
.disp
, 0), 1));
12131 rtx idx
= addr
.index
;
12132 if (addr
.scale
!= 1)
12133 idx
= gen_rtx_MULT (Pmode
, idx
, GEN_INT (addr
.scale
));
12134 x
= gen_rtx_PLUS (Pmode
, idx
, x
);
12137 x
= gen_rtx_PLUS (Pmode
, addr
.base
, x
);
12138 if (MEM_P (orig_x
))
12139 x
= replace_equiv_address_nv (orig_x
, x
);
12143 /* In the name of slightly smaller debug output, and to cater to
12144 general assembler lossage, recognize PIC+GOTOFF and turn it back
12145 into a direct symbol reference.
12147 On Darwin, this is necessary to avoid a crash, because Darwin
12148 has a different PIC label for each routine but the DWARF debugging
12149 information is not associated with any particular routine, so it's
12150 necessary to remove references to the PIC label from RTL stored by
12151 the DWARF output code.
12153 This helper is used in the normal ix86_delegitimize_address
12154 entrypoint (e.g. used in the target delegitimization hook) and
12155 in ix86_find_base_term. As compile time memory optimization, we
12156 avoid allocating rtxes that will not change anything on the outcome
12157 of the callers (find_base_value and find_base_term). */
12160 ix86_delegitimize_address_1 (rtx x
, bool base_term_p
)
12162 rtx orig_x
= delegitimize_mem_from_attrs (x
);
12163 /* addend is NULL or some rtx if x is something+GOTOFF where
12164 something doesn't include the PIC register. */
12165 rtx addend
= NULL_RTX
;
12166 /* reg_addend is NULL or a multiple of some register. */
12167 rtx reg_addend
= NULL_RTX
;
12168 /* const_addend is NULL or a const_int. */
12169 rtx const_addend
= NULL_RTX
;
12170 /* This is the result, or NULL. */
12171 rtx result
= NULL_RTX
;
12180 if (GET_CODE (x
) == CONST
12181 && GET_CODE (XEXP (x
, 0)) == PLUS
12182 && GET_MODE (XEXP (x
, 0)) == Pmode
12183 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
12184 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == UNSPEC
12185 && XINT (XEXP (XEXP (x
, 0), 0), 1) == UNSPEC_PCREL
)
12187 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
12188 base. A CONST can't be arg_pointer_rtx based. */
12189 if (base_term_p
&& MEM_P (orig_x
))
12191 rtx x2
= XVECEXP (XEXP (XEXP (x
, 0), 0), 0, 0);
12192 x
= gen_rtx_PLUS (Pmode
, XEXP (XEXP (x
, 0), 1), x2
);
12193 if (MEM_P (orig_x
))
12194 x
= replace_equiv_address_nv (orig_x
, x
);
12198 if (GET_CODE (x
) == CONST
12199 && GET_CODE (XEXP (x
, 0)) == UNSPEC
12200 && (XINT (XEXP (x
, 0), 1) == UNSPEC_GOTPCREL
12201 || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
)
12202 && (MEM_P (orig_x
) || XINT (XEXP (x
, 0), 1) == UNSPEC_PCREL
))
12204 x
= XVECEXP (XEXP (x
, 0), 0, 0);
12205 if (GET_MODE (orig_x
) != GET_MODE (x
) && MEM_P (orig_x
))
12207 x
= lowpart_subreg (GET_MODE (orig_x
), x
, GET_MODE (x
));
12214 if (ix86_cmodel
!= CM_MEDIUM_PIC
&& ix86_cmodel
!= CM_LARGE_PIC
)
12215 return ix86_delegitimize_tls_address (orig_x
);
12217 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12218 and -mcmodel=medium -fpic. */
12221 if (GET_CODE (x
) != PLUS
12222 || GET_CODE (XEXP (x
, 1)) != CONST
)
12223 return ix86_delegitimize_tls_address (orig_x
);
12225 if (ix86_pic_register_p (XEXP (x
, 0)))
12226 /* %ebx + GOT/GOTOFF */
12228 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
12230 /* %ebx + %reg * scale + GOT/GOTOFF */
12231 reg_addend
= XEXP (x
, 0);
12232 if (ix86_pic_register_p (XEXP (reg_addend
, 0)))
12233 reg_addend
= XEXP (reg_addend
, 1);
12234 else if (ix86_pic_register_p (XEXP (reg_addend
, 1)))
12235 reg_addend
= XEXP (reg_addend
, 0);
12238 reg_addend
= NULL_RTX
;
12239 addend
= XEXP (x
, 0);
12243 addend
= XEXP (x
, 0);
12245 x
= XEXP (XEXP (x
, 1), 0);
12246 if (GET_CODE (x
) == PLUS
12247 && CONST_INT_P (XEXP (x
, 1)))
12249 const_addend
= XEXP (x
, 1);
12253 if (GET_CODE (x
) == UNSPEC
12254 && ((XINT (x
, 1) == UNSPEC_GOT
&& MEM_P (orig_x
) && !addend
)
12255 || (XINT (x
, 1) == UNSPEC_GOTOFF
&& !MEM_P (orig_x
))
12256 || (XINT (x
, 1) == UNSPEC_PLTOFF
&& ix86_cmodel
== CM_LARGE_PIC
12257 && !MEM_P (orig_x
) && !addend
)))
12258 result
= XVECEXP (x
, 0, 0);
12260 if (!TARGET_64BIT
&& TARGET_MACHO
&& darwin_local_data_pic (x
)
12261 && !MEM_P (orig_x
))
12262 result
= XVECEXP (x
, 0, 0);
12265 return ix86_delegitimize_tls_address (orig_x
);
12267 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12268 recurse on the first operand. */
12269 if (const_addend
&& !base_term_p
)
12270 result
= gen_rtx_CONST (Pmode
, gen_rtx_PLUS (Pmode
, result
, const_addend
));
12272 result
= gen_rtx_PLUS (Pmode
, reg_addend
, result
);
12275 /* If the rest of original X doesn't involve the PIC register, add
12276 addend and subtract pic_offset_table_rtx. This can happen e.g.
12278 leal (%ebx, %ecx, 4), %ecx
12280 movl foo@GOTOFF(%ecx), %edx
12281 in which case we return (%ecx - %ebx) + foo
12282 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12283 and reload has completed. Don't do the latter for debug,
12284 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12285 if (pic_offset_table_rtx
12286 && (!reload_completed
|| !ix86_use_pseudo_pic_reg ()))
12287 result
= gen_rtx_PLUS (Pmode
, gen_rtx_MINUS (Pmode
, copy_rtx (addend
),
12288 pic_offset_table_rtx
),
12290 else if (base_term_p
12291 && pic_offset_table_rtx
12293 && !TARGET_VXWORKS_RTP
)
12295 rtx tmp
= gen_rtx_SYMBOL_REF (Pmode
, GOT_SYMBOL_NAME
);
12296 tmp
= gen_rtx_MINUS (Pmode
, copy_rtx (addend
), tmp
);
12297 result
= gen_rtx_PLUS (Pmode
, tmp
, result
);
12302 if (GET_MODE (orig_x
) != Pmode
&& MEM_P (orig_x
))
12304 result
= lowpart_subreg (GET_MODE (orig_x
), result
, Pmode
);
12305 if (result
== NULL_RTX
)
12311 /* The normal instantiation of the above template. */
12314 ix86_delegitimize_address (rtx x
)
12316 return ix86_delegitimize_address_1 (x
, false);
12319 /* If X is a machine specific address (i.e. a symbol or label being
12320 referenced as a displacement from the GOT implemented using an
12321 UNSPEC), then return the base term. Otherwise return X. */
12324 ix86_find_base_term (rtx x
)
12330 if (GET_CODE (x
) != CONST
)
12332 term
= XEXP (x
, 0);
12333 if (GET_CODE (term
) == PLUS
12334 && CONST_INT_P (XEXP (term
, 1)))
12335 term
= XEXP (term
, 0);
12336 if (GET_CODE (term
) != UNSPEC
12337 || (XINT (term
, 1) != UNSPEC_GOTPCREL
12338 && XINT (term
, 1) != UNSPEC_PCREL
))
12341 return XVECEXP (term
, 0, 0);
12344 return ix86_delegitimize_address_1 (x
, true);
12347 /* Return true if X shouldn't be emitted into the debug info.
12348 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12349 symbol easily into the .debug_info section, so we need not to
12350 delegitimize, but instead assemble as @gotoff.
12351 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12352 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12355 ix86_const_not_ok_for_debug_p (rtx x
)
12357 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) != UNSPEC_GOTOFF
)
12360 if (SYMBOL_REF_P (x
) && strcmp (XSTR (x
, 0), GOT_SYMBOL_NAME
) == 0)
12367 put_condition_code (enum rtx_code code
, machine_mode mode
, bool reverse
,
12368 bool fp
, FILE *file
)
12370 const char *suffix
;
12372 if (mode
== CCFPmode
)
12374 code
= ix86_fp_compare_code_to_integer (code
);
12378 code
= reverse_condition (code
);
12383 gcc_assert (mode
!= CCGZmode
);
12407 gcc_assert (mode
!= CCGZmode
);
12431 gcc_assert (mode
== CCmode
|| mode
== CCNOmode
|| mode
== CCGCmode
);
12435 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12436 Those same assemblers have the same but opposite lossage on cmov. */
12437 if (mode
== CCmode
)
12438 suffix
= fp
? "nbe" : "a";
12440 gcc_unreachable ();
12457 gcc_unreachable ();
12461 if (mode
== CCmode
|| mode
== CCGZmode
)
12463 else if (mode
== CCCmode
)
12464 suffix
= fp
? "b" : "c";
12466 gcc_unreachable ();
12483 gcc_unreachable ();
12487 if (mode
== CCmode
|| mode
== CCGZmode
)
12489 else if (mode
== CCCmode
)
12490 suffix
= fp
? "nb" : "nc";
12492 gcc_unreachable ();
12495 gcc_assert (mode
== CCmode
|| mode
== CCGCmode
|| mode
== CCNOmode
);
12499 if (mode
== CCmode
)
12502 gcc_unreachable ();
12505 suffix
= fp
? "u" : "p";
12508 suffix
= fp
? "nu" : "np";
12511 gcc_unreachable ();
12513 fputs (suffix
, file
);
12516 /* Print the name of register X to FILE based on its machine mode and number.
12517 If CODE is 'w', pretend the mode is HImode.
12518 If CODE is 'b', pretend the mode is QImode.
12519 If CODE is 'k', pretend the mode is SImode.
12520 If CODE is 'q', pretend the mode is DImode.
12521 If CODE is 'x', pretend the mode is V4SFmode.
12522 If CODE is 't', pretend the mode is V8SFmode.
12523 If CODE is 'g', pretend the mode is V16SFmode.
12524 If CODE is 'h', pretend the reg is the 'high' byte register.
12525 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12526 If CODE is 'd', duplicate the operand for AVX instruction.
12527 If CODE is 'V', print naked full integer register name without %.
12531 print_reg (rtx x
, int code
, FILE *file
)
12535 unsigned int regno
;
12538 if (ASSEMBLER_DIALECT
== ASM_ATT
&& code
!= 'V')
12543 gcc_assert (TARGET_64BIT
);
12544 fputs ("rip", file
);
12548 if (code
== 'y' && STACK_TOP_P (x
))
12550 fputs ("st(0)", file
);
12556 else if (code
== 'b')
12558 else if (code
== 'k')
12560 else if (code
== 'q')
12562 else if (code
== 'h')
12564 else if (code
== 'x')
12566 else if (code
== 't')
12568 else if (code
== 'g')
12571 msize
= GET_MODE_SIZE (GET_MODE (x
));
12575 if (regno
== ARG_POINTER_REGNUM
12576 || regno
== FRAME_POINTER_REGNUM
12577 || regno
== FPSR_REG
)
12579 output_operand_lossage
12580 ("invalid use of register '%s'", reg_names
[regno
]);
12583 else if (regno
== FLAGS_REG
)
12585 output_operand_lossage ("invalid use of asm flag output");
12591 if (GENERAL_REGNO_P (regno
))
12592 msize
= GET_MODE_SIZE (word_mode
);
12594 error ("%<V%> modifier on non-integer register");
12597 duplicated
= code
== 'd' && TARGET_AVX
;
12604 if (GENERAL_REGNO_P (regno
) && msize
> GET_MODE_SIZE (word_mode
))
12605 warning (0, "unsupported size for integer register");
12608 if (LEGACY_INT_REGNO_P (regno
))
12609 putc (msize
> 4 && TARGET_64BIT
? 'r' : 'e', file
);
12613 reg
= hi_reg_name
[regno
];
12616 if (regno
>= ARRAY_SIZE (qi_reg_name
))
12618 if (!ANY_QI_REGNO_P (regno
))
12619 error ("unsupported size for integer register");
12620 reg
= qi_reg_name
[regno
];
12623 if (regno
>= ARRAY_SIZE (qi_high_reg_name
))
12625 reg
= qi_high_reg_name
[regno
];
12629 if (SSE_REGNO_P (regno
))
12631 gcc_assert (!duplicated
);
12632 putc (msize
== 32 ? 'y' : 'z', file
);
12633 reg
= hi_reg_name
[regno
] + 1;
12638 gcc_unreachable ();
12643 /* Irritatingly, AMD extended registers use
12644 different naming convention: "r%d[bwd]" */
12645 if (REX_INT_REGNO_P (regno
))
12647 gcc_assert (TARGET_64BIT
);
12651 error ("extended registers have no high halves");
12666 error ("unsupported operand size for extended register");
12674 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12675 fprintf (file
, ", %%%s", reg
);
12677 fprintf (file
, ", %s", reg
);
12681 /* Meaning of CODE:
12682 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12683 C -- print opcode suffix for set/cmov insn.
12684 c -- like C, but print reversed condition
12685 F,f -- likewise, but for floating-point.
12686 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12688 R -- print embedded rounding and sae.
12689 r -- print only sae.
12690 z -- print the opcode suffix for the size of the current operand.
12691 Z -- likewise, with special suffixes for x87 instructions.
12692 * -- print a star (in certain assembler syntax)
12693 A -- print an absolute memory reference.
12694 E -- print address with DImode register names if TARGET_64BIT.
12695 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12696 s -- print a shift double count, followed by the assemblers argument
12698 b -- print the QImode name of the register for the indicated operand.
12699 %b0 would print %al if operands[0] is reg 0.
12700 w -- likewise, print the HImode name of the register.
12701 k -- likewise, print the SImode name of the register.
12702 q -- likewise, print the DImode name of the register.
12703 x -- likewise, print the V4SFmode name of the register.
12704 t -- likewise, print the V8SFmode name of the register.
12705 g -- likewise, print the V16SFmode name of the register.
12706 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12707 y -- print "st(0)" instead of "st" as a register.
12708 d -- print duplicated register operand for AVX instruction.
12709 D -- print condition for SSE cmp instruction.
12710 P -- if PIC, print an @PLT suffix.
12711 p -- print raw symbol name.
12712 X -- don't print any sort of PIC '@' suffix for a symbol.
12713 & -- print some in-use local-dynamic symbol name.
12714 H -- print a memory address offset by 8; used for sse high-parts
12715 Y -- print condition for XOP pcom* instruction.
12716 V -- print naked full integer register name without %.
12717 + -- print a branch hint as 'cs' or 'ds' prefix
12718 ; -- print a semicolon (after prefixes due to bug in older gas).
12719 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12720 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12721 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12722 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12723 N -- print maskz if it's constant 0 operand.
12727 ix86_print_operand (FILE *file
, rtx x
, int code
)
12734 switch (ASSEMBLER_DIALECT
)
12741 /* Intel syntax. For absolute addresses, registers should not
12742 be surrounded by braces. */
12746 ix86_print_operand (file
, x
, 0);
12753 gcc_unreachable ();
12756 ix86_print_operand (file
, x
, 0);
12760 /* Wrap address in an UNSPEC to declare special handling. */
12762 x
= gen_rtx_UNSPEC (DImode
, gen_rtvec (1, x
), UNSPEC_LEA_ADDR
);
12764 output_address (VOIDmode
, x
);
12768 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12773 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12778 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12783 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12788 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12793 if (ASSEMBLER_DIALECT
== ASM_ATT
)
12798 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12799 if (ASSEMBLER_DIALECT
!= ASM_ATT
)
12802 switch (GET_MODE_SIZE (GET_MODE (x
)))
12817 output_operand_lossage ("invalid operand size for operand "
12827 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
12829 /* Opcodes don't get size suffixes if using Intel opcodes. */
12830 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12833 switch (GET_MODE_SIZE (GET_MODE (x
)))
12852 output_operand_lossage ("invalid operand size for operand "
12858 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
12859 warning (0, "non-integer operand used with operand code %<z%>");
12863 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12864 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
12867 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
12869 switch (GET_MODE_SIZE (GET_MODE (x
)))
12872 #ifdef HAVE_AS_IX86_FILDS
12882 #ifdef HAVE_AS_IX86_FILDQ
12885 fputs ("ll", file
);
12893 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
12895 /* 387 opcodes don't get size suffixes
12896 if the operands are registers. */
12897 if (STACK_REG_P (x
))
12900 switch (GET_MODE_SIZE (GET_MODE (x
)))
12921 output_operand_lossage ("invalid operand type used with "
12922 "operand code 'Z'");
12926 output_operand_lossage ("invalid operand size for operand code 'Z'");
12946 if (CONST_INT_P (x
) || ! SHIFT_DOUBLE_OMITS_COUNT
)
12948 ix86_print_operand (file
, x
, 0);
12949 fputs (", ", file
);
12954 switch (GET_CODE (x
))
12957 fputs ("neq", file
);
12960 fputs ("eq", file
);
12964 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "ge" : "unlt", file
);
12968 fputs (INTEGRAL_MODE_P (GET_MODE (x
)) ? "gt" : "unle", file
);
12972 fputs ("le", file
);
12976 fputs ("lt", file
);
12979 fputs ("unord", file
);
12982 fputs ("ord", file
);
12985 fputs ("ueq", file
);
12988 fputs ("nlt", file
);
12991 fputs ("nle", file
);
12994 fputs ("ule", file
);
12997 fputs ("ult", file
);
13000 fputs ("une", file
);
13003 output_operand_lossage ("operand is not a condition code, "
13004 "invalid operand code 'Y'");
13010 /* Little bit of braindamage here. The SSE compare instructions
13011 does use completely different names for the comparisons that the
13012 fp conditional moves. */
13013 switch (GET_CODE (x
))
13018 fputs ("eq_us", file
);
13023 fputs ("eq", file
);
13028 fputs ("nge", file
);
13033 fputs ("lt", file
);
13038 fputs ("ngt", file
);
13043 fputs ("le", file
);
13046 fputs ("unord", file
);
13051 fputs ("neq_oq", file
);
13056 fputs ("neq", file
);
13061 fputs ("ge", file
);
13066 fputs ("nlt", file
);
13071 fputs ("gt", file
);
13076 fputs ("nle", file
);
13079 fputs ("ord", file
);
13082 output_operand_lossage ("operand is not a condition code, "
13083 "invalid operand code 'D'");
13090 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13091 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13093 gcc_fallthrough ();
13098 if (!COMPARISON_P (x
))
13100 output_operand_lossage ("operand is not a condition code, "
13101 "invalid operand code '%c'", code
);
13104 put_condition_code (GET_CODE (x
), GET_MODE (XEXP (x
, 0)),
13105 code
== 'c' || code
== 'f',
13106 code
== 'F' || code
== 'f',
13111 if (!offsettable_memref_p (x
))
13113 output_operand_lossage ("operand is not an offsettable memory "
13114 "reference, invalid operand code 'H'");
13117 /* It doesn't actually matter what mode we use here, as we're
13118 only going to use this for printing. */
13119 x
= adjust_address_nv (x
, DImode
, 8);
13120 /* Output 'qword ptr' for intel assembler dialect. */
13121 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13126 if (!CONST_INT_P (x
))
13128 output_operand_lossage ("operand is not an integer, invalid "
13129 "operand code 'K'");
13133 if (INTVAL (x
) & IX86_HLE_ACQUIRE
)
13134 #ifdef HAVE_AS_IX86_HLE
13135 fputs ("xacquire ", file
);
13137 fputs ("\n" ASM_BYTE
"0xf2\n\t", file
);
13139 else if (INTVAL (x
) & IX86_HLE_RELEASE
)
13140 #ifdef HAVE_AS_IX86_HLE
13141 fputs ("xrelease ", file
);
13143 fputs ("\n" ASM_BYTE
"0xf3\n\t", file
);
13145 /* We do not want to print value of the operand. */
13149 if (x
== const0_rtx
|| x
== CONST0_RTX (GET_MODE (x
)))
13150 fputs ("{z}", file
);
13154 if (!CONST_INT_P (x
) || INTVAL (x
) != ROUND_SAE
)
13156 output_operand_lossage ("operand is not a specific integer, "
13157 "invalid operand code 'r'");
13161 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13162 fputs (", ", file
);
13164 fputs ("{sae}", file
);
13166 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13167 fputs (", ", file
);
13172 if (!CONST_INT_P (x
))
13174 output_operand_lossage ("operand is not an integer, invalid "
13175 "operand code 'R'");
13179 if (ASSEMBLER_DIALECT
== ASM_INTEL
)
13180 fputs (", ", file
);
13182 switch (INTVAL (x
))
13184 case ROUND_NEAREST_INT
| ROUND_SAE
:
13185 fputs ("{rn-sae}", file
);
13187 case ROUND_NEG_INF
| ROUND_SAE
:
13188 fputs ("{rd-sae}", file
);
13190 case ROUND_POS_INF
| ROUND_SAE
:
13191 fputs ("{ru-sae}", file
);
13193 case ROUND_ZERO
| ROUND_SAE
:
13194 fputs ("{rz-sae}", file
);
13197 output_operand_lossage ("operand is not a specific integer, "
13198 "invalid operand code 'R'");
13201 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13202 fputs (", ", file
);
13207 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13213 const char *name
= get_some_local_dynamic_name ();
13215 output_operand_lossage ("'%%&' used without any "
13216 "local dynamic TLS references");
13218 assemble_name (file
, name
);
13227 || optimize_function_for_size_p (cfun
)
13228 || !TARGET_BRANCH_PREDICTION_HINTS
)
13231 x
= find_reg_note (current_output_insn
, REG_BR_PROB
, 0);
13234 int pred_val
= profile_probability::from_reg_br_prob_note
13235 (XINT (x
, 0)).to_reg_br_prob_base ();
13237 if (pred_val
< REG_BR_PROB_BASE
* 45 / 100
13238 || pred_val
> REG_BR_PROB_BASE
* 55 / 100)
13240 bool taken
= pred_val
> REG_BR_PROB_BASE
/ 2;
13242 = final_forward_branch_p (current_output_insn
) == 0;
13244 /* Emit hints only in the case default branch prediction
13245 heuristics would fail. */
13246 if (taken
!= cputaken
)
13248 /* We use 3e (DS) prefix for taken branches and
13249 2e (CS) prefix for not taken branches. */
13251 fputs ("ds ; ", file
);
13253 fputs ("cs ; ", file
);
13261 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13267 putc (TARGET_AVX2
? 'i' : 'f', file
);
13273 /* NB: 32-bit indices in VSIB address are sign-extended
13274 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13275 sign-extended to 0xfffffffff7fa3010 which is invalid
13276 address. Add addr32 prefix if there is no base
13277 register nor symbol. */
13279 struct ix86_address parts
;
13280 ok
= ix86_decompose_address (x
, &parts
);
13281 gcc_assert (ok
&& parts
.index
== NULL_RTX
);
13282 if (parts
.base
== NULL_RTX
13283 && (parts
.disp
== NULL_RTX
13284 || !symbolic_operand (parts
.disp
,
13285 GET_MODE (parts
.disp
))))
13286 fputs ("addr32 ", file
);
13291 if (TARGET_64BIT
&& Pmode
!= word_mode
)
13292 fputs ("addr32 ", file
);
13296 if (ix86_notrack_prefixed_insn_p (current_output_insn
))
13297 fputs ("notrack ", file
);
13301 output_operand_lossage ("invalid operand code '%c'", code
);
13306 print_reg (x
, code
, file
);
13308 else if (MEM_P (x
))
13310 rtx addr
= XEXP (x
, 0);
13312 /* No `byte ptr' prefix for call instructions ... */
13313 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& code
!= 'X' && code
!= 'P')
13315 machine_mode mode
= GET_MODE (x
);
13318 /* Check for explicit size override codes. */
13321 else if (code
== 'w')
13323 else if (code
== 'k')
13325 else if (code
== 'q')
13327 else if (code
== 'x')
13329 else if (code
== 't')
13331 else if (code
== 'g')
13333 else if (mode
== BLKmode
)
13334 /* ... or BLKmode operands, when not overridden. */
13337 switch (GET_MODE_SIZE (mode
))
13339 case 1: size
= "BYTE"; break;
13340 case 2: size
= "WORD"; break;
13341 case 4: size
= "DWORD"; break;
13342 case 8: size
= "QWORD"; break;
13343 case 12: size
= "TBYTE"; break;
13345 if (mode
== XFmode
)
13350 case 32: size
= "YMMWORD"; break;
13351 case 64: size
= "ZMMWORD"; break;
13353 gcc_unreachable ();
13357 fputs (size
, file
);
13358 fputs (" PTR ", file
);
13362 if (this_is_asm_operands
&& ! address_operand (addr
, VOIDmode
))
13363 output_operand_lossage ("invalid constraints for operand");
13365 ix86_print_operand_address_as
13366 (file
, addr
, MEM_ADDR_SPACE (x
), code
== 'p' || code
== 'P');
13369 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == SFmode
)
13373 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
13375 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13377 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13379 fprintf (file
, "0x%08" HOST_LONG_LONG_FORMAT
"x",
13380 (unsigned long long) (int) l
);
13382 fprintf (file
, "0x%08x", (unsigned int) l
);
13385 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == DFmode
)
13389 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
13391 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13393 fprintf (file
, "0x%lx%08lx", l
[1] & 0xffffffff, l
[0] & 0xffffffff);
13396 /* These float cases don't actually occur as immediate operands. */
13397 else if (CONST_DOUBLE_P (x
) && GET_MODE (x
) == XFmode
)
13401 real_to_decimal (dstr
, CONST_DOUBLE_REAL_VALUE (x
), sizeof (dstr
), 0, 1);
13402 fputs (dstr
, file
);
13405 /* Print bcst_mem_operand. */
13406 else if (GET_CODE (x
) == VEC_DUPLICATE
)
13408 machine_mode vmode
= GET_MODE (x
);
13409 /* Must be bcst_memory_operand. */
13410 gcc_assert (bcst_mem_operand (x
, vmode
));
13412 rtx mem
= XEXP (x
,0);
13413 ix86_print_operand (file
, mem
, 0);
13419 fputs ("{1to2}", file
);
13425 fputs ("{1to4}", file
);
13431 fputs ("{1to8}", file
);
13435 fputs ("{1to16}", file
);
13438 gcc_unreachable ();
13444 /* We have patterns that allow zero sets of memory, for instance.
13445 In 64-bit mode, we should probably support all 8-byte vectors,
13446 since we can in fact encode that into an immediate. */
13447 if (GET_CODE (x
) == CONST_VECTOR
)
13449 if (x
!= CONST0_RTX (GET_MODE (x
)))
13450 output_operand_lossage ("invalid vector immediate");
13454 if (code
!= 'P' && code
!= 'p')
13456 if (CONST_INT_P (x
))
13458 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13461 else if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
13462 || GET_CODE (x
) == LABEL_REF
)
13464 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13467 fputs ("OFFSET FLAT:", file
);
13470 if (CONST_INT_P (x
))
13471 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
13472 else if (flag_pic
|| MACHOPIC_INDIRECT
)
13473 output_pic_addr_const (file
, x
, code
);
13475 output_addr_const (file
, x
);
13480 ix86_print_operand_punct_valid_p (unsigned char code
)
13482 return (code
== '*' || code
== '+' || code
== '&' || code
== ';'
13483 || code
== '~' || code
== '^' || code
== '!');
13486 /* Print a memory operand whose address is ADDR. */
13489 ix86_print_operand_address_as (FILE *file
, rtx addr
,
13490 addr_space_t as
, bool no_rip
)
13492 struct ix86_address parts
;
13493 rtx base
, index
, disp
;
13499 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_VSIBADDR
)
13501 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
13502 gcc_assert (parts
.index
== NULL_RTX
);
13503 parts
.index
= XVECEXP (addr
, 0, 1);
13504 parts
.scale
= INTVAL (XVECEXP (addr
, 0, 2));
13505 addr
= XVECEXP (addr
, 0, 0);
13508 else if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LEA_ADDR
)
13510 gcc_assert (TARGET_64BIT
);
13511 ok
= ix86_decompose_address (XVECEXP (addr
, 0, 0), &parts
);
13515 ok
= ix86_decompose_address (addr
, &parts
);
13520 index
= parts
.index
;
13522 scale
= parts
.scale
;
13524 if (ADDR_SPACE_GENERIC_P (as
))
13527 gcc_assert (ADDR_SPACE_GENERIC_P (parts
.seg
));
13529 if (!ADDR_SPACE_GENERIC_P (as
))
13531 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13536 case ADDR_SPACE_SEG_FS
:
13537 fputs ("fs:", file
);
13539 case ADDR_SPACE_SEG_GS
:
13540 fputs ("gs:", file
);
13543 gcc_unreachable ();
13547 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13548 if (TARGET_64BIT
&& !base
&& !index
&& !no_rip
)
13552 if (GET_CODE (disp
) == CONST
13553 && GET_CODE (XEXP (disp
, 0)) == PLUS
13554 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13555 symbol
= XEXP (XEXP (disp
, 0), 0);
13557 if (GET_CODE (symbol
) == LABEL_REF
13558 || (GET_CODE (symbol
) == SYMBOL_REF
13559 && SYMBOL_REF_TLS_MODEL (symbol
) == 0))
13563 if (!base
&& !index
)
13565 /* Displacement only requires special attention. */
13566 if (CONST_INT_P (disp
))
13568 if (ASSEMBLER_DIALECT
== ASM_INTEL
&& ADDR_SPACE_GENERIC_P (as
))
13569 fputs ("ds:", file
);
13570 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (disp
));
13572 /* Load the external function address via the GOT slot to avoid PLT. */
13573 else if (GET_CODE (disp
) == CONST
13574 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
13575 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTPCREL
13576 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
)
13577 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp
, 0), 0, 0)))
13578 output_pic_addr_const (file
, disp
, 0);
13580 output_pic_addr_const (file
, disp
, 0);
13582 output_addr_const (file
, disp
);
13586 /* Print SImode register names to force addr32 prefix. */
13587 if (SImode_address_operand (addr
, VOIDmode
))
13591 gcc_assert (TARGET_64BIT
);
13592 switch (GET_CODE (addr
))
13595 gcc_assert (GET_MODE (addr
) == SImode
);
13596 gcc_assert (GET_MODE (SUBREG_REG (addr
)) == DImode
);
13600 gcc_assert (GET_MODE (addr
) == DImode
);
13603 gcc_unreachable ();
13606 gcc_assert (!code
);
13612 && CONST_INT_P (disp
)
13613 && INTVAL (disp
) < -16*1024*1024)
13615 /* X32 runs in 64-bit mode, where displacement, DISP, in
13616 address DISP(%r64), is encoded as 32-bit immediate sign-
13617 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13618 address is %r64 + 0xffffffffbffffd00. When %r64 <
13619 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13620 which is invalid for x32. The correct address is %r64
13621 - 0x40000300 == 0xf7ffdd64. To properly encode
13622 -0x40000300(%r64) for x32, we zero-extend negative
13623 displacement by forcing addr32 prefix which truncates
13624 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13625 zero-extend all negative displacements, including -1(%rsp).
13626 However, for small negative displacements, sign-extension
13627 won't cause overflow. We only zero-extend negative
13628 displacements if they < -16*1024*1024, which is also used
13629 to check legitimate address displacements for PIC. */
13633 /* Since the upper 32 bits of RSP are always zero for x32,
13634 we can encode %esp as %rsp to avoid 0x67 prefix if
13635 there is no index register. */
13636 if (TARGET_X32
&& Pmode
== SImode
13637 && !index
&& base
&& REG_P (base
) && REGNO (base
) == SP_REG
)
13640 if (ASSEMBLER_DIALECT
== ASM_ATT
)
13645 output_pic_addr_const (file
, disp
, 0);
13646 else if (GET_CODE (disp
) == LABEL_REF
)
13647 output_asm_label (disp
);
13649 output_addr_const (file
, disp
);
13654 print_reg (base
, code
, file
);
13658 print_reg (index
, vsib
? 0 : code
, file
);
13659 if (scale
!= 1 || vsib
)
13660 fprintf (file
, ",%d", scale
);
13666 rtx offset
= NULL_RTX
;
13670 /* Pull out the offset of a symbol; print any symbol itself. */
13671 if (GET_CODE (disp
) == CONST
13672 && GET_CODE (XEXP (disp
, 0)) == PLUS
13673 && CONST_INT_P (XEXP (XEXP (disp
, 0), 1)))
13675 offset
= XEXP (XEXP (disp
, 0), 1);
13676 disp
= gen_rtx_CONST (VOIDmode
,
13677 XEXP (XEXP (disp
, 0), 0));
13681 output_pic_addr_const (file
, disp
, 0);
13682 else if (GET_CODE (disp
) == LABEL_REF
)
13683 output_asm_label (disp
);
13684 else if (CONST_INT_P (disp
))
13687 output_addr_const (file
, disp
);
13693 print_reg (base
, code
, file
);
13696 if (INTVAL (offset
) >= 0)
13698 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
13702 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (offset
));
13709 print_reg (index
, vsib
? 0 : code
, file
);
13710 if (scale
!= 1 || vsib
)
13711 fprintf (file
, "*%d", scale
);
13719 ix86_print_operand_address (FILE *file
, machine_mode
/*mode*/, rtx addr
)
13721 ix86_print_operand_address_as (file
, addr
, ADDR_SPACE_GENERIC
, false);
13724 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13727 i386_asm_output_addr_const_extra (FILE *file
, rtx x
)
13731 if (GET_CODE (x
) != UNSPEC
)
13734 op
= XVECEXP (x
, 0, 0);
13735 switch (XINT (x
, 1))
13737 case UNSPEC_GOTOFF
:
13738 output_addr_const (file
, op
);
13739 fputs ("@gotoff", file
);
13741 case UNSPEC_GOTTPOFF
:
13742 output_addr_const (file
, op
);
13743 /* FIXME: This might be @TPOFF in Sun ld. */
13744 fputs ("@gottpoff", file
);
13747 output_addr_const (file
, op
);
13748 fputs ("@tpoff", file
);
13750 case UNSPEC_NTPOFF
:
13751 output_addr_const (file
, op
);
13753 fputs ("@tpoff", file
);
13755 fputs ("@ntpoff", file
);
13757 case UNSPEC_DTPOFF
:
13758 output_addr_const (file
, op
);
13759 fputs ("@dtpoff", file
);
13761 case UNSPEC_GOTNTPOFF
:
13762 output_addr_const (file
, op
);
13764 fputs (ASSEMBLER_DIALECT
== ASM_ATT
?
13765 "@gottpoff(%rip)" : "@gottpoff[rip]", file
);
13767 fputs ("@gotntpoff", file
);
13769 case UNSPEC_INDNTPOFF
:
13770 output_addr_const (file
, op
);
13771 fputs ("@indntpoff", file
);
13774 case UNSPEC_MACHOPIC_OFFSET
:
13775 output_addr_const (file
, op
);
13777 machopic_output_function_base_name (file
);
13789 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13790 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13791 is the expression of the binary operation. The output may either be
13792 emitted here, or returned to the caller, like all output_* functions.
13794 There is no guarantee that the operands are the same mode, as they
13795 might be within FLOAT or FLOAT_EXTEND expressions. */
13797 #ifndef SYSV386_COMPAT
13798 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13799 wants to fix the assemblers because that causes incompatibility
13800 with gcc. No-one wants to fix gcc because that causes
13801 incompatibility with assemblers... You can use the option of
13802 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13803 #define SYSV386_COMPAT 1
13807 output_387_binary_op (rtx_insn
*insn
, rtx
*operands
)
13809 static char buf
[40];
13812 = (SSE_REG_P (operands
[0])
13813 || SSE_REG_P (operands
[1]) || SSE_REG_P (operands
[2]));
13817 else if (GET_MODE_CLASS (GET_MODE (operands
[1])) == MODE_INT
13818 || GET_MODE_CLASS (GET_MODE (operands
[2])) == MODE_INT
)
13825 switch (GET_CODE (operands
[3]))
13836 gcc_unreachable ();
13843 p
= (GET_MODE (operands
[0]) == SFmode
) ? "ss" : "sd";
13847 p
= "\t{%2, %1, %0|%0, %1, %2}";
13849 p
= "\t{%2, %0|%0, %2}";
13855 /* Even if we do not want to check the inputs, this documents input
13856 constraints. Which helps in understanding the following code. */
13859 if (STACK_REG_P (operands
[0])
13860 && ((REG_P (operands
[1])
13861 && REGNO (operands
[0]) == REGNO (operands
[1])
13862 && (STACK_REG_P (operands
[2]) || MEM_P (operands
[2])))
13863 || (REG_P (operands
[2])
13864 && REGNO (operands
[0]) == REGNO (operands
[2])
13865 && (STACK_REG_P (operands
[1]) || MEM_P (operands
[1]))))
13866 && (STACK_TOP_P (operands
[1]) || STACK_TOP_P (operands
[2])))
13869 gcc_unreachable ();
13872 switch (GET_CODE (operands
[3]))
13876 if (REG_P (operands
[2]) && REGNO (operands
[0]) == REGNO (operands
[2]))
13877 std::swap (operands
[1], operands
[2]);
13879 /* know operands[0] == operands[1]. */
13881 if (MEM_P (operands
[2]))
13887 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
13889 if (STACK_TOP_P (operands
[0]))
13890 /* How is it that we are storing to a dead operand[2]?
13891 Well, presumably operands[1] is dead too. We can't
13892 store the result to st(0) as st(0) gets popped on this
13893 instruction. Instead store to operands[2] (which I
13894 think has to be st(1)). st(1) will be popped later.
13895 gcc <= 2.8.1 didn't have this check and generated
13896 assembly code that the Unixware assembler rejected. */
13897 p
= "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13899 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13903 if (STACK_TOP_P (operands
[0]))
13904 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13906 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13911 if (MEM_P (operands
[1]))
13917 if (MEM_P (operands
[2]))
13923 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[2])))
13926 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13927 derived assemblers, confusingly reverse the direction of
13928 the operation for fsub{r} and fdiv{r} when the
13929 destination register is not st(0). The Intel assembler
13930 doesn't have this brain damage. Read !SYSV386_COMPAT to
13931 figure out what the hardware really does. */
13932 if (STACK_TOP_P (operands
[0]))
13933 p
= "{p\t%0, %2|rp\t%2, %0}";
13935 p
= "{rp\t%2, %0|p\t%0, %2}";
13937 if (STACK_TOP_P (operands
[0]))
13938 /* As above for fmul/fadd, we can't store to st(0). */
13939 p
= "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13941 p
= "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13946 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
13949 if (STACK_TOP_P (operands
[0]))
13950 p
= "{rp\t%0, %1|p\t%1, %0}";
13952 p
= "{p\t%1, %0|rp\t%0, %1}";
13954 if (STACK_TOP_P (operands
[0]))
13955 p
= "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13957 p
= "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13962 if (STACK_TOP_P (operands
[0]))
13964 if (STACK_TOP_P (operands
[1]))
13965 p
= "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13967 p
= "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13970 else if (STACK_TOP_P (operands
[1]))
13973 p
= "{\t%1, %0|r\t%0, %1}";
13975 p
= "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13981 p
= "{r\t%2, %0|\t%0, %2}";
13983 p
= "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13989 gcc_unreachable ();
13996 /* Return needed mode for entity in optimize_mode_switching pass. */
13999 ix86_dirflag_mode_needed (rtx_insn
*insn
)
14003 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
14004 return X86_DIRFLAG_ANY
;
14006 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14007 return TARGET_CLD
? X86_DIRFLAG_ANY
: X86_DIRFLAG_RESET
;
14010 if (recog_memoized (insn
) < 0)
14011 return X86_DIRFLAG_ANY
;
14013 if (get_attr_type (insn
) == TYPE_STR
)
14015 /* Emit cld instruction if stringops are used in the function. */
14016 if (cfun
->machine
->func_type
== TYPE_NORMAL
)
14017 return TARGET_CLD
? X86_DIRFLAG_RESET
: X86_DIRFLAG_ANY
;
14019 return X86_DIRFLAG_RESET
;
14022 return X86_DIRFLAG_ANY
;
14025 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14028 ix86_check_avx_upper_register (const_rtx exp
)
14030 return (SSE_REG_P (exp
)
14031 && !EXT_REX_SSE_REG_P (exp
)
14032 && GET_MODE_BITSIZE (GET_MODE (exp
)) > 128);
14035 /* Return needed mode for entity in optimize_mode_switching pass. */
14038 ix86_avx_u128_mode_needed (rtx_insn
*insn
)
14044 /* Needed mode is set to AVX_U128_CLEAN if there are
14045 no 256bit or 512bit modes used in function arguments. */
14046 for (link
= CALL_INSN_FUNCTION_USAGE (insn
);
14048 link
= XEXP (link
, 1))
14050 if (GET_CODE (XEXP (link
, 0)) == USE
)
14052 rtx arg
= XEXP (XEXP (link
, 0), 0);
14054 if (ix86_check_avx_upper_register (arg
))
14055 return AVX_U128_DIRTY
;
14059 /* If the function is known to preserve some SSE registers,
14060 RA and previous passes can legitimately rely on that for
14061 modes wider than 256 bits. It's only safe to issue a
14062 vzeroupper if all SSE registers are clobbered. */
14063 const function_abi
&abi
= insn_callee_abi (insn
);
14064 if (!hard_reg_set_subset_p (reg_class_contents
[SSE_REGS
],
14065 abi
.mode_clobbers (V4DImode
)))
14066 return AVX_U128_ANY
;
14068 return AVX_U128_CLEAN
;
14071 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14072 Hardware changes state only when a 256bit register is written to,
14073 but we need to prevent the compiler from moving optimal insertion
14074 point above eventual read from 256bit or 512 bit register. */
14075 subrtx_iterator::array_type array
;
14076 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
14077 if (ix86_check_avx_upper_register (*iter
))
14078 return AVX_U128_DIRTY
;
14080 return AVX_U128_ANY
;
14083 /* Return mode that i387 must be switched into
14084 prior to the execution of insn. */
14087 ix86_i387_mode_needed (int entity
, rtx_insn
*insn
)
14089 enum attr_i387_cw mode
;
14091 /* The mode UNINITIALIZED is used to store control word after a
14092 function call or ASM pattern. The mode ANY specify that function
14093 has no requirements on the control word and make no changes in the
14094 bits we are interested in. */
14097 || (NONJUMP_INSN_P (insn
)
14098 && (asm_noperands (PATTERN (insn
)) >= 0
14099 || GET_CODE (PATTERN (insn
)) == ASM_INPUT
)))
14100 return I387_CW_UNINITIALIZED
;
14102 if (recog_memoized (insn
) < 0)
14103 return I387_CW_ANY
;
14105 mode
= get_attr_i387_cw (insn
);
14109 case I387_ROUNDEVEN
:
14110 if (mode
== I387_CW_ROUNDEVEN
)
14115 if (mode
== I387_CW_TRUNC
)
14120 if (mode
== I387_CW_FLOOR
)
14125 if (mode
== I387_CW_CEIL
)
14130 gcc_unreachable ();
14133 return I387_CW_ANY
;
14136 /* Return mode that entity must be switched into
14137 prior to the execution of insn. */
14140 ix86_mode_needed (int entity
, rtx_insn
*insn
)
14145 return ix86_dirflag_mode_needed (insn
);
14147 return ix86_avx_u128_mode_needed (insn
);
14148 case I387_ROUNDEVEN
:
14152 return ix86_i387_mode_needed (entity
, insn
);
14154 gcc_unreachable ();
14159 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14162 ix86_check_avx_upper_stores (rtx dest
, const_rtx
, void *data
)
14164 if (ix86_check_avx_upper_register (dest
))
14166 bool *used
= (bool *) data
;
14171 /* Calculate mode of upper 128bit AVX registers after the insn. */
14174 ix86_avx_u128_mode_after (int mode
, rtx_insn
*insn
)
14176 rtx pat
= PATTERN (insn
);
14178 if (vzeroupper_pattern (pat
, VOIDmode
)
14179 || vzeroall_pattern (pat
, VOIDmode
))
14180 return AVX_U128_CLEAN
;
14182 /* We know that state is clean after CALL insn if there are no
14183 256bit or 512bit registers used in the function return register. */
14186 bool avx_upper_reg_found
= false;
14187 note_stores (insn
, ix86_check_avx_upper_stores
, &avx_upper_reg_found
);
14189 return avx_upper_reg_found
? AVX_U128_DIRTY
: AVX_U128_CLEAN
;
14192 /* Otherwise, return current mode. Remember that if insn
14193 references AVX 256bit or 512bit registers, the mode was already
14194 changed to DIRTY from MODE_NEEDED. */
14198 /* Return the mode that an insn results in. */
14201 ix86_mode_after (int entity
, int mode
, rtx_insn
*insn
)
14208 return ix86_avx_u128_mode_after (mode
, insn
);
14209 case I387_ROUNDEVEN
:
14215 gcc_unreachable ();
14220 ix86_dirflag_mode_entry (void)
14222 /* For TARGET_CLD or in the interrupt handler we can't assume
14223 direction flag state at function entry. */
14225 || cfun
->machine
->func_type
!= TYPE_NORMAL
)
14226 return X86_DIRFLAG_ANY
;
14228 return X86_DIRFLAG_RESET
;
14232 ix86_avx_u128_mode_entry (void)
14236 /* Entry mode is set to AVX_U128_DIRTY if there are
14237 256bit or 512bit modes used in function arguments. */
14238 for (arg
= DECL_ARGUMENTS (current_function_decl
); arg
;
14239 arg
= TREE_CHAIN (arg
))
14241 rtx incoming
= DECL_INCOMING_RTL (arg
);
14243 if (incoming
&& ix86_check_avx_upper_register (incoming
))
14244 return AVX_U128_DIRTY
;
14247 return AVX_U128_CLEAN
;
14250 /* Return a mode that ENTITY is assumed to be
14251 switched to at function entry. */
14254 ix86_mode_entry (int entity
)
14259 return ix86_dirflag_mode_entry ();
14261 return ix86_avx_u128_mode_entry ();
14262 case I387_ROUNDEVEN
:
14266 return I387_CW_ANY
;
14268 gcc_unreachable ();
14273 ix86_avx_u128_mode_exit (void)
14275 rtx reg
= crtl
->return_rtx
;
14277 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14278 or 512 bit modes used in the function return register. */
14279 if (reg
&& ix86_check_avx_upper_register (reg
))
14280 return AVX_U128_DIRTY
;
14282 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14283 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14285 return ix86_avx_u128_mode_entry ();
14288 /* Return a mode that ENTITY is assumed to be
14289 switched to at function exit. */
14292 ix86_mode_exit (int entity
)
14297 return X86_DIRFLAG_ANY
;
14299 return ix86_avx_u128_mode_exit ();
14300 case I387_ROUNDEVEN
:
14304 return I387_CW_ANY
;
14306 gcc_unreachable ();
14311 ix86_mode_priority (int, int n
)
14316 /* Output code to initialize control word copies used by trunc?f?i and
14317 rounding patterns. CURRENT_MODE is set to current control word,
14318 while NEW_MODE is set to new control word. */
14321 emit_i387_cw_initialization (int mode
)
14323 rtx stored_mode
= assign_386_stack_local (HImode
, SLOT_CW_STORED
);
14326 enum ix86_stack_slot slot
;
14328 rtx reg
= gen_reg_rtx (HImode
);
14330 emit_insn (gen_x86_fnstcw_1 (stored_mode
));
14331 emit_move_insn (reg
, copy_rtx (stored_mode
));
14335 case I387_CW_ROUNDEVEN
:
14336 /* round to nearest */
14337 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14338 slot
= SLOT_CW_ROUNDEVEN
;
14341 case I387_CW_TRUNC
:
14342 /* round toward zero (truncate) */
14343 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0c00)));
14344 slot
= SLOT_CW_TRUNC
;
14347 case I387_CW_FLOOR
:
14348 /* round down toward -oo */
14349 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14350 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0400)));
14351 slot
= SLOT_CW_FLOOR
;
14355 /* round up toward +oo */
14356 emit_insn (gen_andhi3 (reg
, reg
, GEN_INT (~0x0c00)));
14357 emit_insn (gen_iorhi3 (reg
, reg
, GEN_INT (0x0800)));
14358 slot
= SLOT_CW_CEIL
;
14362 gcc_unreachable ();
14365 gcc_assert (slot
< MAX_386_STACK_LOCALS
);
14367 new_mode
= assign_386_stack_local (HImode
, slot
);
14368 emit_move_insn (new_mode
, reg
);
14371 /* Generate one or more insns to set ENTITY to MODE. */
14374 ix86_emit_mode_set (int entity
, int mode
, int prev_mode ATTRIBUTE_UNUSED
,
14375 HARD_REG_SET regs_live ATTRIBUTE_UNUSED
)
14380 if (mode
== X86_DIRFLAG_RESET
)
14381 emit_insn (gen_cld ());
14384 if (mode
== AVX_U128_CLEAN
)
14385 emit_insn (gen_avx_vzeroupper ());
14387 case I387_ROUNDEVEN
:
14391 if (mode
!= I387_CW_ANY
14392 && mode
!= I387_CW_UNINITIALIZED
)
14393 emit_i387_cw_initialization (mode
);
14396 gcc_unreachable ();
14400 /* Output code for INSN to convert a float to a signed int. OPERANDS
14401 are the insn operands. The output may be [HSD]Imode and the input
14402 operand may be [SDX]Fmode. */
14405 output_fix_trunc (rtx_insn
*insn
, rtx
*operands
, bool fisttp
)
14407 bool stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
14408 bool dimode_p
= GET_MODE (operands
[0]) == DImode
;
14409 int round_mode
= get_attr_i387_cw (insn
);
14411 static char buf
[40];
14414 /* Jump through a hoop or two for DImode, since the hardware has no
14415 non-popping instruction. We used to do this a different way, but
14416 that was somewhat fragile and broke with post-reload splitters. */
14417 if ((dimode_p
|| fisttp
) && !stack_top_dies
)
14418 output_asm_insn ("fld\t%y1", operands
);
14420 gcc_assert (STACK_TOP_P (operands
[1]));
14421 gcc_assert (MEM_P (operands
[0]));
14422 gcc_assert (GET_MODE (operands
[1]) != TFmode
);
14425 return "fisttp%Z0\t%0";
14427 strcpy (buf
, "fist");
14429 if (round_mode
!= I387_CW_ANY
)
14430 output_asm_insn ("fldcw\t%3", operands
);
14433 strcat (buf
, p
+ !(stack_top_dies
|| dimode_p
));
14435 output_asm_insn (buf
, operands
);
14437 if (round_mode
!= I387_CW_ANY
)
14438 output_asm_insn ("fldcw\t%2", operands
);
14443 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14444 have the values zero or one, indicates the ffreep insn's operand
14445 from the OPERANDS array. */
14447 static const char *
14448 output_387_ffreep (rtx
*operands ATTRIBUTE_UNUSED
, int opno
)
14450 if (TARGET_USE_FFREEP
)
14451 #ifdef HAVE_AS_IX86_FFREEP
14452 return opno
? "ffreep\t%y1" : "ffreep\t%y0";
14455 static char retval
[32];
14456 int regno
= REGNO (operands
[opno
]);
14458 gcc_assert (STACK_REGNO_P (regno
));
14460 regno
-= FIRST_STACK_REG
;
14462 snprintf (retval
, sizeof (retval
), ASM_SHORT
"0xc%ddf", regno
);
14467 return opno
? "fstp\t%y1" : "fstp\t%y0";
14471 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14472 should be used. UNORDERED_P is true when fucom should be used. */
14475 output_fp_compare (rtx_insn
*insn
, rtx
*operands
,
14476 bool eflags_p
, bool unordered_p
)
14478 rtx
*xops
= eflags_p
? &operands
[0] : &operands
[1];
14479 bool stack_top_dies
;
14481 static char buf
[40];
14484 gcc_assert (STACK_TOP_P (xops
[0]));
14486 stack_top_dies
= find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
);
14490 p
= unordered_p
? "fucomi" : "fcomi";
14493 p
= "p\t{%y1, %0|%0, %y1}";
14494 strcat (buf
, p
+ !stack_top_dies
);
14499 if (STACK_REG_P (xops
[1])
14501 && find_regno_note (insn
, REG_DEAD
, FIRST_STACK_REG
+ 1))
14503 gcc_assert (REGNO (xops
[1]) == FIRST_STACK_REG
+ 1);
14505 /* If both the top of the 387 stack die, and the other operand
14506 is also a stack register that dies, then this must be a
14507 `fcompp' float compare. */
14508 p
= unordered_p
? "fucompp" : "fcompp";
14511 else if (const0_operand (xops
[1], VOIDmode
))
14513 gcc_assert (!unordered_p
);
14514 strcpy (buf
, "ftst");
14518 if (GET_MODE_CLASS (GET_MODE (xops
[1])) == MODE_INT
)
14520 gcc_assert (!unordered_p
);
14524 p
= unordered_p
? "fucom" : "fcom";
14529 strcat (buf
, p
+ !stack_top_dies
);
14532 output_asm_insn (buf
, operands
);
14533 return "fnstsw\t%0";
14537 ix86_output_addr_vec_elt (FILE *file
, int value
)
14539 const char *directive
= ASM_LONG
;
14543 directive
= ASM_QUAD
;
14545 gcc_assert (!TARGET_64BIT
);
14548 fprintf (file
, "%s%s%d\n", directive
, LPREFIX
, value
);
14552 ix86_output_addr_diff_elt (FILE *file
, int value
, int rel
)
14554 const char *directive
= ASM_LONG
;
14557 if (TARGET_64BIT
&& CASE_VECTOR_MODE
== DImode
)
14558 directive
= ASM_QUAD
;
14560 gcc_assert (!TARGET_64BIT
);
14562 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14563 if (TARGET_64BIT
|| TARGET_VXWORKS_RTP
)
14564 fprintf (file
, "%s%s%d-%s%d\n",
14565 directive
, LPREFIX
, value
, LPREFIX
, rel
);
14567 else if (TARGET_MACHO
)
14569 fprintf (file
, ASM_LONG
"%s%d-", LPREFIX
, value
);
14570 machopic_output_function_base_name (file
);
14574 else if (HAVE_AS_GOTOFF_IN_DATA
)
14575 fprintf (file
, ASM_LONG
"%s%d@GOTOFF\n", LPREFIX
, value
);
14577 asm_fprintf (file
, ASM_LONG
"%U%s+[.-%s%d]\n",
14578 GOT_SYMBOL_NAME
, LPREFIX
, value
);
14581 #define LEA_MAX_STALL (3)
14582 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14584 /* Increase given DISTANCE in half-cycles according to
14585 dependencies between PREV and NEXT instructions.
14586 Add 1 half-cycle if there is no dependency and
14587 go to next cycle if there is some dependecy. */
14589 static unsigned int
14590 increase_distance (rtx_insn
*prev
, rtx_insn
*next
, unsigned int distance
)
14594 if (!prev
|| !next
)
14595 return distance
+ (distance
& 1) + 2;
14597 if (!DF_INSN_USES (next
) || !DF_INSN_DEFS (prev
))
14598 return distance
+ 1;
14600 FOR_EACH_INSN_USE (use
, next
)
14601 FOR_EACH_INSN_DEF (def
, prev
)
14602 if (!DF_REF_IS_ARTIFICIAL (def
)
14603 && DF_REF_REGNO (use
) == DF_REF_REGNO (def
))
14604 return distance
+ (distance
& 1) + 2;
14606 return distance
+ 1;
14609 /* Function checks if instruction INSN defines register number
14610 REGNO1 or REGNO2. */
14613 insn_defines_reg (unsigned int regno1
, unsigned int regno2
,
14618 FOR_EACH_INSN_DEF (def
, insn
)
14619 if (DF_REF_REG_DEF_P (def
)
14620 && !DF_REF_IS_ARTIFICIAL (def
)
14621 && (regno1
== DF_REF_REGNO (def
)
14622 || regno2
== DF_REF_REGNO (def
)))
14628 /* Function checks if instruction INSN uses register number
14629 REGNO as a part of address expression. */
14632 insn_uses_reg_mem (unsigned int regno
, rtx insn
)
14636 FOR_EACH_INSN_USE (use
, insn
)
14637 if (DF_REF_REG_MEM_P (use
) && regno
== DF_REF_REGNO (use
))
14643 /* Search backward for non-agu definition of register number REGNO1
14644 or register number REGNO2 in basic block starting from instruction
14645 START up to head of basic block or instruction INSN.
14647 Function puts true value into *FOUND var if definition was found
14648 and false otherwise.
14650 Distance in half-cycles between START and found instruction or head
14651 of BB is added to DISTANCE and returned. */
14654 distance_non_agu_define_in_bb (unsigned int regno1
, unsigned int regno2
,
14655 rtx_insn
*insn
, int distance
,
14656 rtx_insn
*start
, bool *found
)
14658 basic_block bb
= start
? BLOCK_FOR_INSN (start
) : NULL
;
14659 rtx_insn
*prev
= start
;
14660 rtx_insn
*next
= NULL
;
14666 && distance
< LEA_SEARCH_THRESHOLD
)
14668 if (NONDEBUG_INSN_P (prev
) && NONJUMP_INSN_P (prev
))
14670 distance
= increase_distance (prev
, next
, distance
);
14671 if (insn_defines_reg (regno1
, regno2
, prev
))
14673 if (recog_memoized (prev
) < 0
14674 || get_attr_type (prev
) != TYPE_LEA
)
14683 if (prev
== BB_HEAD (bb
))
14686 prev
= PREV_INSN (prev
);
14692 /* Search backward for non-agu definition of register number REGNO1
14693 or register number REGNO2 in INSN's basic block until
14694 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14695 2. Reach neighbor BBs boundary, or
14696 3. Reach agu definition.
14697 Returns the distance between the non-agu definition point and INSN.
14698 If no definition point, returns -1. */
14701 distance_non_agu_define (unsigned int regno1
, unsigned int regno2
,
14704 basic_block bb
= BLOCK_FOR_INSN (insn
);
14706 bool found
= false;
14708 if (insn
!= BB_HEAD (bb
))
14709 distance
= distance_non_agu_define_in_bb (regno1
, regno2
, insn
,
14710 distance
, PREV_INSN (insn
),
14713 if (!found
&& distance
< LEA_SEARCH_THRESHOLD
)
14717 bool simple_loop
= false;
14719 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14722 simple_loop
= true;
14727 distance
= distance_non_agu_define_in_bb (regno1
, regno2
,
14729 BB_END (bb
), &found
);
14732 int shortest_dist
= -1;
14733 bool found_in_bb
= false;
14735 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14738 = distance_non_agu_define_in_bb (regno1
, regno2
,
14744 if (shortest_dist
< 0)
14745 shortest_dist
= bb_dist
;
14746 else if (bb_dist
> 0)
14747 shortest_dist
= MIN (bb_dist
, shortest_dist
);
14753 distance
= shortest_dist
;
14757 /* get_attr_type may modify recog data. We want to make sure
14758 that recog data is valid for instruction INSN, on which
14759 distance_non_agu_define is called. INSN is unchanged here. */
14760 extract_insn_cached (insn
);
14765 return distance
>> 1;
14768 /* Return the distance in half-cycles between INSN and the next
14769 insn that uses register number REGNO in memory address added
14770 to DISTANCE. Return -1 if REGNO0 is set.
14772 Put true value into *FOUND if register usage was found and
14774 Put true value into *REDEFINED if register redefinition was
14775 found and false otherwise. */
14778 distance_agu_use_in_bb (unsigned int regno
,
14779 rtx_insn
*insn
, int distance
, rtx_insn
*start
,
14780 bool *found
, bool *redefined
)
14782 basic_block bb
= NULL
;
14783 rtx_insn
*next
= start
;
14784 rtx_insn
*prev
= NULL
;
14787 *redefined
= false;
14789 if (start
!= NULL_RTX
)
14791 bb
= BLOCK_FOR_INSN (start
);
14792 if (start
!= BB_HEAD (bb
))
14793 /* If insn and start belong to the same bb, set prev to insn,
14794 so the call to increase_distance will increase the distance
14795 between insns by 1. */
14801 && distance
< LEA_SEARCH_THRESHOLD
)
14803 if (NONDEBUG_INSN_P (next
) && NONJUMP_INSN_P (next
))
14805 distance
= increase_distance(prev
, next
, distance
);
14806 if (insn_uses_reg_mem (regno
, next
))
14808 /* Return DISTANCE if OP0 is used in memory
14809 address in NEXT. */
14814 if (insn_defines_reg (regno
, INVALID_REGNUM
, next
))
14816 /* Return -1 if OP0 is set in NEXT. */
14824 if (next
== BB_END (bb
))
14827 next
= NEXT_INSN (next
);
14833 /* Return the distance between INSN and the next insn that uses
14834 register number REGNO0 in memory address. Return -1 if no such
14835 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14838 distance_agu_use (unsigned int regno0
, rtx_insn
*insn
)
14840 basic_block bb
= BLOCK_FOR_INSN (insn
);
14842 bool found
= false;
14843 bool redefined
= false;
14845 if (insn
!= BB_END (bb
))
14846 distance
= distance_agu_use_in_bb (regno0
, insn
, distance
,
14848 &found
, &redefined
);
14850 if (!found
&& !redefined
&& distance
< LEA_SEARCH_THRESHOLD
)
14854 bool simple_loop
= false;
14856 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
14859 simple_loop
= true;
14864 distance
= distance_agu_use_in_bb (regno0
, insn
,
14865 distance
, BB_HEAD (bb
),
14866 &found
, &redefined
);
14869 int shortest_dist
= -1;
14870 bool found_in_bb
= false;
14871 bool redefined_in_bb
= false;
14873 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
14876 = distance_agu_use_in_bb (regno0
, insn
,
14877 distance
, BB_HEAD (e
->dest
),
14878 &found_in_bb
, &redefined_in_bb
);
14881 if (shortest_dist
< 0)
14882 shortest_dist
= bb_dist
;
14883 else if (bb_dist
> 0)
14884 shortest_dist
= MIN (bb_dist
, shortest_dist
);
14890 distance
= shortest_dist
;
14894 if (!found
|| redefined
)
14897 return distance
>> 1;
14900 /* Define this macro to tune LEA priority vs ADD, it take effect when
14901 there is a dilemma of choosing LEA or ADD
14902 Negative value: ADD is more preferred than LEA
14904 Positive value: LEA is more preferred than ADD. */
14905 #define IX86_LEA_PRIORITY 0
14907 /* Return true if usage of lea INSN has performance advantage
14908 over a sequence of instructions. Instructions sequence has
14909 SPLIT_COST cycles higher latency than lea latency. */
14912 ix86_lea_outperforms (rtx_insn
*insn
, unsigned int regno0
, unsigned int regno1
,
14913 unsigned int regno2
, int split_cost
, bool has_scale
)
14915 int dist_define
, dist_use
;
14917 /* For Atom processors newer than Bonnell, if using a 2-source or
14918 3-source LEA for non-destructive destination purposes, or due to
14919 wanting ability to use SCALE, the use of LEA is justified. */
14920 if (!TARGET_BONNELL
)
14924 if (split_cost
< 1)
14926 if (regno0
== regno1
|| regno0
== regno2
)
14931 rtx_insn
*rinsn
= recog_data
.insn
;
14933 dist_define
= distance_non_agu_define (regno1
, regno2
, insn
);
14934 dist_use
= distance_agu_use (regno0
, insn
);
14936 /* distance_non_agu_define can call extract_insn_cached. If this function
14937 is called from define_split conditions, that can break insn splitting,
14938 because split_insns works by clearing recog_data.insn and then modifying
14939 recog_data.operand array and match the various split conditions. */
14940 if (recog_data
.insn
!= rinsn
)
14941 recog_data
.insn
= NULL
;
14943 if (dist_define
< 0 || dist_define
>= LEA_MAX_STALL
)
14945 /* If there is no non AGU operand definition, no AGU
14946 operand usage and split cost is 0 then both lea
14947 and non lea variants have same priority. Currently
14948 we prefer lea for 64 bit code and non lea on 32 bit
14950 if (dist_use
< 0 && split_cost
== 0)
14951 return TARGET_64BIT
|| IX86_LEA_PRIORITY
;
14956 /* With longer definitions distance lea is more preferable.
14957 Here we change it to take into account splitting cost and
14959 dist_define
+= split_cost
+ IX86_LEA_PRIORITY
;
14961 /* If there is no use in memory addess then we just check
14962 that split cost exceeds AGU stall. */
14964 return dist_define
> LEA_MAX_STALL
;
14966 /* If this insn has both backward non-agu dependence and forward
14967 agu dependence, the one with short distance takes effect. */
14968 return dist_define
>= dist_use
;
14971 /* Return true if it is legal to clobber flags by INSN and
14972 false otherwise. */
14975 ix86_ok_to_clobber_flags (rtx_insn
*insn
)
14977 basic_block bb
= BLOCK_FOR_INSN (insn
);
14983 if (NONDEBUG_INSN_P (insn
))
14985 FOR_EACH_INSN_USE (use
, insn
)
14986 if (DF_REF_REG_USE_P (use
) && DF_REF_REGNO (use
) == FLAGS_REG
)
14989 if (insn_defines_reg (FLAGS_REG
, INVALID_REGNUM
, insn
))
14993 if (insn
== BB_END (bb
))
14996 insn
= NEXT_INSN (insn
);
14999 live
= df_get_live_out(bb
);
15000 return !REGNO_REG_SET_P (live
, FLAGS_REG
);
15003 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15004 move and add to avoid AGU stalls. */
15007 ix86_avoid_lea_for_add (rtx_insn
*insn
, rtx operands
[])
15009 unsigned int regno0
, regno1
, regno2
;
15011 /* Check if we need to optimize. */
15012 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15015 /* Check it is correct to split here. */
15016 if (!ix86_ok_to_clobber_flags(insn
))
15019 regno0
= true_regnum (operands
[0]);
15020 regno1
= true_regnum (operands
[1]);
15021 regno2
= true_regnum (operands
[2]);
15023 /* We need to split only adds with non destructive
15024 destination operand. */
15025 if (regno0
== regno1
|| regno0
== regno2
)
15028 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 1, false);
15031 /* Return true if we should emit lea instruction instead of mov
15035 ix86_use_lea_for_mov (rtx_insn
*insn
, rtx operands
[])
15037 unsigned int regno0
, regno1
;
15039 /* Check if we need to optimize. */
15040 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15043 /* Use lea for reg to reg moves only. */
15044 if (!REG_P (operands
[0]) || !REG_P (operands
[1]))
15047 regno0
= true_regnum (operands
[0]);
15048 regno1
= true_regnum (operands
[1]);
15050 return ix86_lea_outperforms (insn
, regno0
, regno1
, INVALID_REGNUM
, 0, false);
15053 /* Return true if we need to split lea into a sequence of
15054 instructions to avoid AGU stalls. */
15057 ix86_avoid_lea_for_addr (rtx_insn
*insn
, rtx operands
[])
15059 unsigned int regno0
, regno1
, regno2
;
15061 struct ix86_address parts
;
15064 /* The "at least two components" test below might not catch simple
15065 move or zero extension insns if parts.base is non-NULL and parts.disp
15066 is const0_rtx as the only components in the address, e.g. if the
15067 register is %rbp or %r13. As this test is much cheaper and moves or
15068 zero extensions are the common case, do this check first. */
15069 if (REG_P (operands
[1])
15070 || (SImode_address_operand (operands
[1], VOIDmode
)
15071 && REG_P (XEXP (operands
[1], 0))))
15074 /* Check if it is OK to split here. */
15075 if (!ix86_ok_to_clobber_flags (insn
))
15078 ok
= ix86_decompose_address (operands
[1], &parts
);
15081 /* There should be at least two components in the address. */
15082 if ((parts
.base
!= NULL_RTX
) + (parts
.index
!= NULL_RTX
)
15083 + (parts
.disp
!= NULL_RTX
) + (parts
.scale
> 1) < 2)
15086 /* We should not split into add if non legitimate pic
15087 operand is used as displacement. */
15088 if (parts
.disp
&& flag_pic
&& !LEGITIMATE_PIC_OPERAND_P (parts
.disp
))
15091 regno0
= true_regnum (operands
[0]) ;
15092 regno1
= INVALID_REGNUM
;
15093 regno2
= INVALID_REGNUM
;
15096 regno1
= true_regnum (parts
.base
);
15098 regno2
= true_regnum (parts
.index
);
15100 /* Use add for a = a + b and a = b + a since it is faster and shorter
15101 than lea for most processors. For the processors like BONNELL, if
15102 the destination register of LEA holds an actual address which will
15103 be used soon, LEA is better and otherwise ADD is better. */
15104 if (!TARGET_BONNELL
15105 && parts
.scale
== 1
15106 && (!parts
.disp
|| parts
.disp
== const0_rtx
)
15107 && (regno0
== regno1
|| regno0
== regno2
))
15110 /* Check we need to optimize. */
15111 if (!TARGET_AVOID_LEA_FOR_ADDR
|| optimize_function_for_size_p (cfun
))
15116 /* Compute how many cycles we will add to execution time
15117 if split lea into a sequence of instructions. */
15118 if (parts
.base
|| parts
.index
)
15120 /* Have to use mov instruction if non desctructive
15121 destination form is used. */
15122 if (regno1
!= regno0
&& regno2
!= regno0
)
15125 /* Have to add index to base if both exist. */
15126 if (parts
.base
&& parts
.index
)
15129 /* Have to use shift and adds if scale is 2 or greater. */
15130 if (parts
.scale
> 1)
15132 if (regno0
!= regno1
)
15134 else if (regno2
== regno0
)
15137 split_cost
+= parts
.scale
;
15140 /* Have to use add instruction with immediate if
15141 disp is non zero. */
15142 if (parts
.disp
&& parts
.disp
!= const0_rtx
)
15145 /* Subtract the price of lea. */
15149 return !ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, split_cost
,
15153 /* Return true if it is ok to optimize an ADD operation to LEA
15154 operation to avoid flag register consumation. For most processors,
15155 ADD is faster than LEA. For the processors like BONNELL, if the
15156 destination register of LEA holds an actual address which will be
15157 used soon, LEA is better and otherwise ADD is better. */
15160 ix86_lea_for_add_ok (rtx_insn
*insn
, rtx operands
[])
15162 unsigned int regno0
= true_regnum (operands
[0]);
15163 unsigned int regno1
= true_regnum (operands
[1]);
15164 unsigned int regno2
= true_regnum (operands
[2]);
15166 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15167 if (regno0
!= regno1
&& regno0
!= regno2
)
15170 if (!TARGET_OPT_AGU
|| optimize_function_for_size_p (cfun
))
15173 return ix86_lea_outperforms (insn
, regno0
, regno1
, regno2
, 0, false);
15176 /* Return true if destination reg of SET_BODY is shift count of
15180 ix86_dep_by_shift_count_body (const_rtx set_body
, const_rtx use_body
)
15186 /* Retrieve destination of SET_BODY. */
15187 switch (GET_CODE (set_body
))
15190 set_dest
= SET_DEST (set_body
);
15191 if (!set_dest
|| !REG_P (set_dest
))
15195 for (i
= XVECLEN (set_body
, 0) - 1; i
>= 0; i
--)
15196 if (ix86_dep_by_shift_count_body (XVECEXP (set_body
, 0, i
),
15204 /* Retrieve shift count of USE_BODY. */
15205 switch (GET_CODE (use_body
))
15208 shift_rtx
= XEXP (use_body
, 1);
15211 for (i
= XVECLEN (use_body
, 0) - 1; i
>= 0; i
--)
15212 if (ix86_dep_by_shift_count_body (set_body
,
15213 XVECEXP (use_body
, 0, i
)))
15221 && (GET_CODE (shift_rtx
) == ASHIFT
15222 || GET_CODE (shift_rtx
) == LSHIFTRT
15223 || GET_CODE (shift_rtx
) == ASHIFTRT
15224 || GET_CODE (shift_rtx
) == ROTATE
15225 || GET_CODE (shift_rtx
) == ROTATERT
))
15227 rtx shift_count
= XEXP (shift_rtx
, 1);
15229 /* Return true if shift count is dest of SET_BODY. */
15230 if (REG_P (shift_count
))
15232 /* Add check since it can be invoked before register
15233 allocation in pre-reload schedule. */
15234 if (reload_completed
15235 && true_regnum (set_dest
) == true_regnum (shift_count
))
15237 else if (REGNO(set_dest
) == REGNO(shift_count
))
15245 /* Return true if destination reg of SET_INSN is shift count of
15249 ix86_dep_by_shift_count (const_rtx set_insn
, const_rtx use_insn
)
15251 return ix86_dep_by_shift_count_body (PATTERN (set_insn
),
15252 PATTERN (use_insn
));
15255 /* Return TRUE or FALSE depending on whether the unary operator meets the
15256 appropriate constraints. */
15259 ix86_unary_operator_ok (enum rtx_code
,
15263 /* If one of operands is memory, source and destination must match. */
15264 if ((MEM_P (operands
[0])
15265 || MEM_P (operands
[1]))
15266 && ! rtx_equal_p (operands
[0], operands
[1]))
15271 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15272 are ok, keeping in mind the possible movddup alternative. */
15275 ix86_vec_interleave_v2df_operator_ok (rtx operands
[3], bool high
)
15277 if (MEM_P (operands
[0]))
15278 return rtx_equal_p (operands
[0], operands
[1 + high
]);
15279 if (MEM_P (operands
[1]) && MEM_P (operands
[2]))
15280 return TARGET_SSE3
&& rtx_equal_p (operands
[1], operands
[2]);
15284 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15285 then replicate the value for all elements of the vector
15289 ix86_build_const_vector (machine_mode mode
, bool vect
, rtx value
)
15293 machine_mode scalar_mode
;
15318 n_elt
= GET_MODE_NUNITS (mode
);
15319 v
= rtvec_alloc (n_elt
);
15320 scalar_mode
= GET_MODE_INNER (mode
);
15322 RTVEC_ELT (v
, 0) = value
;
15324 for (i
= 1; i
< n_elt
; ++i
)
15325 RTVEC_ELT (v
, i
) = vect
? value
: CONST0_RTX (scalar_mode
);
15327 return gen_rtx_CONST_VECTOR (mode
, v
);
15330 gcc_unreachable ();
15334 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15335 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15336 for an SSE register. If VECT is true, then replicate the mask for
15337 all elements of the vector register. If INVERT is true, then create
15338 a mask excluding the sign bit. */
15341 ix86_build_signbit_mask (machine_mode mode
, bool vect
, bool invert
)
15343 machine_mode vec_mode
, imode
;
15372 vec_mode
= VOIDmode
;
15377 gcc_unreachable ();
15380 machine_mode inner_mode
= GET_MODE_INNER (mode
);
15381 w
= wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode
) - 1,
15382 GET_MODE_BITSIZE (inner_mode
));
15384 w
= wi::bit_not (w
);
15386 /* Force this value into the low part of a fp vector constant. */
15387 mask
= immed_wide_int_const (w
, imode
);
15388 mask
= gen_lowpart (inner_mode
, mask
);
15390 if (vec_mode
== VOIDmode
)
15391 return force_reg (inner_mode
, mask
);
15393 v
= ix86_build_const_vector (vec_mode
, vect
, mask
);
15394 return force_reg (vec_mode
, v
);
15397 /* Return TRUE or FALSE depending on whether the first SET in INSN
15398 has source and destination with matching CC modes, and that the
15399 CC mode is at least as constrained as REQ_MODE. */
15402 ix86_match_ccmode (rtx insn
, machine_mode req_mode
)
15405 machine_mode set_mode
;
15407 set
= PATTERN (insn
);
15408 if (GET_CODE (set
) == PARALLEL
)
15409 set
= XVECEXP (set
, 0, 0);
15410 gcc_assert (GET_CODE (set
) == SET
);
15411 gcc_assert (GET_CODE (SET_SRC (set
)) == COMPARE
);
15413 set_mode
= GET_MODE (SET_DEST (set
));
15417 if (req_mode
!= CCNOmode
15418 && (req_mode
!= CCmode
15419 || XEXP (SET_SRC (set
), 1) != const0_rtx
))
15423 if (req_mode
== CCGCmode
)
15427 if (req_mode
== CCGOCmode
|| req_mode
== CCNOmode
)
15431 if (req_mode
== CCZmode
)
15444 if (set_mode
!= req_mode
)
15449 gcc_unreachable ();
15452 return GET_MODE (SET_SRC (set
)) == set_mode
;
15456 ix86_cc_mode (enum rtx_code code
, rtx op0
, rtx op1
)
15458 machine_mode mode
= GET_MODE (op0
);
15460 if (SCALAR_FLOAT_MODE_P (mode
))
15462 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode
));
15468 /* Only zero flag is needed. */
15469 case EQ
: /* ZF=0 */
15470 case NE
: /* ZF!=0 */
15472 /* Codes needing carry flag. */
15473 case GEU
: /* CF=0 */
15474 case LTU
: /* CF=1 */
15476 /* Detect overflow checks. They need just the carry flag. */
15477 if (GET_CODE (op0
) == PLUS
15478 && (rtx_equal_p (op1
, XEXP (op0
, 0))
15479 || rtx_equal_p (op1
, XEXP (op0
, 1))))
15481 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
15483 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
15485 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
15486 where CC_CCC is either CC or CCC. */
15487 else if (code
== LTU
15488 && GET_CODE (op0
) == NEG
15489 && GET_CODE (geu
= XEXP (op0
, 0)) == GEU
15490 && REG_P (XEXP (geu
, 0))
15491 && (GET_MODE (XEXP (geu
, 0)) == CCCmode
15492 || GET_MODE (XEXP (geu
, 0)) == CCmode
)
15493 && REGNO (XEXP (geu
, 0)) == FLAGS_REG
15494 && XEXP (geu
, 1) == const0_rtx
15495 && GET_CODE (op1
) == LTU
15496 && REG_P (XEXP (op1
, 0))
15497 && GET_MODE (XEXP (op1
, 0)) == GET_MODE (XEXP (geu
, 0))
15498 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
15499 && XEXP (op1
, 1) == const0_rtx
)
15503 case GTU
: /* CF=0 & ZF=0 */
15504 case LEU
: /* CF=1 | ZF=1 */
15506 /* Codes possibly doable only with sign flag when
15507 comparing against zero. */
15508 case GE
: /* SF=OF or SF=0 */
15509 case LT
: /* SF<>OF or SF=1 */
15510 if (op1
== const0_rtx
)
15513 /* For other cases Carry flag is not required. */
15515 /* Codes doable only with sign flag when comparing
15516 against zero, but we miss jump instruction for it
15517 so we need to use relational tests against overflow
15518 that thus needs to be zero. */
15519 case GT
: /* ZF=0 & SF=OF */
15520 case LE
: /* ZF=1 | SF<>OF */
15521 if (op1
== const0_rtx
)
15525 /* strcmp pattern do (use flags) and combine may ask us for proper
15530 gcc_unreachable ();
15534 /* Return the fixed registers used for condition codes. */
15537 ix86_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
15540 *p2
= INVALID_REGNUM
;
15544 /* If two condition code modes are compatible, return a condition code
15545 mode which is compatible with both. Otherwise, return
15548 static machine_mode
15549 ix86_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
15554 if (GET_MODE_CLASS (m1
) != MODE_CC
|| GET_MODE_CLASS (m2
) != MODE_CC
)
15557 if ((m1
== CCGCmode
&& m2
== CCGOCmode
)
15558 || (m1
== CCGOCmode
&& m2
== CCGCmode
))
15561 if ((m1
== CCNOmode
&& m2
== CCGOCmode
)
15562 || (m1
== CCGOCmode
&& m2
== CCNOmode
))
15566 && (m2
== CCGCmode
|| m2
== CCGOCmode
|| m2
== CCNOmode
))
15568 else if (m2
== CCZmode
15569 && (m1
== CCGCmode
|| m1
== CCGOCmode
|| m1
== CCNOmode
))
15575 gcc_unreachable ();
15606 /* These are only compatible with themselves, which we already
15612 /* Return strategy to use for floating-point. We assume that fcomi is always
15613 preferrable where available, since that is also true when looking at size
15614 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15616 enum ix86_fpcmp_strategy
15617 ix86_fp_comparison_strategy (enum rtx_code
)
15619 /* Do fcomi/sahf based test when profitable. */
15622 return IX86_FPCMP_COMI
;
15624 if (TARGET_SAHF
&& (TARGET_USE_SAHF
|| optimize_insn_for_size_p ()))
15625 return IX86_FPCMP_SAHF
;
15627 return IX86_FPCMP_ARITH
;
15630 /* Convert comparison codes we use to represent FP comparison to integer
15631 code that will result in proper branch. Return UNKNOWN if no such code
15635 ix86_fp_compare_code_to_integer (enum rtx_code code
)
15659 /* Zero extend possibly SImode EXP to Pmode register. */
15661 ix86_zero_extend_to_Pmode (rtx exp
)
15663 return force_reg (Pmode
, convert_to_mode (Pmode
, exp
, 1));
15666 /* Return true if the function being called was marked with attribute
15667 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15668 to handle the non-PIC case in the backend because there is no easy
15669 interface for the front-end to force non-PLT calls to use the GOT.
15670 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15671 to call the function marked "noplt" indirectly. */
15674 ix86_nopic_noplt_attribute_p (rtx call_op
)
15676 if (flag_pic
|| ix86_cmodel
== CM_LARGE
15677 || !(TARGET_64BIT
|| HAVE_AS_IX86_GOT32X
)
15678 || TARGET_MACHO
|| TARGET_SEH
|| TARGET_PECOFF
15679 || SYMBOL_REF_LOCAL_P (call_op
))
15682 tree symbol_decl
= SYMBOL_REF_DECL (call_op
);
15685 || (symbol_decl
!= NULL_TREE
15686 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl
))))
15692 /* Helper to output the jmp/call. */
15694 ix86_output_jmp_thunk_or_indirect (const char *thunk_name
, const int regno
)
15696 if (thunk_name
!= NULL
)
15698 fprintf (asm_out_file
, "\tjmp\t");
15699 assemble_name (asm_out_file
, thunk_name
);
15700 putc ('\n', asm_out_file
);
15703 output_indirect_thunk (regno
);
15706 /* Output indirect branch via a call and return thunk. CALL_OP is a
15707 register which contains the branch target. XASM is the assembly
15708 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15709 A normal call is converted to:
15711 call __x86_indirect_thunk_reg
15713 and a tail call is converted to:
15715 jmp __x86_indirect_thunk_reg
15719 ix86_output_indirect_branch_via_reg (rtx call_op
, bool sibcall_p
)
15721 char thunk_name_buf
[32];
15723 enum indirect_thunk_prefix need_prefix
15724 = indirect_thunk_need_prefix (current_output_insn
);
15725 int regno
= REGNO (call_op
);
15727 if (cfun
->machine
->indirect_branch_type
15728 != indirect_branch_thunk_inline
)
15730 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
15733 if (i
>= FIRST_REX_INT_REG
)
15734 i
-= (FIRST_REX_INT_REG
- LAST_INT_REG
- 1);
15735 indirect_thunks_used
|= 1 << i
;
15737 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
15738 thunk_name
= thunk_name_buf
;
15744 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15747 if (thunk_name
!= NULL
)
15749 fprintf (asm_out_file
, "\tcall\t");
15750 assemble_name (asm_out_file
, thunk_name
);
15751 putc ('\n', asm_out_file
);
15755 char indirectlabel1
[32];
15756 char indirectlabel2
[32];
15758 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
15760 indirectlabelno
++);
15761 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
15763 indirectlabelno
++);
15766 fputs ("\tjmp\t", asm_out_file
);
15767 assemble_name_raw (asm_out_file
, indirectlabel2
);
15768 fputc ('\n', asm_out_file
);
15770 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
15772 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15774 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
15777 fputs ("\tcall\t", asm_out_file
);
15778 assemble_name_raw (asm_out_file
, indirectlabel1
);
15779 fputc ('\n', asm_out_file
);
15783 /* Output indirect branch via a call and return thunk. CALL_OP is
15784 the branch target. XASM is the assembly template for CALL_OP.
15785 Branch is a tail call if SIBCALL_P is true. A normal call is
15791 jmp __x86_indirect_thunk
15795 and a tail call is converted to:
15798 jmp __x86_indirect_thunk
15802 ix86_output_indirect_branch_via_push (rtx call_op
, const char *xasm
,
15805 char thunk_name_buf
[32];
15808 enum indirect_thunk_prefix need_prefix
15809 = indirect_thunk_need_prefix (current_output_insn
);
15812 if (cfun
->machine
->indirect_branch_type
15813 != indirect_branch_thunk_inline
)
15815 if (cfun
->machine
->indirect_branch_type
== indirect_branch_thunk
)
15816 indirect_thunk_needed
= true;
15817 indirect_thunk_name (thunk_name_buf
, regno
, need_prefix
, false);
15818 thunk_name
= thunk_name_buf
;
15823 snprintf (push_buf
, sizeof (push_buf
), "push{%c}\t%s",
15824 TARGET_64BIT
? 'q' : 'l', xasm
);
15828 output_asm_insn (push_buf
, &call_op
);
15829 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15833 char indirectlabel1
[32];
15834 char indirectlabel2
[32];
15836 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1
,
15838 indirectlabelno
++);
15839 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2
,
15841 indirectlabelno
++);
15844 fputs ("\tjmp\t", asm_out_file
);
15845 assemble_name_raw (asm_out_file
, indirectlabel2
);
15846 fputc ('\n', asm_out_file
);
15848 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel1
);
15850 /* An external function may be called via GOT, instead of PLT. */
15851 if (MEM_P (call_op
))
15853 struct ix86_address parts
;
15854 rtx addr
= XEXP (call_op
, 0);
15855 if (ix86_decompose_address (addr
, &parts
)
15856 && parts
.base
== stack_pointer_rtx
)
15858 /* Since call will adjust stack by -UNITS_PER_WORD,
15859 we must convert "disp(stack, index, scale)" to
15860 "disp+UNITS_PER_WORD(stack, index, scale)". */
15863 addr
= gen_rtx_MULT (Pmode
, parts
.index
,
15864 GEN_INT (parts
.scale
));
15865 addr
= gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
15869 addr
= stack_pointer_rtx
;
15872 if (parts
.disp
!= NULL_RTX
)
15873 disp
= plus_constant (Pmode
, parts
.disp
,
15876 disp
= GEN_INT (UNITS_PER_WORD
);
15878 addr
= gen_rtx_PLUS (Pmode
, addr
, disp
);
15879 call_op
= gen_rtx_MEM (GET_MODE (call_op
), addr
);
15883 output_asm_insn (push_buf
, &call_op
);
15885 ix86_output_jmp_thunk_or_indirect (thunk_name
, regno
);
15887 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file
, indirectlabel2
);
15890 fputs ("\tcall\t", asm_out_file
);
15891 assemble_name_raw (asm_out_file
, indirectlabel1
);
15892 fputc ('\n', asm_out_file
);
15896 /* Output indirect branch via a call and return thunk. CALL_OP is
15897 the branch target. XASM is the assembly template for CALL_OP.
15898 Branch is a tail call if SIBCALL_P is true. */
15901 ix86_output_indirect_branch (rtx call_op
, const char *xasm
,
15904 if (REG_P (call_op
))
15905 ix86_output_indirect_branch_via_reg (call_op
, sibcall_p
);
15907 ix86_output_indirect_branch_via_push (call_op
, xasm
, sibcall_p
);
15910 /* Output indirect jump. CALL_OP is the jump target. */
15913 ix86_output_indirect_jmp (rtx call_op
)
15915 if (cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
)
15917 /* We can't have red-zone since "call" in the indirect thunk
15918 pushes the return address onto stack, destroying red-zone. */
15919 if (ix86_red_zone_size
!= 0)
15920 gcc_unreachable ();
15922 ix86_output_indirect_branch (call_op
, "%0", true);
15926 return "%!jmp\t%A0";
15929 /* Output return instrumentation for current function if needed. */
15932 output_return_instrumentation (void)
15934 if (ix86_instrument_return
!= instrument_return_none
15936 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun
->decl
))
15938 if (ix86_flag_record_return
)
15939 fprintf (asm_out_file
, "1:\n");
15940 switch (ix86_instrument_return
)
15942 case instrument_return_call
:
15943 fprintf (asm_out_file
, "\tcall\t__return__\n");
15945 case instrument_return_nop5
:
15946 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15947 fprintf (asm_out_file
, ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15949 case instrument_return_none
:
15953 if (ix86_flag_record_return
)
15955 fprintf (asm_out_file
, "\t.section __return_loc, \"a\",@progbits\n");
15956 fprintf (asm_out_file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
15957 fprintf (asm_out_file
, "\t.previous\n");
15962 /* Output function return. CALL_OP is the jump target. Add a REP
15963 prefix to RET if LONG_P is true and function return is kept. */
15966 ix86_output_function_return (bool long_p
)
15968 output_return_instrumentation ();
15970 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
15972 char thunk_name
[32];
15973 enum indirect_thunk_prefix need_prefix
15974 = indirect_thunk_need_prefix (current_output_insn
);
15976 if (cfun
->machine
->function_return_type
15977 != indirect_branch_thunk_inline
)
15979 bool need_thunk
= (cfun
->machine
->function_return_type
15980 == indirect_branch_thunk
);
15981 indirect_thunk_name (thunk_name
, INVALID_REGNUM
, need_prefix
,
15983 indirect_return_needed
|= need_thunk
;
15984 fprintf (asm_out_file
, "\tjmp\t");
15985 assemble_name (asm_out_file
, thunk_name
);
15986 putc ('\n', asm_out_file
);
15989 output_indirect_thunk (INVALID_REGNUM
);
15997 return "rep%; ret";
16000 /* Output indirect function return. RET_OP is the function return
16004 ix86_output_indirect_function_return (rtx ret_op
)
16006 if (cfun
->machine
->function_return_type
!= indirect_branch_keep
)
16008 char thunk_name
[32];
16009 enum indirect_thunk_prefix need_prefix
16010 = indirect_thunk_need_prefix (current_output_insn
);
16011 unsigned int regno
= REGNO (ret_op
);
16012 gcc_assert (regno
== CX_REG
);
16014 if (cfun
->machine
->function_return_type
16015 != indirect_branch_thunk_inline
)
16017 bool need_thunk
= (cfun
->machine
->function_return_type
16018 == indirect_branch_thunk
);
16019 indirect_thunk_name (thunk_name
, regno
, need_prefix
, true);
16023 indirect_return_via_cx
= true;
16024 indirect_thunks_used
|= 1 << CX_REG
;
16026 fprintf (asm_out_file
, "\tjmp\t");
16027 assemble_name (asm_out_file
, thunk_name
);
16028 putc ('\n', asm_out_file
);
16031 output_indirect_thunk (regno
);
16036 return "%!jmp\t%A0";
16039 /* Output the assembly for a call instruction. */
16042 ix86_output_call_insn (rtx_insn
*insn
, rtx call_op
)
16044 bool direct_p
= constant_call_address_operand (call_op
, VOIDmode
);
16045 bool output_indirect_p
16047 && cfun
->machine
->indirect_branch_type
!= indirect_branch_keep
);
16048 bool seh_nop_p
= false;
16051 if (SIBLING_CALL_P (insn
))
16053 output_return_instrumentation ();
16056 if (ix86_nopic_noplt_attribute_p (call_op
))
16061 if (output_indirect_p
)
16062 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16064 xasm
= "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16068 if (output_indirect_p
)
16069 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16071 xasm
= "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16075 xasm
= "%!jmp\t%P0";
16077 /* SEH epilogue detection requires the indirect branch case
16078 to include REX.W. */
16079 else if (TARGET_SEH
)
16080 xasm
= "%!rex.W jmp\t%A0";
16083 if (output_indirect_p
)
16086 xasm
= "%!jmp\t%A0";
16089 if (output_indirect_p
&& !direct_p
)
16090 ix86_output_indirect_branch (call_op
, xasm
, true);
16092 output_asm_insn (xasm
, &call_op
);
16096 /* SEH unwinding can require an extra nop to be emitted in several
16097 circumstances. Determine if we have one of those. */
16102 for (i
= NEXT_INSN (insn
); i
; i
= NEXT_INSN (i
))
16104 /* Prevent a catch region from being adjacent to a jump that would
16105 be interpreted as an epilogue sequence by the unwinder. */
16106 if (JUMP_P(i
) && CROSSING_JUMP_P (i
))
16112 /* If we get to another real insn, we don't need the nop. */
16116 /* If we get to the epilogue note, prevent a catch region from
16117 being adjacent to the standard epilogue sequence. If non-
16118 call-exceptions, we'll have done this during epilogue emission. */
16119 if (NOTE_P (i
) && NOTE_KIND (i
) == NOTE_INSN_EPILOGUE_BEG
16120 && !flag_non_call_exceptions
16121 && !can_throw_internal (insn
))
16128 /* If we didn't find a real insn following the call, prevent the
16129 unwinder from looking into the next function. */
16136 if (ix86_nopic_noplt_attribute_p (call_op
))
16141 if (output_indirect_p
)
16142 xasm
= "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16144 xasm
= "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16148 if (output_indirect_p
)
16149 xasm
= "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16151 xasm
= "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16155 xasm
= "%!call\t%P0";
16159 if (output_indirect_p
)
16162 xasm
= "%!call\t%A0";
16165 if (output_indirect_p
&& !direct_p
)
16166 ix86_output_indirect_branch (call_op
, xasm
, false);
16168 output_asm_insn (xasm
, &call_op
);
16176 /* Return a MEM corresponding to a stack slot with mode MODE.
16177 Allocate a new slot if necessary.
16179 The RTL for a function can have several slots available: N is
16180 which slot to use. */
16183 assign_386_stack_local (machine_mode mode
, enum ix86_stack_slot n
)
16185 struct stack_local_entry
*s
;
16187 gcc_assert (n
< MAX_386_STACK_LOCALS
);
16189 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16190 if (s
->mode
== mode
&& s
->n
== n
)
16191 return validize_mem (copy_rtx (s
->rtl
));
16193 s
= ggc_alloc
<stack_local_entry
> ();
16196 s
->rtl
= assign_stack_local (mode
, GET_MODE_SIZE (mode
), 0);
16198 s
->next
= ix86_stack_locals
;
16199 ix86_stack_locals
= s
;
16200 return validize_mem (copy_rtx (s
->rtl
));
16204 ix86_instantiate_decls (void)
16206 struct stack_local_entry
*s
;
16208 for (s
= ix86_stack_locals
; s
; s
= s
->next
)
16209 if (s
->rtl
!= NULL_RTX
)
16210 instantiate_decl_rtl (s
->rtl
);
16213 /* Check whether x86 address PARTS is a pc-relative address. */
16216 ix86_rip_relative_addr_p (struct ix86_address
*parts
)
16218 rtx base
, index
, disp
;
16220 base
= parts
->base
;
16221 index
= parts
->index
;
16222 disp
= parts
->disp
;
16224 if (disp
&& !base
&& !index
)
16230 if (GET_CODE (disp
) == CONST
)
16231 symbol
= XEXP (disp
, 0);
16232 if (GET_CODE (symbol
) == PLUS
16233 && CONST_INT_P (XEXP (symbol
, 1)))
16234 symbol
= XEXP (symbol
, 0);
16236 if (GET_CODE (symbol
) == LABEL_REF
16237 || (GET_CODE (symbol
) == SYMBOL_REF
16238 && SYMBOL_REF_TLS_MODEL (symbol
) == 0)
16239 || (GET_CODE (symbol
) == UNSPEC
16240 && (XINT (symbol
, 1) == UNSPEC_GOTPCREL
16241 || XINT (symbol
, 1) == UNSPEC_PCREL
16242 || XINT (symbol
, 1) == UNSPEC_GOTNTPOFF
)))
16249 /* Calculate the length of the memory address in the instruction encoding.
16250 Includes addr32 prefix, does not include the one-byte modrm, opcode,
16251 or other prefixes. We never generate addr32 prefix for LEA insn. */
16254 memory_address_length (rtx addr
, bool lea
)
16256 struct ix86_address parts
;
16257 rtx base
, index
, disp
;
16261 if (GET_CODE (addr
) == PRE_DEC
16262 || GET_CODE (addr
) == POST_INC
16263 || GET_CODE (addr
) == PRE_MODIFY
16264 || GET_CODE (addr
) == POST_MODIFY
)
16267 ok
= ix86_decompose_address (addr
, &parts
);
16270 len
= (parts
.seg
== ADDR_SPACE_GENERIC
) ? 0 : 1;
16272 /* If this is not LEA instruction, add the length of addr32 prefix. */
16273 if (TARGET_64BIT
&& !lea
16274 && (SImode_address_operand (addr
, VOIDmode
)
16275 || (parts
.base
&& GET_MODE (parts
.base
) == SImode
)
16276 || (parts
.index
&& GET_MODE (parts
.index
) == SImode
)))
16280 index
= parts
.index
;
16283 if (base
&& SUBREG_P (base
))
16284 base
= SUBREG_REG (base
);
16285 if (index
&& SUBREG_P (index
))
16286 index
= SUBREG_REG (index
);
16288 gcc_assert (base
== NULL_RTX
|| REG_P (base
));
16289 gcc_assert (index
== NULL_RTX
|| REG_P (index
));
16292 - esp as the base always wants an index,
16293 - ebp as the base always wants a displacement,
16294 - r12 as the base always wants an index,
16295 - r13 as the base always wants a displacement. */
16297 /* Register Indirect. */
16298 if (base
&& !index
&& !disp
)
16300 /* esp (for its index) and ebp (for its displacement) need
16301 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16303 if (base
== arg_pointer_rtx
16304 || base
== frame_pointer_rtx
16305 || REGNO (base
) == SP_REG
16306 || REGNO (base
) == BP_REG
16307 || REGNO (base
) == R12_REG
16308 || REGNO (base
) == R13_REG
)
16312 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16313 is not disp32, but disp32(%rip), so for disp32
16314 SIB byte is needed, unless print_operand_address
16315 optimizes it into disp32(%rip) or (%rip) is implied
16317 else if (disp
&& !base
&& !index
)
16320 if (!ix86_rip_relative_addr_p (&parts
))
16325 /* Find the length of the displacement constant. */
16328 if (base
&& satisfies_constraint_K (disp
))
16333 /* ebp always wants a displacement. Similarly r13. */
16334 else if (base
&& (REGNO (base
) == BP_REG
|| REGNO (base
) == R13_REG
))
16337 /* An index requires the two-byte modrm form.... */
16339 /* ...like esp (or r12), which always wants an index. */
16340 || base
== arg_pointer_rtx
16341 || base
== frame_pointer_rtx
16342 || (base
&& (REGNO (base
) == SP_REG
|| REGNO (base
) == R12_REG
)))
16349 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16350 is set, expect that insn have 8bit immediate alternative. */
16352 ix86_attr_length_immediate_default (rtx_insn
*insn
, bool shortform
)
16356 extract_insn_cached (insn
);
16357 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16358 if (CONSTANT_P (recog_data
.operand
[i
]))
16360 enum attr_mode mode
= get_attr_mode (insn
);
16363 if (shortform
&& CONST_INT_P (recog_data
.operand
[i
]))
16365 HOST_WIDE_INT ival
= INTVAL (recog_data
.operand
[i
]);
16372 ival
= trunc_int_for_mode (ival
, HImode
);
16375 ival
= trunc_int_for_mode (ival
, SImode
);
16380 if (IN_RANGE (ival
, -128, 127))
16397 /* Immediates for DImode instructions are encoded
16398 as 32bit sign extended values. */
16403 fatal_insn ("unknown insn mode", insn
);
16409 /* Compute default value for "length_address" attribute. */
16411 ix86_attr_length_address_default (rtx_insn
*insn
)
16415 if (get_attr_type (insn
) == TYPE_LEA
)
16417 rtx set
= PATTERN (insn
), addr
;
16419 if (GET_CODE (set
) == PARALLEL
)
16420 set
= XVECEXP (set
, 0, 0);
16422 gcc_assert (GET_CODE (set
) == SET
);
16424 addr
= SET_SRC (set
);
16426 return memory_address_length (addr
, true);
16429 extract_insn_cached (insn
);
16430 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16432 rtx op
= recog_data
.operand
[i
];
16435 constrain_operands_cached (insn
, reload_completed
);
16436 if (which_alternative
!= -1)
16438 const char *constraints
= recog_data
.constraints
[i
];
16439 int alt
= which_alternative
;
16441 while (*constraints
== '=' || *constraints
== '+')
16444 while (*constraints
++ != ',')
16446 /* Skip ignored operands. */
16447 if (*constraints
== 'X')
16451 int len
= memory_address_length (XEXP (op
, 0), false);
16453 /* Account for segment prefix for non-default addr spaces. */
16454 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op
)))
16463 /* Compute default value for "length_vex" attribute. It includes
16464 2 or 3 byte VEX prefix and 1 opcode byte. */
16467 ix86_attr_length_vex_default (rtx_insn
*insn
, bool has_0f_opcode
,
16472 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
16473 byte VEX prefix. */
16474 if (!has_0f_opcode
|| has_vex_w
)
16477 /* We can always use 2 byte VEX prefix in 32bit. */
16481 extract_insn_cached (insn
);
16483 for (i
= recog_data
.n_operands
- 1; i
>= 0; --i
)
16484 if (REG_P (recog_data
.operand
[i
]))
16486 /* REX.W bit uses 3 byte VEX prefix. */
16487 if (GET_MODE (recog_data
.operand
[i
]) == DImode
16488 && GENERAL_REG_P (recog_data
.operand
[i
]))
16493 /* REX.X or REX.B bits use 3 byte VEX prefix. */
16494 if (MEM_P (recog_data
.operand
[i
])
16495 && x86_extended_reg_mentioned_p (recog_data
.operand
[i
]))
16504 ix86_class_likely_spilled_p (reg_class_t
);
16506 /* Returns true if lhs of insn is HW function argument register and set up
16507 is_spilled to true if it is likely spilled HW register. */
16509 insn_is_function_arg (rtx insn
, bool* is_spilled
)
16513 if (!NONDEBUG_INSN_P (insn
))
16515 /* Call instructions are not movable, ignore it. */
16518 insn
= PATTERN (insn
);
16519 if (GET_CODE (insn
) == PARALLEL
)
16520 insn
= XVECEXP (insn
, 0, 0);
16521 if (GET_CODE (insn
) != SET
)
16523 dst
= SET_DEST (insn
);
16524 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
16525 && ix86_function_arg_regno_p (REGNO (dst
)))
16527 /* Is it likely spilled HW register? */
16528 if (!TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (dst
))
16529 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
))))
16530 *is_spilled
= true;
16536 /* Add output dependencies for chain of function adjacent arguments if only
16537 there is a move to likely spilled HW register. Return first argument
16538 if at least one dependence was added or NULL otherwise. */
16540 add_parameter_dependencies (rtx_insn
*call
, rtx_insn
*head
)
16543 rtx_insn
*last
= call
;
16544 rtx_insn
*first_arg
= NULL
;
16545 bool is_spilled
= false;
16547 head
= PREV_INSN (head
);
16549 /* Find nearest to call argument passing instruction. */
16552 last
= PREV_INSN (last
);
16555 if (!NONDEBUG_INSN_P (last
))
16557 if (insn_is_function_arg (last
, &is_spilled
))
16565 insn
= PREV_INSN (last
);
16566 if (!INSN_P (insn
))
16570 if (!NONDEBUG_INSN_P (insn
))
16575 if (insn_is_function_arg (insn
, &is_spilled
))
16577 /* Add output depdendence between two function arguments if chain
16578 of output arguments contains likely spilled HW registers. */
16580 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
16581 first_arg
= last
= insn
;
16591 /* Add output or anti dependency from insn to first_arg to restrict its code
16594 avoid_func_arg_motion (rtx_insn
*first_arg
, rtx_insn
*insn
)
16599 set
= single_set (insn
);
16602 tmp
= SET_DEST (set
);
16605 /* Add output dependency to the first function argument. */
16606 add_dependence (first_arg
, insn
, REG_DEP_OUTPUT
);
16609 /* Add anti dependency. */
16610 add_dependence (first_arg
, insn
, REG_DEP_ANTI
);
16613 /* Avoid cross block motion of function argument through adding dependency
16614 from the first non-jump instruction in bb. */
16616 add_dependee_for_func_arg (rtx_insn
*arg
, basic_block bb
)
16618 rtx_insn
*insn
= BB_END (bb
);
16622 if (NONDEBUG_INSN_P (insn
) && NONJUMP_INSN_P (insn
))
16624 rtx set
= single_set (insn
);
16627 avoid_func_arg_motion (arg
, insn
);
16631 if (insn
== BB_HEAD (bb
))
16633 insn
= PREV_INSN (insn
);
16637 /* Hook for pre-reload schedule - avoid motion of function arguments
16638 passed in likely spilled HW registers. */
16640 ix86_dependencies_evaluation_hook (rtx_insn
*head
, rtx_insn
*tail
)
16643 rtx_insn
*first_arg
= NULL
;
16644 if (reload_completed
)
16646 while (head
!= tail
&& DEBUG_INSN_P (head
))
16647 head
= NEXT_INSN (head
);
16648 for (insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
16649 if (INSN_P (insn
) && CALL_P (insn
))
16651 first_arg
= add_parameter_dependencies (insn
, head
);
16654 /* Add dependee for first argument to predecessors if only
16655 region contains more than one block. */
16656 basic_block bb
= BLOCK_FOR_INSN (insn
);
16657 int rgn
= CONTAINING_RGN (bb
->index
);
16658 int nr_blks
= RGN_NR_BLOCKS (rgn
);
16659 /* Skip trivial regions and region head blocks that can have
16660 predecessors outside of region. */
16661 if (nr_blks
> 1 && BLOCK_TO_BB (bb
->index
) != 0)
16666 /* Regions are SCCs with the exception of selective
16667 scheduling with pipelining of outer blocks enabled.
16668 So also check that immediate predecessors of a non-head
16669 block are in the same region. */
16670 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
16672 /* Avoid creating of loop-carried dependencies through
16673 using topological ordering in the region. */
16674 if (rgn
== CONTAINING_RGN (e
->src
->index
)
16675 && BLOCK_TO_BB (bb
->index
) > BLOCK_TO_BB (e
->src
->index
))
16676 add_dependee_for_func_arg (first_arg
, e
->src
);
16684 else if (first_arg
)
16685 avoid_func_arg_motion (first_arg
, insn
);
16688 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16689 HW registers to maximum, to schedule them at soon as possible. These are
16690 moves from function argument registers at the top of the function entry
16691 and moves from function return value registers after call. */
16693 ix86_adjust_priority (rtx_insn
*insn
, int priority
)
16697 if (reload_completed
)
16700 if (!NONDEBUG_INSN_P (insn
))
16703 set
= single_set (insn
);
16706 rtx tmp
= SET_SRC (set
);
16708 && HARD_REGISTER_P (tmp
)
16709 && !TEST_HARD_REG_BIT (fixed_reg_set
, REGNO (tmp
))
16710 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp
))))
16711 return current_sched_info
->sched_max_insns_priority
;
16717 /* Prepare for scheduling pass. */
16719 ix86_sched_init_global (FILE *, int, int)
16721 /* Install scheduling hooks for current CPU. Some of these hooks are used
16722 in time-critical parts of the scheduler, so we only set them up when
16723 they are actually used. */
16726 case PROCESSOR_CORE2
:
16727 case PROCESSOR_NEHALEM
:
16728 case PROCESSOR_SANDYBRIDGE
:
16729 case PROCESSOR_HASWELL
:
16730 case PROCESSOR_GENERIC
:
16731 /* Do not perform multipass scheduling for pre-reload schedule
16732 to save compile time. */
16733 if (reload_completed
)
16735 ix86_core2i7_init_hooks ();
16738 /* Fall through. */
16740 targetm
.sched
.dfa_post_advance_cycle
= NULL
;
16741 targetm
.sched
.first_cycle_multipass_init
= NULL
;
16742 targetm
.sched
.first_cycle_multipass_begin
= NULL
;
16743 targetm
.sched
.first_cycle_multipass_issue
= NULL
;
16744 targetm
.sched
.first_cycle_multipass_backtrack
= NULL
;
16745 targetm
.sched
.first_cycle_multipass_end
= NULL
;
16746 targetm
.sched
.first_cycle_multipass_fini
= NULL
;
16752 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16754 static HOST_WIDE_INT
16755 ix86_static_rtx_alignment (machine_mode mode
)
16757 if (mode
== DFmode
)
16759 if (ALIGN_MODE_128 (mode
))
16760 return MAX (128, GET_MODE_ALIGNMENT (mode
));
16761 return GET_MODE_ALIGNMENT (mode
);
16764 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16766 static HOST_WIDE_INT
16767 ix86_constant_alignment (const_tree exp
, HOST_WIDE_INT align
)
16769 if (TREE_CODE (exp
) == REAL_CST
|| TREE_CODE (exp
) == VECTOR_CST
16770 || TREE_CODE (exp
) == INTEGER_CST
)
16772 machine_mode mode
= TYPE_MODE (TREE_TYPE (exp
));
16773 HOST_WIDE_INT mode_align
= ix86_static_rtx_alignment (mode
);
16774 return MAX (mode_align
, align
);
16776 else if (!optimize_size
&& TREE_CODE (exp
) == STRING_CST
16777 && TREE_STRING_LENGTH (exp
) >= 31 && align
< BITS_PER_WORD
)
16778 return BITS_PER_WORD
;
16783 /* Implement TARGET_EMPTY_RECORD_P. */
16786 ix86_is_empty_record (const_tree type
)
16790 return default_is_empty_record (type
);
16793 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16796 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v
, tree type
)
16798 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
16800 if (!cum
->warn_empty
)
16803 if (!TYPE_EMPTY_P (type
))
16806 /* Don't warn if the function isn't visible outside of the TU. */
16807 if (cum
->decl
&& !TREE_PUBLIC (cum
->decl
))
16810 const_tree ctx
= get_ultimate_context (cum
->decl
);
16811 if (ctx
!= NULL_TREE
16812 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx
))
16815 /* If the actual size of the type is zero, then there is no change
16816 in how objects of this size are passed. */
16817 if (int_size_in_bytes (type
) == 0)
16820 warning (OPT_Wabi
, "empty class %qT parameter passing ABI "
16821 "changes in %<-fabi-version=12%> (GCC 8)", type
);
16823 /* Only warn once. */
16824 cum
->warn_empty
= false;
16827 /* This hook returns name of multilib ABI. */
16829 static const char *
16830 ix86_get_multilib_abi_name (void)
16832 if (!(TARGET_64BIT_P (ix86_isa_flags
)))
16834 else if (TARGET_X32_P (ix86_isa_flags
))
16840 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16841 the data type, and ALIGN is the alignment that the object would
16842 ordinarily have. */
16845 iamcu_alignment (tree type
, int align
)
16849 if (align
< 32 || TYPE_USER_ALIGN (type
))
16852 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16854 type
= strip_array_types (type
);
16855 if (TYPE_ATOMIC (type
))
16858 mode
= TYPE_MODE (type
);
16859 switch (GET_MODE_CLASS (mode
))
16862 case MODE_COMPLEX_INT
:
16863 case MODE_COMPLEX_FLOAT
:
16865 case MODE_DECIMAL_FLOAT
:
16872 /* Compute the alignment for a static variable.
16873 TYPE is the data type, and ALIGN is the alignment that
16874 the object would ordinarily have. The value of this function is used
16875 instead of that alignment to align the object. */
16878 ix86_data_alignment (tree type
, unsigned int align
, bool opt
)
16880 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16881 for symbols from other compilation units or symbols that don't need
16882 to bind locally. In order to preserve some ABI compatibility with
16883 those compilers, ensure we don't decrease alignment from what we
16886 unsigned int max_align_compat
= MIN (256, MAX_OFILE_ALIGNMENT
);
16888 /* A data structure, equal or greater than the size of a cache line
16889 (64 bytes in the Pentium 4 and other recent Intel processors, including
16890 processors based on Intel Core microarchitecture) should be aligned
16891 so that its base address is a multiple of a cache line size. */
16893 unsigned int max_align
16894 = MIN ((unsigned) ix86_tune_cost
->prefetch_block
* 8, MAX_OFILE_ALIGNMENT
);
16896 if (max_align
< BITS_PER_WORD
)
16897 max_align
= BITS_PER_WORD
;
16899 switch (ix86_align_data_type
)
16901 case ix86_align_data_type_abi
: opt
= false; break;
16902 case ix86_align_data_type_compat
: max_align
= BITS_PER_WORD
; break;
16903 case ix86_align_data_type_cacheline
: break;
16907 align
= iamcu_alignment (type
, align
);
16910 && AGGREGATE_TYPE_P (type
)
16911 && TYPE_SIZE (type
)
16912 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
)
16914 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align_compat
)
16915 && align
< max_align_compat
)
16916 align
= max_align_compat
;
16917 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), max_align
)
16918 && align
< max_align
)
16922 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16923 to 16byte boundary. */
16926 if ((opt
? AGGREGATE_TYPE_P (type
) : TREE_CODE (type
) == ARRAY_TYPE
)
16927 && TYPE_SIZE (type
)
16928 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
16929 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
16937 if (TREE_CODE (type
) == ARRAY_TYPE
)
16939 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
16941 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
16944 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
16947 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
16949 if ((TYPE_MODE (type
) == XCmode
16950 || TYPE_MODE (type
) == TCmode
) && align
< 128)
16953 else if ((TREE_CODE (type
) == RECORD_TYPE
16954 || TREE_CODE (type
) == UNION_TYPE
16955 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
16956 && TYPE_FIELDS (type
))
16958 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
16960 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
16963 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
16964 || TREE_CODE (type
) == INTEGER_TYPE
)
16966 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
16968 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
16975 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
16977 ix86_lower_local_decl_alignment (tree decl
)
16979 unsigned int new_align
= ix86_local_alignment (decl
, VOIDmode
,
16980 DECL_ALIGN (decl
), true);
16981 if (new_align
< DECL_ALIGN (decl
))
16982 SET_DECL_ALIGN (decl
, new_align
);
16985 /* Compute the alignment for a local variable or a stack slot. EXP is
16986 the data type or decl itself, MODE is the widest mode available and
16987 ALIGN is the alignment that the object would ordinarily have. The
16988 value of this macro is used instead of that alignment to align the
16992 ix86_local_alignment (tree exp
, machine_mode mode
,
16993 unsigned int align
, bool may_lower
)
16997 if (exp
&& DECL_P (exp
))
16999 type
= TREE_TYPE (exp
);
17008 /* Don't do dynamic stack realignment for long long objects with
17009 -mpreferred-stack-boundary=2. */
17013 && ix86_preferred_stack_boundary
< 64
17014 && (mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
17015 && (!type
|| (!TYPE_USER_ALIGN (type
)
17016 && !TYPE_ATOMIC (strip_array_types (type
))))
17017 && (!decl
|| !DECL_USER_ALIGN (decl
)))
17020 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17021 register in MODE. We will return the largest alignment of XF
17025 if (mode
== XFmode
&& align
< GET_MODE_ALIGNMENT (DFmode
))
17026 align
= GET_MODE_ALIGNMENT (DFmode
);
17030 /* Don't increase alignment for Intel MCU psABI. */
17034 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17035 to 16byte boundary. Exact wording is:
17037 An array uses the same alignment as its elements, except that a local or
17038 global array variable of length at least 16 bytes or
17039 a C99 variable-length array variable always has alignment of at least 16 bytes.
17041 This was added to allow use of aligned SSE instructions at arrays. This
17042 rule is meant for static storage (where compiler cannot do the analysis
17043 by itself). We follow it for automatic variables only when convenient.
17044 We fully control everything in the function compiled and functions from
17045 other unit cannot rely on the alignment.
17047 Exclude va_list type. It is the common case of local array where
17048 we cannot benefit from the alignment.
17050 TODO: Probably one should optimize for size only when var is not escaping. */
17051 if (TARGET_64BIT
&& optimize_function_for_speed_p (cfun
)
17054 if (AGGREGATE_TYPE_P (type
)
17055 && (va_list_type_node
== NULL_TREE
17056 || (TYPE_MAIN_VARIANT (type
)
17057 != TYPE_MAIN_VARIANT (va_list_type_node
)))
17058 && TYPE_SIZE (type
)
17059 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
17060 && wi::geu_p (wi::to_wide (TYPE_SIZE (type
)), 128)
17064 if (TREE_CODE (type
) == ARRAY_TYPE
)
17066 if (TYPE_MODE (TREE_TYPE (type
)) == DFmode
&& align
< 64)
17068 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type
))) && align
< 128)
17071 else if (TREE_CODE (type
) == COMPLEX_TYPE
)
17073 if (TYPE_MODE (type
) == DCmode
&& align
< 64)
17075 if ((TYPE_MODE (type
) == XCmode
17076 || TYPE_MODE (type
) == TCmode
) && align
< 128)
17079 else if ((TREE_CODE (type
) == RECORD_TYPE
17080 || TREE_CODE (type
) == UNION_TYPE
17081 || TREE_CODE (type
) == QUAL_UNION_TYPE
)
17082 && TYPE_FIELDS (type
))
17084 if (DECL_MODE (TYPE_FIELDS (type
)) == DFmode
&& align
< 64)
17086 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type
))) && align
< 128)
17089 else if (TREE_CODE (type
) == REAL_TYPE
|| TREE_CODE (type
) == VECTOR_TYPE
17090 || TREE_CODE (type
) == INTEGER_TYPE
)
17093 if (TYPE_MODE (type
) == DFmode
&& align
< 64)
17095 if (ALIGN_MODE_128 (TYPE_MODE (type
)) && align
< 128)
17101 /* Compute the minimum required alignment for dynamic stack realignment
17102 purposes for a local variable, parameter or a stack slot. EXP is
17103 the data type or decl itself, MODE is its mode and ALIGN is the
17104 alignment that the object would ordinarily have. */
17107 ix86_minimum_alignment (tree exp
, machine_mode mode
,
17108 unsigned int align
)
17112 if (exp
&& DECL_P (exp
))
17114 type
= TREE_TYPE (exp
);
17123 if (TARGET_64BIT
|| align
!= 64 || ix86_preferred_stack_boundary
>= 64)
17126 /* Don't do dynamic stack realignment for long long objects with
17127 -mpreferred-stack-boundary=2. */
17128 if ((mode
== DImode
|| (type
&& TYPE_MODE (type
) == DImode
))
17129 && (!type
|| (!TYPE_USER_ALIGN (type
)
17130 && !TYPE_ATOMIC (strip_array_types (type
))))
17131 && (!decl
|| !DECL_USER_ALIGN (decl
)))
17133 gcc_checking_assert (!TARGET_STV
);
17140 /* Find a location for the static chain incoming to a nested function.
17141 This is a register, unless all free registers are used by arguments. */
17144 ix86_static_chain (const_tree fndecl_or_type
, bool incoming_p
)
17150 /* We always use R10 in 64-bit mode. */
17155 const_tree fntype
, fndecl
;
17158 /* By default in 32-bit mode we use ECX to pass the static chain. */
17161 if (TREE_CODE (fndecl_or_type
) == FUNCTION_DECL
)
17163 fntype
= TREE_TYPE (fndecl_or_type
);
17164 fndecl
= fndecl_or_type
;
17168 fntype
= fndecl_or_type
;
17172 ccvt
= ix86_get_callcvt (fntype
);
17173 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
17175 /* Fastcall functions use ecx/edx for arguments, which leaves
17176 us with EAX for the static chain.
17177 Thiscall functions use ecx for arguments, which also
17178 leaves us with EAX for the static chain. */
17181 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
17183 /* Thiscall functions use ecx for arguments, which leaves
17184 us with EAX and EDX for the static chain.
17185 We are using for abi-compatibility EAX. */
17188 else if (ix86_function_regparm (fntype
, fndecl
) == 3)
17190 /* For regparm 3, we have no free call-clobbered registers in
17191 which to store the static chain. In order to implement this,
17192 we have the trampoline push the static chain to the stack.
17193 However, we can't push a value below the return address when
17194 we call the nested function directly, so we have to use an
17195 alternate entry point. For this we use ESI, and have the
17196 alternate entry point push ESI, so that things appear the
17197 same once we're executing the nested function. */
17200 if (fndecl
== current_function_decl
17201 && !ix86_static_chain_on_stack
)
17203 gcc_assert (!reload_completed
);
17204 ix86_static_chain_on_stack
= true;
17206 return gen_frame_mem (SImode
,
17207 plus_constant (Pmode
,
17208 arg_pointer_rtx
, -8));
17214 return gen_rtx_REG (Pmode
, regno
);
17217 /* Emit RTL insns to initialize the variable parts of a trampoline.
17218 FNDECL is the decl of the target address; M_TRAMP is a MEM for
17219 the trampoline, and CHAIN_VALUE is an RTX for the static chain
17220 to be passed to the target function. */
17223 ix86_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
17228 bool need_endbr
= (flag_cf_protection
& CF_BRANCH
);
17230 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
17238 /* Insert ENDBR64. */
17239 mem
= adjust_address (m_tramp
, SImode
, offset
);
17240 emit_move_insn (mem
, gen_int_mode (0xfa1e0ff3, SImode
));
17244 /* Load the function address to r11. Try to load address using
17245 the shorter movl instead of movabs. We may want to support
17246 movq for kernel mode, but kernel does not use trampolines at
17247 the moment. FNADDR is a 32bit address and may not be in
17248 DImode when ptr_mode == SImode. Always use movl in this
17250 if (ptr_mode
== SImode
17251 || x86_64_zext_immediate_operand (fnaddr
, VOIDmode
))
17253 fnaddr
= copy_addr_to_reg (fnaddr
);
17255 mem
= adjust_address (m_tramp
, HImode
, offset
);
17256 emit_move_insn (mem
, gen_int_mode (0xbb41, HImode
));
17258 mem
= adjust_address (m_tramp
, SImode
, offset
+ 2);
17259 emit_move_insn (mem
, gen_lowpart (SImode
, fnaddr
));
17264 mem
= adjust_address (m_tramp
, HImode
, offset
);
17265 emit_move_insn (mem
, gen_int_mode (0xbb49, HImode
));
17267 mem
= adjust_address (m_tramp
, DImode
, offset
+ 2);
17268 emit_move_insn (mem
, fnaddr
);
17272 /* Load static chain using movabs to r10. Use the shorter movl
17273 instead of movabs when ptr_mode == SImode. */
17274 if (ptr_mode
== SImode
)
17285 mem
= adjust_address (m_tramp
, HImode
, offset
);
17286 emit_move_insn (mem
, gen_int_mode (opcode
, HImode
));
17288 mem
= adjust_address (m_tramp
, ptr_mode
, offset
+ 2);
17289 emit_move_insn (mem
, chain_value
);
17292 /* Jump to r11; the last (unused) byte is a nop, only there to
17293 pad the write out to a single 32-bit store. */
17294 mem
= adjust_address (m_tramp
, SImode
, offset
);
17295 emit_move_insn (mem
, gen_int_mode (0x90e3ff49, SImode
));
17302 /* Depending on the static chain location, either load a register
17303 with a constant, or push the constant to the stack. All of the
17304 instructions are the same size. */
17305 chain
= ix86_static_chain (fndecl
, true);
17308 switch (REGNO (chain
))
17311 opcode
= 0xb8; break;
17313 opcode
= 0xb9; break;
17315 gcc_unreachable ();
17323 /* Insert ENDBR32. */
17324 mem
= adjust_address (m_tramp
, SImode
, offset
);
17325 emit_move_insn (mem
, gen_int_mode (0xfb1e0ff3, SImode
));
17329 mem
= adjust_address (m_tramp
, QImode
, offset
);
17330 emit_move_insn (mem
, gen_int_mode (opcode
, QImode
));
17332 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
17333 emit_move_insn (mem
, chain_value
);
17336 mem
= adjust_address (m_tramp
, QImode
, offset
);
17337 emit_move_insn (mem
, gen_int_mode (0xe9, QImode
));
17339 mem
= adjust_address (m_tramp
, SImode
, offset
+ 1);
17341 /* Compute offset from the end of the jmp to the target function.
17342 In the case in which the trampoline stores the static chain on
17343 the stack, we need to skip the first insn which pushes the
17344 (call-saved) register static chain; this push is 1 byte. */
17346 int skip
= MEM_P (chain
) ? 1 : 0;
17347 /* Skip ENDBR32 at the entry of the target function. */
17349 && !cgraph_node::get (fndecl
)->only_called_directly_p ())
17351 disp
= expand_binop (SImode
, sub_optab
, fnaddr
,
17352 plus_constant (Pmode
, XEXP (m_tramp
, 0),
17354 NULL_RTX
, 1, OPTAB_DIRECT
);
17355 emit_move_insn (mem
, disp
);
17358 gcc_assert (offset
<= TRAMPOLINE_SIZE
);
17360 #ifdef HAVE_ENABLE_EXECUTE_STACK
17361 #ifdef CHECK_EXECUTE_STACK_ENABLED
17362 if (CHECK_EXECUTE_STACK_ENABLED
)
17364 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__enable_execute_stack"),
17365 LCT_NORMAL
, VOIDmode
, XEXP (m_tramp
, 0), Pmode
);
17370 ix86_allocate_stack_slots_for_args (void)
17372 /* Naked functions should not allocate stack slots for arguments. */
17373 return !ix86_function_naked (current_function_decl
);
17377 ix86_warn_func_return (tree decl
)
17379 /* Naked functions are implemented entirely in assembly, including the
17380 return sequence, so suppress warnings about this. */
17381 return !ix86_function_naked (decl
);
17384 /* Return the shift count of a vector by scalar shift builtin second argument
17387 ix86_vector_shift_count (tree arg1
)
17389 if (tree_fits_uhwi_p (arg1
))
17391 else if (TREE_CODE (arg1
) == VECTOR_CST
&& CHAR_BIT
== 8)
17393 /* The count argument is weird, passed in as various 128-bit
17394 (or 64-bit) vectors, the low 64 bits from it are the count. */
17395 unsigned char buf
[16];
17396 int len
= native_encode_expr (arg1
, buf
, 16);
17399 tree t
= native_interpret_expr (uint64_type_node
, buf
, len
);
17400 if (t
&& tree_fits_uhwi_p (t
))
17407 ix86_fold_builtin (tree fndecl
, int n_args
,
17408 tree
*args
, bool ignore ATTRIBUTE_UNUSED
)
17410 if (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
)
17412 enum ix86_builtins fn_code
17413 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
17414 enum rtx_code rcode
;
17416 unsigned HOST_WIDE_INT mask
;
17420 case IX86_BUILTIN_CPU_IS
:
17421 case IX86_BUILTIN_CPU_SUPPORTS
:
17422 gcc_assert (n_args
== 1);
17423 return fold_builtin_cpu (fndecl
, args
);
17425 case IX86_BUILTIN_NANQ
:
17426 case IX86_BUILTIN_NANSQ
:
17428 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17429 const char *str
= c_getstr (*args
);
17430 int quiet
= fn_code
== IX86_BUILTIN_NANQ
;
17431 REAL_VALUE_TYPE real
;
17433 if (str
&& real_nan (&real
, str
, quiet
, TYPE_MODE (type
)))
17434 return build_real (type
, real
);
17438 case IX86_BUILTIN_INFQ
:
17439 case IX86_BUILTIN_HUGE_VALQ
:
17441 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17442 REAL_VALUE_TYPE inf
;
17444 return build_real (type
, inf
);
17447 case IX86_BUILTIN_TZCNT16
:
17448 case IX86_BUILTIN_CTZS
:
17449 case IX86_BUILTIN_TZCNT32
:
17450 case IX86_BUILTIN_TZCNT64
:
17451 gcc_assert (n_args
== 1);
17452 if (TREE_CODE (args
[0]) == INTEGER_CST
)
17454 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17455 tree arg
= args
[0];
17456 if (fn_code
== IX86_BUILTIN_TZCNT16
17457 || fn_code
== IX86_BUILTIN_CTZS
)
17458 arg
= fold_convert (short_unsigned_type_node
, arg
);
17459 if (integer_zerop (arg
))
17460 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
17462 return fold_const_call (CFN_CTZ
, type
, arg
);
17466 case IX86_BUILTIN_LZCNT16
:
17467 case IX86_BUILTIN_CLZS
:
17468 case IX86_BUILTIN_LZCNT32
:
17469 case IX86_BUILTIN_LZCNT64
:
17470 gcc_assert (n_args
== 1);
17471 if (TREE_CODE (args
[0]) == INTEGER_CST
)
17473 tree type
= TREE_TYPE (TREE_TYPE (fndecl
));
17474 tree arg
= args
[0];
17475 if (fn_code
== IX86_BUILTIN_LZCNT16
17476 || fn_code
== IX86_BUILTIN_CLZS
)
17477 arg
= fold_convert (short_unsigned_type_node
, arg
);
17478 if (integer_zerop (arg
))
17479 return build_int_cst (type
, TYPE_PRECISION (TREE_TYPE (arg
)));
17481 return fold_const_call (CFN_CLZ
, type
, arg
);
17485 case IX86_BUILTIN_BEXTR32
:
17486 case IX86_BUILTIN_BEXTR64
:
17487 case IX86_BUILTIN_BEXTRI32
:
17488 case IX86_BUILTIN_BEXTRI64
:
17489 gcc_assert (n_args
== 2);
17490 if (tree_fits_uhwi_p (args
[1]))
17492 unsigned HOST_WIDE_INT res
= 0;
17493 unsigned int prec
= TYPE_PRECISION (TREE_TYPE (args
[0]));
17494 unsigned int start
= tree_to_uhwi (args
[1]);
17495 unsigned int len
= (start
& 0xff00) >> 8;
17497 if (start
>= prec
|| len
== 0)
17499 else if (!tree_fits_uhwi_p (args
[0]))
17502 res
= tree_to_uhwi (args
[0]) >> start
;
17505 if (len
< HOST_BITS_PER_WIDE_INT
)
17506 res
&= (HOST_WIDE_INT_1U
<< len
) - 1;
17507 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17511 case IX86_BUILTIN_BZHI32
:
17512 case IX86_BUILTIN_BZHI64
:
17513 gcc_assert (n_args
== 2);
17514 if (tree_fits_uhwi_p (args
[1]))
17516 unsigned int idx
= tree_to_uhwi (args
[1]) & 0xff;
17517 if (idx
>= TYPE_PRECISION (TREE_TYPE (args
[0])))
17520 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), 0);
17521 if (!tree_fits_uhwi_p (args
[0]))
17523 unsigned HOST_WIDE_INT res
= tree_to_uhwi (args
[0]);
17524 res
&= ~(HOST_WIDE_INT_M1U
<< idx
);
17525 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17529 case IX86_BUILTIN_PDEP32
:
17530 case IX86_BUILTIN_PDEP64
:
17531 gcc_assert (n_args
== 2);
17532 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
17534 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
17535 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
17536 unsigned HOST_WIDE_INT res
= 0;
17537 unsigned HOST_WIDE_INT m
, k
= 1;
17538 for (m
= 1; m
; m
<<= 1)
17539 if ((mask
& m
) != 0)
17541 if ((src
& k
) != 0)
17545 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17549 case IX86_BUILTIN_PEXT32
:
17550 case IX86_BUILTIN_PEXT64
:
17551 gcc_assert (n_args
== 2);
17552 if (tree_fits_uhwi_p (args
[0]) && tree_fits_uhwi_p (args
[1]))
17554 unsigned HOST_WIDE_INT src
= tree_to_uhwi (args
[0]);
17555 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (args
[1]);
17556 unsigned HOST_WIDE_INT res
= 0;
17557 unsigned HOST_WIDE_INT m
, k
= 1;
17558 for (m
= 1; m
; m
<<= 1)
17559 if ((mask
& m
) != 0)
17561 if ((src
& m
) != 0)
17565 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17569 case IX86_BUILTIN_MOVMSKPS
:
17570 case IX86_BUILTIN_PMOVMSKB
:
17571 case IX86_BUILTIN_MOVMSKPD
:
17572 case IX86_BUILTIN_PMOVMSKB128
:
17573 case IX86_BUILTIN_MOVMSKPD256
:
17574 case IX86_BUILTIN_MOVMSKPS256
:
17575 case IX86_BUILTIN_PMOVMSKB256
:
17576 gcc_assert (n_args
== 1);
17577 if (TREE_CODE (args
[0]) == VECTOR_CST
)
17579 HOST_WIDE_INT res
= 0;
17580 for (unsigned i
= 0; i
< VECTOR_CST_NELTS (args
[0]); ++i
)
17582 tree e
= VECTOR_CST_ELT (args
[0], i
);
17583 if (TREE_CODE (e
) == INTEGER_CST
&& !TREE_OVERFLOW (e
))
17585 if (wi::neg_p (wi::to_wide (e
)))
17586 res
|= HOST_WIDE_INT_1
<< i
;
17588 else if (TREE_CODE (e
) == REAL_CST
&& !TREE_OVERFLOW (e
))
17590 if (TREE_REAL_CST (e
).sign
)
17591 res
|= HOST_WIDE_INT_1
<< i
;
17596 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl
)), res
);
17600 case IX86_BUILTIN_PSLLD
:
17601 case IX86_BUILTIN_PSLLD128
:
17602 case IX86_BUILTIN_PSLLD128_MASK
:
17603 case IX86_BUILTIN_PSLLD256
:
17604 case IX86_BUILTIN_PSLLD256_MASK
:
17605 case IX86_BUILTIN_PSLLD512
:
17606 case IX86_BUILTIN_PSLLDI
:
17607 case IX86_BUILTIN_PSLLDI128
:
17608 case IX86_BUILTIN_PSLLDI128_MASK
:
17609 case IX86_BUILTIN_PSLLDI256
:
17610 case IX86_BUILTIN_PSLLDI256_MASK
:
17611 case IX86_BUILTIN_PSLLDI512
:
17612 case IX86_BUILTIN_PSLLQ
:
17613 case IX86_BUILTIN_PSLLQ128
:
17614 case IX86_BUILTIN_PSLLQ128_MASK
:
17615 case IX86_BUILTIN_PSLLQ256
:
17616 case IX86_BUILTIN_PSLLQ256_MASK
:
17617 case IX86_BUILTIN_PSLLQ512
:
17618 case IX86_BUILTIN_PSLLQI
:
17619 case IX86_BUILTIN_PSLLQI128
:
17620 case IX86_BUILTIN_PSLLQI128_MASK
:
17621 case IX86_BUILTIN_PSLLQI256
:
17622 case IX86_BUILTIN_PSLLQI256_MASK
:
17623 case IX86_BUILTIN_PSLLQI512
:
17624 case IX86_BUILTIN_PSLLW
:
17625 case IX86_BUILTIN_PSLLW128
:
17626 case IX86_BUILTIN_PSLLW128_MASK
:
17627 case IX86_BUILTIN_PSLLW256
:
17628 case IX86_BUILTIN_PSLLW256_MASK
:
17629 case IX86_BUILTIN_PSLLW512_MASK
:
17630 case IX86_BUILTIN_PSLLWI
:
17631 case IX86_BUILTIN_PSLLWI128
:
17632 case IX86_BUILTIN_PSLLWI128_MASK
:
17633 case IX86_BUILTIN_PSLLWI256
:
17634 case IX86_BUILTIN_PSLLWI256_MASK
:
17635 case IX86_BUILTIN_PSLLWI512_MASK
:
17639 case IX86_BUILTIN_PSRAD
:
17640 case IX86_BUILTIN_PSRAD128
:
17641 case IX86_BUILTIN_PSRAD128_MASK
:
17642 case IX86_BUILTIN_PSRAD256
:
17643 case IX86_BUILTIN_PSRAD256_MASK
:
17644 case IX86_BUILTIN_PSRAD512
:
17645 case IX86_BUILTIN_PSRADI
:
17646 case IX86_BUILTIN_PSRADI128
:
17647 case IX86_BUILTIN_PSRADI128_MASK
:
17648 case IX86_BUILTIN_PSRADI256
:
17649 case IX86_BUILTIN_PSRADI256_MASK
:
17650 case IX86_BUILTIN_PSRADI512
:
17651 case IX86_BUILTIN_PSRAQ128_MASK
:
17652 case IX86_BUILTIN_PSRAQ256_MASK
:
17653 case IX86_BUILTIN_PSRAQ512
:
17654 case IX86_BUILTIN_PSRAQI128_MASK
:
17655 case IX86_BUILTIN_PSRAQI256_MASK
:
17656 case IX86_BUILTIN_PSRAQI512
:
17657 case IX86_BUILTIN_PSRAW
:
17658 case IX86_BUILTIN_PSRAW128
:
17659 case IX86_BUILTIN_PSRAW128_MASK
:
17660 case IX86_BUILTIN_PSRAW256
:
17661 case IX86_BUILTIN_PSRAW256_MASK
:
17662 case IX86_BUILTIN_PSRAW512
:
17663 case IX86_BUILTIN_PSRAWI
:
17664 case IX86_BUILTIN_PSRAWI128
:
17665 case IX86_BUILTIN_PSRAWI128_MASK
:
17666 case IX86_BUILTIN_PSRAWI256
:
17667 case IX86_BUILTIN_PSRAWI256_MASK
:
17668 case IX86_BUILTIN_PSRAWI512
:
17672 case IX86_BUILTIN_PSRLD
:
17673 case IX86_BUILTIN_PSRLD128
:
17674 case IX86_BUILTIN_PSRLD128_MASK
:
17675 case IX86_BUILTIN_PSRLD256
:
17676 case IX86_BUILTIN_PSRLD256_MASK
:
17677 case IX86_BUILTIN_PSRLD512
:
17678 case IX86_BUILTIN_PSRLDI
:
17679 case IX86_BUILTIN_PSRLDI128
:
17680 case IX86_BUILTIN_PSRLDI128_MASK
:
17681 case IX86_BUILTIN_PSRLDI256
:
17682 case IX86_BUILTIN_PSRLDI256_MASK
:
17683 case IX86_BUILTIN_PSRLDI512
:
17684 case IX86_BUILTIN_PSRLQ
:
17685 case IX86_BUILTIN_PSRLQ128
:
17686 case IX86_BUILTIN_PSRLQ128_MASK
:
17687 case IX86_BUILTIN_PSRLQ256
:
17688 case IX86_BUILTIN_PSRLQ256_MASK
:
17689 case IX86_BUILTIN_PSRLQ512
:
17690 case IX86_BUILTIN_PSRLQI
:
17691 case IX86_BUILTIN_PSRLQI128
:
17692 case IX86_BUILTIN_PSRLQI128_MASK
:
17693 case IX86_BUILTIN_PSRLQI256
:
17694 case IX86_BUILTIN_PSRLQI256_MASK
:
17695 case IX86_BUILTIN_PSRLQI512
:
17696 case IX86_BUILTIN_PSRLW
:
17697 case IX86_BUILTIN_PSRLW128
:
17698 case IX86_BUILTIN_PSRLW128_MASK
:
17699 case IX86_BUILTIN_PSRLW256
:
17700 case IX86_BUILTIN_PSRLW256_MASK
:
17701 case IX86_BUILTIN_PSRLW512
:
17702 case IX86_BUILTIN_PSRLWI
:
17703 case IX86_BUILTIN_PSRLWI128
:
17704 case IX86_BUILTIN_PSRLWI128_MASK
:
17705 case IX86_BUILTIN_PSRLWI256
:
17706 case IX86_BUILTIN_PSRLWI256_MASK
:
17707 case IX86_BUILTIN_PSRLWI512
:
17711 case IX86_BUILTIN_PSLLVV16HI
:
17712 case IX86_BUILTIN_PSLLVV16SI
:
17713 case IX86_BUILTIN_PSLLVV2DI
:
17714 case IX86_BUILTIN_PSLLVV2DI_MASK
:
17715 case IX86_BUILTIN_PSLLVV32HI
:
17716 case IX86_BUILTIN_PSLLVV4DI
:
17717 case IX86_BUILTIN_PSLLVV4DI_MASK
:
17718 case IX86_BUILTIN_PSLLVV4SI
:
17719 case IX86_BUILTIN_PSLLVV4SI_MASK
:
17720 case IX86_BUILTIN_PSLLVV8DI
:
17721 case IX86_BUILTIN_PSLLVV8HI
:
17722 case IX86_BUILTIN_PSLLVV8SI
:
17723 case IX86_BUILTIN_PSLLVV8SI_MASK
:
17727 case IX86_BUILTIN_PSRAVQ128
:
17728 case IX86_BUILTIN_PSRAVQ256
:
17729 case IX86_BUILTIN_PSRAVV16HI
:
17730 case IX86_BUILTIN_PSRAVV16SI
:
17731 case IX86_BUILTIN_PSRAVV32HI
:
17732 case IX86_BUILTIN_PSRAVV4SI
:
17733 case IX86_BUILTIN_PSRAVV4SI_MASK
:
17734 case IX86_BUILTIN_PSRAVV8DI
:
17735 case IX86_BUILTIN_PSRAVV8HI
:
17736 case IX86_BUILTIN_PSRAVV8SI
:
17737 case IX86_BUILTIN_PSRAVV8SI_MASK
:
17741 case IX86_BUILTIN_PSRLVV16HI
:
17742 case IX86_BUILTIN_PSRLVV16SI
:
17743 case IX86_BUILTIN_PSRLVV2DI
:
17744 case IX86_BUILTIN_PSRLVV2DI_MASK
:
17745 case IX86_BUILTIN_PSRLVV32HI
:
17746 case IX86_BUILTIN_PSRLVV4DI
:
17747 case IX86_BUILTIN_PSRLVV4DI_MASK
:
17748 case IX86_BUILTIN_PSRLVV4SI
:
17749 case IX86_BUILTIN_PSRLVV4SI_MASK
:
17750 case IX86_BUILTIN_PSRLVV8DI
:
17751 case IX86_BUILTIN_PSRLVV8HI
:
17752 case IX86_BUILTIN_PSRLVV8SI
:
17753 case IX86_BUILTIN_PSRLVV8SI_MASK
:
17759 gcc_assert (n_args
>= 2);
17760 if (TREE_CODE (args
[0]) != VECTOR_CST
)
17762 mask
= HOST_WIDE_INT_M1U
;
17765 /* This is masked shift. */
17766 if (!tree_fits_uhwi_p (args
[n_args
- 1])
17767 || TREE_SIDE_EFFECTS (args
[n_args
- 2]))
17769 mask
= tree_to_uhwi (args
[n_args
- 1]);
17770 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0]));
17771 mask
|= HOST_WIDE_INT_M1U
<< elems
;
17772 if (mask
!= HOST_WIDE_INT_M1U
17773 && TREE_CODE (args
[n_args
- 2]) != VECTOR_CST
)
17775 if (mask
== (HOST_WIDE_INT_M1U
<< elems
))
17776 return args
[n_args
- 2];
17778 if (is_vshift
&& TREE_CODE (args
[1]) != VECTOR_CST
)
17780 if (tree tem
= (is_vshift
? integer_one_node
17781 : ix86_vector_shift_count (args
[1])))
17783 unsigned HOST_WIDE_INT count
= tree_to_uhwi (tem
);
17784 unsigned HOST_WIDE_INT prec
17785 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args
[0])));
17786 if (count
== 0 && mask
== HOST_WIDE_INT_M1U
)
17790 if (rcode
== ASHIFTRT
)
17792 else if (mask
== HOST_WIDE_INT_M1U
)
17793 return build_zero_cst (TREE_TYPE (args
[0]));
17795 tree countt
= NULL_TREE
;
17799 countt
= integer_zero_node
;
17801 countt
= build_int_cst (integer_type_node
, count
);
17803 tree_vector_builder builder
;
17804 if (mask
!= HOST_WIDE_INT_M1U
|| is_vshift
)
17805 builder
.new_vector (TREE_TYPE (args
[0]),
17806 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args
[0])),
17809 builder
.new_unary_operation (TREE_TYPE (args
[0]), args
[0],
17811 unsigned int cnt
= builder
.encoded_nelts ();
17812 for (unsigned int i
= 0; i
< cnt
; ++i
)
17814 tree elt
= VECTOR_CST_ELT (args
[0], i
);
17815 if (TREE_CODE (elt
) != INTEGER_CST
|| TREE_OVERFLOW (elt
))
17817 tree type
= TREE_TYPE (elt
);
17818 if (rcode
== LSHIFTRT
)
17819 elt
= fold_convert (unsigned_type_for (type
), elt
);
17822 countt
= VECTOR_CST_ELT (args
[1], i
);
17823 if (TREE_CODE (countt
) != INTEGER_CST
17824 || TREE_OVERFLOW (countt
))
17826 if (wi::neg_p (wi::to_wide (countt
))
17827 || wi::to_widest (countt
) >= prec
)
17829 if (rcode
== ASHIFTRT
)
17830 countt
= build_int_cst (TREE_TYPE (countt
),
17834 elt
= build_zero_cst (TREE_TYPE (elt
));
17835 countt
= build_zero_cst (TREE_TYPE (countt
));
17839 else if (count
>= prec
)
17840 elt
= build_zero_cst (TREE_TYPE (elt
));
17841 elt
= const_binop (rcode
== ASHIFT
17842 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
17843 TREE_TYPE (elt
), elt
, countt
);
17844 if (!elt
|| TREE_CODE (elt
) != INTEGER_CST
)
17846 if (rcode
== LSHIFTRT
)
17847 elt
= fold_convert (type
, elt
);
17848 if ((mask
& (HOST_WIDE_INT_1U
<< i
)) == 0)
17850 elt
= VECTOR_CST_ELT (args
[n_args
- 2], i
);
17851 if (TREE_CODE (elt
) != INTEGER_CST
17852 || TREE_OVERFLOW (elt
))
17855 builder
.quick_push (elt
);
17857 return builder
.build ();
17866 #ifdef SUBTARGET_FOLD_BUILTIN
17867 return SUBTARGET_FOLD_BUILTIN (fndecl
, n_args
, args
, ignore
);
17873 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17874 constant) in GIMPLE. */
17877 ix86_gimple_fold_builtin (gimple_stmt_iterator
*gsi
)
17879 gimple
*stmt
= gsi_stmt (*gsi
);
17880 tree fndecl
= gimple_call_fndecl (stmt
);
17881 gcc_checking_assert (fndecl
&& fndecl_built_in_p (fndecl
, BUILT_IN_MD
));
17882 int n_args
= gimple_call_num_args (stmt
);
17883 enum ix86_builtins fn_code
17884 = (enum ix86_builtins
) DECL_MD_FUNCTION_CODE (fndecl
);
17885 tree decl
= NULL_TREE
;
17886 tree arg0
, arg1
, arg2
;
17887 enum rtx_code rcode
;
17888 unsigned HOST_WIDE_INT count
;
17893 case IX86_BUILTIN_TZCNT32
:
17894 decl
= builtin_decl_implicit (BUILT_IN_CTZ
);
17895 goto fold_tzcnt_lzcnt
;
17897 case IX86_BUILTIN_TZCNT64
:
17898 decl
= builtin_decl_implicit (BUILT_IN_CTZLL
);
17899 goto fold_tzcnt_lzcnt
;
17901 case IX86_BUILTIN_LZCNT32
:
17902 decl
= builtin_decl_implicit (BUILT_IN_CLZ
);
17903 goto fold_tzcnt_lzcnt
;
17905 case IX86_BUILTIN_LZCNT64
:
17906 decl
= builtin_decl_implicit (BUILT_IN_CLZLL
);
17907 goto fold_tzcnt_lzcnt
;
17910 gcc_assert (n_args
== 1);
17911 arg0
= gimple_call_arg (stmt
, 0);
17912 if (TREE_CODE (arg0
) == SSA_NAME
&& decl
&& gimple_call_lhs (stmt
))
17914 int prec
= TYPE_PRECISION (TREE_TYPE (arg0
));
17915 /* If arg0 is provably non-zero, optimize into generic
17916 __builtin_c[tl]z{,ll} function the middle-end handles
17918 if (!expr_not_equal_to (arg0
, wi::zero (prec
)))
17921 location_t loc
= gimple_location (stmt
);
17922 gimple
*g
= gimple_build_call (decl
, 1, arg0
);
17923 gimple_set_location (g
, loc
);
17924 tree lhs
= make_ssa_name (integer_type_node
);
17925 gimple_call_set_lhs (g
, lhs
);
17926 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
17927 g
= gimple_build_assign (gimple_call_lhs (stmt
), NOP_EXPR
, lhs
);
17928 gimple_set_location (g
, loc
);
17929 gsi_replace (gsi
, g
, false);
17934 case IX86_BUILTIN_BZHI32
:
17935 case IX86_BUILTIN_BZHI64
:
17936 gcc_assert (n_args
== 2);
17937 arg1
= gimple_call_arg (stmt
, 1);
17938 if (tree_fits_uhwi_p (arg1
) && gimple_call_lhs (stmt
))
17940 unsigned int idx
= tree_to_uhwi (arg1
) & 0xff;
17941 arg0
= gimple_call_arg (stmt
, 0);
17942 if (idx
< TYPE_PRECISION (TREE_TYPE (arg0
)))
17944 location_t loc
= gimple_location (stmt
);
17945 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17946 gimple_set_location (g
, loc
);
17947 gsi_replace (gsi
, g
, false);
17952 case IX86_BUILTIN_PDEP32
:
17953 case IX86_BUILTIN_PDEP64
:
17954 case IX86_BUILTIN_PEXT32
:
17955 case IX86_BUILTIN_PEXT64
:
17956 gcc_assert (n_args
== 2);
17957 arg1
= gimple_call_arg (stmt
, 1);
17958 if (integer_all_onesp (arg1
) && gimple_call_lhs (stmt
))
17960 location_t loc
= gimple_location (stmt
);
17961 arg0
= gimple_call_arg (stmt
, 0);
17962 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
17963 gimple_set_location (g
, loc
);
17964 gsi_replace (gsi
, g
, false);
17969 case IX86_BUILTIN_PSLLD
:
17970 case IX86_BUILTIN_PSLLD128
:
17971 case IX86_BUILTIN_PSLLD128_MASK
:
17972 case IX86_BUILTIN_PSLLD256
:
17973 case IX86_BUILTIN_PSLLD256_MASK
:
17974 case IX86_BUILTIN_PSLLD512
:
17975 case IX86_BUILTIN_PSLLDI
:
17976 case IX86_BUILTIN_PSLLDI128
:
17977 case IX86_BUILTIN_PSLLDI128_MASK
:
17978 case IX86_BUILTIN_PSLLDI256
:
17979 case IX86_BUILTIN_PSLLDI256_MASK
:
17980 case IX86_BUILTIN_PSLLDI512
:
17981 case IX86_BUILTIN_PSLLQ
:
17982 case IX86_BUILTIN_PSLLQ128
:
17983 case IX86_BUILTIN_PSLLQ128_MASK
:
17984 case IX86_BUILTIN_PSLLQ256
:
17985 case IX86_BUILTIN_PSLLQ256_MASK
:
17986 case IX86_BUILTIN_PSLLQ512
:
17987 case IX86_BUILTIN_PSLLQI
:
17988 case IX86_BUILTIN_PSLLQI128
:
17989 case IX86_BUILTIN_PSLLQI128_MASK
:
17990 case IX86_BUILTIN_PSLLQI256
:
17991 case IX86_BUILTIN_PSLLQI256_MASK
:
17992 case IX86_BUILTIN_PSLLQI512
:
17993 case IX86_BUILTIN_PSLLW
:
17994 case IX86_BUILTIN_PSLLW128
:
17995 case IX86_BUILTIN_PSLLW128_MASK
:
17996 case IX86_BUILTIN_PSLLW256
:
17997 case IX86_BUILTIN_PSLLW256_MASK
:
17998 case IX86_BUILTIN_PSLLW512_MASK
:
17999 case IX86_BUILTIN_PSLLWI
:
18000 case IX86_BUILTIN_PSLLWI128
:
18001 case IX86_BUILTIN_PSLLWI128_MASK
:
18002 case IX86_BUILTIN_PSLLWI256
:
18003 case IX86_BUILTIN_PSLLWI256_MASK
:
18004 case IX86_BUILTIN_PSLLWI512_MASK
:
18008 case IX86_BUILTIN_PSRAD
:
18009 case IX86_BUILTIN_PSRAD128
:
18010 case IX86_BUILTIN_PSRAD128_MASK
:
18011 case IX86_BUILTIN_PSRAD256
:
18012 case IX86_BUILTIN_PSRAD256_MASK
:
18013 case IX86_BUILTIN_PSRAD512
:
18014 case IX86_BUILTIN_PSRADI
:
18015 case IX86_BUILTIN_PSRADI128
:
18016 case IX86_BUILTIN_PSRADI128_MASK
:
18017 case IX86_BUILTIN_PSRADI256
:
18018 case IX86_BUILTIN_PSRADI256_MASK
:
18019 case IX86_BUILTIN_PSRADI512
:
18020 case IX86_BUILTIN_PSRAQ128_MASK
:
18021 case IX86_BUILTIN_PSRAQ256_MASK
:
18022 case IX86_BUILTIN_PSRAQ512
:
18023 case IX86_BUILTIN_PSRAQI128_MASK
:
18024 case IX86_BUILTIN_PSRAQI256_MASK
:
18025 case IX86_BUILTIN_PSRAQI512
:
18026 case IX86_BUILTIN_PSRAW
:
18027 case IX86_BUILTIN_PSRAW128
:
18028 case IX86_BUILTIN_PSRAW128_MASK
:
18029 case IX86_BUILTIN_PSRAW256
:
18030 case IX86_BUILTIN_PSRAW256_MASK
:
18031 case IX86_BUILTIN_PSRAW512
:
18032 case IX86_BUILTIN_PSRAWI
:
18033 case IX86_BUILTIN_PSRAWI128
:
18034 case IX86_BUILTIN_PSRAWI128_MASK
:
18035 case IX86_BUILTIN_PSRAWI256
:
18036 case IX86_BUILTIN_PSRAWI256_MASK
:
18037 case IX86_BUILTIN_PSRAWI512
:
18041 case IX86_BUILTIN_PSRLD
:
18042 case IX86_BUILTIN_PSRLD128
:
18043 case IX86_BUILTIN_PSRLD128_MASK
:
18044 case IX86_BUILTIN_PSRLD256
:
18045 case IX86_BUILTIN_PSRLD256_MASK
:
18046 case IX86_BUILTIN_PSRLD512
:
18047 case IX86_BUILTIN_PSRLDI
:
18048 case IX86_BUILTIN_PSRLDI128
:
18049 case IX86_BUILTIN_PSRLDI128_MASK
:
18050 case IX86_BUILTIN_PSRLDI256
:
18051 case IX86_BUILTIN_PSRLDI256_MASK
:
18052 case IX86_BUILTIN_PSRLDI512
:
18053 case IX86_BUILTIN_PSRLQ
:
18054 case IX86_BUILTIN_PSRLQ128
:
18055 case IX86_BUILTIN_PSRLQ128_MASK
:
18056 case IX86_BUILTIN_PSRLQ256
:
18057 case IX86_BUILTIN_PSRLQ256_MASK
:
18058 case IX86_BUILTIN_PSRLQ512
:
18059 case IX86_BUILTIN_PSRLQI
:
18060 case IX86_BUILTIN_PSRLQI128
:
18061 case IX86_BUILTIN_PSRLQI128_MASK
:
18062 case IX86_BUILTIN_PSRLQI256
:
18063 case IX86_BUILTIN_PSRLQI256_MASK
:
18064 case IX86_BUILTIN_PSRLQI512
:
18065 case IX86_BUILTIN_PSRLW
:
18066 case IX86_BUILTIN_PSRLW128
:
18067 case IX86_BUILTIN_PSRLW128_MASK
:
18068 case IX86_BUILTIN_PSRLW256
:
18069 case IX86_BUILTIN_PSRLW256_MASK
:
18070 case IX86_BUILTIN_PSRLW512
:
18071 case IX86_BUILTIN_PSRLWI
:
18072 case IX86_BUILTIN_PSRLWI128
:
18073 case IX86_BUILTIN_PSRLWI128_MASK
:
18074 case IX86_BUILTIN_PSRLWI256
:
18075 case IX86_BUILTIN_PSRLWI256_MASK
:
18076 case IX86_BUILTIN_PSRLWI512
:
18080 case IX86_BUILTIN_PSLLVV16HI
:
18081 case IX86_BUILTIN_PSLLVV16SI
:
18082 case IX86_BUILTIN_PSLLVV2DI
:
18083 case IX86_BUILTIN_PSLLVV2DI_MASK
:
18084 case IX86_BUILTIN_PSLLVV32HI
:
18085 case IX86_BUILTIN_PSLLVV4DI
:
18086 case IX86_BUILTIN_PSLLVV4DI_MASK
:
18087 case IX86_BUILTIN_PSLLVV4SI
:
18088 case IX86_BUILTIN_PSLLVV4SI_MASK
:
18089 case IX86_BUILTIN_PSLLVV8DI
:
18090 case IX86_BUILTIN_PSLLVV8HI
:
18091 case IX86_BUILTIN_PSLLVV8SI
:
18092 case IX86_BUILTIN_PSLLVV8SI_MASK
:
18096 case IX86_BUILTIN_PSRAVQ128
:
18097 case IX86_BUILTIN_PSRAVQ256
:
18098 case IX86_BUILTIN_PSRAVV16HI
:
18099 case IX86_BUILTIN_PSRAVV16SI
:
18100 case IX86_BUILTIN_PSRAVV32HI
:
18101 case IX86_BUILTIN_PSRAVV4SI
:
18102 case IX86_BUILTIN_PSRAVV4SI_MASK
:
18103 case IX86_BUILTIN_PSRAVV8DI
:
18104 case IX86_BUILTIN_PSRAVV8HI
:
18105 case IX86_BUILTIN_PSRAVV8SI
:
18106 case IX86_BUILTIN_PSRAVV8SI_MASK
:
18110 case IX86_BUILTIN_PSRLVV16HI
:
18111 case IX86_BUILTIN_PSRLVV16SI
:
18112 case IX86_BUILTIN_PSRLVV2DI
:
18113 case IX86_BUILTIN_PSRLVV2DI_MASK
:
18114 case IX86_BUILTIN_PSRLVV32HI
:
18115 case IX86_BUILTIN_PSRLVV4DI
:
18116 case IX86_BUILTIN_PSRLVV4DI_MASK
:
18117 case IX86_BUILTIN_PSRLVV4SI
:
18118 case IX86_BUILTIN_PSRLVV4SI_MASK
:
18119 case IX86_BUILTIN_PSRLVV8DI
:
18120 case IX86_BUILTIN_PSRLVV8HI
:
18121 case IX86_BUILTIN_PSRLVV8SI
:
18122 case IX86_BUILTIN_PSRLVV8SI_MASK
:
18128 gcc_assert (n_args
>= 2);
18129 arg0
= gimple_call_arg (stmt
, 0);
18130 arg1
= gimple_call_arg (stmt
, 1);
18133 /* This is masked shift. Only optimize if the mask is all ones. */
18134 tree argl
= gimple_call_arg (stmt
, n_args
- 1);
18135 if (!tree_fits_uhwi_p (argl
))
18137 unsigned HOST_WIDE_INT mask
= tree_to_uhwi (argl
);
18138 unsigned elems
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0
));
18139 if ((mask
| (HOST_WIDE_INT_M1U
<< elems
)) != HOST_WIDE_INT_M1U
)
18144 if (TREE_CODE (arg1
) != VECTOR_CST
)
18146 count
= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
)));
18147 if (integer_zerop (arg1
))
18149 else if (rcode
== ASHIFTRT
)
18152 for (unsigned int i
= 0; i
< VECTOR_CST_NELTS (arg1
); ++i
)
18154 tree elt
= VECTOR_CST_ELT (arg1
, i
);
18155 if (!wi::neg_p (wi::to_wide (elt
))
18156 && wi::to_widest (elt
) < count
)
18162 arg1
= ix86_vector_shift_count (arg1
);
18165 count
= tree_to_uhwi (arg1
);
18169 /* Just return the first argument for shift by 0. */
18170 location_t loc
= gimple_location (stmt
);
18171 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
), arg0
);
18172 gimple_set_location (g
, loc
);
18173 gsi_replace (gsi
, g
, false);
18176 if (rcode
!= ASHIFTRT
18177 && count
>= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0
))))
18179 /* For shift counts equal or greater than precision, except for
18180 arithmetic right shift the result is zero. */
18181 location_t loc
= gimple_location (stmt
);
18182 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
),
18183 build_zero_cst (TREE_TYPE (arg0
)));
18184 gimple_set_location (g
, loc
);
18185 gsi_replace (gsi
, g
, false);
18190 case IX86_BUILTIN_SHUFPD
:
18191 arg2
= gimple_call_arg (stmt
, 2);
18192 if (TREE_CODE (arg2
) == INTEGER_CST
)
18194 location_t loc
= gimple_location (stmt
);
18195 unsigned HOST_WIDE_INT imask
= TREE_INT_CST_LOW (arg2
);
18196 arg0
= gimple_call_arg (stmt
, 0);
18197 arg1
= gimple_call_arg (stmt
, 1);
18198 tree itype
= long_long_integer_type_node
;
18199 tree vtype
= build_vector_type (itype
, 2); /* V2DI */
18200 tree_vector_builder
elts (vtype
, 2, 1);
18201 /* Ignore bits other than the lowest 2. */
18202 elts
.quick_push (build_int_cst (itype
, imask
& 1));
18204 elts
.quick_push (build_int_cst (itype
, 2 + (imask
& 1)));
18205 tree omask
= elts
.build ();
18206 gimple
*g
= gimple_build_assign (gimple_call_lhs (stmt
),
18208 arg0
, arg1
, omask
);
18209 gimple_set_location (g
, loc
);
18210 gsi_replace (gsi
, g
, false);
18213 // Do not error yet, the constant could be propagated later?
18223 /* Handler for an SVML-style interface to
18224 a library with vectorized intrinsics. */
18227 ix86_veclibabi_svml (combined_fn fn
, tree type_out
, tree type_in
)
18230 tree fntype
, new_fndecl
, args
;
18233 machine_mode el_mode
, in_mode
;
18236 /* The SVML is suitable for unsafe math only. */
18237 if (!flag_unsafe_math_optimizations
)
18240 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18241 n
= TYPE_VECTOR_SUBPARTS (type_out
);
18242 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18243 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18244 if (el_mode
!= in_mode
18268 if ((el_mode
!= DFmode
|| n
!= 2)
18269 && (el_mode
!= SFmode
|| n
!= 4))
18277 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
18278 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
18280 if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOGF
)
18281 strcpy (name
, "vmlsLn4");
18282 else if (DECL_FUNCTION_CODE (fndecl
) == BUILT_IN_LOG
)
18283 strcpy (name
, "vmldLn2");
18286 sprintf (name
, "vmls%s", bname
+10);
18287 name
[strlen (name
)-1] = '4';
18290 sprintf (name
, "vmld%s2", bname
+10);
18292 /* Convert to uppercase. */
18296 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
18300 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
18302 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
18304 /* Build a function declaration for the vectorized function. */
18305 new_fndecl
= build_decl (BUILTINS_LOCATION
,
18306 FUNCTION_DECL
, get_identifier (name
), fntype
);
18307 TREE_PUBLIC (new_fndecl
) = 1;
18308 DECL_EXTERNAL (new_fndecl
) = 1;
18309 DECL_IS_NOVOPS (new_fndecl
) = 1;
18310 TREE_READONLY (new_fndecl
) = 1;
18315 /* Handler for an ACML-style interface to
18316 a library with vectorized intrinsics. */
18319 ix86_veclibabi_acml (combined_fn fn
, tree type_out
, tree type_in
)
18321 char name
[20] = "__vr.._";
18322 tree fntype
, new_fndecl
, args
;
18325 machine_mode el_mode
, in_mode
;
18328 /* The ACML is 64bits only and suitable for unsafe math only as
18329 it does not correctly support parts of IEEE with the required
18330 precision such as denormals. */
18332 || !flag_unsafe_math_optimizations
)
18335 el_mode
= TYPE_MODE (TREE_TYPE (type_out
));
18336 n
= TYPE_VECTOR_SUBPARTS (type_out
);
18337 in_mode
= TYPE_MODE (TREE_TYPE (type_in
));
18338 in_n
= TYPE_VECTOR_SUBPARTS (type_in
);
18339 if (el_mode
!= in_mode
18351 if (el_mode
== DFmode
&& n
== 2)
18356 else if (el_mode
== SFmode
&& n
== 4)
18369 tree fndecl
= mathfn_built_in (TREE_TYPE (type_in
), fn
);
18370 bname
= IDENTIFIER_POINTER (DECL_NAME (fndecl
));
18371 sprintf (name
+ 7, "%s", bname
+10);
18374 for (args
= DECL_ARGUMENTS (fndecl
); args
; args
= TREE_CHAIN (args
))
18378 fntype
= build_function_type_list (type_out
, type_in
, NULL
);
18380 fntype
= build_function_type_list (type_out
, type_in
, type_in
, NULL
);
18382 /* Build a function declaration for the vectorized function. */
18383 new_fndecl
= build_decl (BUILTINS_LOCATION
,
18384 FUNCTION_DECL
, get_identifier (name
), fntype
);
18385 TREE_PUBLIC (new_fndecl
) = 1;
18386 DECL_EXTERNAL (new_fndecl
) = 1;
18387 DECL_IS_NOVOPS (new_fndecl
) = 1;
18388 TREE_READONLY (new_fndecl
) = 1;
18393 /* Returns a decl of a function that implements scatter store with
18394 register type VECTYPE and index type INDEX_TYPE and SCALE.
18395 Return NULL_TREE if it is not available. */
18398 ix86_vectorize_builtin_scatter (const_tree vectype
,
18399 const_tree index_type
, int scale
)
18402 enum ix86_builtins code
;
18404 if (!TARGET_AVX512F
)
18407 if ((TREE_CODE (index_type
) != INTEGER_TYPE
18408 && !POINTER_TYPE_P (index_type
))
18409 || (TYPE_MODE (index_type
) != SImode
18410 && TYPE_MODE (index_type
) != DImode
))
18413 if (TYPE_PRECISION (index_type
) > POINTER_SIZE
)
18416 /* v*scatter* insn sign extends index to pointer mode. */
18417 if (TYPE_PRECISION (index_type
) < POINTER_SIZE
18418 && TYPE_UNSIGNED (index_type
))
18421 /* Scale can be 1, 2, 4 or 8. */
18424 || (scale
& (scale
- 1)) != 0)
18427 si
= TYPE_MODE (index_type
) == SImode
;
18428 switch (TYPE_MODE (vectype
))
18431 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DF
: IX86_BUILTIN_SCATTERDIV8DF
;
18434 code
= si
? IX86_BUILTIN_SCATTERALTSIV8DI
: IX86_BUILTIN_SCATTERDIV8DI
;
18437 code
= si
? IX86_BUILTIN_SCATTERSIV16SF
: IX86_BUILTIN_SCATTERALTDIV16SF
;
18440 code
= si
? IX86_BUILTIN_SCATTERSIV16SI
: IX86_BUILTIN_SCATTERALTDIV16SI
;
18443 if (TARGET_AVX512VL
)
18444 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DF
: IX86_BUILTIN_SCATTERDIV4DF
;
18449 if (TARGET_AVX512VL
)
18450 code
= si
? IX86_BUILTIN_SCATTERALTSIV4DI
: IX86_BUILTIN_SCATTERDIV4DI
;
18455 if (TARGET_AVX512VL
)
18456 code
= si
? IX86_BUILTIN_SCATTERSIV8SF
: IX86_BUILTIN_SCATTERALTDIV8SF
;
18461 if (TARGET_AVX512VL
)
18462 code
= si
? IX86_BUILTIN_SCATTERSIV8SI
: IX86_BUILTIN_SCATTERALTDIV8SI
;
18467 if (TARGET_AVX512VL
)
18468 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DF
: IX86_BUILTIN_SCATTERDIV2DF
;
18473 if (TARGET_AVX512VL
)
18474 code
= si
? IX86_BUILTIN_SCATTERALTSIV2DI
: IX86_BUILTIN_SCATTERDIV2DI
;
18479 if (TARGET_AVX512VL
)
18480 code
= si
? IX86_BUILTIN_SCATTERSIV4SF
: IX86_BUILTIN_SCATTERALTDIV4SF
;
18485 if (TARGET_AVX512VL
)
18486 code
= si
? IX86_BUILTIN_SCATTERSIV4SI
: IX86_BUILTIN_SCATTERALTDIV4SI
;
18494 return get_ix86_builtin (code
);
18497 /* Return true if it is safe to use the rsqrt optabs to optimize
18503 return (TARGET_SSE
&& TARGET_SSE_MATH
18504 && flag_finite_math_only
18505 && !flag_trapping_math
18506 && flag_unsafe_math_optimizations
);
18509 /* Helper for avx_vpermilps256_operand et al. This is also used by
18510 the expansion functions to turn the parallel back into a mask.
18511 The return value is 0 for no match and the imm8+1 for a match. */
18514 avx_vpermilp_parallel (rtx par
, machine_mode mode
)
18516 unsigned i
, nelt
= GET_MODE_NUNITS (mode
);
18518 unsigned char ipar
[16] = {}; /* Silence -Wuninitialized warning. */
18520 if (XVECLEN (par
, 0) != (int) nelt
)
18523 /* Validate that all of the elements are constants, and not totally
18524 out of range. Copy the data into an integral array to make the
18525 subsequent checks easier. */
18526 for (i
= 0; i
< nelt
; ++i
)
18528 rtx er
= XVECEXP (par
, 0, i
);
18529 unsigned HOST_WIDE_INT ei
;
18531 if (!CONST_INT_P (er
))
18542 /* In the 512-bit DFmode case, we can only move elements within
18543 a 128-bit lane. First fill the second part of the mask,
18545 for (i
= 4; i
< 6; ++i
)
18547 if (ipar
[i
] < 4 || ipar
[i
] >= 6)
18549 mask
|= (ipar
[i
] - 4) << i
;
18551 for (i
= 6; i
< 8; ++i
)
18555 mask
|= (ipar
[i
] - 6) << i
;
18560 /* In the 256-bit DFmode case, we can only move elements within
18562 for (i
= 0; i
< 2; ++i
)
18566 mask
|= ipar
[i
] << i
;
18568 for (i
= 2; i
< 4; ++i
)
18572 mask
|= (ipar
[i
] - 2) << i
;
18577 /* In 512 bit SFmode case, permutation in the upper 256 bits
18578 must mirror the permutation in the lower 256-bits. */
18579 for (i
= 0; i
< 8; ++i
)
18580 if (ipar
[i
] + 8 != ipar
[i
+ 8])
18585 /* In 256 bit SFmode case, we have full freedom of
18586 movement within the low 128-bit lane, but the high 128-bit
18587 lane must mirror the exact same pattern. */
18588 for (i
= 0; i
< 4; ++i
)
18589 if (ipar
[i
] + 4 != ipar
[i
+ 4])
18596 /* In the 128-bit case, we've full freedom in the placement of
18597 the elements from the source operand. */
18598 for (i
= 0; i
< nelt
; ++i
)
18599 mask
|= ipar
[i
] << (i
* (nelt
/ 2));
18603 gcc_unreachable ();
18606 /* Make sure success has a non-zero value by adding one. */
18610 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
18611 the expansion functions to turn the parallel back into a mask.
18612 The return value is 0 for no match and the imm8+1 for a match. */
18615 avx_vperm2f128_parallel (rtx par
, machine_mode mode
)
18617 unsigned i
, nelt
= GET_MODE_NUNITS (mode
), nelt2
= nelt
/ 2;
18619 unsigned char ipar
[8] = {}; /* Silence -Wuninitialized warning. */
18621 if (XVECLEN (par
, 0) != (int) nelt
)
18624 /* Validate that all of the elements are constants, and not totally
18625 out of range. Copy the data into an integral array to make the
18626 subsequent checks easier. */
18627 for (i
= 0; i
< nelt
; ++i
)
18629 rtx er
= XVECEXP (par
, 0, i
);
18630 unsigned HOST_WIDE_INT ei
;
18632 if (!CONST_INT_P (er
))
18635 if (ei
>= 2 * nelt
)
18640 /* Validate that the halves of the permute are halves. */
18641 for (i
= 0; i
< nelt2
- 1; ++i
)
18642 if (ipar
[i
] + 1 != ipar
[i
+ 1])
18644 for (i
= nelt2
; i
< nelt
- 1; ++i
)
18645 if (ipar
[i
] + 1 != ipar
[i
+ 1])
18648 /* Reconstruct the mask. */
18649 for (i
= 0; i
< 2; ++i
)
18651 unsigned e
= ipar
[i
* nelt2
];
18655 mask
|= e
<< (i
* 4);
18658 /* Make sure success has a non-zero value by adding one. */
18662 /* Return a register priority for hard reg REGNO. */
18664 ix86_register_priority (int hard_regno
)
18666 /* ebp and r13 as the base always wants a displacement, r12 as the
18667 base always wants an index. So discourage their usage in an
18669 if (hard_regno
== R12_REG
|| hard_regno
== R13_REG
)
18671 if (hard_regno
== BP_REG
)
18673 /* New x86-64 int registers result in bigger code size. Discourage
18675 if (IN_RANGE (hard_regno
, FIRST_REX_INT_REG
, LAST_REX_INT_REG
))
18677 /* New x86-64 SSE registers result in bigger code size. Discourage
18679 if (IN_RANGE (hard_regno
, FIRST_REX_SSE_REG
, LAST_REX_SSE_REG
))
18681 if (IN_RANGE (hard_regno
, FIRST_EXT_REX_SSE_REG
, LAST_EXT_REX_SSE_REG
))
18683 /* Usage of AX register results in smaller code. Prefer it. */
18684 if (hard_regno
== AX_REG
)
18689 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18691 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18692 QImode must go into class Q_REGS.
18693 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18694 movdf to do mem-to-mem moves through integer regs. */
18697 ix86_preferred_reload_class (rtx x
, reg_class_t regclass
)
18699 machine_mode mode
= GET_MODE (x
);
18701 /* We're only allowed to return a subclass of CLASS. Many of the
18702 following checks fail for NO_REGS, so eliminate that early. */
18703 if (regclass
== NO_REGS
)
18706 /* All classes can load zeros. */
18707 if (x
== CONST0_RTX (mode
))
18710 /* Force constants into memory if we are loading a (nonzero) constant into
18711 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18712 instructions to load from a constant. */
18714 && (MAYBE_MMX_CLASS_P (regclass
)
18715 || MAYBE_SSE_CLASS_P (regclass
)
18716 || MAYBE_MASK_CLASS_P (regclass
)))
18719 /* Floating-point constants need more complex checks. */
18720 if (CONST_DOUBLE_P (x
))
18722 /* General regs can load everything. */
18723 if (INTEGER_CLASS_P (regclass
))
18726 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18727 zero above. We only want to wind up preferring 80387 registers if
18728 we plan on doing computation with them. */
18729 if (IS_STACK_MODE (mode
)
18730 && standard_80387_constant_p (x
) > 0)
18732 /* Limit class to FP regs. */
18733 if (FLOAT_CLASS_P (regclass
))
18740 /* Prefer SSE regs only, if we can use them for math. */
18741 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18742 return SSE_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18744 /* Generally when we see PLUS here, it's the function invariant
18745 (plus soft-fp const_int). Which can only be computed into general
18747 if (GET_CODE (x
) == PLUS
)
18748 return INTEGER_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18750 /* QImode constants are easy to load, but non-constant QImode data
18751 must go into Q_REGS or ALL_MASK_REGS. */
18752 if (GET_MODE (x
) == QImode
&& !CONSTANT_P (x
))
18754 if (Q_CLASS_P (regclass
))
18756 else if (reg_class_subset_p (Q_REGS
, regclass
))
18758 else if (MASK_CLASS_P (regclass
))
18767 /* Discourage putting floating-point values in SSE registers unless
18768 SSE math is being used, and likewise for the 387 registers. */
18770 ix86_preferred_output_reload_class (rtx x
, reg_class_t regclass
)
18772 /* Restrict the output reload class to the register bank that we are doing
18773 math on. If we would like not to return a subset of CLASS, reject this
18774 alternative: if reload cannot do this, it will still use its choice. */
18775 machine_mode mode
= GET_MODE (x
);
18776 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
18777 return MAYBE_SSE_CLASS_P (regclass
) ? ALL_SSE_REGS
: NO_REGS
;
18779 if (IS_STACK_MODE (mode
))
18780 return FLOAT_CLASS_P (regclass
) ? regclass
: NO_REGS
;
18786 ix86_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass
,
18787 machine_mode mode
, secondary_reload_info
*sri
)
18789 /* Double-word spills from general registers to non-offsettable memory
18790 references (zero-extended addresses) require special handling. */
18793 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
18794 && INTEGER_CLASS_P (rclass
)
18795 && !offsettable_memref_p (x
))
18798 ? CODE_FOR_reload_noff_load
18799 : CODE_FOR_reload_noff_store
);
18800 /* Add the cost of moving address to a temporary. */
18801 sri
->extra_cost
= 1;
18806 /* QImode spills from non-QI registers require
18807 intermediate register on 32bit targets. */
18809 && ((!TARGET_64BIT
&& !in_p
18810 && INTEGER_CLASS_P (rclass
)
18811 && MAYBE_NON_Q_CLASS_P (rclass
))
18812 || (!TARGET_AVX512DQ
18813 && MAYBE_MASK_CLASS_P (rclass
))))
18815 int regno
= true_regnum (x
);
18817 /* Return Q_REGS if the operand is in memory. */
18824 /* This condition handles corner case where an expression involving
18825 pointers gets vectorized. We're trying to use the address of a
18826 stack slot as a vector initializer.
18828 (set (reg:V2DI 74 [ vect_cst_.2 ])
18829 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18831 Eventually frame gets turned into sp+offset like this:
18833 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18834 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18835 (const_int 392 [0x188]))))
18837 That later gets turned into:
18839 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18840 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18841 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18843 We'll have the following reload recorded:
18845 Reload 0: reload_in (DI) =
18846 (plus:DI (reg/f:DI 7 sp)
18847 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18848 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18849 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18850 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18851 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18852 reload_reg_rtx: (reg:V2DI 22 xmm1)
18854 Which isn't going to work since SSE instructions can't handle scalar
18855 additions. Returning GENERAL_REGS forces the addition into integer
18856 register and reload can handle subsequent reloads without problems. */
18858 if (in_p
&& GET_CODE (x
) == PLUS
18859 && SSE_CLASS_P (rclass
)
18860 && SCALAR_INT_MODE_P (mode
))
18861 return GENERAL_REGS
;
18866 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18869 ix86_class_likely_spilled_p (reg_class_t rclass
)
18880 case SSE_FIRST_REG
:
18882 case FP_SECOND_REG
:
18892 /* Return true if a set of DST by the expression SRC should be allowed.
18893 This prevents complex sets of likely_spilled hard regs before reload. */
18896 ix86_hardreg_mov_ok (rtx dst
, rtx src
)
18898 /* Avoid complex sets of likely_spilled hard registers before reload. */
18899 if (REG_P (dst
) && HARD_REGISTER_P (dst
)
18900 && !REG_P (src
) && !MEM_P (src
)
18901 && !x86_64_immediate_operand (src
, GET_MODE (dst
))
18902 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst
)))
18903 && !reload_completed
)
18908 /* If we are copying between registers from different register sets
18909 (e.g. FP and integer), we may need a memory location.
18911 The function can't work reliably when one of the CLASSES is a class
18912 containing registers from multiple sets. We avoid this by never combining
18913 different sets in a single alternative in the machine description.
18914 Ensure that this constraint holds to avoid unexpected surprises.
18916 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18917 so do not enforce these sanity checks.
18919 To optimize register_move_cost performance, define inline variant. */
18922 inline_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
18923 reg_class_t class2
, int strict
)
18925 if (lra_in_progress
&& (class1
== NO_REGS
|| class2
== NO_REGS
))
18928 if (MAYBE_FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class1
)
18929 || MAYBE_FLOAT_CLASS_P (class2
) != FLOAT_CLASS_P (class2
)
18930 || MAYBE_SSE_CLASS_P (class1
) != SSE_CLASS_P (class1
)
18931 || MAYBE_SSE_CLASS_P (class2
) != SSE_CLASS_P (class2
)
18932 || MAYBE_MMX_CLASS_P (class1
) != MMX_CLASS_P (class1
)
18933 || MAYBE_MMX_CLASS_P (class2
) != MMX_CLASS_P (class2
)
18934 || MAYBE_MASK_CLASS_P (class1
) != MASK_CLASS_P (class1
)
18935 || MAYBE_MASK_CLASS_P (class2
) != MASK_CLASS_P (class2
))
18937 gcc_assert (!strict
|| lra_in_progress
);
18941 if (FLOAT_CLASS_P (class1
) != FLOAT_CLASS_P (class2
))
18944 /* ??? This is a lie. We do have moves between mmx/general, and for
18945 mmx/sse2. But by saying we need secondary memory we discourage the
18946 register allocator from using the mmx registers unless needed. */
18947 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
18950 /* Between mask and general, we have moves no larger than word size. */
18951 if (MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
18953 if (!(INTEGER_CLASS_P (class1
) || INTEGER_CLASS_P (class2
))
18954 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18958 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
18960 /* SSE1 doesn't have any direct moves from other classes. */
18964 /* Between SSE and general, we have moves no larger than word size. */
18965 if (!(INTEGER_CLASS_P (class1
) || INTEGER_CLASS_P (class2
))
18966 || GET_MODE_SIZE (mode
) < GET_MODE_SIZE (SImode
)
18967 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
18970 /* If the target says that inter-unit moves are more expensive
18971 than moving through memory, then don't generate them. */
18972 if ((SSE_CLASS_P (class1
) && !TARGET_INTER_UNIT_MOVES_FROM_VEC
)
18973 || (SSE_CLASS_P (class2
) && !TARGET_INTER_UNIT_MOVES_TO_VEC
))
18980 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18983 ix86_secondary_memory_needed (machine_mode mode
, reg_class_t class1
,
18984 reg_class_t class2
)
18986 return inline_secondary_memory_needed (mode
, class1
, class2
, true);
18989 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18991 get_secondary_mem widens integral modes to BITS_PER_WORD.
18992 There is no need to emit full 64 bit move on 64 bit targets
18993 for integral modes that can be moved using 32 bit move. */
18995 static machine_mode
18996 ix86_secondary_memory_needed_mode (machine_mode mode
)
18998 if (GET_MODE_BITSIZE (mode
) < 32 && INTEGRAL_MODE_P (mode
))
18999 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
19003 /* Implement the TARGET_CLASS_MAX_NREGS hook.
19005 On the 80386, this is the size of MODE in words,
19006 except in the FP regs, where a single reg is always enough. */
19008 static unsigned char
19009 ix86_class_max_nregs (reg_class_t rclass
, machine_mode mode
)
19011 if (MAYBE_INTEGER_CLASS_P (rclass
))
19013 if (mode
== XFmode
)
19014 return (TARGET_64BIT
? 2 : 3);
19015 else if (mode
== XCmode
)
19016 return (TARGET_64BIT
? 4 : 6);
19018 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
19022 if (COMPLEX_MODE_P (mode
))
19029 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19032 ix86_can_change_mode_class (machine_mode from
, machine_mode to
,
19033 reg_class_t regclass
)
19038 /* x87 registers can't do subreg at all, as all values are reformatted
19039 to extended precision. */
19040 if (MAYBE_FLOAT_CLASS_P (regclass
))
19043 if (MAYBE_SSE_CLASS_P (regclass
) || MAYBE_MMX_CLASS_P (regclass
))
19045 /* Vector registers do not support QI or HImode loads. If we don't
19046 disallow a change to these modes, reload will assume it's ok to
19047 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19048 the vec_dupv4hi pattern. */
19049 if (GET_MODE_SIZE (from
) < 4)
19056 /* Return index of MODE in the sse load/store tables. */
19059 sse_store_index (machine_mode mode
)
19061 switch (GET_MODE_SIZE (mode
))
19078 /* Return the cost of moving data of mode M between a
19079 register and memory. A value of 2 is the default; this cost is
19080 relative to those in `REGISTER_MOVE_COST'.
19082 This function is used extensively by register_move_cost that is used to
19083 build tables at startup. Make it inline in this case.
19084 When IN is 2, return maximum of in and out move cost.
19086 If moving between registers and memory is more expensive than
19087 between two registers, you should define this macro to express the
19090 Model also increased moving costs of QImode registers in non
19094 inline_memory_move_cost (machine_mode mode
, enum reg_class regclass
, int in
)
19097 if (FLOAT_CLASS_P (regclass
))
19115 return MAX (ix86_cost
->hard_register
.fp_load
[index
],
19116 ix86_cost
->hard_register
.fp_store
[index
]);
19117 return in
? ix86_cost
->hard_register
.fp_load
[index
]
19118 : ix86_cost
->hard_register
.fp_store
[index
];
19120 if (SSE_CLASS_P (regclass
))
19122 int index
= sse_store_index (mode
);
19126 return MAX (ix86_cost
->hard_register
.sse_load
[index
],
19127 ix86_cost
->hard_register
.sse_store
[index
]);
19128 return in
? ix86_cost
->hard_register
.sse_load
[index
]
19129 : ix86_cost
->hard_register
.sse_store
[index
];
19131 if (MASK_CLASS_P (regclass
))
19134 switch (GET_MODE_SIZE (mode
))
19142 /* DImode loads and stores assumed to cost the same as SImode. */
19149 return MAX (ix86_cost
->hard_register
.mask_load
[index
],
19150 ix86_cost
->hard_register
.mask_store
[index
]);
19151 return in
? ix86_cost
->hard_register
.mask_load
[2]
19152 : ix86_cost
->hard_register
.mask_store
[2];
19154 if (MMX_CLASS_P (regclass
))
19157 switch (GET_MODE_SIZE (mode
))
19169 return MAX (ix86_cost
->hard_register
.mmx_load
[index
],
19170 ix86_cost
->hard_register
.mmx_store
[index
]);
19171 return in
? ix86_cost
->hard_register
.mmx_load
[index
]
19172 : ix86_cost
->hard_register
.mmx_store
[index
];
19174 switch (GET_MODE_SIZE (mode
))
19177 if (Q_CLASS_P (regclass
) || TARGET_64BIT
)
19180 return ix86_cost
->hard_register
.int_store
[0];
19181 if (TARGET_PARTIAL_REG_DEPENDENCY
19182 && optimize_function_for_speed_p (cfun
))
19183 cost
= ix86_cost
->hard_register
.movzbl_load
;
19185 cost
= ix86_cost
->hard_register
.int_load
[0];
19187 return MAX (cost
, ix86_cost
->hard_register
.int_store
[0]);
19193 return MAX (ix86_cost
->hard_register
.movzbl_load
,
19194 ix86_cost
->hard_register
.int_store
[0] + 4);
19196 return ix86_cost
->hard_register
.movzbl_load
;
19198 return ix86_cost
->hard_register
.int_store
[0] + 4;
19203 return MAX (ix86_cost
->hard_register
.int_load
[1],
19204 ix86_cost
->hard_register
.int_store
[1]);
19205 return in
? ix86_cost
->hard_register
.int_load
[1]
19206 : ix86_cost
->hard_register
.int_store
[1];
19209 cost
= MAX (ix86_cost
->hard_register
.int_load
[2],
19210 ix86_cost
->hard_register
.int_store
[2]);
19212 cost
= ix86_cost
->hard_register
.int_load
[2];
19214 cost
= ix86_cost
->hard_register
.int_store
[2];
19215 /* Multiply with the number of GPR moves needed. */
19216 return cost
* CEIL ((int) GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
19221 ix86_memory_move_cost (machine_mode mode
, reg_class_t regclass
, bool in
)
19223 return inline_memory_move_cost (mode
, (enum reg_class
) regclass
, in
? 1 : 0);
19227 /* Return the cost of moving data from a register in class CLASS1 to
19228 one in class CLASS2.
19230 It is not required that the cost always equal 2 when FROM is the same as TO;
19231 on some machines it is expensive to move between registers if they are not
19232 general registers. */
19235 ix86_register_move_cost (machine_mode mode
, reg_class_t class1_i
,
19236 reg_class_t class2_i
)
19238 enum reg_class class1
= (enum reg_class
) class1_i
;
19239 enum reg_class class2
= (enum reg_class
) class2_i
;
19241 /* In case we require secondary memory, compute cost of the store followed
19242 by load. In order to avoid bad register allocation choices, we need
19243 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19245 if (inline_secondary_memory_needed (mode
, class1
, class2
, false))
19249 cost
+= inline_memory_move_cost (mode
, class1
, 2);
19250 cost
+= inline_memory_move_cost (mode
, class2
, 2);
19252 /* In case of copying from general_purpose_register we may emit multiple
19253 stores followed by single load causing memory size mismatch stall.
19254 Count this as arbitrarily high cost of 20. */
19255 if (GET_MODE_BITSIZE (mode
) > BITS_PER_WORD
19256 && TARGET_MEMORY_MISMATCH_STALL
19257 && targetm
.class_max_nregs (class1
, mode
)
19258 > targetm
.class_max_nregs (class2
, mode
))
19261 /* In the case of FP/MMX moves, the registers actually overlap, and we
19262 have to switch modes in order to treat them differently. */
19263 if ((MMX_CLASS_P (class1
) && MAYBE_FLOAT_CLASS_P (class2
))
19264 || (MMX_CLASS_P (class2
) && MAYBE_FLOAT_CLASS_P (class1
)))
19270 /* Moves between MMX and non-MMX units require secondary memory. */
19271 if (MMX_CLASS_P (class1
) != MMX_CLASS_P (class2
))
19272 gcc_unreachable ();
19274 if (SSE_CLASS_P (class1
) != SSE_CLASS_P (class2
))
19275 return (SSE_CLASS_P (class1
)
19276 ? ix86_cost
->hard_register
.sse_to_integer
19277 : ix86_cost
->hard_register
.integer_to_sse
);
19279 /* Moves between mask register and GPR. */
19280 if (MASK_CLASS_P (class1
) != MASK_CLASS_P (class2
))
19282 return (MASK_CLASS_P (class1
)
19283 ? ix86_cost
->hard_register
.mask_to_integer
19284 : ix86_cost
->hard_register
.integer_to_mask
);
19286 /* Moving between mask registers. */
19287 if (MASK_CLASS_P (class1
) && MASK_CLASS_P (class2
))
19288 return ix86_cost
->hard_register
.mask_move
;
19290 if (MAYBE_FLOAT_CLASS_P (class1
))
19291 return ix86_cost
->hard_register
.fp_move
;
19292 if (MAYBE_SSE_CLASS_P (class1
))
19294 if (GET_MODE_BITSIZE (mode
) <= 128)
19295 return ix86_cost
->hard_register
.xmm_move
;
19296 if (GET_MODE_BITSIZE (mode
) <= 256)
19297 return ix86_cost
->hard_register
.ymm_move
;
19298 return ix86_cost
->hard_register
.zmm_move
;
19300 if (MAYBE_MMX_CLASS_P (class1
))
19301 return ix86_cost
->hard_register
.mmx_move
;
19305 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
19306 words of a value of mode MODE but can be less for certain modes in
19307 special long registers.
19309 Actually there are no two word move instructions for consecutive
19310 registers. And only registers 0-3 may have mov byte instructions
19311 applied to them. */
19313 static unsigned int
19314 ix86_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
19316 if (GENERAL_REGNO_P (regno
))
19318 if (mode
== XFmode
)
19319 return TARGET_64BIT
? 2 : 3;
19320 if (mode
== XCmode
)
19321 return TARGET_64BIT
? 4 : 6;
19322 return CEIL (GET_MODE_SIZE (mode
), UNITS_PER_WORD
);
19324 if (COMPLEX_MODE_P (mode
))
19326 /* Register pair for mask registers. */
19327 if (mode
== P2QImode
|| mode
== P2HImode
)
19329 if (mode
== V64SFmode
|| mode
== V64SImode
)
19334 /* Implement REGMODE_NATURAL_SIZE(MODE). */
19336 ix86_regmode_natural_size (machine_mode mode
)
19338 if (mode
== P2HImode
|| mode
== P2QImode
)
19339 return GET_MODE_SIZE (mode
) / 2;
19340 return UNITS_PER_WORD
;
19343 /* Implement TARGET_HARD_REGNO_MODE_OK. */
19346 ix86_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
19348 /* Flags and only flags can only hold CCmode values. */
19349 if (CC_REGNO_P (regno
))
19350 return GET_MODE_CLASS (mode
) == MODE_CC
;
19351 if (GET_MODE_CLASS (mode
) == MODE_CC
19352 || GET_MODE_CLASS (mode
) == MODE_RANDOM
)
19354 if (STACK_REGNO_P (regno
))
19355 return VALID_FP_MODE_P (mode
);
19356 if (MASK_REGNO_P (regno
))
19358 /* Register pair only starts at even register number. */
19359 if ((mode
== P2QImode
|| mode
== P2HImode
))
19360 return MASK_PAIR_REGNO_P(regno
);
19362 return ((TARGET_AVX512F
&& VALID_MASK_REG_MODE (mode
))
19363 || (TARGET_AVX512BW
19364 && VALID_MASK_AVX512BW_MODE (mode
)));
19367 if (GET_MODE_CLASS (mode
) == MODE_PARTIAL_INT
)
19370 if (SSE_REGNO_P (regno
))
19372 /* We implement the move patterns for all vector modes into and
19373 out of SSE registers, even when no operation instructions
19376 /* For AVX-512 we allow, regardless of regno:
19378 - any of 512-bit wide vector mode
19379 - any scalar mode. */
19382 || VALID_AVX512F_REG_MODE (mode
)
19383 || VALID_AVX512F_SCALAR_MODE (mode
)))
19386 /* For AVX-5124FMAPS or AVX-5124VNNIW
19387 allow V64SF and V64SI modes for special regnos. */
19388 if ((TARGET_AVX5124FMAPS
|| TARGET_AVX5124VNNIW
)
19389 && (mode
== V64SFmode
|| mode
== V64SImode
)
19390 && MOD4_SSE_REGNO_P (regno
))
19393 /* TODO check for QI/HI scalars. */
19394 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
19395 if (TARGET_AVX512VL
19398 || VALID_AVX256_REG_MODE (mode
)
19399 || VALID_AVX512VL_128_REG_MODE (mode
)))
19402 /* xmm16-xmm31 are only available for AVX-512. */
19403 if (EXT_REX_SSE_REGNO_P (regno
))
19406 /* OImode and AVX modes are available only when AVX is enabled. */
19407 return ((TARGET_AVX
19408 && VALID_AVX256_REG_OR_OI_MODE (mode
))
19409 || VALID_SSE_REG_MODE (mode
)
19410 || VALID_SSE2_REG_MODE (mode
)
19411 || VALID_MMX_REG_MODE (mode
)
19412 || VALID_MMX_REG_MODE_3DNOW (mode
));
19414 if (MMX_REGNO_P (regno
))
19416 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19417 so if the register is available at all, then we can move data of
19418 the given mode into or out of it. */
19419 return (VALID_MMX_REG_MODE (mode
)
19420 || VALID_MMX_REG_MODE_3DNOW (mode
));
19423 if (mode
== QImode
)
19425 /* Take care for QImode values - they can be in non-QI regs,
19426 but then they do cause partial register stalls. */
19427 if (ANY_QI_REGNO_P (regno
))
19429 if (!TARGET_PARTIAL_REG_STALL
)
19431 /* LRA checks if the hard register is OK for the given mode.
19432 QImode values can live in non-QI regs, so we allow all
19434 if (lra_in_progress
)
19436 return !can_create_pseudo_p ();
19438 /* We handle both integer and floats in the general purpose registers. */
19439 else if (VALID_INT_MODE_P (mode
))
19441 else if (VALID_FP_MODE_P (mode
))
19443 else if (VALID_DFP_MODE_P (mode
))
19445 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19446 on to use that value in smaller contexts, this can easily force a
19447 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19448 supporting DImode, allow it. */
19449 else if (VALID_MMX_REG_MODE_3DNOW (mode
) || VALID_MMX_REG_MODE (mode
))
19455 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
19456 saves SSE registers across calls is Win64 (thus no need to check the
19457 current ABI here), and with AVX enabled Win64 only guarantees that
19458 the low 16 bytes are saved. */
19461 ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
19464 return SSE_REGNO_P (regno
) && GET_MODE_SIZE (mode
) > 16;
19467 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19468 tieable integer mode. */
19471 ix86_tieable_integer_mode_p (machine_mode mode
)
19480 return TARGET_64BIT
|| !TARGET_PARTIAL_REG_STALL
;
19483 return TARGET_64BIT
;
19490 /* Implement TARGET_MODES_TIEABLE_P.
19492 Return true if MODE1 is accessible in a register that can hold MODE2
19493 without copying. That is, all register classes that can hold MODE2
19494 can also hold MODE1. */
19497 ix86_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
19499 if (mode1
== mode2
)
19502 if (ix86_tieable_integer_mode_p (mode1
)
19503 && ix86_tieable_integer_mode_p (mode2
))
19506 /* MODE2 being XFmode implies fp stack or general regs, which means we
19507 can tie any smaller floating point modes to it. Note that we do not
19508 tie this with TFmode. */
19509 if (mode2
== XFmode
)
19510 return mode1
== SFmode
|| mode1
== DFmode
;
19512 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19513 that we can tie it with SFmode. */
19514 if (mode2
== DFmode
)
19515 return mode1
== SFmode
;
19517 /* If MODE2 is only appropriate for an SSE register, then tie with
19518 any other mode acceptable to SSE registers. */
19519 if (GET_MODE_SIZE (mode2
) == 64
19520 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19521 return (GET_MODE_SIZE (mode1
) == 64
19522 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19523 if (GET_MODE_SIZE (mode2
) == 32
19524 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19525 return (GET_MODE_SIZE (mode1
) == 32
19526 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19527 if (GET_MODE_SIZE (mode2
) == 16
19528 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode2
))
19529 return (GET_MODE_SIZE (mode1
) == 16
19530 && ix86_hard_regno_mode_ok (FIRST_SSE_REG
, mode1
));
19532 /* If MODE2 is appropriate for an MMX register, then tie
19533 with any other mode acceptable to MMX registers. */
19534 if (GET_MODE_SIZE (mode2
) == 8
19535 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode2
))
19536 return (GET_MODE_SIZE (mode1
) == 8
19537 && ix86_hard_regno_mode_ok (FIRST_MMX_REG
, mode1
));
19542 /* Return the cost of moving between two registers of mode MODE. */
19545 ix86_set_reg_reg_cost (machine_mode mode
)
19547 unsigned int units
= UNITS_PER_WORD
;
19549 switch (GET_MODE_CLASS (mode
))
19555 units
= GET_MODE_SIZE (CCmode
);
19559 if ((TARGET_SSE
&& mode
== TFmode
)
19560 || (TARGET_80387
&& mode
== XFmode
)
19561 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DFmode
)
19562 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SFmode
))
19563 units
= GET_MODE_SIZE (mode
);
19566 case MODE_COMPLEX_FLOAT
:
19567 if ((TARGET_SSE
&& mode
== TCmode
)
19568 || (TARGET_80387
&& mode
== XCmode
)
19569 || ((TARGET_80387
|| TARGET_SSE2
) && mode
== DCmode
)
19570 || ((TARGET_80387
|| TARGET_SSE
) && mode
== SCmode
))
19571 units
= GET_MODE_SIZE (mode
);
19574 case MODE_VECTOR_INT
:
19575 case MODE_VECTOR_FLOAT
:
19576 if ((TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
19577 || (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
19578 || (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
19579 || (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
19580 || ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
19581 && VALID_MMX_REG_MODE (mode
)))
19582 units
= GET_MODE_SIZE (mode
);
19585 /* Return the cost of moving between two registers of mode MODE,
19586 assuming that the move will be in pieces of at most UNITS bytes. */
19587 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode
), units
));
19590 /* Return cost of vector operation in MODE given that scalar version has
19594 ix86_vec_cost (machine_mode mode
, int cost
)
19596 if (!VECTOR_MODE_P (mode
))
19599 if (GET_MODE_BITSIZE (mode
) == 128
19600 && TARGET_SSE_SPLIT_REGS
)
19602 if (GET_MODE_BITSIZE (mode
) > 128
19603 && TARGET_AVX256_SPLIT_REGS
)
19604 return cost
* GET_MODE_BITSIZE (mode
) / 128;
19608 /* Return cost of multiplication in MODE. */
19611 ix86_multiplication_cost (const struct processor_costs
*cost
,
19612 enum machine_mode mode
)
19614 machine_mode inner_mode
= mode
;
19615 if (VECTOR_MODE_P (mode
))
19616 inner_mode
= GET_MODE_INNER (mode
);
19618 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19619 return inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
;
19620 else if (X87_FLOAT_MODE_P (mode
))
19622 else if (FLOAT_MODE_P (mode
))
19623 return ix86_vec_cost (mode
,
19624 inner_mode
== DFmode
? cost
->mulsd
: cost
->mulss
);
19625 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19627 /* vpmullq is used in this case. No emulation is needed. */
19628 if (TARGET_AVX512DQ
)
19629 return ix86_vec_cost (mode
, cost
->mulss
);
19631 /* V*QImode is emulated with 7-13 insns. */
19632 if (mode
== V16QImode
|| mode
== V32QImode
)
19635 if (TARGET_XOP
&& mode
== V16QImode
)
19637 else if (TARGET_SSSE3
)
19639 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* extra
);
19641 /* V*DImode is emulated with 5-8 insns. */
19642 else if (mode
== V2DImode
|| mode
== V4DImode
)
19644 if (TARGET_XOP
&& mode
== V2DImode
)
19645 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 3);
19647 return ix86_vec_cost (mode
, cost
->mulss
* 3 + cost
->sse_op
* 5);
19649 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
19650 insns, including two PMULUDQ. */
19651 else if (mode
== V4SImode
&& !(TARGET_SSE4_1
|| TARGET_AVX
))
19652 return ix86_vec_cost (mode
, cost
->mulss
* 2 + cost
->sse_op
* 5);
19654 return ix86_vec_cost (mode
, cost
->mulss
);
19657 return (cost
->mult_init
[MODE_INDEX (mode
)] + cost
->mult_bit
* 7);
19660 /* Return cost of multiplication in MODE. */
19663 ix86_division_cost (const struct processor_costs
*cost
,
19664 enum machine_mode mode
)
19666 machine_mode inner_mode
= mode
;
19667 if (VECTOR_MODE_P (mode
))
19668 inner_mode
= GET_MODE_INNER (mode
);
19670 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
19671 return inner_mode
== DFmode
? cost
->divsd
: cost
->divss
;
19672 else if (X87_FLOAT_MODE_P (mode
))
19674 else if (FLOAT_MODE_P (mode
))
19675 return ix86_vec_cost (mode
,
19676 inner_mode
== DFmode
? cost
->divsd
: cost
->divss
);
19678 return cost
->divide
[MODE_INDEX (mode
)];
19681 #define COSTS_N_BYTES(N) ((N) * 2)
19683 /* Return cost of shift in MODE.
19684 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19685 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19686 if op1 is a result of subreg.
19688 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19691 ix86_shift_rotate_cost (const struct processor_costs
*cost
,
19692 enum machine_mode mode
, bool constant_op1
,
19693 HOST_WIDE_INT op1_val
,
19696 bool shift_and_truncate
,
19697 bool *skip_op0
, bool *skip_op1
)
19700 *skip_op0
= *skip_op1
= false;
19701 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
19703 /* V*QImode is emulated with 1-11 insns. */
19704 if (mode
== V16QImode
|| mode
== V32QImode
)
19707 if (TARGET_XOP
&& mode
== V16QImode
)
19709 /* For XOP we use vpshab, which requires a broadcast of the
19710 value to the variable shift insn. For constants this
19711 means a V16Q const in mem; even when we can perform the
19712 shift with one insn set the cost to prefer paddb. */
19717 return ix86_vec_cost (mode
,
19722 (GET_MODE_UNIT_SIZE (mode
))));
19726 else if (TARGET_SSSE3
)
19728 return ix86_vec_cost (mode
, cost
->sse_op
* count
);
19731 return ix86_vec_cost (mode
, cost
->sse_op
);
19733 if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
19738 return cost
->shift_const
+ COSTS_N_INSNS (2);
19740 return cost
->shift_const
* 2;
19745 return cost
->shift_var
* 2;
19747 return cost
->shift_var
* 6 + COSTS_N_INSNS (2);
19753 return cost
->shift_const
;
19754 else if (shift_and_truncate
)
19757 *skip_op0
= *skip_op1
= true;
19758 /* Return the cost after shift-and truncation. */
19759 return cost
->shift_var
;
19762 return cost
->shift_var
;
19764 return cost
->shift_const
;
19767 /* Compute a (partial) cost for rtx X. Return true if the complete
19768 cost has been computed, and false if subexpressions should be
19769 scanned. In either case, *TOTAL contains the cost result. */
19772 ix86_rtx_costs (rtx x
, machine_mode mode
, int outer_code_i
, int opno
,
19773 int *total
, bool speed
)
19776 enum rtx_code code
= GET_CODE (x
);
19777 enum rtx_code outer_code
= (enum rtx_code
) outer_code_i
;
19778 const struct processor_costs
*cost
19779 = speed
? ix86_tune_cost
: &ix86_size_cost
;
19785 if (register_operand (SET_DEST (x
), VOIDmode
)
19786 && register_operand (SET_SRC (x
), VOIDmode
))
19788 *total
= ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x
)));
19792 if (register_operand (SET_SRC (x
), VOIDmode
))
19793 /* Avoid potentially incorrect high cost from rtx_costs
19794 for non-tieable SUBREGs. */
19798 src_cost
= rtx_cost (SET_SRC (x
), mode
, SET
, 1, speed
);
19800 if (CONSTANT_P (SET_SRC (x
)))
19801 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19802 a small value, possibly zero for cheap constants. */
19803 src_cost
+= COSTS_N_INSNS (1);
19806 *total
= src_cost
+ rtx_cost (SET_DEST (x
), mode
, SET
, 0, speed
);
19813 if (x86_64_immediate_operand (x
, VOIDmode
))
19820 if (IS_STACK_MODE (mode
))
19821 switch (standard_80387_constant_p (x
))
19829 default: /* Other constants */
19836 switch (standard_sse_constant_p (x
, mode
))
19840 case 1: /* 0: xor eliminates false dependency */
19843 default: /* -1: cmp contains false dependency */
19849 case CONST_WIDE_INT
:
19850 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19851 it'll probably end up. Add a penalty for size. */
19852 *total
= (COSTS_N_INSNS (1)
19853 + (!TARGET_64BIT
&& flag_pic
)
19854 + (GET_MODE_SIZE (mode
) <= 4
19855 ? 0 : GET_MODE_SIZE (mode
) <= 8 ? 1 : 2));
19859 /* The zero extensions is often completely free on x86_64, so make
19860 it as cheap as possible. */
19861 if (TARGET_64BIT
&& mode
== DImode
19862 && GET_MODE (XEXP (x
, 0)) == SImode
)
19864 else if (TARGET_ZERO_EXTEND_WITH_AND
)
19865 *total
= cost
->add
;
19867 *total
= cost
->movzx
;
19871 *total
= cost
->movsx
;
19875 if (SCALAR_INT_MODE_P (mode
)
19876 && GET_MODE_SIZE (mode
) < UNITS_PER_WORD
19877 && CONST_INT_P (XEXP (x
, 1)))
19879 HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19882 *total
= cost
->add
;
19885 if ((value
== 2 || value
== 3)
19886 && cost
->lea
<= cost
->shift_const
)
19888 *total
= cost
->lea
;
19898 bool skip_op0
, skip_op1
;
19899 *total
= ix86_shift_rotate_cost (cost
, mode
, CONSTANT_P (XEXP (x
, 1)),
19900 CONST_INT_P (XEXP (x
, 1))
19901 ? INTVAL (XEXP (x
, 1)) : -1,
19903 GET_CODE (XEXP (x
, 1)) == AND
,
19904 SUBREG_P (XEXP (x
, 1))
19905 && GET_CODE (XEXP (XEXP (x
, 1), 0)) == AND
,
19906 &skip_op0
, &skip_op1
);
19907 if (skip_op0
|| skip_op1
)
19910 *total
+= rtx_cost (XEXP (x
, 0), mode
, code
, 0, speed
);
19912 *total
+= rtx_cost (XEXP (x
, 1), mode
, code
, 0, speed
);
19921 gcc_assert (FLOAT_MODE_P (mode
));
19922 gcc_assert (TARGET_FMA
|| TARGET_FMA4
|| TARGET_AVX512F
);
19924 *total
= ix86_vec_cost (mode
,
19925 GET_MODE_INNER (mode
) == SFmode
19926 ? cost
->fmass
: cost
->fmasd
);
19927 *total
+= rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
);
19929 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19931 if (GET_CODE (sub
) == NEG
)
19932 sub
= XEXP (sub
, 0);
19933 *total
+= rtx_cost (sub
, mode
, FMA
, 0, speed
);
19936 if (GET_CODE (sub
) == NEG
)
19937 sub
= XEXP (sub
, 0);
19938 *total
+= rtx_cost (sub
, mode
, FMA
, 2, speed
);
19943 if (!FLOAT_MODE_P (mode
) && !VECTOR_MODE_P (mode
))
19945 rtx op0
= XEXP (x
, 0);
19946 rtx op1
= XEXP (x
, 1);
19948 if (CONST_INT_P (XEXP (x
, 1)))
19950 unsigned HOST_WIDE_INT value
= INTVAL (XEXP (x
, 1));
19951 for (nbits
= 0; value
!= 0; value
&= value
- 1)
19955 /* This is arbitrary. */
19958 /* Compute costs correctly for widening multiplication. */
19959 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
19960 && GET_MODE_SIZE (GET_MODE (XEXP (op0
, 0))) * 2
19961 == GET_MODE_SIZE (mode
))
19963 int is_mulwiden
= 0;
19964 machine_mode inner_mode
= GET_MODE (op0
);
19966 if (GET_CODE (op0
) == GET_CODE (op1
))
19967 is_mulwiden
= 1, op1
= XEXP (op1
, 0);
19968 else if (CONST_INT_P (op1
))
19970 if (GET_CODE (op0
) == SIGN_EXTEND
)
19971 is_mulwiden
= trunc_int_for_mode (INTVAL (op1
), inner_mode
)
19974 is_mulwiden
= !(INTVAL (op1
) & ~GET_MODE_MASK (inner_mode
));
19978 op0
= XEXP (op0
, 0), mode
= GET_MODE (op0
);
19981 *total
= (cost
->mult_init
[MODE_INDEX (mode
)]
19982 + nbits
* cost
->mult_bit
19983 + rtx_cost (op0
, mode
, outer_code
, opno
, speed
)
19984 + rtx_cost (op1
, mode
, outer_code
, opno
, speed
));
19988 *total
= ix86_multiplication_cost (cost
, mode
);
19995 *total
= ix86_division_cost (cost
, mode
);
19999 if (GET_MODE_CLASS (mode
) == MODE_INT
20000 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
)
20002 if (GET_CODE (XEXP (x
, 0)) == PLUS
20003 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
20004 && CONST_INT_P (XEXP (XEXP (XEXP (x
, 0), 0), 1))
20005 && CONSTANT_P (XEXP (x
, 1)))
20007 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1));
20008 if (val
== 2 || val
== 4 || val
== 8)
20010 *total
= cost
->lea
;
20011 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
20012 outer_code
, opno
, speed
);
20013 *total
+= rtx_cost (XEXP (XEXP (XEXP (x
, 0), 0), 0), mode
,
20014 outer_code
, opno
, speed
);
20015 *total
+= rtx_cost (XEXP (x
, 1), mode
,
20016 outer_code
, opno
, speed
);
20020 else if (GET_CODE (XEXP (x
, 0)) == MULT
20021 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
20023 HOST_WIDE_INT val
= INTVAL (XEXP (XEXP (x
, 0), 1));
20024 if (val
== 2 || val
== 4 || val
== 8)
20026 *total
= cost
->lea
;
20027 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
20028 outer_code
, opno
, speed
);
20029 *total
+= rtx_cost (XEXP (x
, 1), mode
,
20030 outer_code
, opno
, speed
);
20034 else if (GET_CODE (XEXP (x
, 0)) == PLUS
)
20036 /* Add with carry, ignore the cost of adding a carry flag. */
20037 if (ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 0), mode
))
20038 *total
= cost
->add
;
20041 *total
= cost
->lea
;
20042 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
20043 outer_code
, opno
, speed
);
20046 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 1), mode
,
20047 outer_code
, opno
, speed
);
20048 *total
+= rtx_cost (XEXP (x
, 1), mode
,
20049 outer_code
, opno
, speed
);
20056 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
20057 if (GET_MODE_CLASS (mode
) == MODE_INT
20058 && GET_MODE_SIZE (mode
) <= UNITS_PER_WORD
20059 && GET_CODE (XEXP (x
, 0)) == MINUS
20060 && ix86_carry_flag_operator (XEXP (XEXP (x
, 0), 1), mode
))
20062 *total
= cost
->add
;
20063 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), mode
,
20064 outer_code
, opno
, speed
);
20065 *total
+= rtx_cost (XEXP (x
, 1), mode
,
20066 outer_code
, opno
, speed
);
20070 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20072 *total
= cost
->addss
;
20075 else if (X87_FLOAT_MODE_P (mode
))
20077 *total
= cost
->fadd
;
20080 else if (FLOAT_MODE_P (mode
))
20082 *total
= ix86_vec_cost (mode
, cost
->addss
);
20090 if (GET_MODE_CLASS (mode
) == MODE_INT
20091 && GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20093 *total
= (cost
->add
* 2
20094 + (rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
)
20095 << (GET_MODE (XEXP (x
, 0)) != DImode
))
20096 + (rtx_cost (XEXP (x
, 1), mode
, outer_code
, opno
, speed
)
20097 << (GET_MODE (XEXP (x
, 1)) != DImode
)));
20103 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20105 *total
= cost
->sse_op
;
20108 else if (X87_FLOAT_MODE_P (mode
))
20110 *total
= cost
->fchs
;
20113 else if (FLOAT_MODE_P (mode
))
20115 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
20121 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
20122 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
20123 else if (GET_MODE_SIZE (mode
) > UNITS_PER_WORD
)
20124 *total
= cost
->add
* 2;
20126 *total
= cost
->add
;
20133 if (GET_CODE (op0
) == ZERO_EXTRACT
20134 && XEXP (op0
, 1) == const1_rtx
20135 && CONST_INT_P (XEXP (op0
, 2))
20136 && op1
== const0_rtx
)
20138 /* This kind of construct is implemented using test[bwl].
20139 Treat it as if we had an AND. */
20140 mode
= GET_MODE (XEXP (op0
, 0));
20141 *total
= (cost
->add
20142 + rtx_cost (XEXP (op0
, 0), mode
, outer_code
,
20144 + rtx_cost (const1_rtx
, mode
, outer_code
, opno
, speed
));
20148 if (GET_CODE (op0
) == PLUS
&& rtx_equal_p (XEXP (op0
, 0), op1
))
20150 /* This is an overflow detection, count it as a normal compare. */
20151 *total
= rtx_cost (op0
, GET_MODE (op0
), COMPARE
, 0, speed
);
20157 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
20158 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
20159 if (mode
== CCCmode
20160 && GET_CODE (op0
) == NEG
20161 && GET_CODE (geu
= XEXP (op0
, 0)) == GEU
20162 && REG_P (XEXP (geu
, 0))
20163 && (GET_MODE (XEXP (geu
, 0)) == CCCmode
20164 || GET_MODE (XEXP (geu
, 0)) == CCmode
)
20165 && REGNO (XEXP (geu
, 0)) == FLAGS_REG
20166 && XEXP (geu
, 1) == const0_rtx
20167 && GET_CODE (op1
) == LTU
20168 && REG_P (XEXP (op1
, 0))
20169 && GET_MODE (XEXP (op1
, 0)) == GET_MODE (XEXP (geu
, 0))
20170 && REGNO (XEXP (op1
, 0)) == FLAGS_REG
20171 && XEXP (op1
, 1) == const0_rtx
)
20173 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
20178 /* The embedded comparison operand is completely free. */
20179 if (!general_operand (op0
, GET_MODE (op0
)) && op1
== const0_rtx
)
20185 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
20188 *total
= ix86_vec_cost (mode
, cost
->addss
);
20191 case FLOAT_TRUNCATE
:
20192 if (!(SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
))
20193 *total
= cost
->fadd
;
20195 *total
= ix86_vec_cost (mode
, cost
->addss
);
20199 /* SSE requires memory load for the constant operand. It may make
20200 sense to account for this. Of course the constant operand may or
20201 may not be reused. */
20202 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20203 *total
= cost
->sse_op
;
20204 else if (X87_FLOAT_MODE_P (mode
))
20205 *total
= cost
->fabs
;
20206 else if (FLOAT_MODE_P (mode
))
20207 *total
= ix86_vec_cost (mode
, cost
->sse_op
);
20211 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
20212 *total
= mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
;
20213 else if (X87_FLOAT_MODE_P (mode
))
20214 *total
= cost
->fsqrt
;
20215 else if (FLOAT_MODE_P (mode
))
20216 *total
= ix86_vec_cost (mode
,
20217 mode
== SFmode
? cost
->sqrtss
: cost
->sqrtsd
);
20221 if (XINT (x
, 1) == UNSPEC_TP
)
20227 case VEC_DUPLICATE
:
20228 /* ??? Assume all of these vector manipulation patterns are
20229 recognizable. In which case they all pretty much have the
20231 *total
= cost
->sse_op
;
20234 mask
= XEXP (x
, 2);
20235 /* This is masked instruction, assume the same cost,
20236 as nonmasked variant. */
20237 if (TARGET_AVX512F
&& register_operand (mask
, GET_MODE (mask
)))
20238 *total
= rtx_cost (XEXP (x
, 0), mode
, outer_code
, opno
, speed
);
20240 *total
= cost
->sse_op
;
20250 static int current_machopic_label_num
;
20252 /* Given a symbol name and its associated stub, write out the
20253 definition of the stub. */
20256 machopic_output_stub (FILE *file
, const char *symb
, const char *stub
)
20258 unsigned int length
;
20259 char *binder_name
, *symbol_name
, lazy_ptr_name
[32];
20260 int label
= ++current_machopic_label_num
;
20262 /* For 64-bit we shouldn't get here. */
20263 gcc_assert (!TARGET_64BIT
);
20265 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20266 symb
= targetm
.strip_name_encoding (symb
);
20268 length
= strlen (stub
);
20269 binder_name
= XALLOCAVEC (char, length
+ 32);
20270 GEN_BINDER_NAME_FOR_STUB (binder_name
, stub
, length
);
20272 length
= strlen (symb
);
20273 symbol_name
= XALLOCAVEC (char, length
+ 32);
20274 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name
, symb
, length
);
20276 sprintf (lazy_ptr_name
, "L%d$lz", label
);
20278 if (MACHOPIC_ATT_STUB
)
20279 switch_to_section (darwin_sections
[machopic_picsymbol_stub3_section
]);
20280 else if (MACHOPIC_PURE
)
20281 switch_to_section (darwin_sections
[machopic_picsymbol_stub2_section
]);
20283 switch_to_section (darwin_sections
[machopic_symbol_stub_section
]);
20285 fprintf (file
, "%s:\n", stub
);
20286 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20288 if (MACHOPIC_ATT_STUB
)
20290 fprintf (file
, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
20292 else if (MACHOPIC_PURE
)
20295 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20296 rtx tmp
= gen_rtx_REG (SImode
, 2 /* ECX */);
20297 output_set_got (tmp
, NULL_RTX
); /* "CALL ___<cpu>.get_pc_thunk.cx". */
20298 fprintf (file
, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
20299 label
, lazy_ptr_name
, label
);
20300 fprintf (file
, "\tjmp\t*%%ecx\n");
20303 fprintf (file
, "\tjmp\t*%s\n", lazy_ptr_name
);
20305 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
20306 it needs no stub-binding-helper. */
20307 if (MACHOPIC_ATT_STUB
)
20310 fprintf (file
, "%s:\n", binder_name
);
20314 fprintf (file
, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name
, binder_name
);
20315 fprintf (file
, "\tpushl\t%%ecx\n");
20318 fprintf (file
, "\tpushl\t$%s\n", lazy_ptr_name
);
20320 fputs ("\tjmp\tdyld_stub_binding_helper\n", file
);
20322 /* N.B. Keep the correspondence of these
20323 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
20324 old-pic/new-pic/non-pic stubs; altering this will break
20325 compatibility with existing dylibs. */
20328 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20329 switch_to_section (darwin_sections
[machopic_lazy_symbol_ptr2_section
]);
20332 /* 16-byte -mdynamic-no-pic stub. */
20333 switch_to_section(darwin_sections
[machopic_lazy_symbol_ptr3_section
]);
20335 fprintf (file
, "%s:\n", lazy_ptr_name
);
20336 fprintf (file
, "\t.indirect_symbol %s\n", symbol_name
);
20337 fprintf (file
, ASM_LONG
"%s\n", binder_name
);
20339 #endif /* TARGET_MACHO */
20341 /* Order the registers for register allocator. */
20344 x86_order_regs_for_local_alloc (void)
20349 /* First allocate the local general purpose registers. */
20350 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
20351 if (GENERAL_REGNO_P (i
) && call_used_or_fixed_reg_p (i
))
20352 reg_alloc_order
[pos
++] = i
;
20354 /* Global general purpose registers. */
20355 for (i
= 0; i
< FIRST_PSEUDO_REGISTER
; i
++)
20356 if (GENERAL_REGNO_P (i
) && !call_used_or_fixed_reg_p (i
))
20357 reg_alloc_order
[pos
++] = i
;
20359 /* x87 registers come first in case we are doing FP math
20361 if (!TARGET_SSE_MATH
)
20362 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
20363 reg_alloc_order
[pos
++] = i
;
20365 /* SSE registers. */
20366 for (i
= FIRST_SSE_REG
; i
<= LAST_SSE_REG
; i
++)
20367 reg_alloc_order
[pos
++] = i
;
20368 for (i
= FIRST_REX_SSE_REG
; i
<= LAST_REX_SSE_REG
; i
++)
20369 reg_alloc_order
[pos
++] = i
;
20371 /* Extended REX SSE registers. */
20372 for (i
= FIRST_EXT_REX_SSE_REG
; i
<= LAST_EXT_REX_SSE_REG
; i
++)
20373 reg_alloc_order
[pos
++] = i
;
20375 /* Mask register. */
20376 for (i
= FIRST_MASK_REG
; i
<= LAST_MASK_REG
; i
++)
20377 reg_alloc_order
[pos
++] = i
;
20379 /* x87 registers. */
20380 if (TARGET_SSE_MATH
)
20381 for (i
= FIRST_STACK_REG
; i
<= LAST_STACK_REG
; i
++)
20382 reg_alloc_order
[pos
++] = i
;
20384 for (i
= FIRST_MMX_REG
; i
<= LAST_MMX_REG
; i
++)
20385 reg_alloc_order
[pos
++] = i
;
20387 /* Initialize the rest of array as we do not allocate some registers
20389 while (pos
< FIRST_PSEUDO_REGISTER
)
20390 reg_alloc_order
[pos
++] = 0;
20394 ix86_ms_bitfield_layout_p (const_tree record_type
)
20396 return ((TARGET_MS_BITFIELD_LAYOUT
20397 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type
)))
20398 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type
)));
20401 /* Returns an expression indicating where the this parameter is
20402 located on entry to the FUNCTION. */
20405 x86_this_parameter (tree function
)
20407 tree type
= TREE_TYPE (function
);
20408 bool aggr
= aggregate_value_p (TREE_TYPE (type
), type
) != 0;
20413 const int *parm_regs
;
20415 if (ix86_function_type_abi (type
) == MS_ABI
)
20416 parm_regs
= x86_64_ms_abi_int_parameter_registers
;
20418 parm_regs
= x86_64_int_parameter_registers
;
20419 return gen_rtx_REG (Pmode
, parm_regs
[aggr
]);
20422 nregs
= ix86_function_regparm (type
, function
);
20424 if (nregs
> 0 && !stdarg_p (type
))
20427 unsigned int ccvt
= ix86_get_callcvt (type
);
20429 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
20430 regno
= aggr
? DX_REG
: CX_REG
;
20431 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
20435 return gen_rtx_MEM (SImode
,
20436 plus_constant (Pmode
, stack_pointer_rtx
, 4));
20445 return gen_rtx_MEM (SImode
,
20446 plus_constant (Pmode
,
20447 stack_pointer_rtx
, 4));
20450 return gen_rtx_REG (SImode
, regno
);
20453 return gen_rtx_MEM (SImode
, plus_constant (Pmode
, stack_pointer_rtx
,
20457 /* Determine whether x86_output_mi_thunk can succeed. */
20460 x86_can_output_mi_thunk (const_tree
, HOST_WIDE_INT
, HOST_WIDE_INT vcall_offset
,
20461 const_tree function
)
20463 /* 64-bit can handle anything. */
20467 /* For 32-bit, everything's fine if we have one free register. */
20468 if (ix86_function_regparm (TREE_TYPE (function
), function
) < 3)
20471 /* Need a free register for vcall_offset. */
20475 /* Need a free register for GOT references. */
20476 if (flag_pic
&& !targetm
.binds_local_p (function
))
20479 /* Otherwise ok. */
20483 /* Output the assembler code for a thunk function. THUNK_DECL is the
20484 declaration for the thunk function itself, FUNCTION is the decl for
20485 the target function. DELTA is an immediate constant offset to be
20486 added to THIS. If VCALL_OFFSET is nonzero, the word at
20487 *(*this + vcall_offset) should be added to THIS. */
20490 x86_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
20491 HOST_WIDE_INT vcall_offset
, tree function
)
20493 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl
));
20494 rtx this_param
= x86_this_parameter (function
);
20495 rtx this_reg
, tmp
, fnaddr
;
20496 unsigned int tmp_regno
;
20500 tmp_regno
= R10_REG
;
20503 unsigned int ccvt
= ix86_get_callcvt (TREE_TYPE (function
));
20504 if ((ccvt
& IX86_CALLCVT_FASTCALL
) != 0)
20505 tmp_regno
= AX_REG
;
20506 else if ((ccvt
& IX86_CALLCVT_THISCALL
) != 0)
20507 tmp_regno
= DX_REG
;
20509 tmp_regno
= CX_REG
;
20512 emit_note (NOTE_INSN_PROLOGUE_END
);
20514 /* CET is enabled, insert EB instruction. */
20515 if ((flag_cf_protection
& CF_BRANCH
))
20516 emit_insn (gen_nop_endbr ());
20518 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20519 pull it in now and let DELTA benefit. */
20520 if (REG_P (this_param
))
20521 this_reg
= this_param
;
20522 else if (vcall_offset
)
20524 /* Put the this parameter into %eax. */
20525 this_reg
= gen_rtx_REG (Pmode
, AX_REG
);
20526 emit_move_insn (this_reg
, this_param
);
20529 this_reg
= NULL_RTX
;
20531 /* Adjust the this parameter by a fixed constant. */
20534 rtx delta_rtx
= GEN_INT (delta
);
20535 rtx delta_dst
= this_reg
? this_reg
: this_param
;
20539 if (!x86_64_general_operand (delta_rtx
, Pmode
))
20541 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
20542 emit_move_insn (tmp
, delta_rtx
);
20547 ix86_emit_binop (PLUS
, Pmode
, delta_dst
, delta_rtx
);
20550 /* Adjust the this parameter by a value stored in the vtable. */
20553 rtx vcall_addr
, vcall_mem
, this_mem
;
20555 tmp
= gen_rtx_REG (Pmode
, tmp_regno
);
20557 this_mem
= gen_rtx_MEM (ptr_mode
, this_reg
);
20558 if (Pmode
!= ptr_mode
)
20559 this_mem
= gen_rtx_ZERO_EXTEND (Pmode
, this_mem
);
20560 emit_move_insn (tmp
, this_mem
);
20562 /* Adjust the this parameter. */
20563 vcall_addr
= plus_constant (Pmode
, tmp
, vcall_offset
);
20565 && !ix86_legitimate_address_p (ptr_mode
, vcall_addr
, true))
20567 rtx tmp2
= gen_rtx_REG (Pmode
, R11_REG
);
20568 emit_move_insn (tmp2
, GEN_INT (vcall_offset
));
20569 vcall_addr
= gen_rtx_PLUS (Pmode
, tmp
, tmp2
);
20572 vcall_mem
= gen_rtx_MEM (ptr_mode
, vcall_addr
);
20573 if (Pmode
!= ptr_mode
)
20574 emit_insn (gen_addsi_1_zext (this_reg
,
20575 gen_rtx_REG (ptr_mode
,
20579 ix86_emit_binop (PLUS
, Pmode
, this_reg
, vcall_mem
);
20582 /* If necessary, drop THIS back to its stack slot. */
20583 if (this_reg
&& this_reg
!= this_param
)
20584 emit_move_insn (this_param
, this_reg
);
20586 fnaddr
= XEXP (DECL_RTL (function
), 0);
20589 if (!flag_pic
|| targetm
.binds_local_p (function
)
20594 tmp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOTPCREL
);
20595 tmp
= gen_rtx_CONST (Pmode
, tmp
);
20596 fnaddr
= gen_const_mem (Pmode
, tmp
);
20601 if (!flag_pic
|| targetm
.binds_local_p (function
))
20604 else if (TARGET_MACHO
)
20606 fnaddr
= machopic_indirect_call_target (DECL_RTL (function
));
20607 fnaddr
= XEXP (fnaddr
, 0);
20609 #endif /* TARGET_MACHO */
20612 tmp
= gen_rtx_REG (Pmode
, CX_REG
);
20613 output_set_got (tmp
, NULL_RTX
);
20615 fnaddr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, fnaddr
), UNSPEC_GOT
);
20616 fnaddr
= gen_rtx_CONST (Pmode
, fnaddr
);
20617 fnaddr
= gen_rtx_PLUS (Pmode
, tmp
, fnaddr
);
20618 fnaddr
= gen_const_mem (Pmode
, fnaddr
);
20622 /* Our sibling call patterns do not allow memories, because we have no
20623 predicate that can distinguish between frame and non-frame memory.
20624 For our purposes here, we can get away with (ab)using a jump pattern,
20625 because we're going to do no optimization. */
20626 if (MEM_P (fnaddr
))
20628 if (sibcall_insn_operand (fnaddr
, word_mode
))
20630 fnaddr
= XEXP (DECL_RTL (function
), 0);
20631 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
20632 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
20633 tmp
= emit_call_insn (tmp
);
20634 SIBLING_CALL_P (tmp
) = 1;
20637 emit_jump_insn (gen_indirect_jump (fnaddr
));
20641 if (ix86_cmodel
== CM_LARGE_PIC
&& SYMBOLIC_CONST (fnaddr
))
20643 // CM_LARGE_PIC always uses pseudo PIC register which is
20644 // uninitialized. Since FUNCTION is local and calling it
20645 // doesn't go through PLT, we use scratch register %r11 as
20646 // PIC register and initialize it here.
20647 pic_offset_table_rtx
= gen_rtx_REG (Pmode
, R11_REG
);
20648 ix86_init_large_pic_reg (tmp_regno
);
20649 fnaddr
= legitimize_pic_address (fnaddr
,
20650 gen_rtx_REG (Pmode
, tmp_regno
));
20653 if (!sibcall_insn_operand (fnaddr
, word_mode
))
20655 tmp
= gen_rtx_REG (word_mode
, tmp_regno
);
20656 if (GET_MODE (fnaddr
) != word_mode
)
20657 fnaddr
= gen_rtx_ZERO_EXTEND (word_mode
, fnaddr
);
20658 emit_move_insn (tmp
, fnaddr
);
20662 tmp
= gen_rtx_MEM (QImode
, fnaddr
);
20663 tmp
= gen_rtx_CALL (VOIDmode
, tmp
, const0_rtx
);
20664 tmp
= emit_call_insn (tmp
);
20665 SIBLING_CALL_P (tmp
) = 1;
20669 /* Emit just enough of rest_of_compilation to get the insns emitted. */
20670 insn
= get_insns ();
20671 shorten_branches (insn
);
20672 assemble_start_function (thunk_fndecl
, fnname
);
20673 final_start_function (insn
, file
, 1);
20674 final (insn
, file
, 1);
20675 final_end_function ();
20676 assemble_end_function (thunk_fndecl
, fnname
);
20680 x86_file_start (void)
20682 default_file_start ();
20684 fputs ("\t.code16gcc\n", asm_out_file
);
20686 darwin_file_start ();
20688 if (X86_FILE_START_VERSION_DIRECTIVE
)
20689 fputs ("\t.version\t\"01.01\"\n", asm_out_file
);
20690 if (X86_FILE_START_FLTUSED
)
20691 fputs ("\t.global\t__fltused\n", asm_out_file
);
20692 if (ix86_asm_dialect
== ASM_INTEL
)
20693 fputs ("\t.intel_syntax noprefix\n", asm_out_file
);
20697 x86_field_alignment (tree type
, int computed
)
20701 if (TARGET_64BIT
|| TARGET_ALIGN_DOUBLE
)
20704 return iamcu_alignment (type
, computed
);
20705 type
= strip_array_types (type
);
20706 mode
= TYPE_MODE (type
);
20707 if (mode
== DFmode
|| mode
== DCmode
20708 || GET_MODE_CLASS (mode
) == MODE_INT
20709 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
20711 if (TYPE_ATOMIC (type
) && computed
> 32)
20713 static bool warned
;
20715 if (!warned
&& warn_psabi
)
20718 = CHANGES_ROOT_URL
"gcc-11/changes.html#ia32_atomic";
20721 inform (input_location
, "the alignment of %<_Atomic %T%> "
20722 "fields changed in %{GCC 11.1%}",
20723 TYPE_MAIN_VARIANT (type
), url
);
20727 return MIN (32, computed
);
20732 /* Print call to TARGET to FILE. */
20735 x86_print_call_or_nop (FILE *file
, const char *target
)
20737 if (flag_nop_mcount
|| !strcmp (target
, "nop"))
20738 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20739 fprintf (file
, "1:" ASM_BYTE
"0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20741 fprintf (file
, "1:\tcall\t%s\n", target
);
20745 current_fentry_name (const char **name
)
20747 tree attr
= lookup_attribute ("fentry_name",
20748 DECL_ATTRIBUTES (current_function_decl
));
20751 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
20756 current_fentry_section (const char **name
)
20758 tree attr
= lookup_attribute ("fentry_section",
20759 DECL_ATTRIBUTES (current_function_decl
));
20762 *name
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
20766 /* Output assembler code to FILE to increment profiler label # LABELNO
20767 for profiling a function entry. */
20769 x86_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
20771 if (cfun
->machine
->insn_queued_at_entrance
)
20773 if (cfun
->machine
->insn_queued_at_entrance
== TYPE_ENDBR
)
20774 fprintf (file
, "\t%s\n", TARGET_64BIT
? "endbr64" : "endbr32");
20775 unsigned int patch_area_size
20776 = crtl
->patch_area_size
- crtl
->patch_area_entry
;
20777 if (patch_area_size
)
20778 ix86_output_patchable_area (patch_area_size
,
20779 crtl
->patch_area_entry
== 0);
20782 const char *mcount_name
= MCOUNT_NAME
;
20784 if (current_fentry_name (&mcount_name
))
20786 else if (fentry_name
)
20787 mcount_name
= fentry_name
;
20788 else if (flag_fentry
)
20789 mcount_name
= MCOUNT_NAME_BEFORE_PROLOGUE
;
20793 #ifndef NO_PROFILE_COUNTERS
20794 fprintf (file
, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX
, labelno
);
20797 if (!TARGET_PECOFF
&& flag_pic
)
20798 fprintf (file
, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name
);
20800 x86_print_call_or_nop (file
, mcount_name
);
20804 #ifndef NO_PROFILE_COUNTERS
20805 fprintf (file
, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER
"\n",
20808 fprintf (file
, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name
);
20812 #ifndef NO_PROFILE_COUNTERS
20813 fprintf (file
, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER
"\n",
20816 x86_print_call_or_nop (file
, mcount_name
);
20819 if (flag_record_mcount
20820 || lookup_attribute ("fentry_section",
20821 DECL_ATTRIBUTES (current_function_decl
)))
20823 const char *sname
= "__mcount_loc";
20825 if (current_fentry_section (&sname
))
20827 else if (fentry_section
)
20828 sname
= fentry_section
;
20830 fprintf (file
, "\t.section %s, \"a\",@progbits\n", sname
);
20831 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
20832 fprintf (file
, "\t.previous\n");
20836 /* We don't have exact information about the insn sizes, but we may assume
20837 quite safely that we are informed about all 1 byte insns and memory
20838 address sizes. This is enough to eliminate unnecessary padding in
20842 ix86_min_insn_size (rtx_insn
*insn
)
20846 if (!INSN_P (insn
) || !active_insn_p (insn
))
20849 /* Discard alignments we've emit and jump instructions. */
20850 if (GET_CODE (PATTERN (insn
)) == UNSPEC_VOLATILE
20851 && XINT (PATTERN (insn
), 1) == UNSPECV_ALIGN
)
20854 /* Important case - calls are always 5 bytes.
20855 It is common to have many calls in the row. */
20857 && symbolic_reference_mentioned_p (PATTERN (insn
))
20858 && !SIBLING_CALL_P (insn
))
20860 len
= get_attr_length (insn
);
20864 /* For normal instructions we rely on get_attr_length being exact,
20865 with a few exceptions. */
20866 if (!JUMP_P (insn
))
20868 enum attr_type type
= get_attr_type (insn
);
20873 if (GET_CODE (PATTERN (insn
)) == ASM_INPUT
20874 || asm_noperands (PATTERN (insn
)) >= 0)
20881 /* Otherwise trust get_attr_length. */
20885 l
= get_attr_length_address (insn
);
20886 if (l
< 4 && symbolic_reference_mentioned_p (PATTERN (insn
)))
20895 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20897 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20901 ix86_avoid_jump_mispredicts (void)
20903 rtx_insn
*insn
, *start
= get_insns ();
20904 int nbytes
= 0, njumps
= 0;
20905 bool isjump
= false;
20907 /* Look for all minimal intervals of instructions containing 4 jumps.
20908 The intervals are bounded by START and INSN. NBYTES is the total
20909 size of instructions in the interval including INSN and not including
20910 START. When the NBYTES is smaller than 16 bytes, it is possible
20911 that the end of START and INSN ends up in the same 16byte page.
20913 The smallest offset in the page INSN can start is the case where START
20914 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20915 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20917 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20918 have to, control transfer to label(s) can be performed through other
20919 means, and also we estimate minimum length of all asm stmts as 0. */
20920 for (insn
= start
; insn
; insn
= NEXT_INSN (insn
))
20924 if (LABEL_P (insn
))
20926 align_flags alignment
= label_to_alignment (insn
);
20927 int align
= alignment
.levels
[0].log
;
20928 int max_skip
= alignment
.levels
[0].maxskip
;
20932 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20933 already in the current 16 byte page, because otherwise
20934 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20935 bytes to reach 16 byte boundary. */
20937 || (align
<= 3 && max_skip
!= (1 << align
) - 1))
20940 fprintf (dump_file
, "Label %i with max_skip %i\n",
20941 INSN_UID (insn
), max_skip
);
20944 while (nbytes
+ max_skip
>= 16)
20946 start
= NEXT_INSN (start
);
20947 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
20949 njumps
--, isjump
= true;
20952 nbytes
-= ix86_min_insn_size (start
);
20958 min_size
= ix86_min_insn_size (insn
);
20959 nbytes
+= min_size
;
20961 fprintf (dump_file
, "Insn %i estimated to %i bytes\n",
20962 INSN_UID (insn
), min_size
);
20963 if ((JUMP_P (insn
) && asm_noperands (PATTERN (insn
)) < 0)
20971 start
= NEXT_INSN (start
);
20972 if ((JUMP_P (start
) && asm_noperands (PATTERN (start
)) < 0)
20974 njumps
--, isjump
= true;
20977 nbytes
-= ix86_min_insn_size (start
);
20979 gcc_assert (njumps
>= 0);
20981 fprintf (dump_file
, "Interval %i to %i has %i bytes\n",
20982 INSN_UID (start
), INSN_UID (insn
), nbytes
);
20984 if (njumps
== 3 && isjump
&& nbytes
< 16)
20986 int padsize
= 15 - nbytes
+ ix86_min_insn_size (insn
);
20989 fprintf (dump_file
, "Padding insn %i by %i bytes!\n",
20990 INSN_UID (insn
), padsize
);
20991 emit_insn_before (gen_pad (GEN_INT (padsize
)), insn
);
20997 /* AMD Athlon works faster
20998 when RET is not destination of conditional jump or directly preceded
20999 by other jump instruction. We avoid the penalty by inserting NOP just
21000 before the RET instructions in such cases. */
21002 ix86_pad_returns (void)
21007 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
21009 basic_block bb
= e
->src
;
21010 rtx_insn
*ret
= BB_END (bb
);
21012 bool replace
= false;
21014 if (!JUMP_P (ret
) || !ANY_RETURN_P (PATTERN (ret
))
21015 || optimize_bb_for_size_p (bb
))
21017 for (prev
= PREV_INSN (ret
); prev
; prev
= PREV_INSN (prev
))
21018 if (active_insn_p (prev
) || LABEL_P (prev
))
21020 if (prev
&& LABEL_P (prev
))
21025 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21026 if (EDGE_FREQUENCY (e
) && e
->src
->index
>= 0
21027 && !(e
->flags
& EDGE_FALLTHRU
))
21035 prev
= prev_active_insn (ret
);
21037 && ((JUMP_P (prev
) && any_condjump_p (prev
))
21040 /* Empty functions get branch mispredict even when
21041 the jump destination is not visible to us. */
21042 if (!prev
&& !optimize_function_for_size_p (cfun
))
21047 emit_jump_insn_before (gen_simple_return_internal_long (), ret
);
21053 /* Count the minimum number of instructions in BB. Return 4 if the
21054 number of instructions >= 4. */
21057 ix86_count_insn_bb (basic_block bb
)
21060 int insn_count
= 0;
21062 /* Count number of instructions in this block. Return 4 if the number
21063 of instructions >= 4. */
21064 FOR_BB_INSNS (bb
, insn
)
21066 /* Only happen in exit blocks. */
21068 && ANY_RETURN_P (PATTERN (insn
)))
21071 if (NONDEBUG_INSN_P (insn
)
21072 && GET_CODE (PATTERN (insn
)) != USE
21073 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
21076 if (insn_count
>= 4)
21085 /* Count the minimum number of instructions in code path in BB.
21086 Return 4 if the number of instructions >= 4. */
21089 ix86_count_insn (basic_block bb
)
21093 int min_prev_count
;
21095 /* Only bother counting instructions along paths with no
21096 more than 2 basic blocks between entry and exit. Given
21097 that BB has an edge to exit, determine if a predecessor
21098 of BB has an edge from entry. If so, compute the number
21099 of instructions in the predecessor block. If there
21100 happen to be multiple such blocks, compute the minimum. */
21101 min_prev_count
= 4;
21102 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
21105 edge_iterator prev_ei
;
21107 if (e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
21109 min_prev_count
= 0;
21112 FOR_EACH_EDGE (prev_e
, prev_ei
, e
->src
->preds
)
21114 if (prev_e
->src
== ENTRY_BLOCK_PTR_FOR_FN (cfun
))
21116 int count
= ix86_count_insn_bb (e
->src
);
21117 if (count
< min_prev_count
)
21118 min_prev_count
= count
;
21124 if (min_prev_count
< 4)
21125 min_prev_count
+= ix86_count_insn_bb (bb
);
21127 return min_prev_count
;
21130 /* Pad short function to 4 instructions. */
21133 ix86_pad_short_function (void)
21138 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
21140 rtx_insn
*ret
= BB_END (e
->src
);
21141 if (JUMP_P (ret
) && ANY_RETURN_P (PATTERN (ret
)))
21143 int insn_count
= ix86_count_insn (e
->src
);
21145 /* Pad short function. */
21146 if (insn_count
< 4)
21148 rtx_insn
*insn
= ret
;
21150 /* Find epilogue. */
21153 || NOTE_KIND (insn
) != NOTE_INSN_EPILOGUE_BEG
))
21154 insn
= PREV_INSN (insn
);
21159 /* Two NOPs count as one instruction. */
21160 insn_count
= 2 * (4 - insn_count
);
21161 emit_insn_before (gen_nops (GEN_INT (insn_count
)), insn
);
21167 /* Fix up a Windows system unwinder issue. If an EH region falls through into
21168 the epilogue, the Windows system unwinder will apply epilogue logic and
21169 produce incorrect offsets. This can be avoided by adding a nop between
21170 the last insn that can throw and the first insn of the epilogue. */
21173 ix86_seh_fixup_eh_fallthru (void)
21178 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
21180 rtx_insn
*insn
, *next
;
21182 /* Find the beginning of the epilogue. */
21183 for (insn
= BB_END (e
->src
); insn
!= NULL
; insn
= PREV_INSN (insn
))
21184 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_EPILOGUE_BEG
)
21189 /* We only care about preceding insns that can throw. */
21190 insn
= prev_active_insn (insn
);
21191 if (insn
== NULL
|| !can_throw_internal (insn
))
21194 /* Do not separate calls from their debug information. */
21195 for (next
= NEXT_INSN (insn
); next
!= NULL
; next
= NEXT_INSN (next
))
21196 if (NOTE_P (next
) && NOTE_KIND (next
) == NOTE_INSN_VAR_LOCATION
)
21201 emit_insn_after (gen_nops (const1_rtx
), insn
);
21205 /* Implement machine specific optimizations. We implement padding of returns
21206 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21210 /* We are freeing block_for_insn in the toplev to keep compatibility
21211 with old MDEP_REORGS that are not CFG based. Recompute it now. */
21212 compute_bb_for_insn ();
21214 if (TARGET_SEH
&& current_function_has_exception_handlers ())
21215 ix86_seh_fixup_eh_fallthru ();
21217 if (optimize
&& optimize_function_for_speed_p (cfun
))
21219 if (TARGET_PAD_SHORT_FUNCTION
)
21220 ix86_pad_short_function ();
21221 else if (TARGET_PAD_RETURNS
)
21222 ix86_pad_returns ();
21223 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
21224 if (TARGET_FOUR_JUMP_LIMIT
)
21225 ix86_avoid_jump_mispredicts ();
21230 /* Return nonzero when QImode register that must be represented via REX prefix
21233 x86_extended_QIreg_mentioned_p (rtx_insn
*insn
)
21236 extract_insn_cached (insn
);
21237 for (i
= 0; i
< recog_data
.n_operands
; i
++)
21238 if (GENERAL_REG_P (recog_data
.operand
[i
])
21239 && !QI_REGNO_P (REGNO (recog_data
.operand
[i
])))
21244 /* Return true when INSN mentions register that must be encoded using REX
21247 x86_extended_reg_mentioned_p (rtx insn
)
21249 subrtx_iterator::array_type array
;
21250 FOR_EACH_SUBRTX (iter
, array
, INSN_P (insn
) ? PATTERN (insn
) : insn
, NONCONST
)
21252 const_rtx x
= *iter
;
21254 && (REX_INT_REGNO_P (REGNO (x
)) || REX_SSE_REGNO_P (REGNO (x
))))
21260 /* If profitable, negate (without causing overflow) integer constant
21261 of mode MODE at location LOC. Return true in this case. */
21263 x86_maybe_negate_const_int (rtx
*loc
, machine_mode mode
)
21267 if (!CONST_INT_P (*loc
))
21273 /* DImode x86_64 constants must fit in 32 bits. */
21274 gcc_assert (x86_64_immediate_operand (*loc
, mode
));
21285 gcc_unreachable ();
21288 /* Avoid overflows. */
21289 if (mode_signbit_p (mode
, *loc
))
21292 val
= INTVAL (*loc
);
21294 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
21295 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
21296 if ((val
< 0 && val
!= -128)
21299 *loc
= GEN_INT (-val
);
21306 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21307 optabs would emit if we didn't have TFmode patterns. */
21310 x86_emit_floatuns (rtx operands
[2])
21312 rtx_code_label
*neglab
, *donelab
;
21313 rtx i0
, i1
, f0
, in
, out
;
21314 machine_mode mode
, inmode
;
21316 inmode
= GET_MODE (operands
[1]);
21317 gcc_assert (inmode
== SImode
|| inmode
== DImode
);
21320 in
= force_reg (inmode
, operands
[1]);
21321 mode
= GET_MODE (out
);
21322 neglab
= gen_label_rtx ();
21323 donelab
= gen_label_rtx ();
21324 f0
= gen_reg_rtx (mode
);
21326 emit_cmp_and_jump_insns (in
, const0_rtx
, LT
, const0_rtx
, inmode
, 0, neglab
);
21328 expand_float (out
, in
, 0);
21330 emit_jump_insn (gen_jump (donelab
));
21333 emit_label (neglab
);
21335 i0
= expand_simple_binop (inmode
, LSHIFTRT
, in
, const1_rtx
, NULL
,
21337 i1
= expand_simple_binop (inmode
, AND
, in
, const1_rtx
, NULL
,
21339 i0
= expand_simple_binop (inmode
, IOR
, i0
, i1
, i0
, 1, OPTAB_DIRECT
);
21341 expand_float (f0
, i0
, 0);
21343 emit_insn (gen_rtx_SET (out
, gen_rtx_PLUS (mode
, f0
, f0
)));
21345 emit_label (donelab
);
21348 /* Target hook for scalar_mode_supported_p. */
21350 ix86_scalar_mode_supported_p (scalar_mode mode
)
21352 if (DECIMAL_FLOAT_MODE_P (mode
))
21353 return default_decimal_float_supported_p ();
21354 else if (mode
== TFmode
)
21357 return default_scalar_mode_supported_p (mode
);
21360 /* Implements target hook vector_mode_supported_p. */
21362 ix86_vector_mode_supported_p (machine_mode mode
)
21364 if (TARGET_SSE
&& VALID_SSE_REG_MODE (mode
))
21366 if (TARGET_SSE2
&& VALID_SSE2_REG_MODE (mode
))
21368 if (TARGET_AVX
&& VALID_AVX256_REG_MODE (mode
))
21370 if (TARGET_AVX512F
&& VALID_AVX512F_REG_MODE (mode
))
21372 if ((TARGET_MMX
|| TARGET_MMX_WITH_SSE
)
21373 && VALID_MMX_REG_MODE (mode
))
21375 if ((TARGET_3DNOW
|| TARGET_MMX_WITH_SSE
)
21376 && VALID_MMX_REG_MODE_3DNOW (mode
))
21381 /* Target hook for c_mode_for_suffix. */
21382 static machine_mode
21383 ix86_c_mode_for_suffix (char suffix
)
21393 /* Worker function for TARGET_MD_ASM_ADJUST.
21395 We implement asm flag outputs, and maintain source compatibility
21396 with the old cc0-based compiler. */
21399 ix86_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &/*inputs*/,
21400 vec
<const char *> &constraints
,
21401 vec
<rtx
> &clobbers
, HARD_REG_SET
&clobbered_regs
)
21403 bool saw_asm_flag
= false;
21406 for (unsigned i
= 0, n
= outputs
.length (); i
< n
; ++i
)
21408 const char *con
= constraints
[i
];
21409 if (strncmp (con
, "=@cc", 4) != 0)
21412 if (strchr (con
, ',') != NULL
)
21414 error ("alternatives not allowed in %<asm%> flag output");
21418 bool invert
= false;
21420 invert
= true, con
++;
21422 machine_mode mode
= CCmode
;
21423 rtx_code code
= UNKNOWN
;
21429 mode
= CCAmode
, code
= EQ
;
21430 else if (con
[1] == 'e' && con
[2] == 0)
21431 mode
= CCCmode
, code
= NE
;
21435 mode
= CCCmode
, code
= EQ
;
21436 else if (con
[1] == 'e' && con
[2] == 0)
21437 mode
= CCAmode
, code
= NE
;
21441 mode
= CCCmode
, code
= EQ
;
21445 mode
= CCZmode
, code
= EQ
;
21449 mode
= CCGCmode
, code
= GT
;
21450 else if (con
[1] == 'e' && con
[2] == 0)
21451 mode
= CCGCmode
, code
= GE
;
21455 mode
= CCGCmode
, code
= LT
;
21456 else if (con
[1] == 'e' && con
[2] == 0)
21457 mode
= CCGCmode
, code
= LE
;
21461 mode
= CCOmode
, code
= EQ
;
21465 mode
= CCPmode
, code
= EQ
;
21469 mode
= CCSmode
, code
= EQ
;
21473 mode
= CCZmode
, code
= EQ
;
21476 if (code
== UNKNOWN
)
21478 error ("unknown %<asm%> flag output %qs", constraints
[i
]);
21482 code
= reverse_condition (code
);
21484 rtx dest
= outputs
[i
];
21487 /* This is the first asm flag output. Here we put the flags
21488 register in as the real output and adjust the condition to
21490 constraints
[i
] = "=Bf";
21491 outputs
[i
] = gen_rtx_REG (CCmode
, FLAGS_REG
);
21492 saw_asm_flag
= true;
21496 /* We don't need the flags register as output twice. */
21497 constraints
[i
] = "=X";
21498 outputs
[i
] = gen_rtx_SCRATCH (SImode
);
21501 rtx x
= gen_rtx_REG (mode
, FLAGS_REG
);
21502 x
= gen_rtx_fmt_ee (code
, QImode
, x
, const0_rtx
);
21504 machine_mode dest_mode
= GET_MODE (dest
);
21505 if (!SCALAR_INT_MODE_P (dest_mode
))
21507 error ("invalid type for %<asm%> flag output");
21511 if (dest_mode
== QImode
)
21512 emit_insn (gen_rtx_SET (dest
, x
));
21515 rtx reg
= gen_reg_rtx (QImode
);
21516 emit_insn (gen_rtx_SET (reg
, x
));
21518 reg
= convert_to_mode (dest_mode
, reg
, 1);
21519 emit_move_insn (dest
, reg
);
21523 rtx_insn
*seq
= get_insns ();
21530 /* If we had no asm flag outputs, clobber the flags. */
21531 clobbers
.safe_push (gen_rtx_REG (CCmode
, FLAGS_REG
));
21532 SET_HARD_REG_BIT (clobbered_regs
, FLAGS_REG
);
21537 /* Implements target vector targetm.asm.encode_section_info. */
21539 static void ATTRIBUTE_UNUSED
21540 ix86_encode_section_info (tree decl
, rtx rtl
, int first
)
21542 default_encode_section_info (decl
, rtl
, first
);
21544 if (ix86_in_large_data_p (decl
))
21545 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_FAR_ADDR
;
21548 /* Worker function for REVERSE_CONDITION. */
21551 ix86_reverse_condition (enum rtx_code code
, machine_mode mode
)
21553 return (mode
== CCFPmode
21554 ? reverse_condition_maybe_unordered (code
)
21555 : reverse_condition (code
));
21558 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21562 output_387_reg_move (rtx_insn
*insn
, rtx
*operands
)
21564 if (REG_P (operands
[0]))
21566 if (REG_P (operands
[1])
21567 && find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21569 if (REGNO (operands
[0]) == FIRST_STACK_REG
)
21570 return output_387_ffreep (operands
, 0);
21571 return "fstp\t%y0";
21573 if (STACK_TOP_P (operands
[0]))
21574 return "fld%Z1\t%y1";
21577 else if (MEM_P (operands
[0]))
21579 gcc_assert (REG_P (operands
[1]));
21580 if (find_regno_note (insn
, REG_DEAD
, REGNO (operands
[1])))
21581 return "fstp%Z0\t%y0";
21584 /* There is no non-popping store to memory for XFmode.
21585 So if we need one, follow the store with a load. */
21586 if (GET_MODE (operands
[0]) == XFmode
)
21587 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
21589 return "fst%Z0\t%y0";
21595 #ifdef TARGET_SOLARIS
21596 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21599 i386_solaris_elf_named_section (const char *name
, unsigned int flags
,
21602 /* With Binutils 2.15, the "@unwind" marker must be specified on
21603 every occurrence of the ".eh_frame" section, not just the first
21606 && strcmp (name
, ".eh_frame") == 0)
21608 fprintf (asm_out_file
, "\t.section\t%s,\"%s\",@unwind\n", name
,
21609 flags
& SECTION_WRITE
? "aw" : "a");
21614 if (HAVE_COMDAT_GROUP
&& flags
& SECTION_LINKONCE
)
21616 solaris_elf_asm_comdat_section (name
, flags
, decl
);
21620 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
21621 SPARC assembler. One cannot mix single-letter flags and #exclude, so
21622 only emit the latter here. */
21623 if (flags
& SECTION_EXCLUDE
)
21625 fprintf (asm_out_file
, "\t.section\t%s,#exclude\n", name
);
21630 default_elf_asm_named_section (name
, flags
, decl
);
21632 #endif /* TARGET_SOLARIS */
21634 /* Return the mangling of TYPE if it is an extended fundamental type. */
21636 static const char *
21637 ix86_mangle_type (const_tree type
)
21639 type
= TYPE_MAIN_VARIANT (type
);
21641 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
21642 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
21645 switch (TYPE_MODE (type
))
21648 /* __float128 is "g". */
21651 /* "long double" or __float80 is "e". */
21658 static GTY(()) tree ix86_tls_stack_chk_guard_decl
;
21661 ix86_stack_protect_guard (void)
21663 if (TARGET_SSP_TLS_GUARD
)
21665 tree type_node
= lang_hooks
.types
.type_for_mode (ptr_mode
, 1);
21666 int qual
= ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg
);
21667 tree type
= build_qualified_type (type_node
, qual
);
21670 if (global_options_set
.x_ix86_stack_protector_guard_symbol_str
)
21672 t
= ix86_tls_stack_chk_guard_decl
;
21679 (UNKNOWN_LOCATION
, VAR_DECL
,
21680 get_identifier (ix86_stack_protector_guard_symbol_str
),
21682 TREE_STATIC (t
) = 1;
21683 TREE_PUBLIC (t
) = 1;
21684 DECL_EXTERNAL (t
) = 1;
21686 TREE_THIS_VOLATILE (t
) = 1;
21687 DECL_ARTIFICIAL (t
) = 1;
21688 DECL_IGNORED_P (t
) = 1;
21690 /* Do not share RTL as the declaration is visible outside of
21691 current function. */
21693 RTX_FLAG (x
, used
) = 1;
21695 ix86_tls_stack_chk_guard_decl
= t
;
21700 tree asptrtype
= build_pointer_type (type
);
21702 t
= build_int_cst (asptrtype
, ix86_stack_protector_guard_offset
);
21703 t
= build2 (MEM_REF
, asptrtype
, t
,
21704 build_int_cst (asptrtype
, 0));
21705 TREE_THIS_VOLATILE (t
) = 1;
21711 return default_stack_protect_guard ();
21714 /* For 32-bit code we can save PIC register setup by using
21715 __stack_chk_fail_local hidden function instead of calling
21716 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21717 register, so it is better to call __stack_chk_fail directly. */
21719 static tree ATTRIBUTE_UNUSED
21720 ix86_stack_protect_fail (void)
21722 return TARGET_64BIT
21723 ? default_external_stack_protect_fail ()
21724 : default_hidden_stack_protect_fail ();
21727 /* Select a format to encode pointers in exception handling data. CODE
21728 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21729 true if the symbol may be affected by dynamic relocations.
21731 ??? All x86 object file formats are capable of representing this.
21732 After all, the relocation needed is the same as for the call insn.
21733 Whether or not a particular assembler allows us to enter such, I
21734 guess we'll have to see. */
21736 asm_preferred_eh_data_format (int code
, int global
)
21740 int type
= DW_EH_PE_sdata8
;
21742 || ix86_cmodel
== CM_SMALL_PIC
21743 || (ix86_cmodel
== CM_MEDIUM_PIC
&& (global
|| code
)))
21744 type
= DW_EH_PE_sdata4
;
21745 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
21747 if (ix86_cmodel
== CM_SMALL
21748 || (ix86_cmodel
== CM_MEDIUM
&& code
))
21749 return DW_EH_PE_udata4
;
21750 return DW_EH_PE_absptr
;
21753 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21755 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
21759 machine_mode mode
= TImode
;
21761 if (vectype
!= NULL
)
21763 fp
= FLOAT_TYPE_P (vectype
);
21764 mode
= TYPE_MODE (vectype
);
21767 switch (type_of_cost
)
21770 return fp
? ix86_cost
->addss
: COSTS_N_INSNS (1);
21773 /* load/store costs are relative to register move which is 2. Recompute
21774 it to COSTS_N_INSNS so everything have same base. */
21775 return COSTS_N_INSNS (fp
? ix86_cost
->sse_load
[0]
21776 : ix86_cost
->int_load
[2]) / 2;
21779 return COSTS_N_INSNS (fp
? ix86_cost
->sse_store
[0]
21780 : ix86_cost
->int_store
[2]) / 2;
21783 return ix86_vec_cost (mode
,
21784 fp
? ix86_cost
->addss
: ix86_cost
->sse_op
);
21787 index
= sse_store_index (mode
);
21788 /* See PR82713 - we may end up being called on non-vector type. */
21791 return COSTS_N_INSNS (ix86_cost
->sse_load
[index
]) / 2;
21794 index
= sse_store_index (mode
);
21795 /* See PR82713 - we may end up being called on non-vector type. */
21798 return COSTS_N_INSNS (ix86_cost
->sse_store
[index
]) / 2;
21800 case vec_to_scalar
:
21801 case scalar_to_vec
:
21802 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21804 /* We should have separate costs for unaligned loads and gather/scatter.
21805 Do that incrementally. */
21806 case unaligned_load
:
21807 index
= sse_store_index (mode
);
21808 /* See PR82713 - we may end up being called on non-vector type. */
21811 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_load
[index
]) / 2;
21813 case unaligned_store
:
21814 index
= sse_store_index (mode
);
21815 /* See PR82713 - we may end up being called on non-vector type. */
21818 return COSTS_N_INSNS (ix86_cost
->sse_unaligned_store
[index
]) / 2;
21820 case vector_gather_load
:
21821 return ix86_vec_cost (mode
,
21823 (ix86_cost
->gather_static
21824 + ix86_cost
->gather_per_elt
21825 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
21827 case vector_scatter_store
:
21828 return ix86_vec_cost (mode
,
21830 (ix86_cost
->scatter_static
21831 + ix86_cost
->scatter_per_elt
21832 * TYPE_VECTOR_SUBPARTS (vectype
)) / 2);
21834 case cond_branch_taken
:
21835 return ix86_cost
->cond_taken_branch_cost
;
21837 case cond_branch_not_taken
:
21838 return ix86_cost
->cond_not_taken_branch_cost
;
21841 case vec_promote_demote
:
21842 return ix86_vec_cost (mode
, ix86_cost
->sse_op
);
21844 case vec_construct
:
21846 /* N element inserts into SSE vectors. */
21847 int cost
= TYPE_VECTOR_SUBPARTS (vectype
) * ix86_cost
->sse_op
;
21848 /* One vinserti128 for combining two SSE vectors for AVX256. */
21849 if (GET_MODE_BITSIZE (mode
) == 256)
21850 cost
+= ix86_vec_cost (mode
, ix86_cost
->addss
);
21851 /* One vinserti64x4 and two vinserti128 for combining SSE
21852 and AVX256 vectors to AVX512. */
21853 else if (GET_MODE_BITSIZE (mode
) == 512)
21854 cost
+= 3 * ix86_vec_cost (mode
, ix86_cost
->addss
);
21859 gcc_unreachable ();
21864 /* This function returns the calling abi specific va_list type node.
21865 It returns the FNDECL specific va_list type. */
21868 ix86_fn_abi_va_list (tree fndecl
)
21871 return va_list_type_node
;
21872 gcc_assert (fndecl
!= NULL_TREE
);
21874 if (ix86_function_abi ((const_tree
) fndecl
) == MS_ABI
)
21875 return ms_va_list_type_node
;
21877 return sysv_va_list_type_node
;
21880 /* Returns the canonical va_list type specified by TYPE. If there
21881 is no valid TYPE provided, it return NULL_TREE. */
21884 ix86_canonical_va_list_type (tree type
)
21888 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type
)))
21889 return ms_va_list_type_node
;
21891 if ((TREE_CODE (type
) == ARRAY_TYPE
21892 && integer_zerop (array_type_nelts (type
)))
21893 || POINTER_TYPE_P (type
))
21895 tree elem_type
= TREE_TYPE (type
);
21896 if (TREE_CODE (elem_type
) == RECORD_TYPE
21897 && lookup_attribute ("sysv_abi va_list",
21898 TYPE_ATTRIBUTES (elem_type
)))
21899 return sysv_va_list_type_node
;
21905 return std_canonical_va_list_type (type
);
21908 /* Iterate through the target-specific builtin types for va_list.
21909 IDX denotes the iterator, *PTREE is set to the result type of
21910 the va_list builtin, and *PNAME to its internal type.
21911 Returns zero if there is no element for this index, otherwise
21912 IDX should be increased upon the next call.
21913 Note, do not iterate a base builtin's name like __builtin_va_list.
21914 Used from c_common_nodes_and_builtins. */
21917 ix86_enum_va_list (int idx
, const char **pname
, tree
*ptree
)
21927 *ptree
= ms_va_list_type_node
;
21928 *pname
= "__builtin_ms_va_list";
21932 *ptree
= sysv_va_list_type_node
;
21933 *pname
= "__builtin_sysv_va_list";
21941 #undef TARGET_SCHED_DISPATCH
21942 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21943 #undef TARGET_SCHED_DISPATCH_DO
21944 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21945 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21946 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21947 #undef TARGET_SCHED_REORDER
21948 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21949 #undef TARGET_SCHED_ADJUST_PRIORITY
21950 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21951 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21952 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21953 ix86_dependencies_evaluation_hook
21956 /* Implementation of reassociation_width target hook used by
21957 reassoc phase to identify parallelism level in reassociated
21958 tree. Statements tree_code is passed in OPC. Arguments type
21959 is passed in MODE. */
21962 ix86_reassociation_width (unsigned int op
, machine_mode mode
)
21966 if (VECTOR_MODE_P (mode
))
21969 if (INTEGRAL_MODE_P (mode
))
21970 width
= ix86_cost
->reassoc_vec_int
;
21971 else if (FLOAT_MODE_P (mode
))
21972 width
= ix86_cost
->reassoc_vec_fp
;
21977 /* Integer vector instructions execute in FP unit
21978 and can execute 3 additions and one multiplication per cycle. */
21979 if ((ix86_tune
== PROCESSOR_ZNVER1
|| ix86_tune
== PROCESSOR_ZNVER2
21980 || ix86_tune
== PROCESSOR_ZNVER3
)
21981 && INTEGRAL_MODE_P (mode
) && op
!= PLUS
&& op
!= MINUS
)
21984 /* Account for targets that splits wide vectors into multiple parts. */
21985 if (TARGET_AVX256_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 128)
21986 div
= GET_MODE_BITSIZE (mode
) / 128;
21987 else if (TARGET_SSE_SPLIT_REGS
&& GET_MODE_BITSIZE (mode
) > 64)
21988 div
= GET_MODE_BITSIZE (mode
) / 64;
21989 width
= (width
+ div
- 1) / div
;
21992 else if (INTEGRAL_MODE_P (mode
))
21993 width
= ix86_cost
->reassoc_int
;
21994 else if (FLOAT_MODE_P (mode
))
21995 width
= ix86_cost
->reassoc_fp
;
21997 /* Avoid using too many registers in 32bit mode. */
21998 if (!TARGET_64BIT
&& width
> 2)
22003 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
22004 place emms and femms instructions. */
22006 static machine_mode
22007 ix86_preferred_simd_mode (scalar_mode mode
)
22015 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
22017 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22023 if (TARGET_AVX512BW
&& !TARGET_PREFER_AVX256
)
22025 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22031 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22033 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22039 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22041 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22047 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22049 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22055 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22057 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22059 else if (TARGET_SSE2
)
22068 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
22069 vectors. If AVX512F is enabled then try vectorizing with 512bit,
22070 256bit and 128bit vectors. */
22072 static unsigned int
22073 ix86_autovectorize_vector_modes (vector_modes
*modes
, bool all
)
22075 if (TARGET_AVX512F
&& !TARGET_PREFER_AVX256
)
22077 modes
->safe_push (V64QImode
);
22078 modes
->safe_push (V32QImode
);
22079 modes
->safe_push (V16QImode
);
22081 else if (TARGET_AVX512F
&& all
)
22083 modes
->safe_push (V32QImode
);
22084 modes
->safe_push (V16QImode
);
22085 modes
->safe_push (V64QImode
);
22087 else if (TARGET_AVX
&& !TARGET_PREFER_AVX128
)
22089 modes
->safe_push (V32QImode
);
22090 modes
->safe_push (V16QImode
);
22092 else if (TARGET_AVX
&& all
)
22094 modes
->safe_push (V16QImode
);
22095 modes
->safe_push (V32QImode
);
22097 else if (TARGET_MMX_WITH_SSE
)
22098 modes
->safe_push (V16QImode
);
22100 if (TARGET_MMX_WITH_SSE
)
22101 modes
->safe_push (V8QImode
);
22106 /* Implemenation of targetm.vectorize.get_mask_mode. */
22108 static opt_machine_mode
22109 ix86_get_mask_mode (machine_mode data_mode
)
22111 unsigned vector_size
= GET_MODE_SIZE (data_mode
);
22112 unsigned nunits
= GET_MODE_NUNITS (data_mode
);
22113 unsigned elem_size
= vector_size
/ nunits
;
22115 /* Scalar mask case. */
22116 if ((TARGET_AVX512F
&& vector_size
== 64)
22117 || (TARGET_AVX512VL
&& (vector_size
== 32 || vector_size
== 16)))
22121 || (TARGET_AVX512BW
&& (elem_size
== 1 || elem_size
== 2)))
22122 return smallest_int_mode_for_size (nunits
);
22125 scalar_int_mode elem_mode
22126 = smallest_int_mode_for_size (elem_size
* BITS_PER_UNIT
);
22128 gcc_assert (elem_size
* nunits
== vector_size
);
22130 return mode_for_vector (elem_mode
, nunits
);
22135 /* Return class of registers which could be used for pseudo of MODE
22136 and of class RCLASS for spilling instead of memory. Return NO_REGS
22137 if it is not possible or non-profitable. */
22139 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22142 ix86_spill_class (reg_class_t rclass
, machine_mode mode
)
22144 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
22146 && TARGET_INTER_UNIT_MOVES_TO_VEC
22147 && TARGET_INTER_UNIT_MOVES_FROM_VEC
22148 && (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
))
22149 && INTEGER_CLASS_P (rclass
))
22150 return ALL_SSE_REGS
;
22154 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
22155 but returns a lower bound. */
22157 static unsigned int
22158 ix86_max_noce_ifcvt_seq_cost (edge e
)
22160 bool predictable_p
= predictable_edge_p (e
);
22163 if (global_options_set
.x_param_max_rtl_if_conversion_predictable_cost
)
22164 return param_max_rtl_if_conversion_predictable_cost
;
22168 if (global_options_set
.x_param_max_rtl_if_conversion_unpredictable_cost
)
22169 return param_max_rtl_if_conversion_unpredictable_cost
;
22172 return BRANCH_COST (true, predictable_p
) * COSTS_N_INSNS (2);
22175 /* Return true if SEQ is a good candidate as a replacement for the
22176 if-convertible sequence described in IF_INFO. */
22179 ix86_noce_conversion_profitable_p (rtx_insn
*seq
, struct noce_if_info
*if_info
)
22181 if (TARGET_ONE_IF_CONV_INSN
&& if_info
->speed_p
)
22184 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
22185 Maybe we should allow even more conditional moves as long as they
22186 are used far enough not to stall the CPU, or also consider
22187 IF_INFO->TEST_BB succ edge probabilities. */
22188 for (rtx_insn
*insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
22190 rtx set
= single_set (insn
);
22193 if (GET_CODE (SET_SRC (set
)) != IF_THEN_ELSE
)
22195 rtx src
= SET_SRC (set
);
22196 machine_mode mode
= GET_MODE (src
);
22197 if (GET_MODE_CLASS (mode
) != MODE_INT
22198 && GET_MODE_CLASS (mode
) != MODE_FLOAT
)
22200 if ((!REG_P (XEXP (src
, 1)) && !MEM_P (XEXP (src
, 1)))
22201 || (!REG_P (XEXP (src
, 2)) && !MEM_P (XEXP (src
, 2))))
22203 /* insn is CMOV or FCMOV. */
22204 if (++cmov_cnt
> 1)
22208 return default_noce_conversion_profitable_p (seq
, if_info
);
22211 /* Implement targetm.vectorize.init_cost. */
22214 ix86_init_cost (class loop
*)
22216 unsigned *cost
= XNEWVEC (unsigned, 3);
22217 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
22221 /* Implement targetm.vectorize.add_stmt_cost. */
22224 ix86_add_stmt_cost (class vec_info
*vinfo
, void *data
, int count
,
22225 enum vect_cost_for_stmt kind
,
22226 class _stmt_vec_info
*stmt_info
, tree vectype
,
22228 enum vect_cost_model_location where
)
22230 unsigned *cost
= (unsigned *) data
;
22231 unsigned retval
= 0;
22233 = (kind
== scalar_stmt
|| kind
== scalar_load
|| kind
== scalar_store
);
22234 int stmt_cost
= - 1;
22237 machine_mode mode
= scalar_p
? SImode
: TImode
;
22239 if (vectype
!= NULL
)
22241 fp
= FLOAT_TYPE_P (vectype
);
22242 mode
= TYPE_MODE (vectype
);
22244 mode
= TYPE_MODE (TREE_TYPE (vectype
));
22247 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
22249 && stmt_info
->stmt
&& gimple_code (stmt_info
->stmt
) == GIMPLE_ASSIGN
)
22251 tree_code subcode
= gimple_assign_rhs_code (stmt_info
->stmt
);
22252 /*machine_mode inner_mode = mode;
22253 if (VECTOR_MODE_P (mode))
22254 inner_mode = GET_MODE_INNER (mode);*/
22259 case POINTER_PLUS_EXPR
:
22261 if (kind
== scalar_stmt
)
22263 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22264 stmt_cost
= ix86_cost
->addss
;
22265 else if (X87_FLOAT_MODE_P (mode
))
22266 stmt_cost
= ix86_cost
->fadd
;
22268 stmt_cost
= ix86_cost
->add
;
22271 stmt_cost
= ix86_vec_cost (mode
, fp
? ix86_cost
->addss
22272 : ix86_cost
->sse_op
);
22276 case WIDEN_MULT_EXPR
:
22277 case MULT_HIGHPART_EXPR
:
22278 stmt_cost
= ix86_multiplication_cost (ix86_cost
, mode
);
22281 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22282 stmt_cost
= ix86_cost
->sse_op
;
22283 else if (X87_FLOAT_MODE_P (mode
))
22284 stmt_cost
= ix86_cost
->fchs
;
22285 else if (VECTOR_MODE_P (mode
))
22286 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
22288 stmt_cost
= ix86_cost
->add
;
22290 case TRUNC_DIV_EXPR
:
22291 case CEIL_DIV_EXPR
:
22292 case FLOOR_DIV_EXPR
:
22293 case ROUND_DIV_EXPR
:
22294 case TRUNC_MOD_EXPR
:
22295 case CEIL_MOD_EXPR
:
22296 case FLOOR_MOD_EXPR
:
22298 case ROUND_MOD_EXPR
:
22299 case EXACT_DIV_EXPR
:
22300 stmt_cost
= ix86_division_cost (ix86_cost
, mode
);
22308 tree op2
= gimple_assign_rhs2 (stmt_info
->stmt
);
22309 stmt_cost
= ix86_shift_rotate_cost
22311 TREE_CODE (op2
) == INTEGER_CST
,
22312 cst_and_fits_in_hwi (op2
) ? int_cst_value (op2
) : -1,
22313 true, false, false, NULL
, NULL
);
22317 /* Only sign-conversions are free. */
22318 if (tree_nop_conversion_p
22319 (TREE_TYPE (gimple_assign_lhs (stmt_info
->stmt
)),
22320 TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
))))
22332 if (SSE_FLOAT_MODE_P (mode
) && TARGET_SSE_MATH
)
22333 stmt_cost
= ix86_cost
->sse_op
;
22334 else if (VECTOR_MODE_P (mode
))
22335 stmt_cost
= ix86_vec_cost (mode
, ix86_cost
->sse_op
);
22337 stmt_cost
= ix86_cost
->add
;
22345 if ((kind
== vector_stmt
|| kind
== scalar_stmt
)
22348 && (cfn
= gimple_call_combined_fn (stmt_info
->stmt
)) != CFN_LAST
)
22352 stmt_cost
= ix86_vec_cost (mode
,
22353 mode
== SFmode
? ix86_cost
->fmass
22354 : ix86_cost
->fmasd
);
22360 /* If we do elementwise loads into a vector then we are bound by
22361 latency and execution resources for the many scalar loads
22362 (AGU and load ports). Try to account for this by scaling the
22363 construction cost by the number of elements involved. */
22364 if ((kind
== vec_construct
|| kind
== vec_to_scalar
)
22366 && (STMT_VINFO_TYPE (stmt_info
) == load_vec_info_type
22367 || STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
22368 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) == VMAT_ELEMENTWISE
22369 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info
))) != INTEGER_CST
)
22371 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
22372 stmt_cost
*= (TYPE_VECTOR_SUBPARTS (vectype
) + 1);
22374 if (stmt_cost
== -1)
22375 stmt_cost
= ix86_builtin_vectorization_cost (kind
, vectype
, misalign
);
22377 /* Penalize DFmode vector operations for Bonnell. */
22378 if (TARGET_BONNELL
&& kind
== vector_stmt
22379 && vectype
&& GET_MODE_INNER (TYPE_MODE (vectype
)) == DFmode
)
22380 stmt_cost
*= 5; /* FIXME: The value here is arbitrary. */
22382 /* Statements in an inner loop relative to the loop being
22383 vectorized are weighted more heavily. The value here is
22384 arbitrary and could potentially be improved with analysis. */
22385 if (where
== vect_body
&& stmt_info
22386 && stmt_in_inner_loop_p (vinfo
, stmt_info
))
22387 count
*= 50; /* FIXME. */
22389 retval
= (unsigned) (count
* stmt_cost
);
22391 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
22392 for Silvermont as it has out of order integer pipeline and can execute
22393 2 scalar instruction per tick, but has in order SIMD pipeline. */
22394 if ((TARGET_SILVERMONT
|| TARGET_GOLDMONT
|| TARGET_GOLDMONT_PLUS
22395 || TARGET_TREMONT
|| TARGET_INTEL
) && stmt_info
&& stmt_info
->stmt
)
22397 tree lhs_op
= gimple_get_lhs (stmt_info
->stmt
);
22398 if (lhs_op
&& TREE_CODE (TREE_TYPE (lhs_op
)) == INTEGER_TYPE
)
22399 retval
= (retval
* 17) / 10;
22402 cost
[where
] += retval
;
22407 /* Implement targetm.vectorize.finish_cost. */
22410 ix86_finish_cost (void *data
, unsigned *prologue_cost
,
22411 unsigned *body_cost
, unsigned *epilogue_cost
)
22413 unsigned *cost
= (unsigned *) data
;
22414 *prologue_cost
= cost
[vect_prologue
];
22415 *body_cost
= cost
[vect_body
];
22416 *epilogue_cost
= cost
[vect_epilogue
];
22419 /* Implement targetm.vectorize.destroy_cost_data. */
22422 ix86_destroy_cost_data (void *data
)
22427 /* Validate target specific memory model bits in VAL. */
22429 static unsigned HOST_WIDE_INT
22430 ix86_memmodel_check (unsigned HOST_WIDE_INT val
)
22432 enum memmodel model
= memmodel_from_int (val
);
22435 if (val
& ~(unsigned HOST_WIDE_INT
)(IX86_HLE_ACQUIRE
|IX86_HLE_RELEASE
22437 || ((val
& IX86_HLE_ACQUIRE
) && (val
& IX86_HLE_RELEASE
)))
22439 warning (OPT_Winvalid_memory_model
,
22440 "unknown architecture specific memory model");
22441 return MEMMODEL_SEQ_CST
;
22443 strong
= (is_mm_acq_rel (model
) || is_mm_seq_cst (model
));
22444 if (val
& IX86_HLE_ACQUIRE
&& !(is_mm_acquire (model
) || strong
))
22446 warning (OPT_Winvalid_memory_model
,
22447 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
22449 return MEMMODEL_SEQ_CST
| IX86_HLE_ACQUIRE
;
22451 if (val
& IX86_HLE_RELEASE
&& !(is_mm_release (model
) || strong
))
22453 warning (OPT_Winvalid_memory_model
,
22454 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
22456 return MEMMODEL_SEQ_CST
| IX86_HLE_RELEASE
;
22461 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
22462 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
22463 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
22464 or number of vecsize_mangle variants that should be emitted. */
22467 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node
*node
,
22468 struct cgraph_simd_clone
*clonei
,
22469 tree base_type
, int num
)
22473 if (clonei
->simdlen
22474 && (clonei
->simdlen
< 2
22475 || clonei
->simdlen
> 1024
22476 || (clonei
->simdlen
& (clonei
->simdlen
- 1)) != 0))
22478 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
22479 "unsupported simdlen %wd", clonei
->simdlen
.to_constant ());
22483 tree ret_type
= TREE_TYPE (TREE_TYPE (node
->decl
));
22484 if (TREE_CODE (ret_type
) != VOID_TYPE
)
22485 switch (TYPE_MODE (ret_type
))
22493 /* case E_SCmode: */
22494 /* case E_DCmode: */
22495 if (!AGGREGATE_TYPE_P (ret_type
))
22499 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
22500 "unsupported return type %qT for simd", ret_type
);
22506 tree type_arg_types
= TYPE_ARG_TYPES (TREE_TYPE (node
->decl
));
22507 bool decl_arg_p
= (node
->definition
|| type_arg_types
== NULL_TREE
);
22509 for (t
= (decl_arg_p
? DECL_ARGUMENTS (node
->decl
) : type_arg_types
), i
= 0;
22510 t
&& t
!= void_list_node
; t
= TREE_CHAIN (t
), i
++)
22512 tree arg_type
= decl_arg_p
? TREE_TYPE (t
) : TREE_VALUE (t
);
22513 switch (TYPE_MODE (arg_type
))
22521 /* case E_SCmode: */
22522 /* case E_DCmode: */
22523 if (!AGGREGATE_TYPE_P (arg_type
))
22527 if (clonei
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_UNIFORM
)
22529 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
22530 "unsupported argument type %qT for simd", arg_type
);
22535 if (!TREE_PUBLIC (node
->decl
))
22537 /* If the function isn't exported, we can pick up just one ISA
22539 if (TARGET_AVX512F
)
22540 clonei
->vecsize_mangle
= 'e';
22541 else if (TARGET_AVX2
)
22542 clonei
->vecsize_mangle
= 'd';
22543 else if (TARGET_AVX
)
22544 clonei
->vecsize_mangle
= 'c';
22546 clonei
->vecsize_mangle
= 'b';
22551 clonei
->vecsize_mangle
= "bcde"[num
];
22554 clonei
->mask_mode
= VOIDmode
;
22555 switch (clonei
->vecsize_mangle
)
22558 clonei
->vecsize_int
= 128;
22559 clonei
->vecsize_float
= 128;
22562 clonei
->vecsize_int
= 128;
22563 clonei
->vecsize_float
= 256;
22566 clonei
->vecsize_int
= 256;
22567 clonei
->vecsize_float
= 256;
22570 clonei
->vecsize_int
= 512;
22571 clonei
->vecsize_float
= 512;
22572 if (TYPE_MODE (base_type
) == QImode
)
22573 clonei
->mask_mode
= DImode
;
22575 clonei
->mask_mode
= SImode
;
22578 if (clonei
->simdlen
== 0)
22580 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type
)))
22581 clonei
->simdlen
= clonei
->vecsize_int
;
22583 clonei
->simdlen
= clonei
->vecsize_float
;
22584 clonei
->simdlen
= clonei
->simdlen
22585 / GET_MODE_BITSIZE (TYPE_MODE (base_type
));
22587 else if (clonei
->simdlen
> 16)
22589 /* For compatibility with ICC, use the same upper bounds
22590 for simdlen. In particular, for CTYPE below, use the return type,
22591 unless the function returns void, in that case use the characteristic
22592 type. If it is possible for given SIMDLEN to pass CTYPE value
22593 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
22594 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
22595 emit corresponding clone. */
22596 tree ctype
= ret_type
;
22597 if (TREE_CODE (ret_type
) == VOID_TYPE
)
22599 int cnt
= GET_MODE_BITSIZE (TYPE_MODE (ctype
)) * clonei
->simdlen
;
22600 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype
)))
22601 cnt
/= clonei
->vecsize_int
;
22603 cnt
/= clonei
->vecsize_float
;
22604 if (cnt
> (TARGET_64BIT
? 16 : 8))
22606 warning_at (DECL_SOURCE_LOCATION (node
->decl
), 0,
22607 "unsupported simdlen %wd",
22608 clonei
->simdlen
.to_constant ());
22615 /* If SIMD clone NODE can't be used in a vectorized loop
22616 in current function, return -1, otherwise return a badness of using it
22617 (0 if it is most desirable from vecsize_mangle point of view, 1
22618 slightly less desirable, etc.). */
22621 ix86_simd_clone_usable (struct cgraph_node
*node
)
22623 switch (node
->simdclone
->vecsize_mangle
)
22630 return TARGET_AVX2
? 2 : 1;
22634 return TARGET_AVX2
? 1 : 0;
22640 if (!TARGET_AVX512F
)
22644 gcc_unreachable ();
22648 /* This function adjusts the unroll factor based on
22649 the hardware capabilities. For ex, bdver3 has
22650 a loop buffer which makes unrolling of smaller
22651 loops less important. This function decides the
22652 unroll factor using number of memory references
22653 (value 32 is used) as a heuristic. */
22656 ix86_loop_unroll_adjust (unsigned nunroll
, class loop
*loop
)
22661 unsigned mem_count
= 0;
22663 if (!TARGET_ADJUST_UNROLL
)
22666 /* Count the number of memory references within the loop body.
22667 This value determines the unrolling factor for bdver3 and bdver4
22669 subrtx_iterator::array_type array
;
22670 bbs
= get_loop_body (loop
);
22671 for (i
= 0; i
< loop
->num_nodes
; i
++)
22672 FOR_BB_INSNS (bbs
[i
], insn
)
22673 if (NONDEBUG_INSN_P (insn
))
22674 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
22675 if (const_rtx x
= *iter
)
22678 machine_mode mode
= GET_MODE (x
);
22679 unsigned int n_words
= GET_MODE_SIZE (mode
) / UNITS_PER_WORD
;
22687 if (mem_count
&& mem_count
<=32)
22688 return MIN (nunroll
, 32 / mem_count
);
22694 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
22697 ix86_float_exceptions_rounding_supported_p (void)
22699 /* For x87 floating point with standard excess precision handling,
22700 there is no adddf3 pattern (since x87 floating point only has
22701 XFmode operations) so the default hook implementation gets this
22703 return TARGET_80387
|| (TARGET_SSE
&& TARGET_SSE_MATH
);
22706 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
22709 ix86_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
22711 if (!TARGET_80387
&& !(TARGET_SSE
&& TARGET_SSE_MATH
))
22713 tree exceptions_var
= create_tmp_var_raw (integer_type_node
);
22716 tree fenv_index_type
= build_index_type (size_int (6));
22717 tree fenv_type
= build_array_type (unsigned_type_node
, fenv_index_type
);
22718 tree fenv_var
= create_tmp_var_raw (fenv_type
);
22719 TREE_ADDRESSABLE (fenv_var
) = 1;
22720 tree fenv_ptr
= build_pointer_type (fenv_type
);
22721 tree fenv_addr
= build1 (ADDR_EXPR
, fenv_ptr
, fenv_var
);
22722 fenv_addr
= fold_convert (ptr_type_node
, fenv_addr
);
22723 tree fnstenv
= get_ix86_builtin (IX86_BUILTIN_FNSTENV
);
22724 tree fldenv
= get_ix86_builtin (IX86_BUILTIN_FLDENV
);
22725 tree fnstsw
= get_ix86_builtin (IX86_BUILTIN_FNSTSW
);
22726 tree fnclex
= get_ix86_builtin (IX86_BUILTIN_FNCLEX
);
22727 tree hold_fnstenv
= build_call_expr (fnstenv
, 1, fenv_addr
);
22728 tree hold_fnclex
= build_call_expr (fnclex
, 0);
22729 fenv_var
= build4 (TARGET_EXPR
, fenv_type
, fenv_var
, hold_fnstenv
,
22730 NULL_TREE
, NULL_TREE
);
22731 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, fenv_var
,
22733 *clear
= build_call_expr (fnclex
, 0);
22734 tree sw_var
= create_tmp_var_raw (short_unsigned_type_node
);
22735 tree fnstsw_call
= build_call_expr (fnstsw
, 0);
22736 tree sw_mod
= build4 (TARGET_EXPR
, short_unsigned_type_node
, sw_var
,
22737 fnstsw_call
, NULL_TREE
, NULL_TREE
);
22738 tree exceptions_x87
= fold_convert (integer_type_node
, sw_var
);
22739 tree update_mod
= build4 (TARGET_EXPR
, integer_type_node
,
22740 exceptions_var
, exceptions_x87
,
22741 NULL_TREE
, NULL_TREE
);
22742 *update
= build2 (COMPOUND_EXPR
, integer_type_node
,
22743 sw_mod
, update_mod
);
22744 tree update_fldenv
= build_call_expr (fldenv
, 1, fenv_addr
);
22745 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
, update_fldenv
);
22747 if (TARGET_SSE
&& TARGET_SSE_MATH
)
22749 tree mxcsr_orig_var
= create_tmp_var_raw (unsigned_type_node
);
22750 tree mxcsr_mod_var
= create_tmp_var_raw (unsigned_type_node
);
22751 tree stmxcsr
= get_ix86_builtin (IX86_BUILTIN_STMXCSR
);
22752 tree ldmxcsr
= get_ix86_builtin (IX86_BUILTIN_LDMXCSR
);
22753 tree stmxcsr_hold_call
= build_call_expr (stmxcsr
, 0);
22754 tree hold_assign_orig
= build4 (TARGET_EXPR
, unsigned_type_node
,
22755 mxcsr_orig_var
, stmxcsr_hold_call
,
22756 NULL_TREE
, NULL_TREE
);
22757 tree hold_mod_val
= build2 (BIT_IOR_EXPR
, unsigned_type_node
,
22759 build_int_cst (unsigned_type_node
, 0x1f80));
22760 hold_mod_val
= build2 (BIT_AND_EXPR
, unsigned_type_node
, hold_mod_val
,
22761 build_int_cst (unsigned_type_node
, 0xffffffc0));
22762 tree hold_assign_mod
= build4 (TARGET_EXPR
, unsigned_type_node
,
22763 mxcsr_mod_var
, hold_mod_val
,
22764 NULL_TREE
, NULL_TREE
);
22765 tree ldmxcsr_hold_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
22766 tree hold_all
= build2 (COMPOUND_EXPR
, unsigned_type_node
,
22767 hold_assign_orig
, hold_assign_mod
);
22768 hold_all
= build2 (COMPOUND_EXPR
, void_type_node
, hold_all
,
22769 ldmxcsr_hold_call
);
22771 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, *hold
, hold_all
);
22774 tree ldmxcsr_clear_call
= build_call_expr (ldmxcsr
, 1, mxcsr_mod_var
);
22776 *clear
= build2 (COMPOUND_EXPR
, void_type_node
, *clear
,
22777 ldmxcsr_clear_call
);
22779 *clear
= ldmxcsr_clear_call
;
22780 tree stxmcsr_update_call
= build_call_expr (stmxcsr
, 0);
22781 tree exceptions_sse
= fold_convert (integer_type_node
,
22782 stxmcsr_update_call
);
22785 tree exceptions_mod
= build2 (BIT_IOR_EXPR
, integer_type_node
,
22786 exceptions_var
, exceptions_sse
);
22787 tree exceptions_assign
= build2 (MODIFY_EXPR
, integer_type_node
,
22788 exceptions_var
, exceptions_mod
);
22789 *update
= build2 (COMPOUND_EXPR
, integer_type_node
, *update
,
22790 exceptions_assign
);
22793 *update
= build4 (TARGET_EXPR
, integer_type_node
, exceptions_var
,
22794 exceptions_sse
, NULL_TREE
, NULL_TREE
);
22795 tree ldmxcsr_update_call
= build_call_expr (ldmxcsr
, 1, mxcsr_orig_var
);
22796 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
22797 ldmxcsr_update_call
);
22799 tree atomic_feraiseexcept
22800 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
22801 tree atomic_feraiseexcept_call
= build_call_expr (atomic_feraiseexcept
,
22802 1, exceptions_var
);
22803 *update
= build2 (COMPOUND_EXPR
, void_type_node
, *update
,
22804 atomic_feraiseexcept_call
);
22807 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22808 /* For i386, common symbol is local only for non-PIE binaries. For
22809 x86-64, common symbol is local only for non-PIE binaries or linker
22810 supports copy reloc in PIE binaries. */
22813 ix86_binds_local_p (const_tree exp
)
22815 return default_binds_local_p_3 (exp
, flag_shlib
!= 0, true, true,
22818 && HAVE_LD_PIE_COPYRELOC
!= 0)));
22822 /* If MEM is in the form of [base+offset], extract the two parts
22823 of address and set to BASE and OFFSET, otherwise return false. */
22826 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
22830 gcc_assert (MEM_P (mem
));
22832 addr
= XEXP (mem
, 0);
22834 if (GET_CODE (addr
) == CONST
)
22835 addr
= XEXP (addr
, 0);
22837 if (REG_P (addr
) || GET_CODE (addr
) == SYMBOL_REF
)
22840 *offset
= const0_rtx
;
22844 if (GET_CODE (addr
) == PLUS
22845 && (REG_P (XEXP (addr
, 0))
22846 || GET_CODE (XEXP (addr
, 0)) == SYMBOL_REF
)
22847 && CONST_INT_P (XEXP (addr
, 1)))
22849 *base
= XEXP (addr
, 0);
22850 *offset
= XEXP (addr
, 1);
22857 /* Given OPERANDS of consecutive load/store, check if we can merge
22858 them into move multiple. LOAD is true if they are load instructions.
22859 MODE is the mode of memory operands. */
22862 ix86_operands_ok_for_move_multiple (rtx
*operands
, bool load
,
22865 HOST_WIDE_INT offval_1
, offval_2
, msize
;
22866 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
22870 mem_1
= operands
[1];
22871 mem_2
= operands
[3];
22872 reg_1
= operands
[0];
22873 reg_2
= operands
[2];
22877 mem_1
= operands
[0];
22878 mem_2
= operands
[2];
22879 reg_1
= operands
[1];
22880 reg_2
= operands
[3];
22883 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
22885 if (REGNO (reg_1
) != REGNO (reg_2
))
22888 /* Check if the addresses are in the form of [base+offset]. */
22889 if (!extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
))
22891 if (!extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
))
22894 /* Check if the bases are the same. */
22895 if (!rtx_equal_p (base_1
, base_2
))
22898 offval_1
= INTVAL (offset_1
);
22899 offval_2
= INTVAL (offset_2
);
22900 msize
= GET_MODE_SIZE (mode
);
22901 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22902 if (offval_1
+ msize
!= offval_2
)
22908 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22911 ix86_optab_supported_p (int op
, machine_mode mode1
, machine_mode
,
22912 optimization_type opt_type
)
22926 return opt_type
== OPTIMIZE_FOR_SPEED
;
22929 if (SSE_FLOAT_MODE_P (mode1
)
22931 && !flag_trapping_math
22933 return opt_type
== OPTIMIZE_FOR_SPEED
;
22939 if (SSE_FLOAT_MODE_P (mode1
)
22941 && !flag_trapping_math
22944 return opt_type
== OPTIMIZE_FOR_SPEED
;
22947 return opt_type
== OPTIMIZE_FOR_SPEED
&& use_rsqrt_p ();
22954 /* Address space support.
22956 This is not "far pointers" in the 16-bit sense, but an easy way
22957 to use %fs and %gs segment prefixes. Therefore:
22959 (a) All address spaces have the same modes,
22960 (b) All address spaces have the same addresss forms,
22961 (c) While %fs and %gs are technically subsets of the generic
22962 address space, they are probably not subsets of each other.
22963 (d) Since we have no access to the segment base register values
22964 without resorting to a system call, we cannot convert a
22965 non-default address space to a default address space.
22966 Therefore we do not claim %fs or %gs are subsets of generic.
22968 Therefore we can (mostly) use the default hooks. */
22970 /* All use of segmentation is assumed to make address 0 valid. */
22973 ix86_addr_space_zero_address_valid (addr_space_t as
)
22975 return as
!= ADDR_SPACE_GENERIC
;
22979 ix86_init_libfuncs (void)
22983 set_optab_libfunc (sdivmod_optab
, TImode
, "__divmodti4");
22984 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
22988 set_optab_libfunc (sdivmod_optab
, DImode
, "__divmoddi4");
22989 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
22993 darwin_rename_builtins ();
22997 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
22998 FPU, assume that the fpcw is set to extended precision; when using
22999 only SSE, rounding is correct; when using both SSE and the FPU,
23000 the rounding precision is indeterminate, since either may be chosen
23001 apparently at random. */
23003 static enum flt_eval_method
23004 ix86_excess_precision (enum excess_precision_type type
)
23008 case EXCESS_PRECISION_TYPE_FAST
:
23009 /* The fastest type to promote to will always be the native type,
23010 whether that occurs with implicit excess precision or
23012 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
23013 case EXCESS_PRECISION_TYPE_STANDARD
:
23014 case EXCESS_PRECISION_TYPE_IMPLICIT
:
23015 /* Otherwise, the excess precision we want when we are
23016 in a standards compliant mode, and the implicit precision we
23017 provide would be identical were it not for the unpredictable
23020 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
23021 else if (!TARGET_MIX_SSE_I387
)
23023 if (!(TARGET_SSE
&& TARGET_SSE_MATH
))
23024 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE
;
23025 else if (TARGET_SSE2
)
23026 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
23029 /* If we are in standards compliant mode, but we know we will
23030 calculate in unpredictable precision, return
23031 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
23032 excess precision if the target can't guarantee it will honor
23034 return (type
== EXCESS_PRECISION_TYPE_STANDARD
23035 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
23036 : FLT_EVAL_METHOD_UNPREDICTABLE
);
23038 gcc_unreachable ();
23041 return FLT_EVAL_METHOD_UNPREDICTABLE
;
23044 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
23045 decrements by exactly 2 no matter what the position was, there is no pushb.
23047 But as CIE data alignment factor on this arch is -4 for 32bit targets
23048 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
23049 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
23052 ix86_push_rounding (poly_int64 bytes
)
23054 return ROUND_UP (bytes
, UNITS_PER_WORD
);
23057 /* Target-specific selftests. */
23061 namespace selftest
{
23063 /* Verify that hard regs are dumped as expected (in compact mode). */
23066 ix86_test_dumping_hard_regs ()
23068 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode
, 0));
23069 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode
, 1));
23072 /* Test dumping an insn with repeated references to the same SCRATCH,
23073 to verify the rtx_reuse code. */
23076 ix86_test_dumping_memory_blockage ()
23078 set_new_first_and_last_insn (NULL
, NULL
);
23080 rtx pat
= gen_memory_blockage ();
23081 rtx_reuse_manager r
;
23082 r
.preprocess (pat
);
23084 /* Verify that the repeated references to the SCRATCH show use
23085 reuse IDS. The first should be prefixed with a reuse ID,
23086 and the second should be dumped as a "reuse_rtx" of that ID.
23087 The expected string assumes Pmode == DImode. */
23088 if (Pmode
== DImode
)
23089 ASSERT_RTL_DUMP_EQ_WITH_REUSE
23090 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
23092 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
23093 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat
, &r
);
23096 /* Verify loading an RTL dump; specifically a dump of copying
23097 a param on x86_64 from a hard reg into the frame.
23098 This test is target-specific since the dump contains target-specific
23102 ix86_test_loading_dump_fragment_1 ()
23104 rtl_dump_test
t (SELFTEST_LOCATION
,
23105 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
23107 rtx_insn
*insn
= get_insn_by_uid (1);
23109 /* The block structure and indentation here is purely for
23110 readability; it mirrors the structure of the rtx. */
23113 rtx pat
= PATTERN (insn
);
23114 ASSERT_EQ (SET
, GET_CODE (pat
));
23116 rtx dest
= SET_DEST (pat
);
23117 ASSERT_EQ (MEM
, GET_CODE (dest
));
23118 /* Verify the "/c" was parsed. */
23119 ASSERT_TRUE (RTX_FLAG (dest
, call
));
23120 ASSERT_EQ (SImode
, GET_MODE (dest
));
23122 rtx addr
= XEXP (dest
, 0);
23123 ASSERT_EQ (PLUS
, GET_CODE (addr
));
23124 ASSERT_EQ (DImode
, GET_MODE (addr
));
23126 rtx lhs
= XEXP (addr
, 0);
23127 /* Verify that the "frame" REG was consolidated. */
23128 ASSERT_RTX_PTR_EQ (frame_pointer_rtx
, lhs
);
23131 rtx rhs
= XEXP (addr
, 1);
23132 ASSERT_EQ (CONST_INT
, GET_CODE (rhs
));
23133 ASSERT_EQ (-4, INTVAL (rhs
));
23136 /* Verify the "[1 i+0 S4 A32]" was parsed. */
23137 ASSERT_EQ (1, MEM_ALIAS_SET (dest
));
23138 /* "i" should have been handled by synthesizing a global int
23139 variable named "i". */
23140 mem_expr
= MEM_EXPR (dest
);
23141 ASSERT_NE (mem_expr
, NULL
);
23142 ASSERT_EQ (VAR_DECL
, TREE_CODE (mem_expr
));
23143 ASSERT_EQ (integer_type_node
, TREE_TYPE (mem_expr
));
23144 ASSERT_EQ (IDENTIFIER_NODE
, TREE_CODE (DECL_NAME (mem_expr
)));
23145 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr
)));
23147 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest
));
23148 ASSERT_EQ (0, MEM_OFFSET (dest
));
23150 ASSERT_EQ (4, MEM_SIZE (dest
));
23152 ASSERT_EQ (32, MEM_ALIGN (dest
));
23155 rtx src
= SET_SRC (pat
);
23156 ASSERT_EQ (REG
, GET_CODE (src
));
23157 ASSERT_EQ (SImode
, GET_MODE (src
));
23158 ASSERT_EQ (5, REGNO (src
));
23159 tree reg_expr
= REG_EXPR (src
);
23160 /* "i" here should point to the same var as for the MEM_EXPR. */
23161 ASSERT_EQ (reg_expr
, mem_expr
);
23166 /* Verify that the RTL loader copes with a call_insn dump.
23167 This test is target-specific since the dump contains a target-specific
23171 ix86_test_loading_call_insn ()
23173 /* The test dump includes register "xmm0", where requires TARGET_SSE
23178 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/call-insn.rtl"));
23180 rtx_insn
*insn
= get_insns ();
23181 ASSERT_EQ (CALL_INSN
, GET_CODE (insn
));
23184 ASSERT_TRUE (RTX_FLAG (insn
, jump
));
23186 rtx pat
= PATTERN (insn
);
23187 ASSERT_EQ (CALL
, GET_CODE (SET_SRC (pat
)));
23189 /* Verify REG_NOTES. */
23191 /* "(expr_list:REG_CALL_DECL". */
23192 ASSERT_EQ (EXPR_LIST
, GET_CODE (REG_NOTES (insn
)));
23193 rtx_expr_list
*note0
= as_a
<rtx_expr_list
*> (REG_NOTES (insn
));
23194 ASSERT_EQ (REG_CALL_DECL
, REG_NOTE_KIND (note0
));
23196 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
23197 rtx_expr_list
*note1
= note0
->next ();
23198 ASSERT_EQ (REG_EH_REGION
, REG_NOTE_KIND (note1
));
23200 ASSERT_EQ (NULL
, note1
->next ());
23203 /* Verify CALL_INSN_FUNCTION_USAGE. */
23205 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
23206 rtx_expr_list
*usage
23207 = as_a
<rtx_expr_list
*> (CALL_INSN_FUNCTION_USAGE (insn
));
23208 ASSERT_EQ (EXPR_LIST
, GET_CODE (usage
));
23209 ASSERT_EQ (DFmode
, GET_MODE (usage
));
23210 ASSERT_EQ (USE
, GET_CODE (usage
->element ()));
23211 ASSERT_EQ (NULL
, usage
->next ());
23215 /* Verify that the RTL loader copes a dump from print_rtx_function.
23216 This test is target-specific since the dump contains target-specific
23220 ix86_test_loading_full_dump ()
23222 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/times-two.rtl"));
23224 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
23226 rtx_insn
*insn_1
= get_insn_by_uid (1);
23227 ASSERT_EQ (NOTE
, GET_CODE (insn_1
));
23229 rtx_insn
*insn_7
= get_insn_by_uid (7);
23230 ASSERT_EQ (INSN
, GET_CODE (insn_7
));
23231 ASSERT_EQ (PARALLEL
, GET_CODE (PATTERN (insn_7
)));
23233 rtx_insn
*insn_15
= get_insn_by_uid (15);
23234 ASSERT_EQ (INSN
, GET_CODE (insn_15
));
23235 ASSERT_EQ (USE
, GET_CODE (PATTERN (insn_15
)));
23237 /* Verify crtl->return_rtx. */
23238 ASSERT_EQ (REG
, GET_CODE (crtl
->return_rtx
));
23239 ASSERT_EQ (0, REGNO (crtl
->return_rtx
));
23240 ASSERT_EQ (SImode
, GET_MODE (crtl
->return_rtx
));
23243 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
23244 In particular, verify that it correctly loads the 2nd operand.
23245 This test is target-specific since these are machine-specific
23246 operands (and enums). */
23249 ix86_test_loading_unspec ()
23251 rtl_dump_test
t (SELFTEST_LOCATION
, locate_file ("x86_64/unspec.rtl"));
23253 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun
->decl
)));
23255 ASSERT_TRUE (cfun
);
23257 /* Test of an UNSPEC. */
23258 rtx_insn
*insn
= get_insns ();
23259 ASSERT_EQ (INSN
, GET_CODE (insn
));
23260 rtx set
= single_set (insn
);
23261 ASSERT_NE (NULL
, set
);
23262 rtx dst
= SET_DEST (set
);
23263 ASSERT_EQ (MEM
, GET_CODE (dst
));
23264 rtx src
= SET_SRC (set
);
23265 ASSERT_EQ (UNSPEC
, GET_CODE (src
));
23266 ASSERT_EQ (BLKmode
, GET_MODE (src
));
23267 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE
, XINT (src
, 1));
23269 rtx v0
= XVECEXP (src
, 0, 0);
23271 /* Verify that the two uses of the first SCRATCH have pointer
23273 rtx scratch_a
= XEXP (dst
, 0);
23274 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_a
));
23276 rtx scratch_b
= XEXP (v0
, 0);
23277 ASSERT_EQ (SCRATCH
, GET_CODE (scratch_b
));
23279 ASSERT_EQ (scratch_a
, scratch_b
);
23281 /* Verify that the two mems are thus treated as equal. */
23282 ASSERT_TRUE (rtx_equal_p (dst
, v0
));
23284 /* Verify that the insn is recognized. */
23285 ASSERT_NE(-1, recog_memoized (insn
));
23287 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
23288 insn
= NEXT_INSN (insn
);
23289 ASSERT_EQ (INSN
, GET_CODE (insn
));
23291 set
= single_set (insn
);
23292 ASSERT_NE (NULL
, set
);
23294 src
= SET_SRC (set
);
23295 ASSERT_EQ (UNSPEC_VOLATILE
, GET_CODE (src
));
23296 ASSERT_EQ (UNSPECV_RDTSCP
, XINT (src
, 1));
23299 /* Run all target-specific selftests. */
23302 ix86_run_selftests (void)
23304 ix86_test_dumping_hard_regs ();
23305 ix86_test_dumping_memory_blockage ();
23307 /* Various tests of loading RTL dumps, here because they contain
23308 ix86-isms (e.g. names of hard regs). */
23309 ix86_test_loading_dump_fragment_1 ();
23310 ix86_test_loading_call_insn ();
23311 ix86_test_loading_full_dump ();
23312 ix86_test_loading_unspec ();
23315 } // namespace selftest
23317 #endif /* CHECKING_P */
23319 /* Initialize the GCC target structure. */
23320 #undef TARGET_RETURN_IN_MEMORY
23321 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
23323 #undef TARGET_LEGITIMIZE_ADDRESS
23324 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
23326 #undef TARGET_ATTRIBUTE_TABLE
23327 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23328 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
23329 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
23330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23331 # undef TARGET_MERGE_DECL_ATTRIBUTES
23332 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23335 #undef TARGET_COMP_TYPE_ATTRIBUTES
23336 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23338 #undef TARGET_INIT_BUILTINS
23339 #define TARGET_INIT_BUILTINS ix86_init_builtins
23340 #undef TARGET_BUILTIN_DECL
23341 #define TARGET_BUILTIN_DECL ix86_builtin_decl
23342 #undef TARGET_EXPAND_BUILTIN
23343 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23345 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23346 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23347 ix86_builtin_vectorized_function
23349 #undef TARGET_VECTORIZE_BUILTIN_GATHER
23350 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
23352 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
23353 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
23355 #undef TARGET_BUILTIN_RECIPROCAL
23356 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23358 #undef TARGET_ASM_FUNCTION_EPILOGUE
23359 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23361 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
23362 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
23363 ix86_print_patchable_function_entry
23365 #undef TARGET_ENCODE_SECTION_INFO
23366 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23367 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23369 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23372 #undef TARGET_ASM_OPEN_PAREN
23373 #define TARGET_ASM_OPEN_PAREN ""
23374 #undef TARGET_ASM_CLOSE_PAREN
23375 #define TARGET_ASM_CLOSE_PAREN ""
23377 #undef TARGET_ASM_BYTE_OP
23378 #define TARGET_ASM_BYTE_OP ASM_BYTE
23380 #undef TARGET_ASM_ALIGNED_HI_OP
23381 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23382 #undef TARGET_ASM_ALIGNED_SI_OP
23383 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23385 #undef TARGET_ASM_ALIGNED_DI_OP
23386 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23389 #undef TARGET_PROFILE_BEFORE_PROLOGUE
23390 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
23392 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
23393 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
23395 #undef TARGET_ASM_UNALIGNED_HI_OP
23396 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23397 #undef TARGET_ASM_UNALIGNED_SI_OP
23398 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23399 #undef TARGET_ASM_UNALIGNED_DI_OP
23400 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23402 #undef TARGET_PRINT_OPERAND
23403 #define TARGET_PRINT_OPERAND ix86_print_operand
23404 #undef TARGET_PRINT_OPERAND_ADDRESS
23405 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
23406 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
23407 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
23408 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
23409 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
23411 #undef TARGET_SCHED_INIT_GLOBAL
23412 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
23413 #undef TARGET_SCHED_ADJUST_COST
23414 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23415 #undef TARGET_SCHED_ISSUE_RATE
23416 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23417 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23418 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23419 ia32_multipass_dfa_lookahead
23420 #undef TARGET_SCHED_MACRO_FUSION_P
23421 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
23422 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
23423 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
23425 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23426 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23428 #undef TARGET_MEMMODEL_CHECK
23429 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
23431 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
23432 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
23435 #undef TARGET_HAVE_TLS
23436 #define TARGET_HAVE_TLS true
23438 #undef TARGET_CANNOT_FORCE_CONST_MEM
23439 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23440 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23441 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23443 #undef TARGET_DELEGITIMIZE_ADDRESS
23444 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23446 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
23447 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
23449 #undef TARGET_MS_BITFIELD_LAYOUT_P
23450 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23453 #undef TARGET_BINDS_LOCAL_P
23454 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23456 #undef TARGET_BINDS_LOCAL_P
23457 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
23459 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23460 #undef TARGET_BINDS_LOCAL_P
23461 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23464 #undef TARGET_ASM_OUTPUT_MI_THUNK
23465 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23469 #undef TARGET_ASM_FILE_START
23470 #define TARGET_ASM_FILE_START x86_file_start
23472 #undef TARGET_OPTION_OVERRIDE
23473 #define TARGET_OPTION_OVERRIDE ix86_option_override
23475 #undef TARGET_REGISTER_MOVE_COST
23476 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
23477 #undef TARGET_MEMORY_MOVE_COST
23478 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
23479 #undef TARGET_RTX_COSTS
23480 #define TARGET_RTX_COSTS ix86_rtx_costs
23481 #undef TARGET_ADDRESS_COST
23482 #define TARGET_ADDRESS_COST ix86_address_cost
23484 #undef TARGET_FLAGS_REGNUM
23485 #define TARGET_FLAGS_REGNUM FLAGS_REG
23486 #undef TARGET_FIXED_CONDITION_CODE_REGS
23487 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23488 #undef TARGET_CC_MODES_COMPATIBLE
23489 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23491 #undef TARGET_MACHINE_DEPENDENT_REORG
23492 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23494 #undef TARGET_BUILD_BUILTIN_VA_LIST
23495 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23497 #undef TARGET_FOLD_BUILTIN
23498 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
23500 #undef TARGET_GIMPLE_FOLD_BUILTIN
23501 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
23503 #undef TARGET_COMPARE_VERSION_PRIORITY
23504 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
23506 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
23507 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
23508 ix86_generate_version_dispatcher_body
23510 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
23511 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
23512 ix86_get_function_versions_dispatcher
23514 #undef TARGET_ENUM_VA_LIST_P
23515 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
23517 #undef TARGET_FN_ABI_VA_LIST
23518 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
23520 #undef TARGET_CANONICAL_VA_LIST_TYPE
23521 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
23523 #undef TARGET_EXPAND_BUILTIN_VA_START
23524 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
23526 #undef TARGET_MD_ASM_ADJUST
23527 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
23529 #undef TARGET_C_EXCESS_PRECISION
23530 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
23531 #undef TARGET_PROMOTE_PROTOTYPES
23532 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23533 #undef TARGET_SETUP_INCOMING_VARARGS
23534 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23535 #undef TARGET_MUST_PASS_IN_STACK
23536 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23537 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
23538 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
23539 #undef TARGET_FUNCTION_ARG_ADVANCE
23540 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
23541 #undef TARGET_FUNCTION_ARG
23542 #define TARGET_FUNCTION_ARG ix86_function_arg
23543 #undef TARGET_INIT_PIC_REG
23544 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
23545 #undef TARGET_USE_PSEUDO_PIC_REG
23546 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
23547 #undef TARGET_FUNCTION_ARG_BOUNDARY
23548 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
23549 #undef TARGET_PASS_BY_REFERENCE
23550 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23551 #undef TARGET_INTERNAL_ARG_POINTER
23552 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23553 #undef TARGET_UPDATE_STACK_BOUNDARY
23554 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
23555 #undef TARGET_GET_DRAP_RTX
23556 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
23557 #undef TARGET_STRICT_ARGUMENT_NAMING
23558 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23559 #undef TARGET_STATIC_CHAIN
23560 #define TARGET_STATIC_CHAIN ix86_static_chain
23561 #undef TARGET_TRAMPOLINE_INIT
23562 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
23563 #undef TARGET_RETURN_POPS_ARGS
23564 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
23566 #undef TARGET_WARN_FUNC_RETURN
23567 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
23569 #undef TARGET_LEGITIMATE_COMBINED_INSN
23570 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
23572 #undef TARGET_ASAN_SHADOW_OFFSET
23573 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
23575 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23576 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23578 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23579 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23581 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23582 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23584 #undef TARGET_C_MODE_FOR_SUFFIX
23585 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23588 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23589 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23592 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23593 #undef TARGET_INSERT_ATTRIBUTES
23594 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23597 #undef TARGET_MANGLE_TYPE
23598 #define TARGET_MANGLE_TYPE ix86_mangle_type
23600 #undef TARGET_STACK_PROTECT_GUARD
23601 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
23604 #undef TARGET_STACK_PROTECT_FAIL
23605 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23608 #undef TARGET_FUNCTION_VALUE
23609 #define TARGET_FUNCTION_VALUE ix86_function_value
23611 #undef TARGET_FUNCTION_VALUE_REGNO_P
23612 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
23614 #undef TARGET_ZERO_CALL_USED_REGS
23615 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
23617 #undef TARGET_PROMOTE_FUNCTION_MODE
23618 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
23620 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23621 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
23623 #undef TARGET_MEMBER_TYPE_FORCES_BLK
23624 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
23626 #undef TARGET_INSTANTIATE_DECLS
23627 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
23629 #undef TARGET_SECONDARY_RELOAD
23630 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
23631 #undef TARGET_SECONDARY_MEMORY_NEEDED
23632 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
23633 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
23634 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
23636 #undef TARGET_CLASS_MAX_NREGS
23637 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
23639 #undef TARGET_PREFERRED_RELOAD_CLASS
23640 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
23641 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
23642 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
23643 #undef TARGET_CLASS_LIKELY_SPILLED_P
23644 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
23646 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23647 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23648 ix86_builtin_vectorization_cost
23649 #undef TARGET_VECTORIZE_VEC_PERM_CONST
23650 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
23651 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23652 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
23653 ix86_preferred_simd_mode
23654 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
23655 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
23656 ix86_split_reduction
23657 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23658 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23659 ix86_autovectorize_vector_modes
23660 #undef TARGET_VECTORIZE_GET_MASK_MODE
23661 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
23662 #undef TARGET_VECTORIZE_INIT_COST
23663 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
23664 #undef TARGET_VECTORIZE_ADD_STMT_COST
23665 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
23666 #undef TARGET_VECTORIZE_FINISH_COST
23667 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
23668 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
23669 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
23671 #undef TARGET_SET_CURRENT_FUNCTION
23672 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
23674 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
23675 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
23677 #undef TARGET_OPTION_SAVE
23678 #define TARGET_OPTION_SAVE ix86_function_specific_save
23680 #undef TARGET_OPTION_RESTORE
23681 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
23683 #undef TARGET_OPTION_POST_STREAM_IN
23684 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
23686 #undef TARGET_OPTION_PRINT
23687 #define TARGET_OPTION_PRINT ix86_function_specific_print
23689 #undef TARGET_OPTION_FUNCTION_VERSIONS
23690 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
23692 #undef TARGET_CAN_INLINE_P
23693 #define TARGET_CAN_INLINE_P ix86_can_inline_p
23695 #undef TARGET_LEGITIMATE_ADDRESS_P
23696 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
23698 #undef TARGET_REGISTER_PRIORITY
23699 #define TARGET_REGISTER_PRIORITY ix86_register_priority
23701 #undef TARGET_REGISTER_USAGE_LEVELING_P
23702 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
23704 #undef TARGET_LEGITIMATE_CONSTANT_P
23705 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
23707 #undef TARGET_COMPUTE_FRAME_LAYOUT
23708 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
23710 #undef TARGET_FRAME_POINTER_REQUIRED
23711 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
23713 #undef TARGET_CAN_ELIMINATE
23714 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
23716 #undef TARGET_EXTRA_LIVE_ON_ENTRY
23717 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
23719 #undef TARGET_ASM_CODE_END
23720 #define TARGET_ASM_CODE_END ix86_code_end
23722 #undef TARGET_CONDITIONAL_REGISTER_USAGE
23723 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
23725 #undef TARGET_CANONICALIZE_COMPARISON
23726 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
23728 #undef TARGET_LOOP_UNROLL_ADJUST
23729 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
23731 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23732 #undef TARGET_SPILL_CLASS
23733 #define TARGET_SPILL_CLASS ix86_spill_class
23735 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23736 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23737 ix86_simd_clone_compute_vecsize_and_simdlen
23739 #undef TARGET_SIMD_CLONE_ADJUST
23740 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
23742 #undef TARGET_SIMD_CLONE_USABLE
23743 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
23745 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
23746 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
23748 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23749 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23750 ix86_float_exceptions_rounding_supported_p
23752 #undef TARGET_MODE_EMIT
23753 #define TARGET_MODE_EMIT ix86_emit_mode_set
23755 #undef TARGET_MODE_NEEDED
23756 #define TARGET_MODE_NEEDED ix86_mode_needed
23758 #undef TARGET_MODE_AFTER
23759 #define TARGET_MODE_AFTER ix86_mode_after
23761 #undef TARGET_MODE_ENTRY
23762 #define TARGET_MODE_ENTRY ix86_mode_entry
23764 #undef TARGET_MODE_EXIT
23765 #define TARGET_MODE_EXIT ix86_mode_exit
23767 #undef TARGET_MODE_PRIORITY
23768 #define TARGET_MODE_PRIORITY ix86_mode_priority
23770 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23771 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23773 #undef TARGET_OFFLOAD_OPTIONS
23774 #define TARGET_OFFLOAD_OPTIONS \
23775 ix86_offload_options
23777 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23778 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23780 #undef TARGET_OPTAB_SUPPORTED_P
23781 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23783 #undef TARGET_HARD_REGNO_SCRATCH_OK
23784 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23786 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23787 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23789 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23790 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23792 #undef TARGET_INIT_LIBFUNCS
23793 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23795 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23796 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23798 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23799 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23801 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23802 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23804 #undef TARGET_HARD_REGNO_NREGS
23805 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23806 #undef TARGET_HARD_REGNO_MODE_OK
23807 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23809 #undef TARGET_MODES_TIEABLE_P
23810 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23812 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23813 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23814 ix86_hard_regno_call_part_clobbered
23816 #undef TARGET_CAN_CHANGE_MODE_CLASS
23817 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23819 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
23820 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
23822 #undef TARGET_STATIC_RTX_ALIGNMENT
23823 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23824 #undef TARGET_CONSTANT_ALIGNMENT
23825 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23827 #undef TARGET_EMPTY_RECORD_P
23828 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23830 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23831 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23833 #undef TARGET_GET_MULTILIB_ABI_NAME
23834 #define TARGET_GET_MULTILIB_ABI_NAME \
23835 ix86_get_multilib_abi_name
23837 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED
)
23839 #ifdef OPTION_GLIBC
23841 return (built_in_function
)fcode
== BUILT_IN_MEMPCPY
;
23849 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23850 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23853 #undef TARGET_RUN_TARGET_SELFTESTS
23854 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23855 #endif /* #if CHECKING_P */
23857 struct gcc_target targetm
= TARGET_INITIALIZER
;
23859 #include "gt-i386.h"