]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/i386/i386.c
X86_64: Enable support for next generation AMD Zen3 CPU.
[thirdparty/gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "memmodel.h"
29 #include "gimple.h"
30 #include "cfghooks.h"
31 #include "cfgloop.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic.h"
42 #include "cfgbuild.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "calls.h"
47 #include "stor-layout.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "except.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "cfgrtl.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "gimplify.h"
60 #include "dwarf2.h"
61 #include "tm-constrs.h"
62 #include "cselib.h"
63 #include "sched-int.h"
64 #include "opts.h"
65 #include "tree-pass.h"
66 #include "context.h"
67 #include "pass_manager.h"
68 #include "target-globals.h"
69 #include "gimple-iterator.h"
70 #include "tree-vectorizer.h"
71 #include "shrink-wrap.h"
72 #include "builtins.h"
73 #include "rtl-iter.h"
74 #include "tree-iterator.h"
75 #include "dbgcnt.h"
76 #include "case-cfn-macros.h"
77 #include "dojump.h"
78 #include "fold-const-call.h"
79 #include "tree-vrp.h"
80 #include "tree-ssanames.h"
81 #include "selftest.h"
82 #include "selftest-rtl.h"
83 #include "print-rtl.h"
84 #include "intl.h"
85 #include "ifcvt.h"
86 #include "symbol-summary.h"
87 #include "ipa-prop.h"
88 #include "ipa-fnsummary.h"
89 #include "wide-int-bitmask.h"
90 #include "tree-vector-builder.h"
91 #include "debug.h"
92 #include "dwarf2out.h"
93 #include "i386-options.h"
94 #include "i386-builtins.h"
95 #include "i386-expand.h"
96 #include "i386-features.h"
97 #include "function-abi.h"
98
99 /* This file should be included last. */
100 #include "target-def.h"
101
102 static rtx legitimize_dllimport_symbol (rtx, bool);
103 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
104 static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
105 static void ix86_emit_restore_reg_using_pop (rtx);
106
107
108 #ifndef CHECK_STACK_LIMIT
109 #define CHECK_STACK_LIMIT (-1)
110 #endif
111
112 /* Return index of given mode in mult and division cost tables. */
113 #define MODE_INDEX(mode) \
114 ((mode) == QImode ? 0 \
115 : (mode) == HImode ? 1 \
116 : (mode) == SImode ? 2 \
117 : (mode) == DImode ? 3 \
118 : 4)
119
120
121 /* Set by -mtune. */
122 const struct processor_costs *ix86_tune_cost = NULL;
123
124 /* Set by -mtune or -Os. */
125 const struct processor_costs *ix86_cost = NULL;
126
127 /* In case the average insn count for single function invocation is
128 lower than this constant, emit fast (but longer) prologue and
129 epilogue code. */
130 #define FAST_PROLOGUE_INSN_COUNT 20
131
132 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
133 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
134 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
135 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
136
137 /* Array of the smallest class containing reg number REGNO, indexed by
138 REGNO. Used by REGNO_REG_CLASS in i386.h. */
139
140 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
141 {
142 /* ax, dx, cx, bx */
143 AREG, DREG, CREG, BREG,
144 /* si, di, bp, sp */
145 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
146 /* FP registers */
147 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
148 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
149 /* arg pointer, flags, fpsr, frame */
150 NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
151 /* SSE registers */
152 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
153 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
154 /* MMX registers */
155 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
156 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
157 /* REX registers */
158 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
159 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
160 /* SSE REX registers */
161 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
162 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
163 /* AVX-512 SSE registers */
164 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
165 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
166 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
167 ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
168 /* Mask registers. */
169 ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
170 MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
171 };
172
173 /* The "default" register map used in 32bit mode. */
174
175 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
176 {
177 /* general regs */
178 0, 2, 1, 3, 6, 7, 4, 5,
179 /* fp regs */
180 12, 13, 14, 15, 16, 17, 18, 19,
181 /* arg, flags, fpsr, frame */
182 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
183 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
184 /* SSE */
185 21, 22, 23, 24, 25, 26, 27, 28,
186 /* MMX */
187 29, 30, 31, 32, 33, 34, 35, 36,
188 /* extended integer registers */
189 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
190 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
191 /* extended sse registers */
192 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
193 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
194 /* AVX-512 registers 16-23 */
195 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
196 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
197 /* AVX-512 registers 24-31 */
198 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
199 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
200 /* Mask registers */
201 93, 94, 95, 96, 97, 98, 99, 100
202 };
203
204 /* The "default" register map used in 64bit mode. */
205
206 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
207 {
208 /* general regs */
209 0, 1, 2, 3, 4, 5, 6, 7,
210 /* fp regs */
211 33, 34, 35, 36, 37, 38, 39, 40,
212 /* arg, flags, fpsr, frame */
213 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
214 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
215 /* SSE */
216 17, 18, 19, 20, 21, 22, 23, 24,
217 /* MMX */
218 41, 42, 43, 44, 45, 46, 47, 48,
219 /* extended integer registers */
220 8, 9, 10, 11, 12, 13, 14, 15,
221 /* extended SSE registers */
222 25, 26, 27, 28, 29, 30, 31, 32,
223 /* AVX-512 registers 16-23 */
224 67, 68, 69, 70, 71, 72, 73, 74,
225 /* AVX-512 registers 24-31 */
226 75, 76, 77, 78, 79, 80, 81, 82,
227 /* Mask registers */
228 118, 119, 120, 121, 122, 123, 124, 125
229 };
230
231 /* Define the register numbers to be used in Dwarf debugging information.
232 The SVR4 reference port C compiler uses the following register numbers
233 in its Dwarf output code:
234 0 for %eax (gcc regno = 0)
235 1 for %ecx (gcc regno = 2)
236 2 for %edx (gcc regno = 1)
237 3 for %ebx (gcc regno = 3)
238 4 for %esp (gcc regno = 7)
239 5 for %ebp (gcc regno = 6)
240 6 for %esi (gcc regno = 4)
241 7 for %edi (gcc regno = 5)
242 The following three DWARF register numbers are never generated by
243 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
244 believed these numbers have these meanings.
245 8 for %eip (no gcc equivalent)
246 9 for %eflags (gcc regno = 17)
247 10 for %trapno (no gcc equivalent)
248 It is not at all clear how we should number the FP stack registers
249 for the x86 architecture. If the version of SDB on x86/svr4 were
250 a bit less brain dead with respect to floating-point then we would
251 have a precedent to follow with respect to DWARF register numbers
252 for x86 FP registers, but the SDB on x86/svr4 was so completely
253 broken with respect to FP registers that it is hardly worth thinking
254 of it as something to strive for compatibility with.
255 The version of x86/svr4 SDB I had does (partially)
256 seem to believe that DWARF register number 11 is associated with
257 the x86 register %st(0), but that's about all. Higher DWARF
258 register numbers don't seem to be associated with anything in
259 particular, and even for DWARF regno 11, SDB only seemed to under-
260 stand that it should say that a variable lives in %st(0) (when
261 asked via an `=' command) if we said it was in DWARF regno 11,
262 but SDB still printed garbage when asked for the value of the
263 variable in question (via a `/' command).
264 (Also note that the labels SDB printed for various FP stack regs
265 when doing an `x' command were all wrong.)
266 Note that these problems generally don't affect the native SVR4
267 C compiler because it doesn't allow the use of -O with -g and
268 because when it is *not* optimizing, it allocates a memory
269 location for each floating-point variable, and the memory
270 location is what gets described in the DWARF AT_location
271 attribute for the variable in question.
272 Regardless of the severe mental illness of the x86/svr4 SDB, we
273 do something sensible here and we use the following DWARF
274 register numbers. Note that these are all stack-top-relative
275 numbers.
276 11 for %st(0) (gcc regno = 8)
277 12 for %st(1) (gcc regno = 9)
278 13 for %st(2) (gcc regno = 10)
279 14 for %st(3) (gcc regno = 11)
280 15 for %st(4) (gcc regno = 12)
281 16 for %st(5) (gcc regno = 13)
282 17 for %st(6) (gcc regno = 14)
283 18 for %st(7) (gcc regno = 15)
284 */
285 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
286 {
287 /* general regs */
288 0, 2, 1, 3, 6, 7, 5, 4,
289 /* fp regs */
290 11, 12, 13, 14, 15, 16, 17, 18,
291 /* arg, flags, fpsr, frame */
292 IGNORED_DWARF_REGNUM, 9,
293 IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
294 /* SSE registers */
295 21, 22, 23, 24, 25, 26, 27, 28,
296 /* MMX registers */
297 29, 30, 31, 32, 33, 34, 35, 36,
298 /* extended integer registers */
299 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
300 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
301 /* extended sse registers */
302 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
303 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
304 /* AVX-512 registers 16-23 */
305 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
306 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
307 /* AVX-512 registers 24-31 */
308 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
309 INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
310 /* Mask registers */
311 93, 94, 95, 96, 97, 98, 99, 100
312 };
313
314 /* Define parameter passing and return registers. */
315
316 static int const x86_64_int_parameter_registers[6] =
317 {
318 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
319 };
320
321 static int const x86_64_ms_abi_int_parameter_registers[4] =
322 {
323 CX_REG, DX_REG, R8_REG, R9_REG
324 };
325
326 static int const x86_64_int_return_registers[4] =
327 {
328 AX_REG, DX_REG, DI_REG, SI_REG
329 };
330
331 /* Define the structure for the machine field in struct function. */
332
333 struct GTY(()) stack_local_entry {
334 unsigned short mode;
335 unsigned short n;
336 rtx rtl;
337 struct stack_local_entry *next;
338 };
339
340 /* Which cpu are we scheduling for. */
341 enum attr_cpu ix86_schedule;
342
343 /* Which cpu are we optimizing for. */
344 enum processor_type ix86_tune;
345
346 /* Which instruction set architecture to use. */
347 enum processor_type ix86_arch;
348
349 /* True if processor has SSE prefetch instruction. */
350 unsigned char x86_prefetch_sse;
351
352 /* Preferred alignment for stack boundary in bits. */
353 unsigned int ix86_preferred_stack_boundary;
354
355 /* Alignment for incoming stack boundary in bits specified at
356 command line. */
357 unsigned int ix86_user_incoming_stack_boundary;
358
359 /* Default alignment for incoming stack boundary in bits. */
360 unsigned int ix86_default_incoming_stack_boundary;
361
362 /* Alignment for incoming stack boundary in bits. */
363 unsigned int ix86_incoming_stack_boundary;
364
365 /* Calling abi specific va_list type nodes. */
366 tree sysv_va_list_type_node;
367 tree ms_va_list_type_node;
368
369 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
370 char internal_label_prefix[16];
371 int internal_label_prefix_len;
372
373 /* Fence to use after loop using movnt. */
374 tree x86_mfence;
375
376 /* Register class used for passing given 64bit part of the argument.
377 These represent classes as documented by the PS ABI, with the exception
378 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
379 use SF or DFmode move instead of DImode to avoid reformatting penalties.
380
381 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
382 whenever possible (upper half does contain padding). */
383 enum x86_64_reg_class
384 {
385 X86_64_NO_CLASS,
386 X86_64_INTEGER_CLASS,
387 X86_64_INTEGERSI_CLASS,
388 X86_64_SSE_CLASS,
389 X86_64_SSESF_CLASS,
390 X86_64_SSEDF_CLASS,
391 X86_64_SSEUP_CLASS,
392 X86_64_X87_CLASS,
393 X86_64_X87UP_CLASS,
394 X86_64_COMPLEX_X87_CLASS,
395 X86_64_MEMORY_CLASS
396 };
397
398 #define MAX_CLASSES 8
399
400 /* Table of constants used by fldpi, fldln2, etc.... */
401 static REAL_VALUE_TYPE ext_80387_constants_table [5];
402 static bool ext_80387_constants_init;
403
404 \f
405 static rtx ix86_function_value (const_tree, const_tree, bool);
406 static bool ix86_function_value_regno_p (const unsigned int);
407 static unsigned int ix86_function_arg_boundary (machine_mode,
408 const_tree);
409 static rtx ix86_static_chain (const_tree, bool);
410 static int ix86_function_regparm (const_tree, const_tree);
411 static void ix86_compute_frame_layout (void);
412 static tree ix86_canonical_va_list_type (tree);
413 static unsigned int split_stack_prologue_scratch_regno (void);
414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
415
416 static bool ix86_can_inline_p (tree, tree);
417 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
418
419 \f
420 /* Whether -mtune= or -march= were specified */
421 int ix86_tune_defaulted;
422 int ix86_arch_specified;
423 \f
424 /* Return true if a red-zone is in use. We can't use red-zone when
425 there are local indirect jumps, like "indirect_jump" or "tablejump",
426 which jumps to another place in the function, since "call" in the
427 indirect thunk pushes the return address onto stack, destroying
428 red-zone.
429
430 TODO: If we can reserve the first 2 WORDs, for PUSH and, another
431 for CALL, in red-zone, we can allow local indirect jumps with
432 indirect thunk. */
433
434 bool
435 ix86_using_red_zone (void)
436 {
437 return (TARGET_RED_ZONE
438 && !TARGET_64BIT_MS_ABI
439 && (!cfun->machine->has_local_indirect_jump
440 || cfun->machine->indirect_branch_type == indirect_branch_keep));
441 }
442 \f
443 /* Return true, if profiling code should be emitted before
444 prologue. Otherwise it returns false.
445 Note: For x86 with "hotfix" it is sorried. */
446 static bool
447 ix86_profile_before_prologue (void)
448 {
449 return flag_fentry != 0;
450 }
451
452 /* Update register usage after having seen the compiler flags. */
453
454 static void
455 ix86_conditional_register_usage (void)
456 {
457 int i, c_mask;
458
459 /* If there are no caller-saved registers, preserve all registers.
460 except fixed_regs and registers used for function return value
461 since aggregate_value_p checks call_used_regs[regno] on return
462 value. */
463 if (cfun && cfun->machine->no_caller_saved_registers)
464 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
465 if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
466 call_used_regs[i] = 0;
467
468 /* For 32-bit targets, disable the REX registers. */
469 if (! TARGET_64BIT)
470 {
471 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
472 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
473 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
474 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
475 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
476 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
477 }
478
479 /* See the definition of CALL_USED_REGISTERS in i386.h. */
480 c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
481
482 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
483
484 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
485 {
486 /* Set/reset conditionally defined registers from
487 CALL_USED_REGISTERS initializer. */
488 if (call_used_regs[i] > 1)
489 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
490
491 /* Calculate registers of CLOBBERED_REGS register set
492 as call used registers from GENERAL_REGS register set. */
493 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
494 && call_used_regs[i])
495 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
496 }
497
498 /* If MMX is disabled, disable the registers. */
499 if (! TARGET_MMX)
500 accessible_reg_set &= ~reg_class_contents[MMX_REGS];
501
502 /* If SSE is disabled, disable the registers. */
503 if (! TARGET_SSE)
504 accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
505
506 /* If the FPU is disabled, disable the registers. */
507 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
508 accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
509
510 /* If AVX512F is disabled, disable the registers. */
511 if (! TARGET_AVX512F)
512 {
513 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
514 CLEAR_HARD_REG_BIT (accessible_reg_set, i);
515
516 accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
517 }
518 }
519
520 /* Canonicalize a comparison from one we don't have to one we do have. */
521
522 static void
523 ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
524 bool op0_preserve_value)
525 {
526 /* The order of operands in x87 ficom compare is forced by combine in
527 simplify_comparison () function. Float operator is treated as RTX_OBJ
528 with a precedence over other operators and is always put in the first
529 place. Swap condition and operands to match ficom instruction. */
530 if (!op0_preserve_value
531 && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
532 {
533 enum rtx_code scode = swap_condition ((enum rtx_code) *code);
534
535 /* We are called only for compares that are split to SAHF instruction.
536 Ensure that we have setcc/jcc insn for the swapped condition. */
537 if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
538 {
539 std::swap (*op0, *op1);
540 *code = (int) scode;
541 }
542 }
543 }
544 \f
545 \f
546 /* Hook to determine if one function can safely inline another. */
547
548 static bool
549 ix86_can_inline_p (tree caller, tree callee)
550 {
551 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
552 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
553
554 /* Changes of those flags can be tolerated for always inlines. Lets hope
555 user knows what he is doing. */
556 const unsigned HOST_WIDE_INT always_inline_safe_mask
557 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
558 | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
559 | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
560 | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
561 | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
562 | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
563 | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
564
565
566 if (!callee_tree)
567 callee_tree = target_option_default_node;
568 if (!caller_tree)
569 caller_tree = target_option_default_node;
570 if (callee_tree == caller_tree)
571 return true;
572
573 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
574 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
575 bool ret = false;
576 bool always_inline
577 = (DECL_DISREGARD_INLINE_LIMITS (callee)
578 && lookup_attribute ("always_inline",
579 DECL_ATTRIBUTES (callee)));
580
581 cgraph_node *callee_node = cgraph_node::get (callee);
582 /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
583 function can inline a SSE2 function but a SSE2 function can't inline
584 a SSE4 function. */
585 if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
586 != callee_opts->x_ix86_isa_flags)
587 || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
588 != callee_opts->x_ix86_isa_flags2))
589 ret = false;
590
591 /* See if we have the same non-isa options. */
592 else if ((!always_inline
593 && caller_opts->x_target_flags != callee_opts->x_target_flags)
594 || (caller_opts->x_target_flags & ~always_inline_safe_mask)
595 != (callee_opts->x_target_flags & ~always_inline_safe_mask))
596 ret = false;
597
598 /* See if arch, tune, etc. are the same. */
599 else if (caller_opts->arch != callee_opts->arch)
600 ret = false;
601
602 else if (!always_inline && caller_opts->tune != callee_opts->tune)
603 ret = false;
604
605 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
606 /* If the calle doesn't use FP expressions differences in
607 ix86_fpmath can be ignored. We are called from FEs
608 for multi-versioning call optimization, so beware of
609 ipa_fn_summaries not available. */
610 && (! ipa_fn_summaries
611 || ipa_fn_summaries->get (callee_node) == NULL
612 || ipa_fn_summaries->get (callee_node)->fp_expressions))
613 ret = false;
614
615 else if (!always_inline
616 && caller_opts->branch_cost != callee_opts->branch_cost)
617 ret = false;
618
619 else
620 ret = true;
621
622 return ret;
623 }
624 \f
625 /* Return true if this goes in large data/bss. */
626
627 static bool
628 ix86_in_large_data_p (tree exp)
629 {
630 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
631 return false;
632
633 if (exp == NULL_TREE)
634 return false;
635
636 /* Functions are never large data. */
637 if (TREE_CODE (exp) == FUNCTION_DECL)
638 return false;
639
640 /* Automatic variables are never large data. */
641 if (VAR_P (exp) && !is_global_var (exp))
642 return false;
643
644 if (VAR_P (exp) && DECL_SECTION_NAME (exp))
645 {
646 const char *section = DECL_SECTION_NAME (exp);
647 if (strcmp (section, ".ldata") == 0
648 || strcmp (section, ".lbss") == 0)
649 return true;
650 return false;
651 }
652 else
653 {
654 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
655
656 /* If this is an incomplete type with size 0, then we can't put it
657 in data because it might be too big when completed. Also,
658 int_size_in_bytes returns -1 if size can vary or is larger than
659 an integer in which case also it is safer to assume that it goes in
660 large data. */
661 if (size <= 0 || size > ix86_section_threshold)
662 return true;
663 }
664
665 return false;
666 }
667
668 /* i386-specific section flag to mark large sections. */
669 #define SECTION_LARGE SECTION_MACH_DEP
670
671 /* Switch to the appropriate section for output of DECL.
672 DECL is either a `VAR_DECL' node or a constant of some sort.
673 RELOC indicates whether forming the initial value of DECL requires
674 link-time relocations. */
675
676 ATTRIBUTE_UNUSED static section *
677 x86_64_elf_select_section (tree decl, int reloc,
678 unsigned HOST_WIDE_INT align)
679 {
680 if (ix86_in_large_data_p (decl))
681 {
682 const char *sname = NULL;
683 unsigned int flags = SECTION_WRITE | SECTION_LARGE;
684 switch (categorize_decl_for_section (decl, reloc))
685 {
686 case SECCAT_DATA:
687 sname = ".ldata";
688 break;
689 case SECCAT_DATA_REL:
690 sname = ".ldata.rel";
691 break;
692 case SECCAT_DATA_REL_LOCAL:
693 sname = ".ldata.rel.local";
694 break;
695 case SECCAT_DATA_REL_RO:
696 sname = ".ldata.rel.ro";
697 break;
698 case SECCAT_DATA_REL_RO_LOCAL:
699 sname = ".ldata.rel.ro.local";
700 break;
701 case SECCAT_BSS:
702 sname = ".lbss";
703 flags |= SECTION_BSS;
704 break;
705 case SECCAT_RODATA:
706 case SECCAT_RODATA_MERGE_STR:
707 case SECCAT_RODATA_MERGE_STR_INIT:
708 case SECCAT_RODATA_MERGE_CONST:
709 sname = ".lrodata";
710 flags &= ~SECTION_WRITE;
711 break;
712 case SECCAT_SRODATA:
713 case SECCAT_SDATA:
714 case SECCAT_SBSS:
715 gcc_unreachable ();
716 case SECCAT_TEXT:
717 case SECCAT_TDATA:
718 case SECCAT_TBSS:
719 /* We don't split these for medium model. Place them into
720 default sections and hope for best. */
721 break;
722 }
723 if (sname)
724 {
725 /* We might get called with string constants, but get_named_section
726 doesn't like them as they are not DECLs. Also, we need to set
727 flags in that case. */
728 if (!DECL_P (decl))
729 return get_section (sname, flags, NULL);
730 return get_named_section (decl, sname, reloc);
731 }
732 }
733 return default_elf_select_section (decl, reloc, align);
734 }
735
736 /* Select a set of attributes for section NAME based on the properties
737 of DECL and whether or not RELOC indicates that DECL's initializer
738 might contain runtime relocations. */
739
740 static unsigned int ATTRIBUTE_UNUSED
741 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
742 {
743 unsigned int flags = default_section_type_flags (decl, name, reloc);
744
745 if (ix86_in_large_data_p (decl))
746 flags |= SECTION_LARGE;
747
748 if (decl == NULL_TREE
749 && (strcmp (name, ".ldata.rel.ro") == 0
750 || strcmp (name, ".ldata.rel.ro.local") == 0))
751 flags |= SECTION_RELRO;
752
753 if (strcmp (name, ".lbss") == 0
754 || strncmp (name, ".lbss.", sizeof (".lbss.") - 1) == 0
755 || strncmp (name, ".gnu.linkonce.lb.",
756 sizeof (".gnu.linkonce.lb.") - 1) == 0)
757 flags |= SECTION_BSS;
758
759 return flags;
760 }
761
762 /* Build up a unique section name, expressed as a
763 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
764 RELOC indicates whether the initial value of EXP requires
765 link-time relocations. */
766
767 static void ATTRIBUTE_UNUSED
768 x86_64_elf_unique_section (tree decl, int reloc)
769 {
770 if (ix86_in_large_data_p (decl))
771 {
772 const char *prefix = NULL;
773 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
774 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
775
776 switch (categorize_decl_for_section (decl, reloc))
777 {
778 case SECCAT_DATA:
779 case SECCAT_DATA_REL:
780 case SECCAT_DATA_REL_LOCAL:
781 case SECCAT_DATA_REL_RO:
782 case SECCAT_DATA_REL_RO_LOCAL:
783 prefix = one_only ? ".ld" : ".ldata";
784 break;
785 case SECCAT_BSS:
786 prefix = one_only ? ".lb" : ".lbss";
787 break;
788 case SECCAT_RODATA:
789 case SECCAT_RODATA_MERGE_STR:
790 case SECCAT_RODATA_MERGE_STR_INIT:
791 case SECCAT_RODATA_MERGE_CONST:
792 prefix = one_only ? ".lr" : ".lrodata";
793 break;
794 case SECCAT_SRODATA:
795 case SECCAT_SDATA:
796 case SECCAT_SBSS:
797 gcc_unreachable ();
798 case SECCAT_TEXT:
799 case SECCAT_TDATA:
800 case SECCAT_TBSS:
801 /* We don't split these for medium model. Place them into
802 default sections and hope for best. */
803 break;
804 }
805 if (prefix)
806 {
807 const char *name, *linkonce;
808 char *string;
809
810 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
811 name = targetm.strip_name_encoding (name);
812
813 /* If we're using one_only, then there needs to be a .gnu.linkonce
814 prefix to the section name. */
815 linkonce = one_only ? ".gnu.linkonce" : "";
816
817 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
818
819 set_decl_section_name (decl, string);
820 return;
821 }
822 }
823 default_unique_section (decl, reloc);
824 }
825
826 #ifdef COMMON_ASM_OP
827
828 #ifndef LARGECOMM_SECTION_ASM_OP
829 #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
830 #endif
831
832 /* This says how to output assembler code to declare an
833 uninitialized external linkage data object.
834
835 For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
836 large objects. */
837 void
838 x86_elf_aligned_decl_common (FILE *file, tree decl,
839 const char *name, unsigned HOST_WIDE_INT size,
840 int align)
841 {
842 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
843 && size > (unsigned int)ix86_section_threshold)
844 {
845 switch_to_section (get_named_section (decl, ".lbss", 0));
846 fputs (LARGECOMM_SECTION_ASM_OP, file);
847 }
848 else
849 fputs (COMMON_ASM_OP, file);
850 assemble_name (file, name);
851 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
852 size, align / BITS_PER_UNIT);
853 }
854 #endif
855
856 /* Utility function for targets to use in implementing
857 ASM_OUTPUT_ALIGNED_BSS. */
858
859 void
860 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
861 unsigned HOST_WIDE_INT size, int align)
862 {
863 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
864 && size > (unsigned int)ix86_section_threshold)
865 switch_to_section (get_named_section (decl, ".lbss", 0));
866 else
867 switch_to_section (bss_section);
868 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
869 #ifdef ASM_DECLARE_OBJECT_NAME
870 last_assemble_variable_decl = decl;
871 ASM_DECLARE_OBJECT_NAME (file, name, decl);
872 #else
873 /* Standard thing is just output label for the object. */
874 ASM_OUTPUT_LABEL (file, name);
875 #endif /* ASM_DECLARE_OBJECT_NAME */
876 ASM_OUTPUT_SKIP (file, size ? size : 1);
877 }
878 \f
879 /* Decide whether we must probe the stack before any space allocation
880 on this target. It's essentially TARGET_STACK_PROBE except when
881 -fstack-check causes the stack to be already probed differently. */
882
883 bool
884 ix86_target_stack_probe (void)
885 {
886 /* Do not probe the stack twice if static stack checking is enabled. */
887 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
888 return false;
889
890 return TARGET_STACK_PROBE;
891 }
892 \f
893 /* Decide whether we can make a sibling call to a function. DECL is the
894 declaration of the function being targeted by the call and EXP is the
895 CALL_EXPR representing the call. */
896
897 static bool
898 ix86_function_ok_for_sibcall (tree decl, tree exp)
899 {
900 tree type, decl_or_type;
901 rtx a, b;
902 bool bind_global = decl && !targetm.binds_local_p (decl);
903
904 if (ix86_function_naked (current_function_decl))
905 return false;
906
907 /* Sibling call isn't OK if there are no caller-saved registers
908 since all registers must be preserved before return. */
909 if (cfun->machine->no_caller_saved_registers)
910 return false;
911
912 /* If we are generating position-independent code, we cannot sibcall
913 optimize direct calls to global functions, as the PLT requires
914 %ebx be live. (Darwin does not have a PLT.) */
915 if (!TARGET_MACHO
916 && !TARGET_64BIT
917 && flag_pic
918 && flag_plt
919 && bind_global)
920 return false;
921
922 /* If we need to align the outgoing stack, then sibcalling would
923 unalign the stack, which may break the called function. */
924 if (ix86_minimum_incoming_stack_boundary (true)
925 < PREFERRED_STACK_BOUNDARY)
926 return false;
927
928 if (decl)
929 {
930 decl_or_type = decl;
931 type = TREE_TYPE (decl);
932 }
933 else
934 {
935 /* We're looking at the CALL_EXPR, we need the type of the function. */
936 type = CALL_EXPR_FN (exp); /* pointer expression */
937 type = TREE_TYPE (type); /* pointer type */
938 type = TREE_TYPE (type); /* function type */
939 decl_or_type = type;
940 }
941
942 /* If outgoing reg parm stack space changes, we cannot do sibcall. */
943 if ((OUTGOING_REG_PARM_STACK_SPACE (type)
944 != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
945 || (REG_PARM_STACK_SPACE (decl_or_type)
946 != REG_PARM_STACK_SPACE (current_function_decl)))
947 {
948 maybe_complain_about_tail_call (exp,
949 "inconsistent size of stack space"
950 " allocated for arguments which are"
951 " passed in registers");
952 return false;
953 }
954
955 /* Check that the return value locations are the same. Like
956 if we are returning floats on the 80387 register stack, we cannot
957 make a sibcall from a function that doesn't return a float to a
958 function that does or, conversely, from a function that does return
959 a float to a function that doesn't; the necessary stack adjustment
960 would not be executed. This is also the place we notice
961 differences in the return value ABI. Note that it is ok for one
962 of the functions to have void return type as long as the return
963 value of the other is passed in a register. */
964 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
965 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
966 cfun->decl, false);
967 if (STACK_REG_P (a) || STACK_REG_P (b))
968 {
969 if (!rtx_equal_p (a, b))
970 return false;
971 }
972 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
973 ;
974 else if (!rtx_equal_p (a, b))
975 return false;
976
977 if (TARGET_64BIT)
978 {
979 /* The SYSV ABI has more call-clobbered registers;
980 disallow sibcalls from MS to SYSV. */
981 if (cfun->machine->call_abi == MS_ABI
982 && ix86_function_type_abi (type) == SYSV_ABI)
983 return false;
984 }
985 else
986 {
987 /* If this call is indirect, we'll need to be able to use a
988 call-clobbered register for the address of the target function.
989 Make sure that all such registers are not used for passing
990 parameters. Note that DLLIMPORT functions and call to global
991 function via GOT slot are indirect. */
992 if (!decl
993 || (bind_global && flag_pic && !flag_plt)
994 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
995 || flag_force_indirect_call)
996 {
997 /* Check if regparm >= 3 since arg_reg_available is set to
998 false if regparm == 0. If regparm is 1 or 2, there is
999 always a call-clobbered register available.
1000
1001 ??? The symbol indirect call doesn't need a call-clobbered
1002 register. But we don't know if this is a symbol indirect
1003 call or not here. */
1004 if (ix86_function_regparm (type, decl) >= 3
1005 && !cfun->machine->arg_reg_available)
1006 return false;
1007 }
1008 }
1009
1010 /* Otherwise okay. That also includes certain types of indirect calls. */
1011 return true;
1012 }
1013
1014 /* This function determines from TYPE the calling-convention. */
1015
1016 unsigned int
1017 ix86_get_callcvt (const_tree type)
1018 {
1019 unsigned int ret = 0;
1020 bool is_stdarg;
1021 tree attrs;
1022
1023 if (TARGET_64BIT)
1024 return IX86_CALLCVT_CDECL;
1025
1026 attrs = TYPE_ATTRIBUTES (type);
1027 if (attrs != NULL_TREE)
1028 {
1029 if (lookup_attribute ("cdecl", attrs))
1030 ret |= IX86_CALLCVT_CDECL;
1031 else if (lookup_attribute ("stdcall", attrs))
1032 ret |= IX86_CALLCVT_STDCALL;
1033 else if (lookup_attribute ("fastcall", attrs))
1034 ret |= IX86_CALLCVT_FASTCALL;
1035 else if (lookup_attribute ("thiscall", attrs))
1036 ret |= IX86_CALLCVT_THISCALL;
1037
1038 /* Regparam isn't allowed for thiscall and fastcall. */
1039 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
1040 {
1041 if (lookup_attribute ("regparm", attrs))
1042 ret |= IX86_CALLCVT_REGPARM;
1043 if (lookup_attribute ("sseregparm", attrs))
1044 ret |= IX86_CALLCVT_SSEREGPARM;
1045 }
1046
1047 if (IX86_BASE_CALLCVT(ret) != 0)
1048 return ret;
1049 }
1050
1051 is_stdarg = stdarg_p (type);
1052 if (TARGET_RTD && !is_stdarg)
1053 return IX86_CALLCVT_STDCALL | ret;
1054
1055 if (ret != 0
1056 || is_stdarg
1057 || TREE_CODE (type) != METHOD_TYPE
1058 || ix86_function_type_abi (type) != MS_ABI)
1059 return IX86_CALLCVT_CDECL | ret;
1060
1061 return IX86_CALLCVT_THISCALL;
1062 }
1063
1064 /* Return 0 if the attributes for two types are incompatible, 1 if they
1065 are compatible, and 2 if they are nearly compatible (which causes a
1066 warning to be generated). */
1067
1068 static int
1069 ix86_comp_type_attributes (const_tree type1, const_tree type2)
1070 {
1071 unsigned int ccvt1, ccvt2;
1072
1073 if (TREE_CODE (type1) != FUNCTION_TYPE
1074 && TREE_CODE (type1) != METHOD_TYPE)
1075 return 1;
1076
1077 ccvt1 = ix86_get_callcvt (type1);
1078 ccvt2 = ix86_get_callcvt (type2);
1079 if (ccvt1 != ccvt2)
1080 return 0;
1081 if (ix86_function_regparm (type1, NULL)
1082 != ix86_function_regparm (type2, NULL))
1083 return 0;
1084
1085 return 1;
1086 }
1087 \f
1088 /* Return the regparm value for a function with the indicated TYPE and DECL.
1089 DECL may be NULL when calling function indirectly
1090 or considering a libcall. */
1091
1092 static int
1093 ix86_function_regparm (const_tree type, const_tree decl)
1094 {
1095 tree attr;
1096 int regparm;
1097 unsigned int ccvt;
1098
1099 if (TARGET_64BIT)
1100 return (ix86_function_type_abi (type) == SYSV_ABI
1101 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
1102 ccvt = ix86_get_callcvt (type);
1103 regparm = ix86_regparm;
1104
1105 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
1106 {
1107 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1108 if (attr)
1109 {
1110 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1111 return regparm;
1112 }
1113 }
1114 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1115 return 2;
1116 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1117 return 1;
1118
1119 /* Use register calling convention for local functions when possible. */
1120 if (decl
1121 && TREE_CODE (decl) == FUNCTION_DECL)
1122 {
1123 cgraph_node *target = cgraph_node::get (decl);
1124 if (target)
1125 target = target->function_symbol ();
1126
1127 /* Caller and callee must agree on the calling convention, so
1128 checking here just optimize means that with
1129 __attribute__((optimize (...))) caller could use regparm convention
1130 and callee not, or vice versa. Instead look at whether the callee
1131 is optimized or not. */
1132 if (target && opt_for_fn (target->decl, optimize)
1133 && !(profile_flag && !flag_fentry))
1134 {
1135 if (target->local && target->can_change_signature)
1136 {
1137 int local_regparm, globals = 0, regno;
1138
1139 /* Make sure no regparm register is taken by a
1140 fixed register variable. */
1141 for (local_regparm = 0; local_regparm < REGPARM_MAX;
1142 local_regparm++)
1143 if (fixed_regs[local_regparm])
1144 break;
1145
1146 /* We don't want to use regparm(3) for nested functions as
1147 these use a static chain pointer in the third argument. */
1148 if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
1149 local_regparm = 2;
1150
1151 /* Save a register for the split stack. */
1152 if (flag_split_stack)
1153 {
1154 if (local_regparm == 3)
1155 local_regparm = 2;
1156 else if (local_regparm == 2
1157 && DECL_STATIC_CHAIN (target->decl))
1158 local_regparm = 1;
1159 }
1160
1161 /* Each fixed register usage increases register pressure,
1162 so less registers should be used for argument passing.
1163 This functionality can be overriden by an explicit
1164 regparm value. */
1165 for (regno = AX_REG; regno <= DI_REG; regno++)
1166 if (fixed_regs[regno])
1167 globals++;
1168
1169 local_regparm
1170 = globals < local_regparm ? local_regparm - globals : 0;
1171
1172 if (local_regparm > regparm)
1173 regparm = local_regparm;
1174 }
1175 }
1176 }
1177
1178 return regparm;
1179 }
1180
1181 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
1182 DFmode (2) arguments in SSE registers for a function with the
1183 indicated TYPE and DECL. DECL may be NULL when calling function
1184 indirectly or considering a libcall. Return -1 if any FP parameter
1185 should be rejected by error. This is used in siutation we imply SSE
1186 calling convetion but the function is called from another function with
1187 SSE disabled. Otherwise return 0. */
1188
1189 static int
1190 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
1191 {
1192 gcc_assert (!TARGET_64BIT);
1193
1194 /* Use SSE registers to pass SFmode and DFmode arguments if requested
1195 by the sseregparm attribute. */
1196 if (TARGET_SSEREGPARM
1197 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
1198 {
1199 if (!TARGET_SSE)
1200 {
1201 if (warn)
1202 {
1203 if (decl)
1204 error ("calling %qD with attribute sseregparm without "
1205 "SSE/SSE2 enabled", decl);
1206 else
1207 error ("calling %qT with attribute sseregparm without "
1208 "SSE/SSE2 enabled", type);
1209 }
1210 return 0;
1211 }
1212
1213 return 2;
1214 }
1215
1216 if (!decl)
1217 return 0;
1218
1219 cgraph_node *target = cgraph_node::get (decl);
1220 if (target)
1221 target = target->function_symbol ();
1222
1223 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
1224 (and DFmode for SSE2) arguments in SSE registers. */
1225 if (target
1226 /* TARGET_SSE_MATH */
1227 && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
1228 && opt_for_fn (target->decl, optimize)
1229 && !(profile_flag && !flag_fentry))
1230 {
1231 if (target->local && target->can_change_signature)
1232 {
1233 /* Refuse to produce wrong code when local function with SSE enabled
1234 is called from SSE disabled function.
1235 FIXME: We need a way to detect these cases cross-ltrans partition
1236 and avoid using SSE calling conventions on local functions called
1237 from function with SSE disabled. For now at least delay the
1238 warning until we know we are going to produce wrong code.
1239 See PR66047 */
1240 if (!TARGET_SSE && warn)
1241 return -1;
1242 return TARGET_SSE2_P (target_opts_for_fn (target->decl)
1243 ->x_ix86_isa_flags) ? 2 : 1;
1244 }
1245 }
1246
1247 return 0;
1248 }
1249
1250 /* Return true if EAX is live at the start of the function. Used by
1251 ix86_expand_prologue to determine if we need special help before
1252 calling allocate_stack_worker. */
1253
1254 static bool
1255 ix86_eax_live_at_start_p (void)
1256 {
1257 /* Cheat. Don't bother working forward from ix86_function_regparm
1258 to the function type to whether an actual argument is located in
1259 eax. Instead just look at cfg info, which is still close enough
1260 to correct at this point. This gives false positives for broken
1261 functions that might use uninitialized data that happens to be
1262 allocated in eax, but who cares? */
1263 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
1264 }
1265
1266 static bool
1267 ix86_keep_aggregate_return_pointer (tree fntype)
1268 {
1269 tree attr;
1270
1271 if (!TARGET_64BIT)
1272 {
1273 attr = lookup_attribute ("callee_pop_aggregate_return",
1274 TYPE_ATTRIBUTES (fntype));
1275 if (attr)
1276 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
1277
1278 /* For 32-bit MS-ABI the default is to keep aggregate
1279 return pointer. */
1280 if (ix86_function_type_abi (fntype) == MS_ABI)
1281 return true;
1282 }
1283 return KEEP_AGGREGATE_RETURN_POINTER != 0;
1284 }
1285
1286 /* Value is the number of bytes of arguments automatically
1287 popped when returning from a subroutine call.
1288 FUNDECL is the declaration node of the function (as a tree),
1289 FUNTYPE is the data type of the function (as a tree),
1290 or for a library call it is an identifier node for the subroutine name.
1291 SIZE is the number of bytes of arguments passed on the stack.
1292
1293 On the 80386, the RTD insn may be used to pop them if the number
1294 of args is fixed, but if the number is variable then the caller
1295 must pop them all. RTD can't be used for library calls now
1296 because the library is compiled with the Unix compiler.
1297 Use of RTD is a selectable option, since it is incompatible with
1298 standard Unix calling sequences. If the option is not selected,
1299 the caller must always pop the args.
1300
1301 The attribute stdcall is equivalent to RTD on a per module basis. */
1302
1303 static poly_int64
1304 ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
1305 {
1306 unsigned int ccvt;
1307
1308 /* None of the 64-bit ABIs pop arguments. */
1309 if (TARGET_64BIT)
1310 return 0;
1311
1312 ccvt = ix86_get_callcvt (funtype);
1313
1314 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
1315 | IX86_CALLCVT_THISCALL)) != 0
1316 && ! stdarg_p (funtype))
1317 return size;
1318
1319 /* Lose any fake structure return argument if it is passed on the stack. */
1320 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1321 && !ix86_keep_aggregate_return_pointer (funtype))
1322 {
1323 int nregs = ix86_function_regparm (funtype, fundecl);
1324 if (nregs == 0)
1325 return GET_MODE_SIZE (Pmode);
1326 }
1327
1328 return 0;
1329 }
1330
1331 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
1332
1333 static bool
1334 ix86_legitimate_combined_insn (rtx_insn *insn)
1335 {
1336 int i;
1337
1338 /* Check operand constraints in case hard registers were propagated
1339 into insn pattern. This check prevents combine pass from
1340 generating insn patterns with invalid hard register operands.
1341 These invalid insns can eventually confuse reload to error out
1342 with a spill failure. See also PRs 46829 and 46843. */
1343
1344 gcc_assert (INSN_CODE (insn) >= 0);
1345
1346 extract_insn (insn);
1347 preprocess_constraints (insn);
1348
1349 int n_operands = recog_data.n_operands;
1350 int n_alternatives = recog_data.n_alternatives;
1351 for (i = 0; i < n_operands; i++)
1352 {
1353 rtx op = recog_data.operand[i];
1354 machine_mode mode = GET_MODE (op);
1355 const operand_alternative *op_alt;
1356 int offset = 0;
1357 bool win;
1358 int j;
1359
1360 /* A unary operator may be accepted by the predicate, but it
1361 is irrelevant for matching constraints. */
1362 if (UNARY_P (op))
1363 op = XEXP (op, 0);
1364
1365 if (SUBREG_P (op))
1366 {
1367 if (REG_P (SUBREG_REG (op))
1368 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
1369 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
1370 GET_MODE (SUBREG_REG (op)),
1371 SUBREG_BYTE (op),
1372 GET_MODE (op));
1373 op = SUBREG_REG (op);
1374 }
1375
1376 if (!(REG_P (op) && HARD_REGISTER_P (op)))
1377 continue;
1378
1379 op_alt = recog_op_alt;
1380
1381 /* Operand has no constraints, anything is OK. */
1382 win = !n_alternatives;
1383
1384 alternative_mask preferred = get_preferred_alternatives (insn);
1385 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
1386 {
1387 if (!TEST_BIT (preferred, j))
1388 continue;
1389 if (op_alt[i].anything_ok
1390 || (op_alt[i].matches != -1
1391 && operands_match_p
1392 (recog_data.operand[i],
1393 recog_data.operand[op_alt[i].matches]))
1394 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
1395 {
1396 win = true;
1397 break;
1398 }
1399 }
1400
1401 if (!win)
1402 return false;
1403 }
1404
1405 return true;
1406 }
1407 \f
1408 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
1409
1410 static unsigned HOST_WIDE_INT
1411 ix86_asan_shadow_offset (void)
1412 {
1413 return SUBTARGET_SHADOW_OFFSET;
1414 }
1415 \f
1416 /* Argument support functions. */
1417
1418 /* Return true when register may be used to pass function parameters. */
1419 bool
1420 ix86_function_arg_regno_p (int regno)
1421 {
1422 int i;
1423 enum calling_abi call_abi;
1424 const int *parm_regs;
1425
1426 if (TARGET_SSE && SSE_REGNO_P (regno)
1427 && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
1428 return true;
1429
1430 if (!TARGET_64BIT)
1431 return (regno < REGPARM_MAX
1432 || (TARGET_MMX && MMX_REGNO_P (regno)
1433 && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
1434
1435 /* TODO: The function should depend on current function ABI but
1436 builtins.c would need updating then. Therefore we use the
1437 default ABI. */
1438 call_abi = ix86_cfun_abi ();
1439
1440 /* RAX is used as hidden argument to va_arg functions. */
1441 if (call_abi == SYSV_ABI && regno == AX_REG)
1442 return true;
1443
1444 if (call_abi == MS_ABI)
1445 parm_regs = x86_64_ms_abi_int_parameter_registers;
1446 else
1447 parm_regs = x86_64_int_parameter_registers;
1448
1449 for (i = 0; i < (call_abi == MS_ABI
1450 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
1451 if (regno == parm_regs[i])
1452 return true;
1453 return false;
1454 }
1455
1456 /* Return if we do not know how to pass ARG solely in registers. */
1457
1458 static bool
1459 ix86_must_pass_in_stack (const function_arg_info &arg)
1460 {
1461 if (must_pass_in_stack_var_size_or_pad (arg))
1462 return true;
1463
1464 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
1465 The layout_type routine is crafty and tries to trick us into passing
1466 currently unsupported vector types on the stack by using TImode. */
1467 return (!TARGET_64BIT && arg.mode == TImode
1468 && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
1469 }
1470
1471 /* It returns the size, in bytes, of the area reserved for arguments passed
1472 in registers for the function represented by fndecl dependent to the used
1473 abi format. */
1474 int
1475 ix86_reg_parm_stack_space (const_tree fndecl)
1476 {
1477 enum calling_abi call_abi = SYSV_ABI;
1478 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
1479 call_abi = ix86_function_abi (fndecl);
1480 else
1481 call_abi = ix86_function_type_abi (fndecl);
1482 if (TARGET_64BIT && call_abi == MS_ABI)
1483 return 32;
1484 return 0;
1485 }
1486
1487 /* We add this as a workaround in order to use libc_has_function
1488 hook in i386.md. */
1489 bool
1490 ix86_libc_has_function (enum function_class fn_class)
1491 {
1492 return targetm.libc_has_function (fn_class, NULL_TREE);
1493 }
1494
1495 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
1496 specifying the call abi used. */
1497 enum calling_abi
1498 ix86_function_type_abi (const_tree fntype)
1499 {
1500 enum calling_abi abi = ix86_abi;
1501
1502 if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
1503 return abi;
1504
1505 if (abi == SYSV_ABI
1506 && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
1507 {
1508 static int warned;
1509 if (TARGET_X32 && !warned)
1510 {
1511 error ("X32 does not support %<ms_abi%> attribute");
1512 warned = 1;
1513 }
1514
1515 abi = MS_ABI;
1516 }
1517 else if (abi == MS_ABI
1518 && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
1519 abi = SYSV_ABI;
1520
1521 return abi;
1522 }
1523
1524 enum calling_abi
1525 ix86_function_abi (const_tree fndecl)
1526 {
1527 return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
1528 }
1529
1530 /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
1531 specifying the call abi used. */
1532 enum calling_abi
1533 ix86_cfun_abi (void)
1534 {
1535 return cfun ? cfun->machine->call_abi : ix86_abi;
1536 }
1537
1538 bool
1539 ix86_function_ms_hook_prologue (const_tree fn)
1540 {
1541 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
1542 {
1543 if (decl_function_context (fn) != NULL_TREE)
1544 error_at (DECL_SOURCE_LOCATION (fn),
1545 "%<ms_hook_prologue%> attribute is not compatible "
1546 "with nested function");
1547 else
1548 return true;
1549 }
1550 return false;
1551 }
1552
1553 bool
1554 ix86_function_naked (const_tree fn)
1555 {
1556 if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
1557 return true;
1558
1559 return false;
1560 }
1561
1562 /* Write the extra assembler code needed to declare a function properly. */
1563
1564 void
1565 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
1566 tree decl)
1567 {
1568 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
1569
1570 if (cfun)
1571 cfun->machine->function_label_emitted = true;
1572
1573 if (is_ms_hook)
1574 {
1575 int i, filler_count = (TARGET_64BIT ? 32 : 16);
1576 unsigned int filler_cc = 0xcccccccc;
1577
1578 for (i = 0; i < filler_count; i += 4)
1579 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
1580 }
1581
1582 #ifdef SUBTARGET_ASM_UNWIND_INIT
1583 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
1584 #endif
1585
1586 ASM_OUTPUT_LABEL (asm_out_file, fname);
1587
1588 /* Output magic byte marker, if hot-patch attribute is set. */
1589 if (is_ms_hook)
1590 {
1591 if (TARGET_64BIT)
1592 {
1593 /* leaq [%rsp + 0], %rsp */
1594 fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
1595 asm_out_file);
1596 }
1597 else
1598 {
1599 /* movl.s %edi, %edi
1600 push %ebp
1601 movl.s %esp, %ebp */
1602 fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
1603 }
1604 }
1605 }
1606
1607 /* Implementation of call abi switching target hook. Specific to FNDECL
1608 the specific call register sets are set. See also
1609 ix86_conditional_register_usage for more details. */
1610 void
1611 ix86_call_abi_override (const_tree fndecl)
1612 {
1613 cfun->machine->call_abi = ix86_function_abi (fndecl);
1614 }
1615
1616 /* Return 1 if pseudo register should be created and used to hold
1617 GOT address for PIC code. */
1618 bool
1619 ix86_use_pseudo_pic_reg (void)
1620 {
1621 if ((TARGET_64BIT
1622 && (ix86_cmodel == CM_SMALL_PIC
1623 || TARGET_PECOFF))
1624 || !flag_pic)
1625 return false;
1626 return true;
1627 }
1628
1629 /* Initialize large model PIC register. */
1630
1631 static void
1632 ix86_init_large_pic_reg (unsigned int tmp_regno)
1633 {
1634 rtx_code_label *label;
1635 rtx tmp_reg;
1636
1637 gcc_assert (Pmode == DImode);
1638 label = gen_label_rtx ();
1639 emit_label (label);
1640 LABEL_PRESERVE_P (label) = 1;
1641 tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
1642 gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
1643 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
1644 label));
1645 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
1646 emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
1647 const char *name = LABEL_NAME (label);
1648 PUT_CODE (label, NOTE);
1649 NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
1650 NOTE_DELETED_LABEL_NAME (label) = name;
1651 }
1652
1653 /* Create and initialize PIC register if required. */
1654 static void
1655 ix86_init_pic_reg (void)
1656 {
1657 edge entry_edge;
1658 rtx_insn *seq;
1659
1660 if (!ix86_use_pseudo_pic_reg ())
1661 return;
1662
1663 start_sequence ();
1664
1665 if (TARGET_64BIT)
1666 {
1667 if (ix86_cmodel == CM_LARGE_PIC)
1668 ix86_init_large_pic_reg (R11_REG);
1669 else
1670 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
1671 }
1672 else
1673 {
1674 /* If there is future mcount call in the function it is more profitable
1675 to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
1676 rtx reg = crtl->profile
1677 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
1678 : pic_offset_table_rtx;
1679 rtx_insn *insn = emit_insn (gen_set_got (reg));
1680 RTX_FRAME_RELATED_P (insn) = 1;
1681 if (crtl->profile)
1682 emit_move_insn (pic_offset_table_rtx, reg);
1683 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
1684 }
1685
1686 seq = get_insns ();
1687 end_sequence ();
1688
1689 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
1690 insert_insn_on_edge (seq, entry_edge);
1691 commit_one_edge_insertion (entry_edge);
1692 }
1693
1694 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1695 for a call to a function whose data type is FNTYPE.
1696 For a library call, FNTYPE is 0. */
1697
1698 void
1699 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
1700 tree fntype, /* tree ptr for function decl */
1701 rtx libname, /* SYMBOL_REF of library name or 0 */
1702 tree fndecl,
1703 int caller)
1704 {
1705 struct cgraph_node *local_info_node = NULL;
1706 struct cgraph_node *target = NULL;
1707
1708 memset (cum, 0, sizeof (*cum));
1709
1710 if (fndecl)
1711 {
1712 target = cgraph_node::get (fndecl);
1713 if (target)
1714 {
1715 target = target->function_symbol ();
1716 local_info_node = cgraph_node::local_info_node (target->decl);
1717 cum->call_abi = ix86_function_abi (target->decl);
1718 }
1719 else
1720 cum->call_abi = ix86_function_abi (fndecl);
1721 }
1722 else
1723 cum->call_abi = ix86_function_type_abi (fntype);
1724
1725 cum->caller = caller;
1726
1727 /* Set up the number of registers to use for passing arguments. */
1728 cum->nregs = ix86_regparm;
1729 if (TARGET_64BIT)
1730 {
1731 cum->nregs = (cum->call_abi == SYSV_ABI
1732 ? X86_64_REGPARM_MAX
1733 : X86_64_MS_REGPARM_MAX);
1734 }
1735 if (TARGET_SSE)
1736 {
1737 cum->sse_nregs = SSE_REGPARM_MAX;
1738 if (TARGET_64BIT)
1739 {
1740 cum->sse_nregs = (cum->call_abi == SYSV_ABI
1741 ? X86_64_SSE_REGPARM_MAX
1742 : X86_64_MS_SSE_REGPARM_MAX);
1743 }
1744 }
1745 if (TARGET_MMX)
1746 cum->mmx_nregs = MMX_REGPARM_MAX;
1747 cum->warn_avx512f = true;
1748 cum->warn_avx = true;
1749 cum->warn_sse = true;
1750 cum->warn_mmx = true;
1751
1752 /* Because type might mismatch in between caller and callee, we need to
1753 use actual type of function for local calls.
1754 FIXME: cgraph_analyze can be told to actually record if function uses
1755 va_start so for local functions maybe_vaarg can be made aggressive
1756 helping K&R code.
1757 FIXME: once typesytem is fixed, we won't need this code anymore. */
1758 if (local_info_node && local_info_node->local
1759 && local_info_node->can_change_signature)
1760 fntype = TREE_TYPE (target->decl);
1761 cum->stdarg = stdarg_p (fntype);
1762 cum->maybe_vaarg = (fntype
1763 ? (!prototype_p (fntype) || stdarg_p (fntype))
1764 : !libname);
1765
1766 cum->decl = fndecl;
1767
1768 cum->warn_empty = !warn_abi || cum->stdarg;
1769 if (!cum->warn_empty && fntype)
1770 {
1771 function_args_iterator iter;
1772 tree argtype;
1773 bool seen_empty_type = false;
1774 FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
1775 {
1776 if (argtype == error_mark_node || VOID_TYPE_P (argtype))
1777 break;
1778 if (TYPE_EMPTY_P (argtype))
1779 seen_empty_type = true;
1780 else if (seen_empty_type)
1781 {
1782 cum->warn_empty = true;
1783 break;
1784 }
1785 }
1786 }
1787
1788 if (!TARGET_64BIT)
1789 {
1790 /* If there are variable arguments, then we won't pass anything
1791 in registers in 32-bit mode. */
1792 if (stdarg_p (fntype))
1793 {
1794 cum->nregs = 0;
1795 /* Since in 32-bit, variable arguments are always passed on
1796 stack, there is scratch register available for indirect
1797 sibcall. */
1798 cfun->machine->arg_reg_available = true;
1799 cum->sse_nregs = 0;
1800 cum->mmx_nregs = 0;
1801 cum->warn_avx512f = false;
1802 cum->warn_avx = false;
1803 cum->warn_sse = false;
1804 cum->warn_mmx = false;
1805 return;
1806 }
1807
1808 /* Use ecx and edx registers if function has fastcall attribute,
1809 else look for regparm information. */
1810 if (fntype)
1811 {
1812 unsigned int ccvt = ix86_get_callcvt (fntype);
1813 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
1814 {
1815 cum->nregs = 1;
1816 cum->fastcall = 1; /* Same first register as in fastcall. */
1817 }
1818 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
1819 {
1820 cum->nregs = 2;
1821 cum->fastcall = 1;
1822 }
1823 else
1824 cum->nregs = ix86_function_regparm (fntype, fndecl);
1825 }
1826
1827 /* Set up the number of SSE registers used for passing SFmode
1828 and DFmode arguments. Warn for mismatching ABI. */
1829 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
1830 }
1831
1832 cfun->machine->arg_reg_available = (cum->nregs > 0);
1833 }
1834
1835 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
1836 But in the case of vector types, it is some vector mode.
1837
1838 When we have only some of our vector isa extensions enabled, then there
1839 are some modes for which vector_mode_supported_p is false. For these
1840 modes, the generic vector support in gcc will choose some non-vector mode
1841 in order to implement the type. By computing the natural mode, we'll
1842 select the proper ABI location for the operand and not depend on whatever
1843 the middle-end decides to do with these vector types.
1844
1845 The midde-end can't deal with the vector types > 16 bytes. In this
1846 case, we return the original mode and warn ABI change if CUM isn't
1847 NULL.
1848
1849 If INT_RETURN is true, warn ABI change if the vector mode isn't
1850 available for function return value. */
1851
1852 static machine_mode
1853 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
1854 bool in_return)
1855 {
1856 machine_mode mode = TYPE_MODE (type);
1857
1858 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
1859 {
1860 HOST_WIDE_INT size = int_size_in_bytes (type);
1861 if ((size == 8 || size == 16 || size == 32 || size == 64)
1862 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
1863 && TYPE_VECTOR_SUBPARTS (type) > 1)
1864 {
1865 machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
1866
1867 /* There are no XFmode vector modes. */
1868 if (innermode == XFmode)
1869 return mode;
1870
1871 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
1872 mode = MIN_MODE_VECTOR_FLOAT;
1873 else
1874 mode = MIN_MODE_VECTOR_INT;
1875
1876 /* Get the mode which has this inner mode and number of units. */
1877 FOR_EACH_MODE_FROM (mode, mode)
1878 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
1879 && GET_MODE_INNER (mode) == innermode)
1880 {
1881 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
1882 {
1883 static bool warnedavx512f;
1884 static bool warnedavx512f_ret;
1885
1886 if (cum && cum->warn_avx512f && !warnedavx512f)
1887 {
1888 if (warning (OPT_Wpsabi, "AVX512F vector argument "
1889 "without AVX512F enabled changes the ABI"))
1890 warnedavx512f = true;
1891 }
1892 else if (in_return && !warnedavx512f_ret)
1893 {
1894 if (warning (OPT_Wpsabi, "AVX512F vector return "
1895 "without AVX512F enabled changes the ABI"))
1896 warnedavx512f_ret = true;
1897 }
1898
1899 return TYPE_MODE (type);
1900 }
1901 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
1902 {
1903 static bool warnedavx;
1904 static bool warnedavx_ret;
1905
1906 if (cum && cum->warn_avx && !warnedavx)
1907 {
1908 if (warning (OPT_Wpsabi, "AVX vector argument "
1909 "without AVX enabled changes the ABI"))
1910 warnedavx = true;
1911 }
1912 else if (in_return && !warnedavx_ret)
1913 {
1914 if (warning (OPT_Wpsabi, "AVX vector return "
1915 "without AVX enabled changes the ABI"))
1916 warnedavx_ret = true;
1917 }
1918
1919 return TYPE_MODE (type);
1920 }
1921 else if (((size == 8 && TARGET_64BIT) || size == 16)
1922 && !TARGET_SSE
1923 && !TARGET_IAMCU)
1924 {
1925 static bool warnedsse;
1926 static bool warnedsse_ret;
1927
1928 if (cum && cum->warn_sse && !warnedsse)
1929 {
1930 if (warning (OPT_Wpsabi, "SSE vector argument "
1931 "without SSE enabled changes the ABI"))
1932 warnedsse = true;
1933 }
1934 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
1935 {
1936 if (warning (OPT_Wpsabi, "SSE vector return "
1937 "without SSE enabled changes the ABI"))
1938 warnedsse_ret = true;
1939 }
1940 }
1941 else if ((size == 8 && !TARGET_64BIT)
1942 && (!cfun
1943 || cfun->machine->func_type == TYPE_NORMAL)
1944 && !TARGET_MMX
1945 && !TARGET_IAMCU)
1946 {
1947 static bool warnedmmx;
1948 static bool warnedmmx_ret;
1949
1950 if (cum && cum->warn_mmx && !warnedmmx)
1951 {
1952 if (warning (OPT_Wpsabi, "MMX vector argument "
1953 "without MMX enabled changes the ABI"))
1954 warnedmmx = true;
1955 }
1956 else if (in_return && !warnedmmx_ret)
1957 {
1958 if (warning (OPT_Wpsabi, "MMX vector return "
1959 "without MMX enabled changes the ABI"))
1960 warnedmmx_ret = true;
1961 }
1962 }
1963 return mode;
1964 }
1965
1966 gcc_unreachable ();
1967 }
1968 }
1969
1970 return mode;
1971 }
1972
1973 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
1974 this may not agree with the mode that the type system has chosen for the
1975 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
1976 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
1977
1978 static rtx
1979 gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
1980 unsigned int regno)
1981 {
1982 rtx tmp;
1983
1984 if (orig_mode != BLKmode)
1985 tmp = gen_rtx_REG (orig_mode, regno);
1986 else
1987 {
1988 tmp = gen_rtx_REG (mode, regno);
1989 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
1990 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
1991 }
1992
1993 return tmp;
1994 }
1995
1996 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
1997 of this code is to classify each 8bytes of incoming argument by the register
1998 class and assign registers accordingly. */
1999
2000 /* Return the union class of CLASS1 and CLASS2.
2001 See the x86-64 PS ABI for details. */
2002
2003 static enum x86_64_reg_class
2004 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2005 {
2006 /* Rule #1: If both classes are equal, this is the resulting class. */
2007 if (class1 == class2)
2008 return class1;
2009
2010 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2011 the other class. */
2012 if (class1 == X86_64_NO_CLASS)
2013 return class2;
2014 if (class2 == X86_64_NO_CLASS)
2015 return class1;
2016
2017 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
2018 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2019 return X86_64_MEMORY_CLASS;
2020
2021 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
2022 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2023 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2024 return X86_64_INTEGERSI_CLASS;
2025 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2026 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2027 return X86_64_INTEGER_CLASS;
2028
2029 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2030 MEMORY is used. */
2031 if (class1 == X86_64_X87_CLASS
2032 || class1 == X86_64_X87UP_CLASS
2033 || class1 == X86_64_COMPLEX_X87_CLASS
2034 || class2 == X86_64_X87_CLASS
2035 || class2 == X86_64_X87UP_CLASS
2036 || class2 == X86_64_COMPLEX_X87_CLASS)
2037 return X86_64_MEMORY_CLASS;
2038
2039 /* Rule #6: Otherwise class SSE is used. */
2040 return X86_64_SSE_CLASS;
2041 }
2042
2043 /* Classify the argument of type TYPE and mode MODE.
2044 CLASSES will be filled by the register class used to pass each word
2045 of the operand. The number of words is returned. In case the parameter
2046 should be passed in memory, 0 is returned. As a special case for zero
2047 sized containers, classes[0] will be NO_CLASS and 1 is returned.
2048
2049 BIT_OFFSET is used internally for handling records and specifies offset
2050 of the offset in bits modulo 512 to avoid overflow cases.
2051
2052 See the x86-64 PS ABI for details.
2053 */
2054
2055 static int
2056 classify_argument (machine_mode mode, const_tree type,
2057 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2058 {
2059 HOST_WIDE_INT bytes
2060 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2061 int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
2062
2063 /* Variable sized entities are always passed/returned in memory. */
2064 if (bytes < 0)
2065 return 0;
2066
2067 if (mode != VOIDmode)
2068 {
2069 /* The value of "named" doesn't matter. */
2070 function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
2071 if (targetm.calls.must_pass_in_stack (arg))
2072 return 0;
2073 }
2074
2075 if (type && AGGREGATE_TYPE_P (type))
2076 {
2077 int i;
2078 tree field;
2079 enum x86_64_reg_class subclasses[MAX_CLASSES];
2080
2081 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
2082 if (bytes > 64)
2083 return 0;
2084
2085 for (i = 0; i < words; i++)
2086 classes[i] = X86_64_NO_CLASS;
2087
2088 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
2089 signalize memory class, so handle it as special case. */
2090 if (!words)
2091 {
2092 classes[0] = X86_64_NO_CLASS;
2093 return 1;
2094 }
2095
2096 /* Classify each field of record and merge classes. */
2097 switch (TREE_CODE (type))
2098 {
2099 case RECORD_TYPE:
2100 /* And now merge the fields of structure. */
2101 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2102 {
2103 if (TREE_CODE (field) == FIELD_DECL)
2104 {
2105 int num;
2106
2107 if (TREE_TYPE (field) == error_mark_node)
2108 continue;
2109
2110 /* Bitfields are always classified as integer. Handle them
2111 early, since later code would consider them to be
2112 misaligned integers. */
2113 if (DECL_BIT_FIELD (field))
2114 {
2115 for (i = (int_bit_position (field)
2116 + (bit_offset % 64)) / 8 / 8;
2117 i < ((int_bit_position (field) + (bit_offset % 64))
2118 + tree_to_shwi (DECL_SIZE (field))
2119 + 63) / 8 / 8; i++)
2120 classes[i]
2121 = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
2122 }
2123 else
2124 {
2125 int pos;
2126
2127 type = TREE_TYPE (field);
2128
2129 /* Flexible array member is ignored. */
2130 if (TYPE_MODE (type) == BLKmode
2131 && TREE_CODE (type) == ARRAY_TYPE
2132 && TYPE_SIZE (type) == NULL_TREE
2133 && TYPE_DOMAIN (type) != NULL_TREE
2134 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
2135 == NULL_TREE))
2136 {
2137 static bool warned;
2138
2139 if (!warned && warn_psabi)
2140 {
2141 warned = true;
2142 inform (input_location,
2143 "the ABI of passing struct with"
2144 " a flexible array member has"
2145 " changed in GCC 4.4");
2146 }
2147 continue;
2148 }
2149 num = classify_argument (TYPE_MODE (type), type,
2150 subclasses,
2151 (int_bit_position (field)
2152 + bit_offset) % 512);
2153 if (!num)
2154 return 0;
2155 pos = (int_bit_position (field)
2156 + (bit_offset % 64)) / 8 / 8;
2157 for (i = 0; i < num && (i + pos) < words; i++)
2158 classes[i + pos]
2159 = merge_classes (subclasses[i], classes[i + pos]);
2160 }
2161 }
2162 }
2163 break;
2164
2165 case ARRAY_TYPE:
2166 /* Arrays are handled as small records. */
2167 {
2168 int num;
2169 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2170 TREE_TYPE (type), subclasses, bit_offset);
2171 if (!num)
2172 return 0;
2173
2174 /* The partial classes are now full classes. */
2175 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2176 subclasses[0] = X86_64_SSE_CLASS;
2177 if (subclasses[0] == X86_64_INTEGERSI_CLASS
2178 && !((bit_offset % 64) == 0 && bytes == 4))
2179 subclasses[0] = X86_64_INTEGER_CLASS;
2180
2181 for (i = 0; i < words; i++)
2182 classes[i] = subclasses[i % num];
2183
2184 break;
2185 }
2186 case UNION_TYPE:
2187 case QUAL_UNION_TYPE:
2188 /* Unions are similar to RECORD_TYPE but offset is always 0.
2189 */
2190 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
2191 {
2192 if (TREE_CODE (field) == FIELD_DECL)
2193 {
2194 int num;
2195
2196 if (TREE_TYPE (field) == error_mark_node)
2197 continue;
2198
2199 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2200 TREE_TYPE (field), subclasses,
2201 bit_offset);
2202 if (!num)
2203 return 0;
2204 for (i = 0; i < num && i < words; i++)
2205 classes[i] = merge_classes (subclasses[i], classes[i]);
2206 }
2207 }
2208 break;
2209
2210 default:
2211 gcc_unreachable ();
2212 }
2213
2214 if (words > 2)
2215 {
2216 /* When size > 16 bytes, if the first one isn't
2217 X86_64_SSE_CLASS or any other ones aren't
2218 X86_64_SSEUP_CLASS, everything should be passed in
2219 memory. */
2220 if (classes[0] != X86_64_SSE_CLASS)
2221 return 0;
2222
2223 for (i = 1; i < words; i++)
2224 if (classes[i] != X86_64_SSEUP_CLASS)
2225 return 0;
2226 }
2227
2228 /* Final merger cleanup. */
2229 for (i = 0; i < words; i++)
2230 {
2231 /* If one class is MEMORY, everything should be passed in
2232 memory. */
2233 if (classes[i] == X86_64_MEMORY_CLASS)
2234 return 0;
2235
2236 /* The X86_64_SSEUP_CLASS should be always preceded by
2237 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
2238 if (classes[i] == X86_64_SSEUP_CLASS
2239 && classes[i - 1] != X86_64_SSE_CLASS
2240 && classes[i - 1] != X86_64_SSEUP_CLASS)
2241 {
2242 /* The first one should never be X86_64_SSEUP_CLASS. */
2243 gcc_assert (i != 0);
2244 classes[i] = X86_64_SSE_CLASS;
2245 }
2246
2247 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
2248 everything should be passed in memory. */
2249 if (classes[i] == X86_64_X87UP_CLASS
2250 && (classes[i - 1] != X86_64_X87_CLASS))
2251 {
2252 static bool warned;
2253
2254 /* The first one should never be X86_64_X87UP_CLASS. */
2255 gcc_assert (i != 0);
2256 if (!warned && warn_psabi)
2257 {
2258 warned = true;
2259 inform (input_location,
2260 "the ABI of passing union with %<long double%>"
2261 " has changed in GCC 4.4");
2262 }
2263 return 0;
2264 }
2265 }
2266 return words;
2267 }
2268
2269 /* Compute alignment needed. We align all types to natural boundaries with
2270 exception of XFmode that is aligned to 64bits. */
2271 if (mode != VOIDmode && mode != BLKmode)
2272 {
2273 int mode_alignment = GET_MODE_BITSIZE (mode);
2274
2275 if (mode == XFmode)
2276 mode_alignment = 128;
2277 else if (mode == XCmode)
2278 mode_alignment = 256;
2279 if (COMPLEX_MODE_P (mode))
2280 mode_alignment /= 2;
2281 /* Misaligned fields are always returned in memory. */
2282 if (bit_offset % mode_alignment)
2283 return 0;
2284 }
2285
2286 /* for V1xx modes, just use the base mode */
2287 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
2288 && GET_MODE_UNIT_SIZE (mode) == bytes)
2289 mode = GET_MODE_INNER (mode);
2290
2291 /* Classification of atomic types. */
2292 switch (mode)
2293 {
2294 case E_SDmode:
2295 case E_DDmode:
2296 classes[0] = X86_64_SSE_CLASS;
2297 return 1;
2298 case E_TDmode:
2299 classes[0] = X86_64_SSE_CLASS;
2300 classes[1] = X86_64_SSEUP_CLASS;
2301 return 2;
2302 case E_DImode:
2303 case E_SImode:
2304 case E_HImode:
2305 case E_QImode:
2306 case E_CSImode:
2307 case E_CHImode:
2308 case E_CQImode:
2309 {
2310 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
2311
2312 /* Analyze last 128 bits only. */
2313 size = (size - 1) & 0x7f;
2314
2315 if (size < 32)
2316 {
2317 classes[0] = X86_64_INTEGERSI_CLASS;
2318 return 1;
2319 }
2320 else if (size < 64)
2321 {
2322 classes[0] = X86_64_INTEGER_CLASS;
2323 return 1;
2324 }
2325 else if (size < 64+32)
2326 {
2327 classes[0] = X86_64_INTEGER_CLASS;
2328 classes[1] = X86_64_INTEGERSI_CLASS;
2329 return 2;
2330 }
2331 else if (size < 64+64)
2332 {
2333 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2334 return 2;
2335 }
2336 else
2337 gcc_unreachable ();
2338 }
2339 case E_CDImode:
2340 case E_TImode:
2341 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2342 return 2;
2343 case E_COImode:
2344 case E_OImode:
2345 /* OImode shouldn't be used directly. */
2346 gcc_unreachable ();
2347 case E_CTImode:
2348 return 0;
2349 case E_SFmode:
2350 if (!(bit_offset % 64))
2351 classes[0] = X86_64_SSESF_CLASS;
2352 else
2353 classes[0] = X86_64_SSE_CLASS;
2354 return 1;
2355 case E_DFmode:
2356 classes[0] = X86_64_SSEDF_CLASS;
2357 return 1;
2358 case E_XFmode:
2359 classes[0] = X86_64_X87_CLASS;
2360 classes[1] = X86_64_X87UP_CLASS;
2361 return 2;
2362 case E_TFmode:
2363 classes[0] = X86_64_SSE_CLASS;
2364 classes[1] = X86_64_SSEUP_CLASS;
2365 return 2;
2366 case E_SCmode:
2367 classes[0] = X86_64_SSE_CLASS;
2368 if (!(bit_offset % 64))
2369 return 1;
2370 else
2371 {
2372 static bool warned;
2373
2374 if (!warned && warn_psabi)
2375 {
2376 warned = true;
2377 inform (input_location,
2378 "the ABI of passing structure with %<complex float%>"
2379 " member has changed in GCC 4.4");
2380 }
2381 classes[1] = X86_64_SSESF_CLASS;
2382 return 2;
2383 }
2384 case E_DCmode:
2385 classes[0] = X86_64_SSEDF_CLASS;
2386 classes[1] = X86_64_SSEDF_CLASS;
2387 return 2;
2388 case E_XCmode:
2389 classes[0] = X86_64_COMPLEX_X87_CLASS;
2390 return 1;
2391 case E_TCmode:
2392 /* This modes is larger than 16 bytes. */
2393 return 0;
2394 case E_V8SFmode:
2395 case E_V8SImode:
2396 case E_V32QImode:
2397 case E_V16HImode:
2398 case E_V4DFmode:
2399 case E_V4DImode:
2400 classes[0] = X86_64_SSE_CLASS;
2401 classes[1] = X86_64_SSEUP_CLASS;
2402 classes[2] = X86_64_SSEUP_CLASS;
2403 classes[3] = X86_64_SSEUP_CLASS;
2404 return 4;
2405 case E_V8DFmode:
2406 case E_V16SFmode:
2407 case E_V8DImode:
2408 case E_V16SImode:
2409 case E_V32HImode:
2410 case E_V64QImode:
2411 classes[0] = X86_64_SSE_CLASS;
2412 classes[1] = X86_64_SSEUP_CLASS;
2413 classes[2] = X86_64_SSEUP_CLASS;
2414 classes[3] = X86_64_SSEUP_CLASS;
2415 classes[4] = X86_64_SSEUP_CLASS;
2416 classes[5] = X86_64_SSEUP_CLASS;
2417 classes[6] = X86_64_SSEUP_CLASS;
2418 classes[7] = X86_64_SSEUP_CLASS;
2419 return 8;
2420 case E_V4SFmode:
2421 case E_V4SImode:
2422 case E_V16QImode:
2423 case E_V8HImode:
2424 case E_V2DFmode:
2425 case E_V2DImode:
2426 classes[0] = X86_64_SSE_CLASS;
2427 classes[1] = X86_64_SSEUP_CLASS;
2428 return 2;
2429 case E_V1TImode:
2430 case E_V1DImode:
2431 case E_V2SFmode:
2432 case E_V2SImode:
2433 case E_V4HImode:
2434 case E_V8QImode:
2435 classes[0] = X86_64_SSE_CLASS;
2436 return 1;
2437 case E_BLKmode:
2438 case E_VOIDmode:
2439 return 0;
2440 default:
2441 gcc_assert (VECTOR_MODE_P (mode));
2442
2443 if (bytes > 16)
2444 return 0;
2445
2446 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2447
2448 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2449 classes[0] = X86_64_INTEGERSI_CLASS;
2450 else
2451 classes[0] = X86_64_INTEGER_CLASS;
2452 classes[1] = X86_64_INTEGER_CLASS;
2453 return 1 + (bytes > 8);
2454 }
2455 }
2456
2457 /* Examine the argument and return set number of register required in each
2458 class. Return true iff parameter should be passed in memory. */
2459
2460 static bool
2461 examine_argument (machine_mode mode, const_tree type, int in_return,
2462 int *int_nregs, int *sse_nregs)
2463 {
2464 enum x86_64_reg_class regclass[MAX_CLASSES];
2465 int n = classify_argument (mode, type, regclass, 0);
2466
2467 *int_nregs = 0;
2468 *sse_nregs = 0;
2469
2470 if (!n)
2471 return true;
2472 for (n--; n >= 0; n--)
2473 switch (regclass[n])
2474 {
2475 case X86_64_INTEGER_CLASS:
2476 case X86_64_INTEGERSI_CLASS:
2477 (*int_nregs)++;
2478 break;
2479 case X86_64_SSE_CLASS:
2480 case X86_64_SSESF_CLASS:
2481 case X86_64_SSEDF_CLASS:
2482 (*sse_nregs)++;
2483 break;
2484 case X86_64_NO_CLASS:
2485 case X86_64_SSEUP_CLASS:
2486 break;
2487 case X86_64_X87_CLASS:
2488 case X86_64_X87UP_CLASS:
2489 case X86_64_COMPLEX_X87_CLASS:
2490 if (!in_return)
2491 return true;
2492 break;
2493 case X86_64_MEMORY_CLASS:
2494 gcc_unreachable ();
2495 }
2496
2497 return false;
2498 }
2499
2500 /* Construct container for the argument used by GCC interface. See
2501 FUNCTION_ARG for the detailed description. */
2502
2503 static rtx
2504 construct_container (machine_mode mode, machine_mode orig_mode,
2505 const_tree type, int in_return, int nintregs, int nsseregs,
2506 const int *intreg, int sse_regno)
2507 {
2508 /* The following variables hold the static issued_error state. */
2509 static bool issued_sse_arg_error;
2510 static bool issued_sse_ret_error;
2511 static bool issued_x87_ret_error;
2512
2513 machine_mode tmpmode;
2514 int bytes
2515 = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2516 enum x86_64_reg_class regclass[MAX_CLASSES];
2517 int n;
2518 int i;
2519 int nexps = 0;
2520 int needed_sseregs, needed_intregs;
2521 rtx exp[MAX_CLASSES];
2522 rtx ret;
2523
2524 n = classify_argument (mode, type, regclass, 0);
2525 if (!n)
2526 return NULL;
2527 if (examine_argument (mode, type, in_return, &needed_intregs,
2528 &needed_sseregs))
2529 return NULL;
2530 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2531 return NULL;
2532
2533 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
2534 some less clueful developer tries to use floating-point anyway. */
2535 if (needed_sseregs && !TARGET_SSE)
2536 {
2537 if (in_return)
2538 {
2539 if (!issued_sse_ret_error)
2540 {
2541 error ("SSE register return with SSE disabled");
2542 issued_sse_ret_error = true;
2543 }
2544 }
2545 else if (!issued_sse_arg_error)
2546 {
2547 error ("SSE register argument with SSE disabled");
2548 issued_sse_arg_error = true;
2549 }
2550 return NULL;
2551 }
2552
2553 /* Likewise, error if the ABI requires us to return values in the
2554 x87 registers and the user specified -mno-80387. */
2555 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
2556 for (i = 0; i < n; i++)
2557 if (regclass[i] == X86_64_X87_CLASS
2558 || regclass[i] == X86_64_X87UP_CLASS
2559 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
2560 {
2561 if (!issued_x87_ret_error)
2562 {
2563 error ("x87 register return with x87 disabled");
2564 issued_x87_ret_error = true;
2565 }
2566 return NULL;
2567 }
2568
2569 /* First construct simple cases. Avoid SCmode, since we want to use
2570 single register to pass this type. */
2571 if (n == 1 && mode != SCmode)
2572 switch (regclass[0])
2573 {
2574 case X86_64_INTEGER_CLASS:
2575 case X86_64_INTEGERSI_CLASS:
2576 return gen_rtx_REG (mode, intreg[0]);
2577 case X86_64_SSE_CLASS:
2578 case X86_64_SSESF_CLASS:
2579 case X86_64_SSEDF_CLASS:
2580 if (mode != BLKmode)
2581 return gen_reg_or_parallel (mode, orig_mode,
2582 GET_SSE_REGNO (sse_regno));
2583 break;
2584 case X86_64_X87_CLASS:
2585 case X86_64_COMPLEX_X87_CLASS:
2586 return gen_rtx_REG (mode, FIRST_STACK_REG);
2587 case X86_64_NO_CLASS:
2588 /* Zero sized array, struct or class. */
2589 return NULL;
2590 default:
2591 gcc_unreachable ();
2592 }
2593 if (n == 2
2594 && regclass[0] == X86_64_SSE_CLASS
2595 && regclass[1] == X86_64_SSEUP_CLASS
2596 && mode != BLKmode)
2597 return gen_reg_or_parallel (mode, orig_mode,
2598 GET_SSE_REGNO (sse_regno));
2599 if (n == 4
2600 && regclass[0] == X86_64_SSE_CLASS
2601 && regclass[1] == X86_64_SSEUP_CLASS
2602 && regclass[2] == X86_64_SSEUP_CLASS
2603 && regclass[3] == X86_64_SSEUP_CLASS
2604 && mode != BLKmode)
2605 return gen_reg_or_parallel (mode, orig_mode,
2606 GET_SSE_REGNO (sse_regno));
2607 if (n == 8
2608 && regclass[0] == X86_64_SSE_CLASS
2609 && regclass[1] == X86_64_SSEUP_CLASS
2610 && regclass[2] == X86_64_SSEUP_CLASS
2611 && regclass[3] == X86_64_SSEUP_CLASS
2612 && regclass[4] == X86_64_SSEUP_CLASS
2613 && regclass[5] == X86_64_SSEUP_CLASS
2614 && regclass[6] == X86_64_SSEUP_CLASS
2615 && regclass[7] == X86_64_SSEUP_CLASS
2616 && mode != BLKmode)
2617 return gen_reg_or_parallel (mode, orig_mode,
2618 GET_SSE_REGNO (sse_regno));
2619 if (n == 2
2620 && regclass[0] == X86_64_X87_CLASS
2621 && regclass[1] == X86_64_X87UP_CLASS)
2622 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2623
2624 if (n == 2
2625 && regclass[0] == X86_64_INTEGER_CLASS
2626 && regclass[1] == X86_64_INTEGER_CLASS
2627 && (mode == CDImode || mode == TImode || mode == BLKmode)
2628 && intreg[0] + 1 == intreg[1])
2629 {
2630 if (mode == BLKmode)
2631 {
2632 /* Use TImode for BLKmode values in 2 integer registers. */
2633 exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
2634 gen_rtx_REG (TImode, intreg[0]),
2635 GEN_INT (0));
2636 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
2637 XVECEXP (ret, 0, 0) = exp[0];
2638 return ret;
2639 }
2640 else
2641 return gen_rtx_REG (mode, intreg[0]);
2642 }
2643
2644 /* Otherwise figure out the entries of the PARALLEL. */
2645 for (i = 0; i < n; i++)
2646 {
2647 int pos;
2648
2649 switch (regclass[i])
2650 {
2651 case X86_64_NO_CLASS:
2652 break;
2653 case X86_64_INTEGER_CLASS:
2654 case X86_64_INTEGERSI_CLASS:
2655 /* Merge TImodes on aligned occasions here too. */
2656 if (i * 8 + 8 > bytes)
2657 {
2658 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
2659 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
2660 /* We've requested 24 bytes we
2661 don't have mode for. Use DImode. */
2662 tmpmode = DImode;
2663 }
2664 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
2665 tmpmode = SImode;
2666 else
2667 tmpmode = DImode;
2668 exp [nexps++]
2669 = gen_rtx_EXPR_LIST (VOIDmode,
2670 gen_rtx_REG (tmpmode, *intreg),
2671 GEN_INT (i*8));
2672 intreg++;
2673 break;
2674 case X86_64_SSESF_CLASS:
2675 exp [nexps++]
2676 = gen_rtx_EXPR_LIST (VOIDmode,
2677 gen_rtx_REG (SFmode,
2678 GET_SSE_REGNO (sse_regno)),
2679 GEN_INT (i*8));
2680 sse_regno++;
2681 break;
2682 case X86_64_SSEDF_CLASS:
2683 exp [nexps++]
2684 = gen_rtx_EXPR_LIST (VOIDmode,
2685 gen_rtx_REG (DFmode,
2686 GET_SSE_REGNO (sse_regno)),
2687 GEN_INT (i*8));
2688 sse_regno++;
2689 break;
2690 case X86_64_SSE_CLASS:
2691 pos = i;
2692 switch (n)
2693 {
2694 case 1:
2695 tmpmode = DImode;
2696 break;
2697 case 2:
2698 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
2699 {
2700 tmpmode = TImode;
2701 i++;
2702 }
2703 else
2704 tmpmode = DImode;
2705 break;
2706 case 4:
2707 gcc_assert (i == 0
2708 && regclass[1] == X86_64_SSEUP_CLASS
2709 && regclass[2] == X86_64_SSEUP_CLASS
2710 && regclass[3] == X86_64_SSEUP_CLASS);
2711 tmpmode = OImode;
2712 i += 3;
2713 break;
2714 case 8:
2715 gcc_assert (i == 0
2716 && regclass[1] == X86_64_SSEUP_CLASS
2717 && regclass[2] == X86_64_SSEUP_CLASS
2718 && regclass[3] == X86_64_SSEUP_CLASS
2719 && regclass[4] == X86_64_SSEUP_CLASS
2720 && regclass[5] == X86_64_SSEUP_CLASS
2721 && regclass[6] == X86_64_SSEUP_CLASS
2722 && regclass[7] == X86_64_SSEUP_CLASS);
2723 tmpmode = XImode;
2724 i += 7;
2725 break;
2726 default:
2727 gcc_unreachable ();
2728 }
2729 exp [nexps++]
2730 = gen_rtx_EXPR_LIST (VOIDmode,
2731 gen_rtx_REG (tmpmode,
2732 GET_SSE_REGNO (sse_regno)),
2733 GEN_INT (pos*8));
2734 sse_regno++;
2735 break;
2736 default:
2737 gcc_unreachable ();
2738 }
2739 }
2740
2741 /* Empty aligned struct, union or class. */
2742 if (nexps == 0)
2743 return NULL;
2744
2745 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2746 for (i = 0; i < nexps; i++)
2747 XVECEXP (ret, 0, i) = exp [i];
2748 return ret;
2749 }
2750
2751 /* Update the data in CUM to advance over an argument of mode MODE
2752 and data type TYPE. (TYPE is null for libcalls where that information
2753 may not be available.)
2754
2755 Return a number of integer regsiters advanced over. */
2756
2757 static int
2758 function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2759 const_tree type, HOST_WIDE_INT bytes,
2760 HOST_WIDE_INT words)
2761 {
2762 int res = 0;
2763 bool error_p = false;
2764
2765 if (TARGET_IAMCU)
2766 {
2767 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2768 bytes in registers. */
2769 if (!VECTOR_MODE_P (mode) && bytes <= 8)
2770 goto pass_in_reg;
2771 return res;
2772 }
2773
2774 switch (mode)
2775 {
2776 default:
2777 break;
2778
2779 case E_BLKmode:
2780 if (bytes < 0)
2781 break;
2782 /* FALLTHRU */
2783
2784 case E_DImode:
2785 case E_SImode:
2786 case E_HImode:
2787 case E_QImode:
2788 pass_in_reg:
2789 cum->words += words;
2790 cum->nregs -= words;
2791 cum->regno += words;
2792 if (cum->nregs >= 0)
2793 res = words;
2794 if (cum->nregs <= 0)
2795 {
2796 cum->nregs = 0;
2797 cfun->machine->arg_reg_available = false;
2798 cum->regno = 0;
2799 }
2800 break;
2801
2802 case E_OImode:
2803 /* OImode shouldn't be used directly. */
2804 gcc_unreachable ();
2805
2806 case E_DFmode:
2807 if (cum->float_in_sse == -1)
2808 error_p = true;
2809 if (cum->float_in_sse < 2)
2810 break;
2811 /* FALLTHRU */
2812 case E_SFmode:
2813 if (cum->float_in_sse == -1)
2814 error_p = true;
2815 if (cum->float_in_sse < 1)
2816 break;
2817 /* FALLTHRU */
2818
2819 case E_V8SFmode:
2820 case E_V8SImode:
2821 case E_V64QImode:
2822 case E_V32HImode:
2823 case E_V16SImode:
2824 case E_V8DImode:
2825 case E_V16SFmode:
2826 case E_V8DFmode:
2827 case E_V32QImode:
2828 case E_V16HImode:
2829 case E_V4DFmode:
2830 case E_V4DImode:
2831 case E_TImode:
2832 case E_V16QImode:
2833 case E_V8HImode:
2834 case E_V4SImode:
2835 case E_V2DImode:
2836 case E_V4SFmode:
2837 case E_V2DFmode:
2838 if (!type || !AGGREGATE_TYPE_P (type))
2839 {
2840 cum->sse_words += words;
2841 cum->sse_nregs -= 1;
2842 cum->sse_regno += 1;
2843 if (cum->sse_nregs <= 0)
2844 {
2845 cum->sse_nregs = 0;
2846 cum->sse_regno = 0;
2847 }
2848 }
2849 break;
2850
2851 case E_V8QImode:
2852 case E_V4HImode:
2853 case E_V2SImode:
2854 case E_V2SFmode:
2855 case E_V1TImode:
2856 case E_V1DImode:
2857 if (!type || !AGGREGATE_TYPE_P (type))
2858 {
2859 cum->mmx_words += words;
2860 cum->mmx_nregs -= 1;
2861 cum->mmx_regno += 1;
2862 if (cum->mmx_nregs <= 0)
2863 {
2864 cum->mmx_nregs = 0;
2865 cum->mmx_regno = 0;
2866 }
2867 }
2868 break;
2869 }
2870 if (error_p)
2871 {
2872 cum->float_in_sse = 0;
2873 error ("calling %qD with SSE calling convention without "
2874 "SSE/SSE2 enabled", cum->decl);
2875 sorry ("this is a GCC bug that can be worked around by adding "
2876 "attribute used to function called");
2877 }
2878
2879 return res;
2880 }
2881
2882 static int
2883 function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
2884 const_tree type, HOST_WIDE_INT words, bool named)
2885 {
2886 int int_nregs, sse_nregs;
2887
2888 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
2889 if (!named && (VALID_AVX512F_REG_MODE (mode)
2890 || VALID_AVX256_REG_MODE (mode)))
2891 return 0;
2892
2893 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
2894 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2895 {
2896 cum->nregs -= int_nregs;
2897 cum->sse_nregs -= sse_nregs;
2898 cum->regno += int_nregs;
2899 cum->sse_regno += sse_nregs;
2900 return int_nregs;
2901 }
2902 else
2903 {
2904 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
2905 cum->words = ROUND_UP (cum->words, align);
2906 cum->words += words;
2907 return 0;
2908 }
2909 }
2910
2911 static int
2912 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
2913 HOST_WIDE_INT words)
2914 {
2915 /* Otherwise, this should be passed indirect. */
2916 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
2917
2918 cum->words += words;
2919 if (cum->nregs > 0)
2920 {
2921 cum->nregs -= 1;
2922 cum->regno += 1;
2923 return 1;
2924 }
2925 return 0;
2926 }
2927
2928 /* Update the data in CUM to advance over argument ARG. */
2929
2930 static void
2931 ix86_function_arg_advance (cumulative_args_t cum_v,
2932 const function_arg_info &arg)
2933 {
2934 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2935 machine_mode mode = arg.mode;
2936 HOST_WIDE_INT bytes, words;
2937 int nregs;
2938
2939 /* The argument of interrupt handler is a special case and is
2940 handled in ix86_function_arg. */
2941 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
2942 return;
2943
2944 bytes = arg.promoted_size_in_bytes ();
2945 words = CEIL (bytes, UNITS_PER_WORD);
2946
2947 if (arg.type)
2948 mode = type_natural_mode (arg.type, NULL, false);
2949
2950 if (TARGET_64BIT)
2951 {
2952 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
2953
2954 if (call_abi == MS_ABI)
2955 nregs = function_arg_advance_ms_64 (cum, bytes, words);
2956 else
2957 nregs = function_arg_advance_64 (cum, mode, arg.type, words,
2958 arg.named);
2959 }
2960 else
2961 nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
2962
2963 if (!nregs)
2964 {
2965 /* Track if there are outgoing arguments on stack. */
2966 if (cum->caller)
2967 cfun->machine->outgoing_args_on_stack = true;
2968 }
2969 }
2970
2971 /* Define where to put the arguments to a function.
2972 Value is zero to push the argument on the stack,
2973 or a hard register in which to store the argument.
2974
2975 MODE is the argument's machine mode.
2976 TYPE is the data type of the argument (as a tree).
2977 This is null for libcalls where that information may
2978 not be available.
2979 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2980 the preceding args and about the function being called.
2981 NAMED is nonzero if this argument is a named parameter
2982 (otherwise it is an extra parameter matching an ellipsis). */
2983
2984 static rtx
2985 function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
2986 machine_mode orig_mode, const_tree type,
2987 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
2988 {
2989 bool error_p = false;
2990
2991 /* Avoid the AL settings for the Unix64 ABI. */
2992 if (mode == VOIDmode)
2993 return constm1_rtx;
2994
2995 if (TARGET_IAMCU)
2996 {
2997 /* Intel MCU psABI passes scalars and aggregates no larger than 8
2998 bytes in registers. */
2999 if (!VECTOR_MODE_P (mode) && bytes <= 8)
3000 goto pass_in_reg;
3001 return NULL_RTX;
3002 }
3003
3004 switch (mode)
3005 {
3006 default:
3007 break;
3008
3009 case E_BLKmode:
3010 if (bytes < 0)
3011 break;
3012 /* FALLTHRU */
3013 case E_DImode:
3014 case E_SImode:
3015 case E_HImode:
3016 case E_QImode:
3017 pass_in_reg:
3018 if (words <= cum->nregs)
3019 {
3020 int regno = cum->regno;
3021
3022 /* Fastcall allocates the first two DWORD (SImode) or
3023 smaller arguments to ECX and EDX if it isn't an
3024 aggregate type . */
3025 if (cum->fastcall)
3026 {
3027 if (mode == BLKmode
3028 || mode == DImode
3029 || (type && AGGREGATE_TYPE_P (type)))
3030 break;
3031
3032 /* ECX not EAX is the first allocated register. */
3033 if (regno == AX_REG)
3034 regno = CX_REG;
3035 }
3036 return gen_rtx_REG (mode, regno);
3037 }
3038 break;
3039
3040 case E_DFmode:
3041 if (cum->float_in_sse == -1)
3042 error_p = true;
3043 if (cum->float_in_sse < 2)
3044 break;
3045 /* FALLTHRU */
3046 case E_SFmode:
3047 if (cum->float_in_sse == -1)
3048 error_p = true;
3049 if (cum->float_in_sse < 1)
3050 break;
3051 /* FALLTHRU */
3052 case E_TImode:
3053 /* In 32bit, we pass TImode in xmm registers. */
3054 case E_V16QImode:
3055 case E_V8HImode:
3056 case E_V4SImode:
3057 case E_V2DImode:
3058 case E_V4SFmode:
3059 case E_V2DFmode:
3060 if (!type || !AGGREGATE_TYPE_P (type))
3061 {
3062 if (cum->sse_nregs)
3063 return gen_reg_or_parallel (mode, orig_mode,
3064 cum->sse_regno + FIRST_SSE_REG);
3065 }
3066 break;
3067
3068 case E_OImode:
3069 case E_XImode:
3070 /* OImode and XImode shouldn't be used directly. */
3071 gcc_unreachable ();
3072
3073 case E_V64QImode:
3074 case E_V32HImode:
3075 case E_V16SImode:
3076 case E_V8DImode:
3077 case E_V16SFmode:
3078 case E_V8DFmode:
3079 case E_V8SFmode:
3080 case E_V8SImode:
3081 case E_V32QImode:
3082 case E_V16HImode:
3083 case E_V4DFmode:
3084 case E_V4DImode:
3085 if (!type || !AGGREGATE_TYPE_P (type))
3086 {
3087 if (cum->sse_nregs)
3088 return gen_reg_or_parallel (mode, orig_mode,
3089 cum->sse_regno + FIRST_SSE_REG);
3090 }
3091 break;
3092
3093 case E_V8QImode:
3094 case E_V4HImode:
3095 case E_V2SImode:
3096 case E_V2SFmode:
3097 case E_V1TImode:
3098 case E_V1DImode:
3099 if (!type || !AGGREGATE_TYPE_P (type))
3100 {
3101 if (cum->mmx_nregs)
3102 return gen_reg_or_parallel (mode, orig_mode,
3103 cum->mmx_regno + FIRST_MMX_REG);
3104 }
3105 break;
3106 }
3107 if (error_p)
3108 {
3109 cum->float_in_sse = 0;
3110 error ("calling %qD with SSE calling convention without "
3111 "SSE/SSE2 enabled", cum->decl);
3112 sorry ("this is a GCC bug that can be worked around by adding "
3113 "attribute used to function called");
3114 }
3115
3116 return NULL_RTX;
3117 }
3118
3119 static rtx
3120 function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3121 machine_mode orig_mode, const_tree type, bool named)
3122 {
3123 /* Handle a hidden AL argument containing number of registers
3124 for varargs x86-64 functions. */
3125 if (mode == VOIDmode)
3126 return GEN_INT (cum->maybe_vaarg
3127 ? (cum->sse_nregs < 0
3128 ? X86_64_SSE_REGPARM_MAX
3129 : cum->sse_regno)
3130 : -1);
3131
3132 switch (mode)
3133 {
3134 default:
3135 break;
3136
3137 case E_V8SFmode:
3138 case E_V8SImode:
3139 case E_V32QImode:
3140 case E_V16HImode:
3141 case E_V4DFmode:
3142 case E_V4DImode:
3143 case E_V16SFmode:
3144 case E_V16SImode:
3145 case E_V64QImode:
3146 case E_V32HImode:
3147 case E_V8DFmode:
3148 case E_V8DImode:
3149 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
3150 if (!named)
3151 return NULL;
3152 break;
3153 }
3154
3155 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3156 cum->sse_nregs,
3157 &x86_64_int_parameter_registers [cum->regno],
3158 cum->sse_regno);
3159 }
3160
3161 static rtx
3162 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
3163 machine_mode orig_mode, bool named, const_tree type,
3164 HOST_WIDE_INT bytes)
3165 {
3166 unsigned int regno;
3167
3168 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
3169 We use value of -2 to specify that current function call is MSABI. */
3170 if (mode == VOIDmode)
3171 return GEN_INT (-2);
3172
3173 /* If we've run out of registers, it goes on the stack. */
3174 if (cum->nregs == 0)
3175 return NULL_RTX;
3176
3177 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3178
3179 /* Only floating point modes are passed in anything but integer regs. */
3180 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3181 {
3182 if (named)
3183 {
3184 if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
3185 regno = cum->regno + FIRST_SSE_REG;
3186 }
3187 else
3188 {
3189 rtx t1, t2;
3190
3191 /* Unnamed floating parameters are passed in both the
3192 SSE and integer registers. */
3193 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3194 t2 = gen_rtx_REG (mode, regno);
3195 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3196 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3197 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3198 }
3199 }
3200 /* Handle aggregated types passed in register. */
3201 if (orig_mode == BLKmode)
3202 {
3203 if (bytes > 0 && bytes <= 8)
3204 mode = (bytes > 4 ? DImode : SImode);
3205 if (mode == BLKmode)
3206 mode = DImode;
3207 }
3208
3209 return gen_reg_or_parallel (mode, orig_mode, regno);
3210 }
3211
3212 /* Return where to put the arguments to a function.
3213 Return zero to push the argument on the stack, or a hard register in which to store the argument.
3214
3215 ARG describes the argument while CUM gives information about the
3216 preceding args and about the function being called. */
3217
3218 static rtx
3219 ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
3220 {
3221 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3222 machine_mode mode = arg.mode;
3223 HOST_WIDE_INT bytes, words;
3224 rtx reg;
3225
3226 if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
3227 {
3228 gcc_assert (arg.type != NULL_TREE);
3229 if (POINTER_TYPE_P (arg.type))
3230 {
3231 /* This is the pointer argument. */
3232 gcc_assert (TYPE_MODE (arg.type) == Pmode);
3233 /* It is at -WORD(AP) in the current frame in interrupt and
3234 exception handlers. */
3235 reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
3236 }
3237 else
3238 {
3239 gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
3240 && TREE_CODE (arg.type) == INTEGER_TYPE
3241 && TYPE_MODE (arg.type) == word_mode);
3242 /* The error code is the word-mode integer argument at
3243 -2 * WORD(AP) in the current frame of the exception
3244 handler. */
3245 reg = gen_rtx_MEM (word_mode,
3246 plus_constant (Pmode,
3247 arg_pointer_rtx,
3248 -2 * UNITS_PER_WORD));
3249 }
3250 return reg;
3251 }
3252
3253 bytes = arg.promoted_size_in_bytes ();
3254 words = CEIL (bytes, UNITS_PER_WORD);
3255
3256 /* To simplify the code below, represent vector types with a vector mode
3257 even if MMX/SSE are not active. */
3258 if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE)
3259 mode = type_natural_mode (arg.type, cum, false);
3260
3261 if (TARGET_64BIT)
3262 {
3263 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3264
3265 if (call_abi == MS_ABI)
3266 reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
3267 arg.type, bytes);
3268 else
3269 reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
3270 }
3271 else
3272 reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
3273
3274 /* Track if there are outgoing arguments on stack. */
3275 if (reg == NULL_RTX && cum->caller)
3276 cfun->machine->outgoing_args_on_stack = true;
3277
3278 return reg;
3279 }
3280
3281 /* A C expression that indicates when an argument must be passed by
3282 reference. If nonzero for an argument, a copy of that argument is
3283 made in memory and a pointer to the argument is passed instead of
3284 the argument itself. The pointer is passed in whatever way is
3285 appropriate for passing a pointer to that type. */
3286
3287 static bool
3288 ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
3289 {
3290 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3291
3292 if (TARGET_64BIT)
3293 {
3294 enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
3295
3296 /* See Windows x64 Software Convention. */
3297 if (call_abi == MS_ABI)
3298 {
3299 HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
3300
3301 if (tree type = arg.type)
3302 {
3303 /* Arrays are passed by reference. */
3304 if (TREE_CODE (type) == ARRAY_TYPE)
3305 return true;
3306
3307 if (RECORD_OR_UNION_TYPE_P (type))
3308 {
3309 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
3310 are passed by reference. */
3311 msize = int_size_in_bytes (type);
3312 }
3313 }
3314
3315 /* __m128 is passed by reference. */
3316 return msize != 1 && msize != 2 && msize != 4 && msize != 8;
3317 }
3318 else if (arg.type && int_size_in_bytes (arg.type) == -1)
3319 return true;
3320 }
3321
3322 return false;
3323 }
3324
3325 /* Return true when TYPE should be 128bit aligned for 32bit argument
3326 passing ABI. XXX: This function is obsolete and is only used for
3327 checking psABI compatibility with previous versions of GCC. */
3328
3329 static bool
3330 ix86_compat_aligned_value_p (const_tree type)
3331 {
3332 machine_mode mode = TYPE_MODE (type);
3333 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
3334 || mode == TDmode
3335 || mode == TFmode
3336 || mode == TCmode)
3337 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3338 return true;
3339 if (TYPE_ALIGN (type) < 128)
3340 return false;
3341
3342 if (AGGREGATE_TYPE_P (type))
3343 {
3344 /* Walk the aggregates recursively. */
3345 switch (TREE_CODE (type))
3346 {
3347 case RECORD_TYPE:
3348 case UNION_TYPE:
3349 case QUAL_UNION_TYPE:
3350 {
3351 tree field;
3352
3353 /* Walk all the structure fields. */
3354 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3355 {
3356 if (TREE_CODE (field) == FIELD_DECL
3357 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
3358 return true;
3359 }
3360 break;
3361 }
3362
3363 case ARRAY_TYPE:
3364 /* Just for use if some languages passes arrays by value. */
3365 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
3366 return true;
3367 break;
3368
3369 default:
3370 gcc_unreachable ();
3371 }
3372 }
3373 return false;
3374 }
3375
3376 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
3377 XXX: This function is obsolete and is only used for checking psABI
3378 compatibility with previous versions of GCC. */
3379
3380 static unsigned int
3381 ix86_compat_function_arg_boundary (machine_mode mode,
3382 const_tree type, unsigned int align)
3383 {
3384 /* In 32bit, only _Decimal128 and __float128 are aligned to their
3385 natural boundaries. */
3386 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
3387 {
3388 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3389 make an exception for SSE modes since these require 128bit
3390 alignment.
3391
3392 The handling here differs from field_alignment. ICC aligns MMX
3393 arguments to 4 byte boundaries, while structure fields are aligned
3394 to 8 byte boundaries. */
3395 if (!type)
3396 {
3397 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
3398 align = PARM_BOUNDARY;
3399 }
3400 else
3401 {
3402 if (!ix86_compat_aligned_value_p (type))
3403 align = PARM_BOUNDARY;
3404 }
3405 }
3406 if (align > BIGGEST_ALIGNMENT)
3407 align = BIGGEST_ALIGNMENT;
3408 return align;
3409 }
3410
3411 /* Return true when TYPE should be 128bit aligned for 32bit argument
3412 passing ABI. */
3413
3414 static bool
3415 ix86_contains_aligned_value_p (const_tree type)
3416 {
3417 machine_mode mode = TYPE_MODE (type);
3418
3419 if (mode == XFmode || mode == XCmode)
3420 return false;
3421
3422 if (TYPE_ALIGN (type) < 128)
3423 return false;
3424
3425 if (AGGREGATE_TYPE_P (type))
3426 {
3427 /* Walk the aggregates recursively. */
3428 switch (TREE_CODE (type))
3429 {
3430 case RECORD_TYPE:
3431 case UNION_TYPE:
3432 case QUAL_UNION_TYPE:
3433 {
3434 tree field;
3435
3436 /* Walk all the structure fields. */
3437 for (field = TYPE_FIELDS (type);
3438 field;
3439 field = DECL_CHAIN (field))
3440 {
3441 if (TREE_CODE (field) == FIELD_DECL
3442 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
3443 return true;
3444 }
3445 break;
3446 }
3447
3448 case ARRAY_TYPE:
3449 /* Just for use if some languages passes arrays by value. */
3450 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
3451 return true;
3452 break;
3453
3454 default:
3455 gcc_unreachable ();
3456 }
3457 }
3458 else
3459 return TYPE_ALIGN (type) >= 128;
3460
3461 return false;
3462 }
3463
3464 /* Gives the alignment boundary, in bits, of an argument with the
3465 specified mode and type. */
3466
3467 static unsigned int
3468 ix86_function_arg_boundary (machine_mode mode, const_tree type)
3469 {
3470 unsigned int align;
3471 if (type)
3472 {
3473 /* Since the main variant type is used for call, we convert it to
3474 the main variant type. */
3475 type = TYPE_MAIN_VARIANT (type);
3476 align = TYPE_ALIGN (type);
3477 if (TYPE_EMPTY_P (type))
3478 return PARM_BOUNDARY;
3479 }
3480 else
3481 align = GET_MODE_ALIGNMENT (mode);
3482 if (align < PARM_BOUNDARY)
3483 align = PARM_BOUNDARY;
3484 else
3485 {
3486 static bool warned;
3487 unsigned int saved_align = align;
3488
3489 if (!TARGET_64BIT)
3490 {
3491 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
3492 if (!type)
3493 {
3494 if (mode == XFmode || mode == XCmode)
3495 align = PARM_BOUNDARY;
3496 }
3497 else if (!ix86_contains_aligned_value_p (type))
3498 align = PARM_BOUNDARY;
3499
3500 if (align < 128)
3501 align = PARM_BOUNDARY;
3502 }
3503
3504 if (warn_psabi
3505 && !warned
3506 && align != ix86_compat_function_arg_boundary (mode, type,
3507 saved_align))
3508 {
3509 warned = true;
3510 inform (input_location,
3511 "the ABI for passing parameters with %d-byte"
3512 " alignment has changed in GCC 4.6",
3513 align / BITS_PER_UNIT);
3514 }
3515 }
3516
3517 return align;
3518 }
3519
3520 /* Return true if N is a possible register number of function value. */
3521
3522 static bool
3523 ix86_function_value_regno_p (const unsigned int regno)
3524 {
3525 switch (regno)
3526 {
3527 case AX_REG:
3528 return true;
3529 case DX_REG:
3530 return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
3531 case DI_REG:
3532 case SI_REG:
3533 return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
3534
3535 /* Complex values are returned in %st(0)/%st(1) pair. */
3536 case ST0_REG:
3537 case ST1_REG:
3538 /* TODO: The function should depend on current function ABI but
3539 builtins.c would need updating then. Therefore we use the
3540 default ABI. */
3541 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3542 return false;
3543 return TARGET_FLOAT_RETURNS_IN_80387;
3544
3545 /* Complex values are returned in %xmm0/%xmm1 pair. */
3546 case XMM0_REG:
3547 case XMM1_REG:
3548 return TARGET_SSE;
3549
3550 case MM0_REG:
3551 if (TARGET_MACHO || TARGET_64BIT)
3552 return false;
3553 return TARGET_MMX;
3554 }
3555
3556 return false;
3557 }
3558
3559 /* Check whether the register REGNO should be zeroed on X86.
3560 When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
3561 together, no need to zero it again.
3562 When NEED_ZERO_MMX is true, MMX registers should be cleared. */
3563
3564 static bool
3565 zero_call_used_regno_p (const unsigned int regno,
3566 bool all_sse_zeroed,
3567 bool need_zero_mmx)
3568 {
3569 return GENERAL_REGNO_P (regno)
3570 || (!all_sse_zeroed && SSE_REGNO_P (regno))
3571 || MASK_REGNO_P (regno)
3572 || (need_zero_mmx && MMX_REGNO_P (regno));
3573 }
3574
3575 /* Return the machine_mode that is used to zero register REGNO. */
3576
3577 static machine_mode
3578 zero_call_used_regno_mode (const unsigned int regno)
3579 {
3580 /* NB: We only need to zero the lower 32 bits for integer registers
3581 and the lower 128 bits for vector registers since destination are
3582 zero-extended to the full register width. */
3583 if (GENERAL_REGNO_P (regno))
3584 return SImode;
3585 else if (SSE_REGNO_P (regno))
3586 return V4SFmode;
3587 else if (MASK_REGNO_P (regno))
3588 return HImode;
3589 else if (MMX_REGNO_P (regno))
3590 return V4HImode;
3591 else
3592 gcc_unreachable ();
3593 }
3594
3595 /* Generate a rtx to zero all vector registers together if possible,
3596 otherwise, return NULL. */
3597
3598 static rtx
3599 zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
3600 {
3601 if (!TARGET_AVX)
3602 return NULL;
3603
3604 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3605 if ((IN_RANGE (regno, FIRST_SSE_REG, LAST_SSE_REG)
3606 || (TARGET_64BIT
3607 && (REX_SSE_REGNO_P (regno)
3608 || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
3609 && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3610 return NULL;
3611
3612 return gen_avx_vzeroall ();
3613 }
3614
3615 /* Generate insns to zero all st registers together.
3616 Return true when zeroing instructions are generated.
3617 Assume the number of st registers that are zeroed is num_of_st,
3618 we will emit the following sequence to zero them together:
3619 fldz; \
3620 fldz; \
3621 ...
3622 fldz; \
3623 fstp %%st(0); \
3624 fstp %%st(0); \
3625 ...
3626 fstp %%st(0);
3627 i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
3628 mark stack slots empty.
3629
3630 How to compute the num_of_st:
3631 There is no direct mapping from stack registers to hard register
3632 numbers. If one stack register needs to be cleared, we don't know
3633 where in the stack the value remains. So, if any stack register
3634 needs to be cleared, the whole stack should be cleared. However,
3635 x87 stack registers that hold the return value should be excluded.
3636 x87 returns in the top (two for complex values) register, so
3637 num_of_st should be 7/6 when x87 returns, otherwise it will be 8. */
3638
3639
3640 static bool
3641 zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
3642 {
3643
3644 /* If the FPU is disabled, no need to zero all st registers. */
3645 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3646 return false;
3647
3648 unsigned int num_of_st = 0;
3649 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3650 if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
3651 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3652 {
3653 num_of_st++;
3654 break;
3655 }
3656
3657 if (num_of_st == 0)
3658 return false;
3659
3660 bool return_with_x87 = false;
3661 return_with_x87 = (crtl->return_rtx
3662 && (STACK_REG_P (crtl->return_rtx)));
3663
3664 bool complex_return = false;
3665 complex_return = (crtl->return_rtx
3666 && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
3667
3668 if (return_with_x87)
3669 if (complex_return)
3670 num_of_st = 6;
3671 else
3672 num_of_st = 7;
3673 else
3674 num_of_st = 8;
3675
3676 rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
3677 for (unsigned int i = 0; i < num_of_st; i++)
3678 emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
3679
3680 for (unsigned int i = 0; i < num_of_st; i++)
3681 {
3682 rtx insn;
3683 insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
3684 add_reg_note (insn, REG_DEAD, st_reg);
3685 }
3686 return true;
3687 }
3688
3689
3690 /* When the routine exit in MMX mode, if any ST register needs
3691 to be zeroed, we should clear all MMX registers except the
3692 RET_MMX_REGNO that holds the return value. */
3693 static bool
3694 zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
3695 unsigned int ret_mmx_regno)
3696 {
3697 bool need_zero_all_mm = false;
3698 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3699 if (STACK_REGNO_P (regno)
3700 && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3701 {
3702 need_zero_all_mm = true;
3703 break;
3704 }
3705
3706 if (!need_zero_all_mm)
3707 return false;
3708
3709 rtx zero_mmx = NULL_RTX;
3710 machine_mode mode = V4HImode;
3711 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3712 if (regno != ret_mmx_regno)
3713 {
3714 rtx reg = gen_rtx_REG (mode, regno);
3715 if (zero_mmx == NULL_RTX)
3716 {
3717 zero_mmx = reg;
3718 emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
3719 }
3720 else
3721 emit_move_insn (reg, zero_mmx);
3722 }
3723 return true;
3724 }
3725
3726 /* TARGET_ZERO_CALL_USED_REGS. */
3727 /* Generate a sequence of instructions that zero registers specified by
3728 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
3729 zeroed. */
3730 static HARD_REG_SET
3731 ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
3732 {
3733 HARD_REG_SET zeroed_hardregs;
3734 bool all_sse_zeroed = false;
3735 bool all_st_zeroed = false;
3736 bool all_mm_zeroed = false;
3737
3738 CLEAR_HARD_REG_SET (zeroed_hardregs);
3739
3740 /* first, let's see whether we can zero all vector registers together. */
3741 rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
3742 if (zero_all_vec_insn)
3743 {
3744 emit_insn (zero_all_vec_insn);
3745 all_sse_zeroed = true;
3746 }
3747
3748 /* mm/st registers are shared registers set, we should follow the following
3749 rules to clear them:
3750 MMX exit mode x87 exit mode
3751 -------------|----------------------|---------------
3752 uses x87 reg | clear all MMX | clear all x87
3753 uses MMX reg | clear individual MMX | clear all x87
3754 x87 + MMX | clear all MMX | clear all x87
3755
3756 first, we should decide which mode (MMX mode or x87 mode) the function
3757 exit with. */
3758
3759 bool exit_with_mmx_mode = (crtl->return_rtx
3760 && (MMX_REG_P (crtl->return_rtx)));
3761
3762 if (!exit_with_mmx_mode)
3763 /* x87 exit mode, we should zero all st registers together. */
3764 {
3765 all_st_zeroed = zero_all_st_registers (need_zeroed_hardregs);
3766 if (all_st_zeroed)
3767 SET_HARD_REG_BIT (zeroed_hardregs, FIRST_STACK_REG);
3768 }
3769 else
3770 /* MMX exit mode, check whether we can zero all mm registers. */
3771 {
3772 unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
3773 all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
3774 exit_mmx_regno);
3775 if (all_mm_zeroed)
3776 for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
3777 if (regno != exit_mmx_regno)
3778 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3779 }
3780
3781 /* Now, generate instructions to zero all the other registers. */
3782
3783 rtx zero_gpr = NULL_RTX;
3784 rtx zero_vector = NULL_RTX;
3785 rtx zero_mask = NULL_RTX;
3786 rtx zero_mmx = NULL_RTX;
3787
3788 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
3789 {
3790 if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
3791 continue;
3792 if (!zero_call_used_regno_p (regno, all_sse_zeroed,
3793 exit_with_mmx_mode && !all_mm_zeroed))
3794 continue;
3795
3796 SET_HARD_REG_BIT (zeroed_hardregs, regno);
3797
3798 rtx reg, tmp, zero_rtx;
3799 machine_mode mode = zero_call_used_regno_mode (regno);
3800
3801 reg = gen_rtx_REG (mode, regno);
3802 zero_rtx = CONST0_RTX (mode);
3803
3804 if (mode == SImode)
3805 if (zero_gpr == NULL_RTX)
3806 {
3807 zero_gpr = reg;
3808 tmp = gen_rtx_SET (reg, zero_rtx);
3809 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
3810 {
3811 rtx clob = gen_rtx_CLOBBER (VOIDmode,
3812 gen_rtx_REG (CCmode,
3813 FLAGS_REG));
3814 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
3815 tmp,
3816 clob));
3817 }
3818 emit_insn (tmp);
3819 }
3820 else
3821 emit_move_insn (reg, zero_gpr);
3822 else if (mode == V4SFmode)
3823 if (zero_vector == NULL_RTX)
3824 {
3825 zero_vector = reg;
3826 tmp = gen_rtx_SET (reg, zero_rtx);
3827 emit_insn (tmp);
3828 }
3829 else
3830 emit_move_insn (reg, zero_vector);
3831 else if (mode == HImode)
3832 if (zero_mask == NULL_RTX)
3833 {
3834 zero_mask = reg;
3835 tmp = gen_rtx_SET (reg, zero_rtx);
3836 emit_insn (tmp);
3837 }
3838 else
3839 emit_move_insn (reg, zero_mask);
3840 else if (mode == V4HImode)
3841 if (zero_mmx == NULL_RTX)
3842 {
3843 zero_mmx = reg;
3844 tmp = gen_rtx_SET (reg, zero_rtx);
3845 emit_insn (tmp);
3846 }
3847 else
3848 emit_move_insn (reg, zero_mmx);
3849 else
3850 gcc_unreachable ();
3851 }
3852 return zeroed_hardregs;
3853 }
3854
3855 /* Define how to find the value returned by a function.
3856 VALTYPE is the data type of the value (as a tree).
3857 If the precise function being called is known, FUNC is its FUNCTION_DECL;
3858 otherwise, FUNC is 0. */
3859
3860 static rtx
3861 function_value_32 (machine_mode orig_mode, machine_mode mode,
3862 const_tree fntype, const_tree fn)
3863 {
3864 unsigned int regno;
3865
3866 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3867 we normally prevent this case when mmx is not available. However
3868 some ABIs may require the result to be returned like DImode. */
3869 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3870 regno = FIRST_MMX_REG;
3871
3872 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
3873 we prevent this case when sse is not available. However some ABIs
3874 may require the result to be returned like integer TImode. */
3875 else if (mode == TImode
3876 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3877 regno = FIRST_SSE_REG;
3878
3879 /* 32-byte vector modes in %ymm0. */
3880 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
3881 regno = FIRST_SSE_REG;
3882
3883 /* 64-byte vector modes in %zmm0. */
3884 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
3885 regno = FIRST_SSE_REG;
3886
3887 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
3888 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
3889 regno = FIRST_FLOAT_REG;
3890 else
3891 /* Most things go in %eax. */
3892 regno = AX_REG;
3893
3894 /* Override FP return register with %xmm0 for local functions when
3895 SSE math is enabled or for functions with sseregparm attribute. */
3896 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
3897 {
3898 int sse_level = ix86_function_sseregparm (fntype, fn, false);
3899 if (sse_level == -1)
3900 {
3901 error ("calling %qD with SSE calling convention without "
3902 "SSE/SSE2 enabled", fn);
3903 sorry ("this is a GCC bug that can be worked around by adding "
3904 "attribute used to function called");
3905 }
3906 else if ((sse_level >= 1 && mode == SFmode)
3907 || (sse_level == 2 && mode == DFmode))
3908 regno = FIRST_SSE_REG;
3909 }
3910
3911 /* OImode shouldn't be used directly. */
3912 gcc_assert (mode != OImode);
3913
3914 return gen_rtx_REG (orig_mode, regno);
3915 }
3916
3917 static rtx
3918 function_value_64 (machine_mode orig_mode, machine_mode mode,
3919 const_tree valtype)
3920 {
3921 rtx ret;
3922
3923 /* Handle libcalls, which don't provide a type node. */
3924 if (valtype == NULL)
3925 {
3926 unsigned int regno;
3927
3928 switch (mode)
3929 {
3930 case E_SFmode:
3931 case E_SCmode:
3932 case E_DFmode:
3933 case E_DCmode:
3934 case E_TFmode:
3935 case E_SDmode:
3936 case E_DDmode:
3937 case E_TDmode:
3938 regno = FIRST_SSE_REG;
3939 break;
3940 case E_XFmode:
3941 case E_XCmode:
3942 regno = FIRST_FLOAT_REG;
3943 break;
3944 case E_TCmode:
3945 return NULL;
3946 default:
3947 regno = AX_REG;
3948 }
3949
3950 return gen_rtx_REG (mode, regno);
3951 }
3952 else if (POINTER_TYPE_P (valtype))
3953 {
3954 /* Pointers are always returned in word_mode. */
3955 mode = word_mode;
3956 }
3957
3958 ret = construct_container (mode, orig_mode, valtype, 1,
3959 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
3960 x86_64_int_return_registers, 0);
3961
3962 /* For zero sized structures, construct_container returns NULL, but we
3963 need to keep rest of compiler happy by returning meaningful value. */
3964 if (!ret)
3965 ret = gen_rtx_REG (orig_mode, AX_REG);
3966
3967 return ret;
3968 }
3969
3970 static rtx
3971 function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
3972 const_tree fntype, const_tree fn, const_tree valtype)
3973 {
3974 unsigned int regno;
3975
3976 /* Floating point return values in %st(0)
3977 (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */
3978 if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
3979 && (GET_MODE_SIZE (mode) > 8
3980 || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
3981 {
3982 regno = FIRST_FLOAT_REG;
3983 return gen_rtx_REG (orig_mode, regno);
3984 }
3985 else
3986 return function_value_32(orig_mode, mode, fntype,fn);
3987 }
3988
3989 static rtx
3990 function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
3991 const_tree valtype)
3992 {
3993 unsigned int regno = AX_REG;
3994
3995 if (TARGET_SSE)
3996 {
3997 switch (GET_MODE_SIZE (mode))
3998 {
3999 case 16:
4000 if (valtype != NULL_TREE
4001 && !VECTOR_INTEGER_TYPE_P (valtype)
4002 && !VECTOR_INTEGER_TYPE_P (valtype)
4003 && !INTEGRAL_TYPE_P (valtype)
4004 && !VECTOR_FLOAT_TYPE_P (valtype))
4005 break;
4006 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4007 && !COMPLEX_MODE_P (mode))
4008 regno = FIRST_SSE_REG;
4009 break;
4010 case 8:
4011 case 4:
4012 if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
4013 break;
4014 if (mode == SFmode || mode == DFmode)
4015 regno = FIRST_SSE_REG;
4016 break;
4017 default:
4018 break;
4019 }
4020 }
4021 return gen_rtx_REG (orig_mode, regno);
4022 }
4023
4024 static rtx
4025 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4026 machine_mode orig_mode, machine_mode mode)
4027 {
4028 const_tree fn, fntype;
4029
4030 fn = NULL_TREE;
4031 if (fntype_or_decl && DECL_P (fntype_or_decl))
4032 fn = fntype_or_decl;
4033 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4034
4035 if (ix86_function_type_abi (fntype) == MS_ABI)
4036 {
4037 if (TARGET_64BIT)
4038 return function_value_ms_64 (orig_mode, mode, valtype);
4039 else
4040 return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
4041 }
4042 else if (TARGET_64BIT)
4043 return function_value_64 (orig_mode, mode, valtype);
4044 else
4045 return function_value_32 (orig_mode, mode, fntype, fn);
4046 }
4047
4048 static rtx
4049 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
4050 {
4051 machine_mode mode, orig_mode;
4052
4053 orig_mode = TYPE_MODE (valtype);
4054 mode = type_natural_mode (valtype, NULL, true);
4055 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4056 }
4057
4058 /* Pointer function arguments and return values are promoted to
4059 word_mode for normal functions. */
4060
4061 static machine_mode
4062 ix86_promote_function_mode (const_tree type, machine_mode mode,
4063 int *punsignedp, const_tree fntype,
4064 int for_return)
4065 {
4066 if (cfun->machine->func_type == TYPE_NORMAL
4067 && type != NULL_TREE
4068 && POINTER_TYPE_P (type))
4069 {
4070 *punsignedp = POINTERS_EXTEND_UNSIGNED;
4071 return word_mode;
4072 }
4073 return default_promote_function_mode (type, mode, punsignedp, fntype,
4074 for_return);
4075 }
4076
4077 /* Return true if a structure, union or array with MODE containing FIELD
4078 should be accessed using BLKmode. */
4079
4080 static bool
4081 ix86_member_type_forces_blk (const_tree field, machine_mode mode)
4082 {
4083 /* Union with XFmode must be in BLKmode. */
4084 return (mode == XFmode
4085 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
4086 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
4087 }
4088
4089 rtx
4090 ix86_libcall_value (machine_mode mode)
4091 {
4092 return ix86_function_value_1 (NULL, NULL, mode, mode);
4093 }
4094
4095 /* Return true iff type is returned in memory. */
4096
4097 static bool
4098 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4099 {
4100 const machine_mode mode = type_natural_mode (type, NULL, true);
4101 HOST_WIDE_INT size;
4102
4103 if (TARGET_64BIT)
4104 {
4105 if (ix86_function_type_abi (fntype) == MS_ABI)
4106 {
4107 size = int_size_in_bytes (type);
4108
4109 /* __m128 is returned in xmm0. */
4110 if ((!type || VECTOR_INTEGER_TYPE_P (type)
4111 || INTEGRAL_TYPE_P (type)
4112 || VECTOR_FLOAT_TYPE_P (type))
4113 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4114 && !COMPLEX_MODE_P (mode)
4115 && (GET_MODE_SIZE (mode) == 16 || size == 16))
4116 return false;
4117
4118 /* Otherwise, the size must be exactly in [1248]. */
4119 return size != 1 && size != 2 && size != 4 && size != 8;
4120 }
4121 else
4122 {
4123 int needed_intregs, needed_sseregs;
4124
4125 return examine_argument (mode, type, 1,
4126 &needed_intregs, &needed_sseregs);
4127 }
4128 }
4129 else
4130 {
4131 size = int_size_in_bytes (type);
4132
4133 /* Intel MCU psABI returns scalars and aggregates no larger than 8
4134 bytes in registers. */
4135 if (TARGET_IAMCU)
4136 return VECTOR_MODE_P (mode) || size < 0 || size > 8;
4137
4138 if (mode == BLKmode)
4139 return true;
4140
4141 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4142 return false;
4143
4144 if (VECTOR_MODE_P (mode) || mode == TImode)
4145 {
4146 /* User-created vectors small enough to fit in EAX. */
4147 if (size < 8)
4148 return false;
4149
4150 /* Unless ABI prescibes otherwise,
4151 MMX/3dNow values are returned in MM0 if available. */
4152
4153 if (size == 8)
4154 return TARGET_VECT8_RETURNS || !TARGET_MMX;
4155
4156 /* SSE values are returned in XMM0 if available. */
4157 if (size == 16)
4158 return !TARGET_SSE;
4159
4160 /* AVX values are returned in YMM0 if available. */
4161 if (size == 32)
4162 return !TARGET_AVX;
4163
4164 /* AVX512F values are returned in ZMM0 if available. */
4165 if (size == 64)
4166 return !TARGET_AVX512F;
4167 }
4168
4169 if (mode == XFmode)
4170 return false;
4171
4172 if (size > 12)
4173 return true;
4174
4175 /* OImode shouldn't be used directly. */
4176 gcc_assert (mode != OImode);
4177
4178 return false;
4179 }
4180 }
4181
4182 \f
4183 /* Create the va_list data type. */
4184
4185 static tree
4186 ix86_build_builtin_va_list_64 (void)
4187 {
4188 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4189
4190 record = lang_hooks.types.make_type (RECORD_TYPE);
4191 type_decl = build_decl (BUILTINS_LOCATION,
4192 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4193
4194 f_gpr = build_decl (BUILTINS_LOCATION,
4195 FIELD_DECL, get_identifier ("gp_offset"),
4196 unsigned_type_node);
4197 f_fpr = build_decl (BUILTINS_LOCATION,
4198 FIELD_DECL, get_identifier ("fp_offset"),
4199 unsigned_type_node);
4200 f_ovf = build_decl (BUILTINS_LOCATION,
4201 FIELD_DECL, get_identifier ("overflow_arg_area"),
4202 ptr_type_node);
4203 f_sav = build_decl (BUILTINS_LOCATION,
4204 FIELD_DECL, get_identifier ("reg_save_area"),
4205 ptr_type_node);
4206
4207 va_list_gpr_counter_field = f_gpr;
4208 va_list_fpr_counter_field = f_fpr;
4209
4210 DECL_FIELD_CONTEXT (f_gpr) = record;
4211 DECL_FIELD_CONTEXT (f_fpr) = record;
4212 DECL_FIELD_CONTEXT (f_ovf) = record;
4213 DECL_FIELD_CONTEXT (f_sav) = record;
4214
4215 TYPE_STUB_DECL (record) = type_decl;
4216 TYPE_NAME (record) = type_decl;
4217 TYPE_FIELDS (record) = f_gpr;
4218 DECL_CHAIN (f_gpr) = f_fpr;
4219 DECL_CHAIN (f_fpr) = f_ovf;
4220 DECL_CHAIN (f_ovf) = f_sav;
4221
4222 layout_type (record);
4223
4224 TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
4225 NULL_TREE, TYPE_ATTRIBUTES (record));
4226
4227 /* The correct type is an array type of one element. */
4228 return build_array_type (record, build_index_type (size_zero_node));
4229 }
4230
4231 /* Setup the builtin va_list data type and for 64-bit the additional
4232 calling convention specific va_list data types. */
4233
4234 static tree
4235 ix86_build_builtin_va_list (void)
4236 {
4237 if (TARGET_64BIT)
4238 {
4239 /* Initialize ABI specific va_list builtin types.
4240
4241 In lto1, we can encounter two va_list types:
4242 - one as a result of the type-merge across TUs, and
4243 - the one constructed here.
4244 These two types will not have the same TYPE_MAIN_VARIANT, and therefore
4245 a type identity check in canonical_va_list_type based on
4246 TYPE_MAIN_VARIANT (which we used to have) will not work.
4247 Instead, we tag each va_list_type_node with its unique attribute, and
4248 look for the attribute in the type identity check in
4249 canonical_va_list_type.
4250
4251 Tagging sysv_va_list_type_node directly with the attribute is
4252 problematic since it's a array of one record, which will degrade into a
4253 pointer to record when used as parameter (see build_va_arg comments for
4254 an example), dropping the attribute in the process. So we tag the
4255 record instead. */
4256
4257 /* For SYSV_ABI we use an array of one record. */
4258 sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
4259
4260 /* For MS_ABI we use plain pointer to argument area. */
4261 tree char_ptr_type = build_pointer_type (char_type_node);
4262 tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
4263 TYPE_ATTRIBUTES (char_ptr_type));
4264 ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
4265
4266 return ((ix86_abi == MS_ABI)
4267 ? ms_va_list_type_node
4268 : sysv_va_list_type_node);
4269 }
4270 else
4271 {
4272 /* For i386 we use plain pointer to argument area. */
4273 return build_pointer_type (char_type_node);
4274 }
4275 }
4276
4277 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4278
4279 static void
4280 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4281 {
4282 rtx save_area, mem;
4283 alias_set_type set;
4284 int i, max;
4285
4286 /* GPR size of varargs save area. */
4287 if (cfun->va_list_gpr_size)
4288 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
4289 else
4290 ix86_varargs_gpr_size = 0;
4291
4292 /* FPR size of varargs save area. We don't need it if we don't pass
4293 anything in SSE registers. */
4294 if (TARGET_SSE && cfun->va_list_fpr_size)
4295 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
4296 else
4297 ix86_varargs_fpr_size = 0;
4298
4299 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
4300 return;
4301
4302 save_area = frame_pointer_rtx;
4303 set = get_varargs_alias_set ();
4304
4305 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4306 if (max > X86_64_REGPARM_MAX)
4307 max = X86_64_REGPARM_MAX;
4308
4309 for (i = cum->regno; i < max; i++)
4310 {
4311 mem = gen_rtx_MEM (word_mode,
4312 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
4313 MEM_NOTRAP_P (mem) = 1;
4314 set_mem_alias_set (mem, set);
4315 emit_move_insn (mem,
4316 gen_rtx_REG (word_mode,
4317 x86_64_int_parameter_registers[i]));
4318 }
4319
4320 if (ix86_varargs_fpr_size)
4321 {
4322 machine_mode smode;
4323 rtx_code_label *label;
4324 rtx test;
4325
4326 /* Now emit code to save SSE registers. The AX parameter contains number
4327 of SSE parameter registers used to call this function, though all we
4328 actually check here is the zero/non-zero status. */
4329
4330 label = gen_label_rtx ();
4331 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
4332 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
4333 label));
4334
4335 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
4336 we used movdqa (i.e. TImode) instead? Perhaps even better would
4337 be if we could determine the real mode of the data, via a hook
4338 into pass_stdarg. Ignore all that for now. */
4339 smode = V4SFmode;
4340 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
4341 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
4342
4343 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
4344 if (max > X86_64_SSE_REGPARM_MAX)
4345 max = X86_64_SSE_REGPARM_MAX;
4346
4347 for (i = cum->sse_regno; i < max; ++i)
4348 {
4349 mem = plus_constant (Pmode, save_area,
4350 i * 16 + ix86_varargs_gpr_size);
4351 mem = gen_rtx_MEM (smode, mem);
4352 MEM_NOTRAP_P (mem) = 1;
4353 set_mem_alias_set (mem, set);
4354 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
4355
4356 emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
4357 }
4358
4359 emit_label (label);
4360 }
4361 }
4362
4363 static void
4364 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4365 {
4366 alias_set_type set = get_varargs_alias_set ();
4367 int i;
4368
4369 /* Reset to zero, as there might be a sysv vaarg used
4370 before. */
4371 ix86_varargs_gpr_size = 0;
4372 ix86_varargs_fpr_size = 0;
4373
4374 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
4375 {
4376 rtx reg, mem;
4377
4378 mem = gen_rtx_MEM (Pmode,
4379 plus_constant (Pmode, virtual_incoming_args_rtx,
4380 i * UNITS_PER_WORD));
4381 MEM_NOTRAP_P (mem) = 1;
4382 set_mem_alias_set (mem, set);
4383
4384 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4385 emit_move_insn (mem, reg);
4386 }
4387 }
4388
4389 static void
4390 ix86_setup_incoming_varargs (cumulative_args_t cum_v,
4391 const function_arg_info &arg,
4392 int *, int no_rtl)
4393 {
4394 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
4395 CUMULATIVE_ARGS next_cum;
4396 tree fntype;
4397
4398 /* This argument doesn't appear to be used anymore. Which is good,
4399 because the old code here didn't suppress rtl generation. */
4400 gcc_assert (!no_rtl);
4401
4402 if (!TARGET_64BIT)
4403 return;
4404
4405 fntype = TREE_TYPE (current_function_decl);
4406
4407 /* For varargs, we do not want to skip the dummy va_dcl argument.
4408 For stdargs, we do want to skip the last named argument. */
4409 next_cum = *cum;
4410 if (stdarg_p (fntype))
4411 ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
4412
4413 if (cum->call_abi == MS_ABI)
4414 setup_incoming_varargs_ms_64 (&next_cum);
4415 else
4416 setup_incoming_varargs_64 (&next_cum);
4417 }
4418
4419 /* Checks if TYPE is of kind va_list char *. */
4420
4421 static bool
4422 is_va_list_char_pointer (tree type)
4423 {
4424 tree canonic;
4425
4426 /* For 32-bit it is always true. */
4427 if (!TARGET_64BIT)
4428 return true;
4429 canonic = ix86_canonical_va_list_type (type);
4430 return (canonic == ms_va_list_type_node
4431 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
4432 }
4433
4434 /* Implement va_start. */
4435
4436 static void
4437 ix86_va_start (tree valist, rtx nextarg)
4438 {
4439 HOST_WIDE_INT words, n_gpr, n_fpr;
4440 tree f_gpr, f_fpr, f_ovf, f_sav;
4441 tree gpr, fpr, ovf, sav, t;
4442 tree type;
4443 rtx ovf_rtx;
4444
4445 if (flag_split_stack
4446 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4447 {
4448 unsigned int scratch_regno;
4449
4450 /* When we are splitting the stack, we can't refer to the stack
4451 arguments using internal_arg_pointer, because they may be on
4452 the old stack. The split stack prologue will arrange to
4453 leave a pointer to the old stack arguments in a scratch
4454 register, which we here copy to a pseudo-register. The split
4455 stack prologue can't set the pseudo-register directly because
4456 it (the prologue) runs before any registers have been saved. */
4457
4458 scratch_regno = split_stack_prologue_scratch_regno ();
4459 if (scratch_regno != INVALID_REGNUM)
4460 {
4461 rtx reg;
4462 rtx_insn *seq;
4463
4464 reg = gen_reg_rtx (Pmode);
4465 cfun->machine->split_stack_varargs_pointer = reg;
4466
4467 start_sequence ();
4468 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
4469 seq = get_insns ();
4470 end_sequence ();
4471
4472 push_topmost_sequence ();
4473 emit_insn_after (seq, entry_of_function ());
4474 pop_topmost_sequence ();
4475 }
4476 }
4477
4478 /* Only 64bit target needs something special. */
4479 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4480 {
4481 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4482 std_expand_builtin_va_start (valist, nextarg);
4483 else
4484 {
4485 rtx va_r, next;
4486
4487 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
4488 next = expand_binop (ptr_mode, add_optab,
4489 cfun->machine->split_stack_varargs_pointer,
4490 crtl->args.arg_offset_rtx,
4491 NULL_RTX, 0, OPTAB_LIB_WIDEN);
4492 convert_move (va_r, next, 0);
4493 }
4494 return;
4495 }
4496
4497 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4498 f_fpr = DECL_CHAIN (f_gpr);
4499 f_ovf = DECL_CHAIN (f_fpr);
4500 f_sav = DECL_CHAIN (f_ovf);
4501
4502 valist = build_simple_mem_ref (valist);
4503 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
4504 /* The following should be folded into the MEM_REF offset. */
4505 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
4506 f_gpr, NULL_TREE);
4507 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
4508 f_fpr, NULL_TREE);
4509 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
4510 f_ovf, NULL_TREE);
4511 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
4512 f_sav, NULL_TREE);
4513
4514 /* Count number of gp and fp argument registers used. */
4515 words = crtl->args.info.words;
4516 n_gpr = crtl->args.info.regno;
4517 n_fpr = crtl->args.info.sse_regno;
4518
4519 if (cfun->va_list_gpr_size)
4520 {
4521 type = TREE_TYPE (gpr);
4522 t = build2 (MODIFY_EXPR, type,
4523 gpr, build_int_cst (type, n_gpr * 8));
4524 TREE_SIDE_EFFECTS (t) = 1;
4525 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4526 }
4527
4528 if (TARGET_SSE && cfun->va_list_fpr_size)
4529 {
4530 type = TREE_TYPE (fpr);
4531 t = build2 (MODIFY_EXPR, type, fpr,
4532 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
4533 TREE_SIDE_EFFECTS (t) = 1;
4534 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4535 }
4536
4537 /* Find the overflow area. */
4538 type = TREE_TYPE (ovf);
4539 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
4540 ovf_rtx = crtl->args.internal_arg_pointer;
4541 else
4542 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
4543 t = make_tree (type, ovf_rtx);
4544 if (words != 0)
4545 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
4546
4547 t = build2 (MODIFY_EXPR, type, ovf, t);
4548 TREE_SIDE_EFFECTS (t) = 1;
4549 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4550
4551 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
4552 {
4553 /* Find the register save area.
4554 Prologue of the function save it right above stack frame. */
4555 type = TREE_TYPE (sav);
4556 t = make_tree (type, frame_pointer_rtx);
4557 if (!ix86_varargs_gpr_size)
4558 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
4559
4560 t = build2 (MODIFY_EXPR, type, sav, t);
4561 TREE_SIDE_EFFECTS (t) = 1;
4562 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4563 }
4564 }
4565
4566 /* Implement va_arg. */
4567
4568 static tree
4569 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
4570 gimple_seq *post_p)
4571 {
4572 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4573 tree f_gpr, f_fpr, f_ovf, f_sav;
4574 tree gpr, fpr, ovf, sav, t;
4575 int size, rsize;
4576 tree lab_false, lab_over = NULL_TREE;
4577 tree addr, t2;
4578 rtx container;
4579 int indirect_p = 0;
4580 tree ptrtype;
4581 machine_mode nat_mode;
4582 unsigned int arg_boundary;
4583 unsigned int type_align;
4584
4585 /* Only 64bit target needs something special. */
4586 if (is_va_list_char_pointer (TREE_TYPE (valist)))
4587 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4588
4589 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
4590 f_fpr = DECL_CHAIN (f_gpr);
4591 f_ovf = DECL_CHAIN (f_fpr);
4592 f_sav = DECL_CHAIN (f_ovf);
4593
4594 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
4595 valist, f_gpr, NULL_TREE);
4596
4597 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4598 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4599 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4600
4601 indirect_p = pass_va_arg_by_reference (type);
4602 if (indirect_p)
4603 type = build_pointer_type (type);
4604 size = arg_int_size_in_bytes (type);
4605 rsize = CEIL (size, UNITS_PER_WORD);
4606
4607 nat_mode = type_natural_mode (type, NULL, false);
4608 switch (nat_mode)
4609 {
4610 case E_V8SFmode:
4611 case E_V8SImode:
4612 case E_V32QImode:
4613 case E_V16HImode:
4614 case E_V4DFmode:
4615 case E_V4DImode:
4616 case E_V16SFmode:
4617 case E_V16SImode:
4618 case E_V64QImode:
4619 case E_V32HImode:
4620 case E_V8DFmode:
4621 case E_V8DImode:
4622 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
4623 if (!TARGET_64BIT_MS_ABI)
4624 {
4625 container = NULL;
4626 break;
4627 }
4628 /* FALLTHRU */
4629
4630 default:
4631 container = construct_container (nat_mode, TYPE_MODE (type),
4632 type, 0, X86_64_REGPARM_MAX,
4633 X86_64_SSE_REGPARM_MAX, intreg,
4634 0);
4635 break;
4636 }
4637
4638 /* Pull the value out of the saved registers. */
4639
4640 addr = create_tmp_var (ptr_type_node, "addr");
4641 type_align = TYPE_ALIGN (type);
4642
4643 if (container)
4644 {
4645 int needed_intregs, needed_sseregs;
4646 bool need_temp;
4647 tree int_addr, sse_addr;
4648
4649 lab_false = create_artificial_label (UNKNOWN_LOCATION);
4650 lab_over = create_artificial_label (UNKNOWN_LOCATION);
4651
4652 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4653
4654 need_temp = (!REG_P (container)
4655 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4656 || TYPE_ALIGN (type) > 128));
4657
4658 /* In case we are passing structure, verify that it is consecutive block
4659 on the register save area. If not we need to do moves. */
4660 if (!need_temp && !REG_P (container))
4661 {
4662 /* Verify that all registers are strictly consecutive */
4663 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4664 {
4665 int i;
4666
4667 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4668 {
4669 rtx slot = XVECEXP (container, 0, i);
4670 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4671 || INTVAL (XEXP (slot, 1)) != i * 16)
4672 need_temp = true;
4673 }
4674 }
4675 else
4676 {
4677 int i;
4678
4679 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4680 {
4681 rtx slot = XVECEXP (container, 0, i);
4682 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4683 || INTVAL (XEXP (slot, 1)) != i * 8)
4684 need_temp = true;
4685 }
4686 }
4687 }
4688 if (!need_temp)
4689 {
4690 int_addr = addr;
4691 sse_addr = addr;
4692 }
4693 else
4694 {
4695 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4696 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4697 }
4698
4699 /* First ensure that we fit completely in registers. */
4700 if (needed_intregs)
4701 {
4702 t = build_int_cst (TREE_TYPE (gpr),
4703 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
4704 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4705 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4706 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4707 gimplify_and_add (t, pre_p);
4708 }
4709 if (needed_sseregs)
4710 {
4711 t = build_int_cst (TREE_TYPE (fpr),
4712 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4713 + X86_64_REGPARM_MAX * 8);
4714 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4715 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4716 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4717 gimplify_and_add (t, pre_p);
4718 }
4719
4720 /* Compute index to start of area used for integer regs. */
4721 if (needed_intregs)
4722 {
4723 /* int_addr = gpr + sav; */
4724 t = fold_build_pointer_plus (sav, gpr);
4725 gimplify_assign (int_addr, t, pre_p);
4726 }
4727 if (needed_sseregs)
4728 {
4729 /* sse_addr = fpr + sav; */
4730 t = fold_build_pointer_plus (sav, fpr);
4731 gimplify_assign (sse_addr, t, pre_p);
4732 }
4733 if (need_temp)
4734 {
4735 int i, prev_size = 0;
4736 tree temp = create_tmp_var (type, "va_arg_tmp");
4737
4738 /* addr = &temp; */
4739 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4740 gimplify_assign (addr, t, pre_p);
4741
4742 for (i = 0; i < XVECLEN (container, 0); i++)
4743 {
4744 rtx slot = XVECEXP (container, 0, i);
4745 rtx reg = XEXP (slot, 0);
4746 machine_mode mode = GET_MODE (reg);
4747 tree piece_type;
4748 tree addr_type;
4749 tree daddr_type;
4750 tree src_addr, src;
4751 int src_offset;
4752 tree dest_addr, dest;
4753 int cur_size = GET_MODE_SIZE (mode);
4754
4755 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
4756 prev_size = INTVAL (XEXP (slot, 1));
4757 if (prev_size + cur_size > size)
4758 {
4759 cur_size = size - prev_size;
4760 unsigned int nbits = cur_size * BITS_PER_UNIT;
4761 if (!int_mode_for_size (nbits, 1).exists (&mode))
4762 mode = QImode;
4763 }
4764 piece_type = lang_hooks.types.type_for_mode (mode, 1);
4765 if (mode == GET_MODE (reg))
4766 addr_type = build_pointer_type (piece_type);
4767 else
4768 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4769 true);
4770 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
4771 true);
4772
4773 if (SSE_REGNO_P (REGNO (reg)))
4774 {
4775 src_addr = sse_addr;
4776 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4777 }
4778 else
4779 {
4780 src_addr = int_addr;
4781 src_offset = REGNO (reg) * 8;
4782 }
4783 src_addr = fold_convert (addr_type, src_addr);
4784 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
4785
4786 dest_addr = fold_convert (daddr_type, addr);
4787 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
4788 if (cur_size == GET_MODE_SIZE (mode))
4789 {
4790 src = build_va_arg_indirect_ref (src_addr);
4791 dest = build_va_arg_indirect_ref (dest_addr);
4792
4793 gimplify_assign (dest, src, pre_p);
4794 }
4795 else
4796 {
4797 tree copy
4798 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
4799 3, dest_addr, src_addr,
4800 size_int (cur_size));
4801 gimplify_and_add (copy, pre_p);
4802 }
4803 prev_size += cur_size;
4804 }
4805 }
4806
4807 if (needed_intregs)
4808 {
4809 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4810 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4811 gimplify_assign (gpr, t, pre_p);
4812 /* The GPR save area guarantees only 8-byte alignment. */
4813 if (!need_temp)
4814 type_align = MIN (type_align, 64);
4815 }
4816
4817 if (needed_sseregs)
4818 {
4819 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4820 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4821 gimplify_assign (unshare_expr (fpr), t, pre_p);
4822 }
4823
4824 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
4825
4826 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
4827 }
4828
4829 /* ... otherwise out of the overflow area. */
4830
4831 /* When we align parameter on stack for caller, if the parameter
4832 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
4833 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
4834 here with caller. */
4835 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
4836 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
4837 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
4838
4839 /* Care for on-stack alignment if needed. */
4840 if (arg_boundary <= 64 || size == 0)
4841 t = ovf;
4842 else
4843 {
4844 HOST_WIDE_INT align = arg_boundary / 8;
4845 t = fold_build_pointer_plus_hwi (ovf, align - 1);
4846 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4847 build_int_cst (TREE_TYPE (t), -align));
4848 }
4849
4850 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4851 gimplify_assign (addr, t, pre_p);
4852
4853 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
4854 gimplify_assign (unshare_expr (ovf), t, pre_p);
4855
4856 if (container)
4857 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
4858
4859 type = build_aligned_type (type, type_align);
4860 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
4861 addr = fold_convert (ptrtype, addr);
4862
4863 if (indirect_p)
4864 addr = build_va_arg_indirect_ref (addr);
4865 return build_va_arg_indirect_ref (addr);
4866 }
4867 \f
4868 /* Return true if OPNUM's MEM should be matched
4869 in movabs* patterns. */
4870
4871 bool
4872 ix86_check_movabs (rtx insn, int opnum)
4873 {
4874 rtx set, mem;
4875
4876 set = PATTERN (insn);
4877 if (GET_CODE (set) == PARALLEL)
4878 set = XVECEXP (set, 0, 0);
4879 gcc_assert (GET_CODE (set) == SET);
4880 mem = XEXP (set, opnum);
4881 while (SUBREG_P (mem))
4882 mem = SUBREG_REG (mem);
4883 gcc_assert (MEM_P (mem));
4884 return volatile_ok || !MEM_VOLATILE_P (mem);
4885 }
4886
4887 /* Return false if INSN contains a MEM with a non-default address space. */
4888 bool
4889 ix86_check_no_addr_space (rtx insn)
4890 {
4891 subrtx_var_iterator::array_type array;
4892 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
4893 {
4894 rtx x = *iter;
4895 if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
4896 return false;
4897 }
4898 return true;
4899 }
4900 \f
4901 /* Initialize the table of extra 80387 mathematical constants. */
4902
4903 static void
4904 init_ext_80387_constants (void)
4905 {
4906 static const char * cst[5] =
4907 {
4908 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4909 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4910 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4911 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4912 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4913 };
4914 int i;
4915
4916 for (i = 0; i < 5; i++)
4917 {
4918 real_from_string (&ext_80387_constants_table[i], cst[i]);
4919 /* Ensure each constant is rounded to XFmode precision. */
4920 real_convert (&ext_80387_constants_table[i],
4921 XFmode, &ext_80387_constants_table[i]);
4922 }
4923
4924 ext_80387_constants_init = 1;
4925 }
4926
4927 /* Return non-zero if the constant is something that
4928 can be loaded with a special instruction. */
4929
4930 int
4931 standard_80387_constant_p (rtx x)
4932 {
4933 machine_mode mode = GET_MODE (x);
4934
4935 const REAL_VALUE_TYPE *r;
4936
4937 if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
4938 return -1;
4939
4940 if (x == CONST0_RTX (mode))
4941 return 1;
4942 if (x == CONST1_RTX (mode))
4943 return 2;
4944
4945 r = CONST_DOUBLE_REAL_VALUE (x);
4946
4947 /* For XFmode constants, try to find a special 80387 instruction when
4948 optimizing for size or on those CPUs that benefit from them. */
4949 if (mode == XFmode
4950 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
4951 {
4952 int i;
4953
4954 if (! ext_80387_constants_init)
4955 init_ext_80387_constants ();
4956
4957 for (i = 0; i < 5; i++)
4958 if (real_identical (r, &ext_80387_constants_table[i]))
4959 return i + 3;
4960 }
4961
4962 /* Load of the constant -0.0 or -1.0 will be split as
4963 fldz;fchs or fld1;fchs sequence. */
4964 if (real_isnegzero (r))
4965 return 8;
4966 if (real_identical (r, &dconstm1))
4967 return 9;
4968
4969 return 0;
4970 }
4971
4972 /* Return the opcode of the special instruction to be used to load
4973 the constant X. */
4974
4975 const char *
4976 standard_80387_constant_opcode (rtx x)
4977 {
4978 switch (standard_80387_constant_p (x))
4979 {
4980 case 1:
4981 return "fldz";
4982 case 2:
4983 return "fld1";
4984 case 3:
4985 return "fldlg2";
4986 case 4:
4987 return "fldln2";
4988 case 5:
4989 return "fldl2e";
4990 case 6:
4991 return "fldl2t";
4992 case 7:
4993 return "fldpi";
4994 case 8:
4995 case 9:
4996 return "#";
4997 default:
4998 gcc_unreachable ();
4999 }
5000 }
5001
5002 /* Return the CONST_DOUBLE representing the 80387 constant that is
5003 loaded by the specified special instruction. The argument IDX
5004 matches the return value from standard_80387_constant_p. */
5005
5006 rtx
5007 standard_80387_constant_rtx (int idx)
5008 {
5009 int i;
5010
5011 if (! ext_80387_constants_init)
5012 init_ext_80387_constants ();
5013
5014 switch (idx)
5015 {
5016 case 3:
5017 case 4:
5018 case 5:
5019 case 6:
5020 case 7:
5021 i = idx - 3;
5022 break;
5023
5024 default:
5025 gcc_unreachable ();
5026 }
5027
5028 return const_double_from_real_value (ext_80387_constants_table[i],
5029 XFmode);
5030 }
5031
5032 /* Return 1 if X is all bits 0 and 2 if X is all bits 1
5033 in supported SSE/AVX vector mode. */
5034
5035 int
5036 standard_sse_constant_p (rtx x, machine_mode pred_mode)
5037 {
5038 machine_mode mode;
5039
5040 if (!TARGET_SSE)
5041 return 0;
5042
5043 mode = GET_MODE (x);
5044
5045 if (x == const0_rtx || const0_operand (x, mode))
5046 return 1;
5047
5048 if (x == constm1_rtx || vector_all_ones_operand (x, mode))
5049 {
5050 /* VOIDmode integer constant, get mode from the predicate. */
5051 if (mode == VOIDmode)
5052 mode = pred_mode;
5053
5054 switch (GET_MODE_SIZE (mode))
5055 {
5056 case 64:
5057 if (TARGET_AVX512F)
5058 return 2;
5059 break;
5060 case 32:
5061 if (TARGET_AVX2)
5062 return 2;
5063 break;
5064 case 16:
5065 if (TARGET_SSE2)
5066 return 2;
5067 break;
5068 case 0:
5069 /* VOIDmode */
5070 gcc_unreachable ();
5071 default:
5072 break;
5073 }
5074 }
5075
5076 return 0;
5077 }
5078
5079 /* Return the opcode of the special instruction to be used to load
5080 the constant operands[1] into operands[0]. */
5081
5082 const char *
5083 standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
5084 {
5085 machine_mode mode;
5086 rtx x = operands[1];
5087
5088 gcc_assert (TARGET_SSE);
5089
5090 mode = GET_MODE (x);
5091
5092 if (x == const0_rtx || const0_operand (x, mode))
5093 {
5094 switch (get_attr_mode (insn))
5095 {
5096 case MODE_TI:
5097 if (!EXT_REX_SSE_REG_P (operands[0]))
5098 return "%vpxor\t%0, %d0";
5099 /* FALLTHRU */
5100 case MODE_XI:
5101 case MODE_OI:
5102 if (EXT_REX_SSE_REG_P (operands[0]))
5103 return (TARGET_AVX512VL
5104 ? "vpxord\t%x0, %x0, %x0"
5105 : "vpxord\t%g0, %g0, %g0");
5106 return "vpxor\t%x0, %x0, %x0";
5107
5108 case MODE_V2DF:
5109 if (!EXT_REX_SSE_REG_P (operands[0]))
5110 return "%vxorpd\t%0, %d0";
5111 /* FALLTHRU */
5112 case MODE_V8DF:
5113 case MODE_V4DF:
5114 if (!EXT_REX_SSE_REG_P (operands[0]))
5115 return "vxorpd\t%x0, %x0, %x0";
5116 else if (TARGET_AVX512DQ)
5117 return (TARGET_AVX512VL
5118 ? "vxorpd\t%x0, %x0, %x0"
5119 : "vxorpd\t%g0, %g0, %g0");
5120 else
5121 return (TARGET_AVX512VL
5122 ? "vpxorq\t%x0, %x0, %x0"
5123 : "vpxorq\t%g0, %g0, %g0");
5124
5125 case MODE_V4SF:
5126 if (!EXT_REX_SSE_REG_P (operands[0]))
5127 return "%vxorps\t%0, %d0";
5128 /* FALLTHRU */
5129 case MODE_V16SF:
5130 case MODE_V8SF:
5131 if (!EXT_REX_SSE_REG_P (operands[0]))
5132 return "vxorps\t%x0, %x0, %x0";
5133 else if (TARGET_AVX512DQ)
5134 return (TARGET_AVX512VL
5135 ? "vxorps\t%x0, %x0, %x0"
5136 : "vxorps\t%g0, %g0, %g0");
5137 else
5138 return (TARGET_AVX512VL
5139 ? "vpxord\t%x0, %x0, %x0"
5140 : "vpxord\t%g0, %g0, %g0");
5141
5142 default:
5143 gcc_unreachable ();
5144 }
5145 }
5146 else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
5147 {
5148 enum attr_mode insn_mode = get_attr_mode (insn);
5149
5150 switch (insn_mode)
5151 {
5152 case MODE_XI:
5153 case MODE_V8DF:
5154 case MODE_V16SF:
5155 gcc_assert (TARGET_AVX512F);
5156 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5157
5158 case MODE_OI:
5159 case MODE_V4DF:
5160 case MODE_V8SF:
5161 gcc_assert (TARGET_AVX2);
5162 /* FALLTHRU */
5163 case MODE_TI:
5164 case MODE_V2DF:
5165 case MODE_V4SF:
5166 gcc_assert (TARGET_SSE2);
5167 if (!EXT_REX_SSE_REG_P (operands[0]))
5168 return (TARGET_AVX
5169 ? "vpcmpeqd\t%0, %0, %0"
5170 : "pcmpeqd\t%0, %0");
5171 else if (TARGET_AVX512VL)
5172 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
5173 else
5174 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
5175
5176 default:
5177 gcc_unreachable ();
5178 }
5179 }
5180
5181 gcc_unreachable ();
5182 }
5183
5184 /* Returns true if INSN can be transformed from a memory load
5185 to a supported FP constant load. */
5186
5187 bool
5188 ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
5189 {
5190 rtx src = find_constant_src (insn);
5191
5192 gcc_assert (REG_P (dst));
5193
5194 if (src == NULL
5195 || (SSE_REGNO_P (REGNO (dst))
5196 && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
5197 || (STACK_REGNO_P (REGNO (dst))
5198 && standard_80387_constant_p (src) < 1))
5199 return false;
5200
5201 return true;
5202 }
5203
5204 /* Predicate for pre-reload splitters with associated instructions,
5205 which can match any time before the split1 pass (usually combine),
5206 then are unconditionally split in that pass and should not be
5207 matched again afterwards. */
5208
5209 bool
5210 ix86_pre_reload_split (void)
5211 {
5212 return (can_create_pseudo_p ()
5213 && !(cfun->curr_properties & PROP_rtl_split_insns));
5214 }
5215
5216 /* Return the opcode of the TYPE_SSEMOV instruction. To move from
5217 or to xmm16-xmm31/ymm16-ymm31 registers, we either require
5218 TARGET_AVX512VL or it is a register to register move which can
5219 be done with zmm register move. */
5220
5221 static const char *
5222 ix86_get_ssemov (rtx *operands, unsigned size,
5223 enum attr_mode insn_mode, machine_mode mode)
5224 {
5225 char buf[128];
5226 bool misaligned_p = (misaligned_operand (operands[0], mode)
5227 || misaligned_operand (operands[1], mode));
5228 bool evex_reg_p = (size == 64
5229 || EXT_REX_SSE_REG_P (operands[0])
5230 || EXT_REX_SSE_REG_P (operands[1]));
5231 machine_mode scalar_mode;
5232
5233 const char *opcode = NULL;
5234 enum
5235 {
5236 opcode_int,
5237 opcode_float,
5238 opcode_double
5239 } type = opcode_int;
5240
5241 switch (insn_mode)
5242 {
5243 case MODE_V16SF:
5244 case MODE_V8SF:
5245 case MODE_V4SF:
5246 scalar_mode = E_SFmode;
5247 type = opcode_float;
5248 break;
5249 case MODE_V8DF:
5250 case MODE_V4DF:
5251 case MODE_V2DF:
5252 scalar_mode = E_DFmode;
5253 type = opcode_double;
5254 break;
5255 case MODE_XI:
5256 case MODE_OI:
5257 case MODE_TI:
5258 scalar_mode = GET_MODE_INNER (mode);
5259 break;
5260 default:
5261 gcc_unreachable ();
5262 }
5263
5264 /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
5265 we can only use zmm register move without memory operand. */
5266 if (evex_reg_p
5267 && !TARGET_AVX512VL
5268 && GET_MODE_SIZE (mode) < 64)
5269 {
5270 /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
5271 xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
5272 AVX512VL is disabled, LRA can still generate reg to
5273 reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
5274 modes. */
5275 if (memory_operand (operands[0], mode)
5276 || memory_operand (operands[1], mode))
5277 gcc_unreachable ();
5278 size = 64;
5279 switch (type)
5280 {
5281 case opcode_int:
5282 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5283 break;
5284 case opcode_float:
5285 opcode = misaligned_p ? "vmovups" : "vmovaps";
5286 break;
5287 case opcode_double:
5288 opcode = misaligned_p ? "vmovupd" : "vmovapd";
5289 break;
5290 }
5291 }
5292 else if (SCALAR_FLOAT_MODE_P (scalar_mode))
5293 {
5294 switch (scalar_mode)
5295 {
5296 case E_SFmode:
5297 opcode = misaligned_p ? "%vmovups" : "%vmovaps";
5298 break;
5299 case E_DFmode:
5300 opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
5301 break;
5302 case E_TFmode:
5303 if (evex_reg_p)
5304 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5305 else
5306 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5307 break;
5308 default:
5309 gcc_unreachable ();
5310 }
5311 }
5312 else if (SCALAR_INT_MODE_P (scalar_mode))
5313 {
5314 switch (scalar_mode)
5315 {
5316 case E_QImode:
5317 if (evex_reg_p)
5318 opcode = (misaligned_p
5319 ? (TARGET_AVX512BW
5320 ? "vmovdqu8"
5321 : "vmovdqu64")
5322 : "vmovdqa64");
5323 else
5324 opcode = (misaligned_p
5325 ? (TARGET_AVX512BW
5326 ? "vmovdqu8"
5327 : "%vmovdqu")
5328 : "%vmovdqa");
5329 break;
5330 case E_HImode:
5331 if (evex_reg_p)
5332 opcode = (misaligned_p
5333 ? (TARGET_AVX512BW
5334 ? "vmovdqu16"
5335 : "vmovdqu64")
5336 : "vmovdqa64");
5337 else
5338 opcode = (misaligned_p
5339 ? (TARGET_AVX512BW
5340 ? "vmovdqu16"
5341 : "%vmovdqu")
5342 : "%vmovdqa");
5343 break;
5344 case E_SImode:
5345 if (evex_reg_p)
5346 opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
5347 else
5348 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5349 break;
5350 case E_DImode:
5351 case E_TImode:
5352 case E_OImode:
5353 if (evex_reg_p)
5354 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5355 else
5356 opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
5357 break;
5358 case E_XImode:
5359 opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
5360 break;
5361 default:
5362 gcc_unreachable ();
5363 }
5364 }
5365 else
5366 gcc_unreachable ();
5367
5368 switch (size)
5369 {
5370 case 64:
5371 snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
5372 opcode);
5373 break;
5374 case 32:
5375 snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
5376 opcode);
5377 break;
5378 case 16:
5379 snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
5380 opcode);
5381 break;
5382 default:
5383 gcc_unreachable ();
5384 }
5385 output_asm_insn (buf, operands);
5386 return "";
5387 }
5388
5389 /* Return the template of the TYPE_SSEMOV instruction to move
5390 operands[1] into operands[0]. */
5391
5392 const char *
5393 ix86_output_ssemov (rtx_insn *insn, rtx *operands)
5394 {
5395 machine_mode mode = GET_MODE (operands[0]);
5396 if (get_attr_type (insn) != TYPE_SSEMOV
5397 || mode != GET_MODE (operands[1]))
5398 gcc_unreachable ();
5399
5400 enum attr_mode insn_mode = get_attr_mode (insn);
5401
5402 switch (insn_mode)
5403 {
5404 case MODE_XI:
5405 case MODE_V8DF:
5406 case MODE_V16SF:
5407 return ix86_get_ssemov (operands, 64, insn_mode, mode);
5408
5409 case MODE_OI:
5410 case MODE_V4DF:
5411 case MODE_V8SF:
5412 return ix86_get_ssemov (operands, 32, insn_mode, mode);
5413
5414 case MODE_TI:
5415 case MODE_V2DF:
5416 case MODE_V4SF:
5417 return ix86_get_ssemov (operands, 16, insn_mode, mode);
5418
5419 case MODE_DI:
5420 /* Handle broken assemblers that require movd instead of movq. */
5421 if (!HAVE_AS_IX86_INTERUNIT_MOVQ
5422 && (GENERAL_REG_P (operands[0])
5423 || GENERAL_REG_P (operands[1])))
5424 return "%vmovd\t{%1, %0|%0, %1}";
5425 else
5426 return "%vmovq\t{%1, %0|%0, %1}";
5427
5428 case MODE_SI:
5429 return "%vmovd\t{%1, %0|%0, %1}";
5430
5431 case MODE_DF:
5432 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5433 return "vmovsd\t{%d1, %0|%0, %d1}";
5434 else
5435 return "%vmovsd\t{%1, %0|%0, %1}";
5436
5437 case MODE_SF:
5438 if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
5439 return "vmovss\t{%d1, %0|%0, %d1}";
5440 else
5441 return "%vmovss\t{%1, %0|%0, %1}";
5442
5443 case MODE_V1DF:
5444 gcc_assert (!TARGET_AVX);
5445 return "movlpd\t{%1, %0|%0, %1}";
5446
5447 case MODE_V2SF:
5448 if (TARGET_AVX && REG_P (operands[0]))
5449 return "vmovlps\t{%1, %d0|%d0, %1}";
5450 else
5451 return "%vmovlps\t{%1, %0|%0, %1}";
5452
5453 default:
5454 gcc_unreachable ();
5455 }
5456 }
5457
5458 /* Returns true if OP contains a symbol reference */
5459
5460 bool
5461 symbolic_reference_mentioned_p (rtx op)
5462 {
5463 const char *fmt;
5464 int i;
5465
5466 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5467 return true;
5468
5469 fmt = GET_RTX_FORMAT (GET_CODE (op));
5470 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5471 {
5472 if (fmt[i] == 'E')
5473 {
5474 int j;
5475
5476 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5477 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5478 return true;
5479 }
5480
5481 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5482 return true;
5483 }
5484
5485 return false;
5486 }
5487
5488 /* Return true if it is appropriate to emit `ret' instructions in the
5489 body of a function. Do this only if the epilogue is simple, needing a
5490 couple of insns. Prior to reloading, we can't tell how many registers
5491 must be saved, so return false then. Return false if there is no frame
5492 marker to de-allocate. */
5493
5494 bool
5495 ix86_can_use_return_insn_p (void)
5496 {
5497 if (ix86_function_ms_hook_prologue (current_function_decl))
5498 return false;
5499
5500 if (ix86_function_naked (current_function_decl))
5501 return false;
5502
5503 /* Don't use `ret' instruction in interrupt handler. */
5504 if (! reload_completed
5505 || frame_pointer_needed
5506 || cfun->machine->func_type != TYPE_NORMAL)
5507 return 0;
5508
5509 /* Don't allow more than 32k pop, since that's all we can do
5510 with one instruction. */
5511 if (crtl->args.pops_args && crtl->args.size >= 32768)
5512 return 0;
5513
5514 struct ix86_frame &frame = cfun->machine->frame;
5515 return (frame.stack_pointer_offset == UNITS_PER_WORD
5516 && (frame.nregs + frame.nsseregs) == 0);
5517 }
5518 \f
5519 /* Return stack frame size. get_frame_size () returns used stack slots
5520 during compilation, which may be optimized out later. If stack frame
5521 is needed, stack_frame_required should be true. */
5522
5523 static HOST_WIDE_INT
5524 ix86_get_frame_size (void)
5525 {
5526 if (cfun->machine->stack_frame_required)
5527 return get_frame_size ();
5528 else
5529 return 0;
5530 }
5531
5532 /* Value should be nonzero if functions must have frame pointers.
5533 Zero means the frame pointer need not be set up (and parms may
5534 be accessed via the stack pointer) in functions that seem suitable. */
5535
5536 static bool
5537 ix86_frame_pointer_required (void)
5538 {
5539 /* If we accessed previous frames, then the generated code expects
5540 to be able to access the saved ebp value in our frame. */
5541 if (cfun->machine->accesses_prev_frame)
5542 return true;
5543
5544 /* Several x86 os'es need a frame pointer for other reasons,
5545 usually pertaining to setjmp. */
5546 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5547 return true;
5548
5549 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
5550 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
5551 return true;
5552
5553 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
5554 allocation is 4GB. */
5555 if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
5556 return true;
5557
5558 /* SSE saves require frame-pointer when stack is misaligned. */
5559 if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
5560 return true;
5561
5562 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
5563 turns off the frame pointer by default. Turn it back on now if
5564 we've not got a leaf function. */
5565 if (TARGET_OMIT_LEAF_FRAME_POINTER
5566 && (!crtl->is_leaf
5567 || ix86_current_function_calls_tls_descriptor))
5568 return true;
5569
5570 /* Several versions of mcount for the x86 assumes that there is a
5571 frame, so we cannot allow profiling without a frame pointer. */
5572 if (crtl->profile && !flag_fentry)
5573 return true;
5574
5575 return false;
5576 }
5577
5578 /* Record that the current function accesses previous call frames. */
5579
5580 void
5581 ix86_setup_frame_addresses (void)
5582 {
5583 cfun->machine->accesses_prev_frame = 1;
5584 }
5585 \f
5586 #ifndef USE_HIDDEN_LINKONCE
5587 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
5588 # define USE_HIDDEN_LINKONCE 1
5589 # else
5590 # define USE_HIDDEN_LINKONCE 0
5591 # endif
5592 #endif
5593
5594 /* Label count for call and return thunks. It is used to make unique
5595 labels in call and return thunks. */
5596 static int indirectlabelno;
5597
5598 /* True if call thunk function is needed. */
5599 static bool indirect_thunk_needed = false;
5600
5601 /* Bit masks of integer registers, which contain branch target, used
5602 by call thunk functions. */
5603 static int indirect_thunks_used;
5604
5605 /* True if return thunk function is needed. */
5606 static bool indirect_return_needed = false;
5607
5608 /* True if return thunk function via CX is needed. */
5609 static bool indirect_return_via_cx;
5610
5611 #ifndef INDIRECT_LABEL
5612 # define INDIRECT_LABEL "LIND"
5613 #endif
5614
5615 /* Indicate what prefix is needed for an indirect branch. */
5616 enum indirect_thunk_prefix
5617 {
5618 indirect_thunk_prefix_none,
5619 indirect_thunk_prefix_nt
5620 };
5621
5622 /* Return the prefix needed for an indirect branch INSN. */
5623
5624 enum indirect_thunk_prefix
5625 indirect_thunk_need_prefix (rtx_insn *insn)
5626 {
5627 enum indirect_thunk_prefix need_prefix;
5628 if ((cfun->machine->indirect_branch_type
5629 == indirect_branch_thunk_extern)
5630 && ix86_notrack_prefixed_insn_p (insn))
5631 {
5632 /* NOTRACK prefix is only used with external thunk so that it
5633 can be properly updated to support CET at run-time. */
5634 need_prefix = indirect_thunk_prefix_nt;
5635 }
5636 else
5637 need_prefix = indirect_thunk_prefix_none;
5638 return need_prefix;
5639 }
5640
5641 /* Fills in the label name that should be used for the indirect thunk. */
5642
5643 static void
5644 indirect_thunk_name (char name[32], unsigned int regno,
5645 enum indirect_thunk_prefix need_prefix,
5646 bool ret_p)
5647 {
5648 if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
5649 gcc_unreachable ();
5650
5651 if (USE_HIDDEN_LINKONCE)
5652 {
5653 const char *prefix;
5654
5655 if (need_prefix == indirect_thunk_prefix_nt
5656 && regno != INVALID_REGNUM)
5657 {
5658 /* NOTRACK prefix is only used with external thunk via
5659 register so that NOTRACK prefix can be added to indirect
5660 branch via register to support CET at run-time. */
5661 prefix = "_nt";
5662 }
5663 else
5664 prefix = "";
5665
5666 const char *ret = ret_p ? "return" : "indirect";
5667
5668 if (regno != INVALID_REGNUM)
5669 {
5670 const char *reg_prefix;
5671 if (LEGACY_INT_REGNO_P (regno))
5672 reg_prefix = TARGET_64BIT ? "r" : "e";
5673 else
5674 reg_prefix = "";
5675 sprintf (name, "__x86_%s_thunk%s_%s%s",
5676 ret, prefix, reg_prefix, reg_names[regno]);
5677 }
5678 else
5679 sprintf (name, "__x86_%s_thunk%s", ret, prefix);
5680 }
5681 else
5682 {
5683 if (regno != INVALID_REGNUM)
5684 ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
5685 else
5686 {
5687 if (ret_p)
5688 ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
5689 else
5690 ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
5691 }
5692 }
5693 }
5694
5695 /* Output a call and return thunk for indirect branch. If REGNO != -1,
5696 the function address is in REGNO and the call and return thunk looks like:
5697
5698 call L2
5699 L1:
5700 pause
5701 lfence
5702 jmp L1
5703 L2:
5704 mov %REG, (%sp)
5705 ret
5706
5707 Otherwise, the function address is on the top of stack and the
5708 call and return thunk looks like:
5709
5710 call L2
5711 L1:
5712 pause
5713 lfence
5714 jmp L1
5715 L2:
5716 lea WORD_SIZE(%sp), %sp
5717 ret
5718 */
5719
5720 static void
5721 output_indirect_thunk (unsigned int regno)
5722 {
5723 char indirectlabel1[32];
5724 char indirectlabel2[32];
5725
5726 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
5727 indirectlabelno++);
5728 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
5729 indirectlabelno++);
5730
5731 /* Call */
5732 fputs ("\tcall\t", asm_out_file);
5733 assemble_name_raw (asm_out_file, indirectlabel2);
5734 fputc ('\n', asm_out_file);
5735
5736 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
5737
5738 /* AMD and Intel CPUs prefer each a different instruction as loop filler.
5739 Usage of both pause + lfence is compromise solution. */
5740 fprintf (asm_out_file, "\tpause\n\tlfence\n");
5741
5742 /* Jump. */
5743 fputs ("\tjmp\t", asm_out_file);
5744 assemble_name_raw (asm_out_file, indirectlabel1);
5745 fputc ('\n', asm_out_file);
5746
5747 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
5748
5749 /* The above call insn pushed a word to stack. Adjust CFI info. */
5750 if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
5751 {
5752 if (! dwarf2out_do_cfi_asm ())
5753 {
5754 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5755 xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
5756 xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
5757 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5758 }
5759 dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
5760 xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
5761 xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
5762 vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
5763 dwarf2out_emit_cfi (xcfi);
5764 }
5765
5766 if (regno != INVALID_REGNUM)
5767 {
5768 /* MOV. */
5769 rtx xops[2];
5770 xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
5771 xops[1] = gen_rtx_REG (word_mode, regno);
5772 output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
5773 }
5774 else
5775 {
5776 /* LEA. */
5777 rtx xops[2];
5778 xops[0] = stack_pointer_rtx;
5779 xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
5780 output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
5781 }
5782
5783 fputs ("\tret\n", asm_out_file);
5784 }
5785
5786 /* Output a funtion with a call and return thunk for indirect branch.
5787 If REGNO != INVALID_REGNUM, the function address is in REGNO.
5788 Otherwise, the function address is on the top of stack. Thunk is
5789 used for function return if RET_P is true. */
5790
5791 static void
5792 output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
5793 unsigned int regno, bool ret_p)
5794 {
5795 char name[32];
5796 tree decl;
5797
5798 /* Create __x86_indirect_thunk. */
5799 indirect_thunk_name (name, regno, need_prefix, ret_p);
5800 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5801 get_identifier (name),
5802 build_function_type_list (void_type_node, NULL_TREE));
5803 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5804 NULL_TREE, void_type_node);
5805 TREE_PUBLIC (decl) = 1;
5806 TREE_STATIC (decl) = 1;
5807 DECL_IGNORED_P (decl) = 1;
5808
5809 #if TARGET_MACHO
5810 if (TARGET_MACHO)
5811 {
5812 switch_to_section (darwin_sections[picbase_thunk_section]);
5813 fputs ("\t.weak_definition\t", asm_out_file);
5814 assemble_name (asm_out_file, name);
5815 fputs ("\n\t.private_extern\t", asm_out_file);
5816 assemble_name (asm_out_file, name);
5817 putc ('\n', asm_out_file);
5818 ASM_OUTPUT_LABEL (asm_out_file, name);
5819 DECL_WEAK (decl) = 1;
5820 }
5821 else
5822 #endif
5823 if (USE_HIDDEN_LINKONCE)
5824 {
5825 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5826
5827 targetm.asm_out.unique_section (decl, 0);
5828 switch_to_section (get_named_section (decl, NULL, 0));
5829
5830 targetm.asm_out.globalize_label (asm_out_file, name);
5831 fputs ("\t.hidden\t", asm_out_file);
5832 assemble_name (asm_out_file, name);
5833 putc ('\n', asm_out_file);
5834 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5835 }
5836 else
5837 {
5838 switch_to_section (text_section);
5839 ASM_OUTPUT_LABEL (asm_out_file, name);
5840 }
5841
5842 DECL_INITIAL (decl) = make_node (BLOCK);
5843 current_function_decl = decl;
5844 allocate_struct_function (decl, false);
5845 init_function_start (decl);
5846 /* We're about to hide the function body from callees of final_* by
5847 emitting it directly; tell them we're a thunk, if they care. */
5848 cfun->is_thunk = true;
5849 first_function_block_is_cold = false;
5850 /* Make sure unwind info is emitted for the thunk if needed. */
5851 final_start_function (emit_barrier (), asm_out_file, 1);
5852
5853 output_indirect_thunk (regno);
5854
5855 final_end_function ();
5856 init_insn_lengths ();
5857 free_after_compilation (cfun);
5858 set_cfun (NULL);
5859 current_function_decl = NULL;
5860 }
5861
5862 static int pic_labels_used;
5863
5864 /* Fills in the label name that should be used for a pc thunk for
5865 the given register. */
5866
5867 static void
5868 get_pc_thunk_name (char name[32], unsigned int regno)
5869 {
5870 gcc_assert (!TARGET_64BIT);
5871
5872 if (USE_HIDDEN_LINKONCE)
5873 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
5874 else
5875 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5876 }
5877
5878
5879 /* This function generates code for -fpic that loads %ebx with
5880 the return address of the caller and then returns. */
5881
5882 static void
5883 ix86_code_end (void)
5884 {
5885 rtx xops[2];
5886 unsigned int regno;
5887
5888 if (indirect_return_needed)
5889 output_indirect_thunk_function (indirect_thunk_prefix_none,
5890 INVALID_REGNUM, true);
5891 if (indirect_return_via_cx)
5892 output_indirect_thunk_function (indirect_thunk_prefix_none,
5893 CX_REG, true);
5894 if (indirect_thunk_needed)
5895 output_indirect_thunk_function (indirect_thunk_prefix_none,
5896 INVALID_REGNUM, false);
5897
5898 for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
5899 {
5900 unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1;
5901 if ((indirect_thunks_used & (1 << i)))
5902 output_indirect_thunk_function (indirect_thunk_prefix_none,
5903 regno, false);
5904 }
5905
5906 for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
5907 {
5908 char name[32];
5909 tree decl;
5910
5911 if ((indirect_thunks_used & (1 << regno)))
5912 output_indirect_thunk_function (indirect_thunk_prefix_none,
5913 regno, false);
5914
5915 if (!(pic_labels_used & (1 << regno)))
5916 continue;
5917
5918 get_pc_thunk_name (name, regno);
5919
5920 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
5921 get_identifier (name),
5922 build_function_type_list (void_type_node, NULL_TREE));
5923 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
5924 NULL_TREE, void_type_node);
5925 TREE_PUBLIC (decl) = 1;
5926 TREE_STATIC (decl) = 1;
5927 DECL_IGNORED_P (decl) = 1;
5928
5929 #if TARGET_MACHO
5930 if (TARGET_MACHO)
5931 {
5932 switch_to_section (darwin_sections[picbase_thunk_section]);
5933 fputs ("\t.weak_definition\t", asm_out_file);
5934 assemble_name (asm_out_file, name);
5935 fputs ("\n\t.private_extern\t", asm_out_file);
5936 assemble_name (asm_out_file, name);
5937 putc ('\n', asm_out_file);
5938 ASM_OUTPUT_LABEL (asm_out_file, name);
5939 DECL_WEAK (decl) = 1;
5940 }
5941 else
5942 #endif
5943 if (USE_HIDDEN_LINKONCE)
5944 {
5945 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
5946
5947 targetm.asm_out.unique_section (decl, 0);
5948 switch_to_section (get_named_section (decl, NULL, 0));
5949
5950 targetm.asm_out.globalize_label (asm_out_file, name);
5951 fputs ("\t.hidden\t", asm_out_file);
5952 assemble_name (asm_out_file, name);
5953 putc ('\n', asm_out_file);
5954 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5955 }
5956 else
5957 {
5958 switch_to_section (text_section);
5959 ASM_OUTPUT_LABEL (asm_out_file, name);
5960 }
5961
5962 DECL_INITIAL (decl) = make_node (BLOCK);
5963 current_function_decl = decl;
5964 allocate_struct_function (decl, false);
5965 init_function_start (decl);
5966 /* We're about to hide the function body from callees of final_* by
5967 emitting it directly; tell them we're a thunk, if they care. */
5968 cfun->is_thunk = true;
5969 first_function_block_is_cold = false;
5970 /* Make sure unwind info is emitted for the thunk if needed. */
5971 final_start_function (emit_barrier (), asm_out_file, 1);
5972
5973 /* Pad stack IP move with 4 instructions (two NOPs count
5974 as one instruction). */
5975 if (TARGET_PAD_SHORT_FUNCTION)
5976 {
5977 int i = 8;
5978
5979 while (i--)
5980 fputs ("\tnop\n", asm_out_file);
5981 }
5982
5983 xops[0] = gen_rtx_REG (Pmode, regno);
5984 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5985 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
5986 output_asm_insn ("%!ret", NULL);
5987 final_end_function ();
5988 init_insn_lengths ();
5989 free_after_compilation (cfun);
5990 set_cfun (NULL);
5991 current_function_decl = NULL;
5992 }
5993
5994 if (flag_split_stack)
5995 file_end_indicate_split_stack ();
5996 }
5997
5998 /* Emit code for the SET_GOT patterns. */
5999
6000 const char *
6001 output_set_got (rtx dest, rtx label)
6002 {
6003 rtx xops[3];
6004
6005 xops[0] = dest;
6006
6007 if (TARGET_VXWORKS_RTP && flag_pic)
6008 {
6009 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6010 xops[2] = gen_rtx_MEM (Pmode,
6011 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6012 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6013
6014 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6015 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6016 an unadorned address. */
6017 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6018 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6019 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6020 return "";
6021 }
6022
6023 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6024
6025 if (flag_pic)
6026 {
6027 char name[32];
6028 get_pc_thunk_name (name, REGNO (dest));
6029 pic_labels_used |= 1 << REGNO (dest);
6030
6031 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6032 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6033 output_asm_insn ("%!call\t%X2", xops);
6034
6035 #if TARGET_MACHO
6036 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
6037 This is what will be referenced by the Mach-O PIC subsystem. */
6038 if (machopic_should_output_picbase_label () || !label)
6039 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
6040
6041 /* When we are restoring the pic base at the site of a nonlocal label,
6042 and we decided to emit the pic base above, we will still output a
6043 local label used for calculating the correction offset (even though
6044 the offset will be 0 in that case). */
6045 if (label)
6046 targetm.asm_out.internal_label (asm_out_file, "L",
6047 CODE_LABEL_NUMBER (label));
6048 #endif
6049 }
6050 else
6051 {
6052 if (TARGET_MACHO)
6053 /* We don't need a pic base, we're not producing pic. */
6054 gcc_unreachable ();
6055
6056 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6057 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6058 targetm.asm_out.internal_label (asm_out_file, "L",
6059 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6060 }
6061
6062 if (!TARGET_MACHO)
6063 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6064
6065 return "";
6066 }
6067
6068 /* Generate an "push" pattern for input ARG. */
6069
6070 rtx
6071 gen_push (rtx arg)
6072 {
6073 struct machine_function *m = cfun->machine;
6074
6075 if (m->fs.cfa_reg == stack_pointer_rtx)
6076 m->fs.cfa_offset += UNITS_PER_WORD;
6077 m->fs.sp_offset += UNITS_PER_WORD;
6078
6079 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6080 arg = gen_rtx_REG (word_mode, REGNO (arg));
6081
6082 return gen_rtx_SET (gen_rtx_MEM (word_mode,
6083 gen_rtx_PRE_DEC (Pmode,
6084 stack_pointer_rtx)),
6085 arg);
6086 }
6087
6088 /* Generate an "pop" pattern for input ARG. */
6089
6090 rtx
6091 gen_pop (rtx arg)
6092 {
6093 if (REG_P (arg) && GET_MODE (arg) != word_mode)
6094 arg = gen_rtx_REG (word_mode, REGNO (arg));
6095
6096 return gen_rtx_SET (arg,
6097 gen_rtx_MEM (word_mode,
6098 gen_rtx_POST_INC (Pmode,
6099 stack_pointer_rtx)));
6100 }
6101
6102 /* Return >= 0 if there is an unused call-clobbered register available
6103 for the entire function. */
6104
6105 static unsigned int
6106 ix86_select_alt_pic_regnum (void)
6107 {
6108 if (ix86_use_pseudo_pic_reg ())
6109 return INVALID_REGNUM;
6110
6111 if (crtl->is_leaf
6112 && !crtl->profile
6113 && !ix86_current_function_calls_tls_descriptor)
6114 {
6115 int i, drap;
6116 /* Can't use the same register for both PIC and DRAP. */
6117 if (crtl->drap_reg)
6118 drap = REGNO (crtl->drap_reg);
6119 else
6120 drap = -1;
6121 for (i = 2; i >= 0; --i)
6122 if (i != drap && !df_regs_ever_live_p (i))
6123 return i;
6124 }
6125
6126 return INVALID_REGNUM;
6127 }
6128
6129 /* Return true if REGNO is used by the epilogue. */
6130
6131 bool
6132 ix86_epilogue_uses (int regno)
6133 {
6134 /* If there are no caller-saved registers, we preserve all registers,
6135 except for MMX and x87 registers which aren't supported when saving
6136 and restoring registers. Don't explicitly save SP register since
6137 it is always preserved. */
6138 return (epilogue_completed
6139 && cfun->machine->no_caller_saved_registers
6140 && !fixed_regs[regno]
6141 && !STACK_REGNO_P (regno)
6142 && !MMX_REGNO_P (regno));
6143 }
6144
6145 /* Return nonzero if register REGNO can be used as a scratch register
6146 in peephole2. */
6147
6148 static bool
6149 ix86_hard_regno_scratch_ok (unsigned int regno)
6150 {
6151 /* If there are no caller-saved registers, we can't use any register
6152 as a scratch register after epilogue and use REGNO as scratch
6153 register only if it has been used before to avoid saving and
6154 restoring it. */
6155 return (!cfun->machine->no_caller_saved_registers
6156 || (!epilogue_completed
6157 && df_regs_ever_live_p (regno)));
6158 }
6159
6160 /* Return TRUE if we need to save REGNO. */
6161
6162 bool
6163 ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
6164 {
6165 /* If there are no caller-saved registers, we preserve all registers,
6166 except for MMX and x87 registers which aren't supported when saving
6167 and restoring registers. Don't explicitly save SP register since
6168 it is always preserved. */
6169 if (cfun->machine->no_caller_saved_registers)
6170 {
6171 /* Don't preserve registers used for function return value. */
6172 rtx reg = crtl->return_rtx;
6173 if (reg)
6174 {
6175 unsigned int i = REGNO (reg);
6176 unsigned int nregs = REG_NREGS (reg);
6177 while (nregs-- > 0)
6178 if ((i + nregs) == regno)
6179 return false;
6180 }
6181
6182 return (df_regs_ever_live_p (regno)
6183 && !fixed_regs[regno]
6184 && !STACK_REGNO_P (regno)
6185 && !MMX_REGNO_P (regno)
6186 && (regno != HARD_FRAME_POINTER_REGNUM
6187 || !frame_pointer_needed));
6188 }
6189
6190 if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
6191 && pic_offset_table_rtx)
6192 {
6193 if (ix86_use_pseudo_pic_reg ())
6194 {
6195 /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
6196 _mcount in prologue. */
6197 if (!TARGET_64BIT && flag_pic && crtl->profile)
6198 return true;
6199 }
6200 else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6201 || crtl->profile
6202 || crtl->calls_eh_return
6203 || crtl->uses_const_pool
6204 || cfun->has_nonlocal_label)
6205 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
6206 }
6207
6208 if (crtl->calls_eh_return && maybe_eh_return)
6209 {
6210 unsigned i;
6211 for (i = 0; ; i++)
6212 {
6213 unsigned test = EH_RETURN_DATA_REGNO (i);
6214 if (test == INVALID_REGNUM)
6215 break;
6216 if (test == regno)
6217 return true;
6218 }
6219 }
6220
6221 if (ignore_outlined && cfun->machine->call_ms2sysv)
6222 {
6223 unsigned count = cfun->machine->call_ms2sysv_extra_regs
6224 + xlogue_layout::MIN_REGS;
6225 if (xlogue_layout::is_stub_managed_reg (regno, count))
6226 return false;
6227 }
6228
6229 if (crtl->drap_reg
6230 && regno == REGNO (crtl->drap_reg)
6231 && !cfun->machine->no_drap_save_restore)
6232 return true;
6233
6234 return (df_regs_ever_live_p (regno)
6235 && !call_used_or_fixed_reg_p (regno)
6236 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6237 }
6238
6239 /* Return number of saved general prupose registers. */
6240
6241 static int
6242 ix86_nsaved_regs (void)
6243 {
6244 int nregs = 0;
6245 int regno;
6246
6247 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6248 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6249 nregs ++;
6250 return nregs;
6251 }
6252
6253 /* Return number of saved SSE registers. */
6254
6255 static int
6256 ix86_nsaved_sseregs (void)
6257 {
6258 int nregs = 0;
6259 int regno;
6260
6261 if (!TARGET_64BIT_MS_ABI)
6262 return 0;
6263 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6264 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6265 nregs ++;
6266 return nregs;
6267 }
6268
6269 /* Given FROM and TO register numbers, say whether this elimination is
6270 allowed. If stack alignment is needed, we can only replace argument
6271 pointer with hard frame pointer, or replace frame pointer with stack
6272 pointer. Otherwise, frame pointer elimination is automatically
6273 handled and all other eliminations are valid. */
6274
6275 static bool
6276 ix86_can_eliminate (const int from, const int to)
6277 {
6278 if (stack_realign_fp)
6279 return ((from == ARG_POINTER_REGNUM
6280 && to == HARD_FRAME_POINTER_REGNUM)
6281 || (from == FRAME_POINTER_REGNUM
6282 && to == STACK_POINTER_REGNUM));
6283 else
6284 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
6285 }
6286
6287 /* Return the offset between two registers, one to be eliminated, and the other
6288 its replacement, at the start of a routine. */
6289
6290 HOST_WIDE_INT
6291 ix86_initial_elimination_offset (int from, int to)
6292 {
6293 struct ix86_frame &frame = cfun->machine->frame;
6294
6295 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6296 return frame.hard_frame_pointer_offset;
6297 else if (from == FRAME_POINTER_REGNUM
6298 && to == HARD_FRAME_POINTER_REGNUM)
6299 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6300 else
6301 {
6302 gcc_assert (to == STACK_POINTER_REGNUM);
6303
6304 if (from == ARG_POINTER_REGNUM)
6305 return frame.stack_pointer_offset;
6306
6307 gcc_assert (from == FRAME_POINTER_REGNUM);
6308 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6309 }
6310 }
6311
6312 /* Emits a warning for unsupported msabi to sysv pro/epilogues. */
6313 void warn_once_call_ms2sysv_xlogues (const char *feature)
6314 {
6315 static bool warned_once = false;
6316 if (!warned_once)
6317 {
6318 warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
6319 feature);
6320 warned_once = true;
6321 }
6322 }
6323
6324 /* Return the probing interval for -fstack-clash-protection. */
6325
6326 static HOST_WIDE_INT
6327 get_probe_interval (void)
6328 {
6329 if (flag_stack_clash_protection)
6330 return (HOST_WIDE_INT_1U
6331 << param_stack_clash_protection_probe_interval);
6332 else
6333 return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
6334 }
6335
6336 /* When using -fsplit-stack, the allocation routines set a field in
6337 the TCB to the bottom of the stack plus this much space, measured
6338 in bytes. */
6339
6340 #define SPLIT_STACK_AVAILABLE 256
6341
6342 /* Fill structure ix86_frame about frame of currently computed function. */
6343
6344 static void
6345 ix86_compute_frame_layout (void)
6346 {
6347 struct ix86_frame *frame = &cfun->machine->frame;
6348 struct machine_function *m = cfun->machine;
6349 unsigned HOST_WIDE_INT stack_alignment_needed;
6350 HOST_WIDE_INT offset;
6351 unsigned HOST_WIDE_INT preferred_alignment;
6352 HOST_WIDE_INT size = ix86_get_frame_size ();
6353 HOST_WIDE_INT to_allocate;
6354
6355 /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
6356 * ms_abi functions that call a sysv function. We now need to prune away
6357 * cases where it should be disabled. */
6358 if (TARGET_64BIT && m->call_ms2sysv)
6359 {
6360 gcc_assert (TARGET_64BIT_MS_ABI);
6361 gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
6362 gcc_assert (!TARGET_SEH);
6363 gcc_assert (TARGET_SSE);
6364 gcc_assert (!ix86_using_red_zone ());
6365
6366 if (crtl->calls_eh_return)
6367 {
6368 gcc_assert (!reload_completed);
6369 m->call_ms2sysv = false;
6370 warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
6371 }
6372
6373 else if (ix86_static_chain_on_stack)
6374 {
6375 gcc_assert (!reload_completed);
6376 m->call_ms2sysv = false;
6377 warn_once_call_ms2sysv_xlogues ("static call chains");
6378 }
6379
6380 /* Finally, compute which registers the stub will manage. */
6381 else
6382 {
6383 unsigned count = xlogue_layout::count_stub_managed_regs ();
6384 m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
6385 m->call_ms2sysv_pad_in = 0;
6386 }
6387 }
6388
6389 frame->nregs = ix86_nsaved_regs ();
6390 frame->nsseregs = ix86_nsaved_sseregs ();
6391
6392 /* 64-bit MS ABI seem to require stack alignment to be always 16,
6393 except for function prologues, leaf functions and when the defult
6394 incoming stack boundary is overriden at command line or via
6395 force_align_arg_pointer attribute.
6396
6397 Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants
6398 at call sites, including profile function calls.
6399 */
6400 if (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
6401 && crtl->preferred_stack_boundary < 128)
6402 && (!crtl->is_leaf || cfun->calls_alloca != 0
6403 || ix86_current_function_calls_tls_descriptor
6404 || (TARGET_MACHO && crtl->profile)
6405 || ix86_incoming_stack_boundary < 128))
6406 {
6407 crtl->preferred_stack_boundary = 128;
6408 crtl->stack_alignment_needed = 128;
6409 }
6410
6411 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6412 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6413
6414 gcc_assert (!size || stack_alignment_needed);
6415 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6416 gcc_assert (preferred_alignment <= stack_alignment_needed);
6417
6418 /* The only ABI saving SSE regs should be 64-bit ms_abi. */
6419 gcc_assert (TARGET_64BIT || !frame->nsseregs);
6420 if (TARGET_64BIT && m->call_ms2sysv)
6421 {
6422 gcc_assert (stack_alignment_needed >= 16);
6423 gcc_assert (!frame->nsseregs);
6424 }
6425
6426 /* For SEH we have to limit the amount of code movement into the prologue.
6427 At present we do this via a BLOCKAGE, at which point there's very little
6428 scheduling that can be done, which means that there's very little point
6429 in doing anything except PUSHs. */
6430 if (TARGET_SEH)
6431 m->use_fast_prologue_epilogue = false;
6432 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
6433 {
6434 int count = frame->nregs;
6435 struct cgraph_node *node = cgraph_node::get (current_function_decl);
6436
6437 /* The fast prologue uses move instead of push to save registers. This
6438 is significantly longer, but also executes faster as modern hardware
6439 can execute the moves in parallel, but can't do that for push/pop.
6440
6441 Be careful about choosing what prologue to emit: When function takes
6442 many instructions to execute we may use slow version as well as in
6443 case function is known to be outside hot spot (this is known with
6444 feedback only). Weight the size of function by number of registers
6445 to save as it is cheap to use one or two push instructions but very
6446 slow to use many of them.
6447
6448 Calling this hook multiple times with the same frame requirements
6449 must produce the same layout, since the RA might otherwise be
6450 unable to reach a fixed point or might fail its final sanity checks.
6451 This means that once we've assumed that a function does or doesn't
6452 have a particular size, we have to stick to that assumption
6453 regardless of how the function has changed since. */
6454 if (count)
6455 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6456 if (node->frequency < NODE_FREQUENCY_NORMAL
6457 || (flag_branch_probabilities
6458 && node->frequency < NODE_FREQUENCY_HOT))
6459 m->use_fast_prologue_epilogue = false;
6460 else
6461 {
6462 if (count != frame->expensive_count)
6463 {
6464 frame->expensive_count = count;
6465 frame->expensive_p = expensive_function_p (count);
6466 }
6467 m->use_fast_prologue_epilogue = !frame->expensive_p;
6468 }
6469 }
6470
6471 frame->save_regs_using_mov
6472 = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
6473
6474 /* Skip return address and error code in exception handler. */
6475 offset = INCOMING_FRAME_SP_OFFSET;
6476
6477 /* Skip pushed static chain. */
6478 if (ix86_static_chain_on_stack)
6479 offset += UNITS_PER_WORD;
6480
6481 /* Skip saved base pointer. */
6482 if (frame_pointer_needed)
6483 offset += UNITS_PER_WORD;
6484 frame->hfp_save_offset = offset;
6485
6486 /* The traditional frame pointer location is at the top of the frame. */
6487 frame->hard_frame_pointer_offset = offset;
6488
6489 /* Register save area */
6490 offset += frame->nregs * UNITS_PER_WORD;
6491 frame->reg_save_offset = offset;
6492
6493 /* On SEH target, registers are pushed just before the frame pointer
6494 location. */
6495 if (TARGET_SEH)
6496 frame->hard_frame_pointer_offset = offset;
6497
6498 /* Calculate the size of the va-arg area (not including padding, if any). */
6499 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
6500
6501 /* Also adjust stack_realign_offset for the largest alignment of
6502 stack slot actually used. */
6503 if (stack_realign_fp
6504 || (cfun->machine->max_used_stack_alignment != 0
6505 && (offset % cfun->machine->max_used_stack_alignment) != 0))
6506 {
6507 /* We may need a 16-byte aligned stack for the remainder of the
6508 register save area, but the stack frame for the local function
6509 may require a greater alignment if using AVX/2/512. In order
6510 to avoid wasting space, we first calculate the space needed for
6511 the rest of the register saves, add that to the stack pointer,
6512 and then realign the stack to the boundary of the start of the
6513 frame for the local function. */
6514 HOST_WIDE_INT space_needed = 0;
6515 HOST_WIDE_INT sse_reg_space_needed = 0;
6516
6517 if (TARGET_64BIT)
6518 {
6519 if (m->call_ms2sysv)
6520 {
6521 m->call_ms2sysv_pad_in = 0;
6522 space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
6523 }
6524
6525 else if (frame->nsseregs)
6526 /* The only ABI that has saved SSE registers (Win64) also has a
6527 16-byte aligned default stack. However, many programs violate
6528 the ABI, and Wine64 forces stack realignment to compensate. */
6529 space_needed = frame->nsseregs * 16;
6530
6531 sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
6532
6533 /* 64-bit frame->va_arg_size should always be a multiple of 16, but
6534 rounding to be pedantic. */
6535 space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
6536 }
6537 else
6538 space_needed = frame->va_arg_size;
6539
6540 /* Record the allocation size required prior to the realignment AND. */
6541 frame->stack_realign_allocate = space_needed;
6542
6543 /* The re-aligned stack starts at frame->stack_realign_offset. Values
6544 before this point are not directly comparable with values below
6545 this point. Use sp_valid_at to determine if the stack pointer is
6546 valid for a given offset, fp_valid_at for the frame pointer, or
6547 choose_baseaddr to have a base register chosen for you.
6548
6549 Note that the result of (frame->stack_realign_offset
6550 & (stack_alignment_needed - 1)) may not equal zero. */
6551 offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
6552 frame->stack_realign_offset = offset - space_needed;
6553 frame->sse_reg_save_offset = frame->stack_realign_offset
6554 + sse_reg_space_needed;
6555 }
6556 else
6557 {
6558 frame->stack_realign_offset = offset;
6559
6560 if (TARGET_64BIT && m->call_ms2sysv)
6561 {
6562 m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
6563 offset += xlogue_layout::get_instance ().get_stack_space_used ();
6564 }
6565
6566 /* Align and set SSE register save area. */
6567 else if (frame->nsseregs)
6568 {
6569 /* If the incoming stack boundary is at least 16 bytes, or DRAP is
6570 required and the DRAP re-alignment boundary is at least 16 bytes,
6571 then we want the SSE register save area properly aligned. */
6572 if (ix86_incoming_stack_boundary >= 128
6573 || (stack_realign_drap && stack_alignment_needed >= 16))
6574 offset = ROUND_UP (offset, 16);
6575 offset += frame->nsseregs * 16;
6576 }
6577 frame->sse_reg_save_offset = offset;
6578 offset += frame->va_arg_size;
6579 }
6580
6581 /* Align start of frame for local function. When a function call
6582 is removed, it may become a leaf function. But if argument may
6583 be passed on stack, we need to align the stack when there is no
6584 tail call. */
6585 if (m->call_ms2sysv
6586 || frame->va_arg_size != 0
6587 || size != 0
6588 || !crtl->is_leaf
6589 || (!crtl->tail_call_emit
6590 && cfun->machine->outgoing_args_on_stack)
6591 || cfun->calls_alloca
6592 || ix86_current_function_calls_tls_descriptor)
6593 offset = ROUND_UP (offset, stack_alignment_needed);
6594
6595 /* Frame pointer points here. */
6596 frame->frame_pointer_offset = offset;
6597
6598 offset += size;
6599
6600 /* Add outgoing arguments area. Can be skipped if we eliminated
6601 all the function calls as dead code.
6602 Skipping is however impossible when function calls alloca. Alloca
6603 expander assumes that last crtl->outgoing_args_size
6604 of stack frame are unused. */
6605 if (ACCUMULATE_OUTGOING_ARGS
6606 && (!crtl->is_leaf || cfun->calls_alloca
6607 || ix86_current_function_calls_tls_descriptor))
6608 {
6609 offset += crtl->outgoing_args_size;
6610 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6611 }
6612 else
6613 frame->outgoing_arguments_size = 0;
6614
6615 /* Align stack boundary. Only needed if we're calling another function
6616 or using alloca. */
6617 if (!crtl->is_leaf || cfun->calls_alloca
6618 || ix86_current_function_calls_tls_descriptor)
6619 offset = ROUND_UP (offset, preferred_alignment);
6620
6621 /* We've reached end of stack frame. */
6622 frame->stack_pointer_offset = offset;
6623
6624 /* Size prologue needs to allocate. */
6625 to_allocate = offset - frame->sse_reg_save_offset;
6626
6627 if ((!to_allocate && frame->nregs <= 1)
6628 || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
6629 /* If static stack checking is enabled and done with probes,
6630 the registers need to be saved before allocating the frame. */
6631 || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6632 /* If stack clash probing needs a loop, then it needs a
6633 scratch register. But the returned register is only guaranteed
6634 to be safe to use after register saves are complete. So if
6635 stack clash protections are enabled and the allocated frame is
6636 larger than the probe interval, then use pushes to save
6637 callee saved registers. */
6638 || (flag_stack_clash_protection && to_allocate > get_probe_interval ()))
6639 frame->save_regs_using_mov = false;
6640
6641 if (ix86_using_red_zone ()
6642 && crtl->sp_is_unchanging
6643 && crtl->is_leaf
6644 && !ix86_pc_thunk_call_expanded
6645 && !ix86_current_function_calls_tls_descriptor)
6646 {
6647 frame->red_zone_size = to_allocate;
6648 if (frame->save_regs_using_mov)
6649 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6650 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6651 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6652 }
6653 else
6654 frame->red_zone_size = 0;
6655 frame->stack_pointer_offset -= frame->red_zone_size;
6656
6657 /* The SEH frame pointer location is near the bottom of the frame.
6658 This is enforced by the fact that the difference between the
6659 stack pointer and the frame pointer is limited to 240 bytes in
6660 the unwind data structure. */
6661 if (TARGET_SEH)
6662 {
6663 HOST_WIDE_INT diff;
6664
6665 /* If we can leave the frame pointer where it is, do so. Also, returns
6666 the establisher frame for __builtin_frame_address (0). */
6667 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
6668 if (diff <= SEH_MAX_FRAME_SIZE
6669 && (diff > 240 || (diff & 15) != 0)
6670 && !crtl->accesses_prior_frames)
6671 {
6672 /* Ideally we'd determine what portion of the local stack frame
6673 (within the constraint of the lowest 240) is most heavily used.
6674 But without that complication, simply bias the frame pointer
6675 by 128 bytes so as to maximize the amount of the local stack
6676 frame that is addressable with 8-bit offsets. */
6677 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
6678 }
6679 }
6680 }
6681
6682 /* This is semi-inlined memory_address_length, but simplified
6683 since we know that we're always dealing with reg+offset, and
6684 to avoid having to create and discard all that rtl. */
6685
6686 static inline int
6687 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
6688 {
6689 int len = 4;
6690
6691 if (offset == 0)
6692 {
6693 /* EBP and R13 cannot be encoded without an offset. */
6694 len = (regno == BP_REG || regno == R13_REG);
6695 }
6696 else if (IN_RANGE (offset, -128, 127))
6697 len = 1;
6698
6699 /* ESP and R12 must be encoded with a SIB byte. */
6700 if (regno == SP_REG || regno == R12_REG)
6701 len++;
6702
6703 return len;
6704 }
6705
6706 /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
6707 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6708
6709 static bool
6710 sp_valid_at (HOST_WIDE_INT cfa_offset)
6711 {
6712 const struct machine_frame_state &fs = cfun->machine->fs;
6713 if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
6714 {
6715 /* Validate that the cfa_offset isn't in a "no-man's land". */
6716 gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
6717 return false;
6718 }
6719 return fs.sp_valid;
6720 }
6721
6722 /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
6723 the frame save area. The register is saved at CFA - CFA_OFFSET. */
6724
6725 static inline bool
6726 fp_valid_at (HOST_WIDE_INT cfa_offset)
6727 {
6728 const struct machine_frame_state &fs = cfun->machine->fs;
6729 if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
6730 {
6731 /* Validate that the cfa_offset isn't in a "no-man's land". */
6732 gcc_assert (cfa_offset >= fs.sp_realigned_offset);
6733 return false;
6734 }
6735 return fs.fp_valid;
6736 }
6737
6738 /* Choose a base register based upon alignment requested, speed and/or
6739 size. */
6740
6741 static void
6742 choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
6743 HOST_WIDE_INT &base_offset,
6744 unsigned int align_reqested, unsigned int *align)
6745 {
6746 const struct machine_function *m = cfun->machine;
6747 unsigned int hfp_align;
6748 unsigned int drap_align;
6749 unsigned int sp_align;
6750 bool hfp_ok = fp_valid_at (cfa_offset);
6751 bool drap_ok = m->fs.drap_valid;
6752 bool sp_ok = sp_valid_at (cfa_offset);
6753
6754 hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
6755
6756 /* Filter out any registers that don't meet the requested alignment
6757 criteria. */
6758 if (align_reqested)
6759 {
6760 if (m->fs.realigned)
6761 hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
6762 /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
6763 notes (which we would need to use a realigned stack pointer),
6764 so disable on SEH targets. */
6765 else if (m->fs.sp_realigned)
6766 sp_align = crtl->stack_alignment_needed;
6767
6768 hfp_ok = hfp_ok && hfp_align >= align_reqested;
6769 drap_ok = drap_ok && drap_align >= align_reqested;
6770 sp_ok = sp_ok && sp_align >= align_reqested;
6771 }
6772
6773 if (m->use_fast_prologue_epilogue)
6774 {
6775 /* Choose the base register most likely to allow the most scheduling
6776 opportunities. Generally FP is valid throughout the function,
6777 while DRAP must be reloaded within the epilogue. But choose either
6778 over the SP due to increased encoding size. */
6779
6780 if (hfp_ok)
6781 {
6782 base_reg = hard_frame_pointer_rtx;
6783 base_offset = m->fs.fp_offset - cfa_offset;
6784 }
6785 else if (drap_ok)
6786 {
6787 base_reg = crtl->drap_reg;
6788 base_offset = 0 - cfa_offset;
6789 }
6790 else if (sp_ok)
6791 {
6792 base_reg = stack_pointer_rtx;
6793 base_offset = m->fs.sp_offset - cfa_offset;
6794 }
6795 }
6796 else
6797 {
6798 HOST_WIDE_INT toffset;
6799 int len = 16, tlen;
6800
6801 /* Choose the base register with the smallest address encoding.
6802 With a tie, choose FP > DRAP > SP. */
6803 if (sp_ok)
6804 {
6805 base_reg = stack_pointer_rtx;
6806 base_offset = m->fs.sp_offset - cfa_offset;
6807 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
6808 }
6809 if (drap_ok)
6810 {
6811 toffset = 0 - cfa_offset;
6812 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
6813 if (tlen <= len)
6814 {
6815 base_reg = crtl->drap_reg;
6816 base_offset = toffset;
6817 len = tlen;
6818 }
6819 }
6820 if (hfp_ok)
6821 {
6822 toffset = m->fs.fp_offset - cfa_offset;
6823 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
6824 if (tlen <= len)
6825 {
6826 base_reg = hard_frame_pointer_rtx;
6827 base_offset = toffset;
6828 }
6829 }
6830 }
6831
6832 /* Set the align return value. */
6833 if (align)
6834 {
6835 if (base_reg == stack_pointer_rtx)
6836 *align = sp_align;
6837 else if (base_reg == crtl->drap_reg)
6838 *align = drap_align;
6839 else if (base_reg == hard_frame_pointer_rtx)
6840 *align = hfp_align;
6841 }
6842 }
6843
6844 /* Return an RTX that points to CFA_OFFSET within the stack frame and
6845 the alignment of address. If ALIGN is non-null, it should point to
6846 an alignment value (in bits) that is preferred or zero and will
6847 recieve the alignment of the base register that was selected,
6848 irrespective of rather or not CFA_OFFSET is a multiple of that
6849 alignment value. If it is possible for the base register offset to be
6850 non-immediate then SCRATCH_REGNO should specify a scratch register to
6851 use.
6852
6853 The valid base registers are taken from CFUN->MACHINE->FS. */
6854
6855 static rtx
6856 choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
6857 unsigned int scratch_regno = INVALID_REGNUM)
6858 {
6859 rtx base_reg = NULL;
6860 HOST_WIDE_INT base_offset = 0;
6861
6862 /* If a specific alignment is requested, try to get a base register
6863 with that alignment first. */
6864 if (align && *align)
6865 choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
6866
6867 if (!base_reg)
6868 choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
6869
6870 gcc_assert (base_reg != NULL);
6871
6872 rtx base_offset_rtx = GEN_INT (base_offset);
6873
6874 if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
6875 {
6876 gcc_assert (scratch_regno != INVALID_REGNUM);
6877
6878 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
6879 emit_move_insn (scratch_reg, base_offset_rtx);
6880
6881 return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
6882 }
6883
6884 return plus_constant (Pmode, base_reg, base_offset);
6885 }
6886
6887 /* Emit code to save registers in the prologue. */
6888
6889 static void
6890 ix86_emit_save_regs (void)
6891 {
6892 unsigned int regno;
6893 rtx_insn *insn;
6894
6895 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
6896 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6897 {
6898 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
6899 RTX_FRAME_RELATED_P (insn) = 1;
6900 }
6901 }
6902
6903 /* Emit a single register save at CFA - CFA_OFFSET. */
6904
6905 static void
6906 ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
6907 HOST_WIDE_INT cfa_offset)
6908 {
6909 struct machine_function *m = cfun->machine;
6910 rtx reg = gen_rtx_REG (mode, regno);
6911 rtx mem, addr, base, insn;
6912 unsigned int align = GET_MODE_ALIGNMENT (mode);
6913
6914 addr = choose_baseaddr (cfa_offset, &align);
6915 mem = gen_frame_mem (mode, addr);
6916
6917 /* The location aligment depends upon the base register. */
6918 align = MIN (GET_MODE_ALIGNMENT (mode), align);
6919 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
6920 set_mem_align (mem, align);
6921
6922 insn = emit_insn (gen_rtx_SET (mem, reg));
6923 RTX_FRAME_RELATED_P (insn) = 1;
6924
6925 base = addr;
6926 if (GET_CODE (base) == PLUS)
6927 base = XEXP (base, 0);
6928 gcc_checking_assert (REG_P (base));
6929
6930 /* When saving registers into a re-aligned local stack frame, avoid
6931 any tricky guessing by dwarf2out. */
6932 if (m->fs.realigned)
6933 {
6934 gcc_checking_assert (stack_realign_drap);
6935
6936 if (regno == REGNO (crtl->drap_reg))
6937 {
6938 /* A bit of a hack. We force the DRAP register to be saved in
6939 the re-aligned stack frame, which provides us with a copy
6940 of the CFA that will last past the prologue. Install it. */
6941 gcc_checking_assert (cfun->machine->fs.fp_valid);
6942 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6943 cfun->machine->fs.fp_offset - cfa_offset);
6944 mem = gen_rtx_MEM (mode, addr);
6945 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
6946 }
6947 else
6948 {
6949 /* The frame pointer is a stable reference within the
6950 aligned frame. Use it. */
6951 gcc_checking_assert (cfun->machine->fs.fp_valid);
6952 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
6953 cfun->machine->fs.fp_offset - cfa_offset);
6954 mem = gen_rtx_MEM (mode, addr);
6955 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6956 }
6957 }
6958
6959 else if (base == stack_pointer_rtx && m->fs.sp_realigned
6960 && cfa_offset >= m->fs.sp_realigned_offset)
6961 {
6962 gcc_checking_assert (stack_realign_fp);
6963 add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
6964 }
6965
6966 /* The memory may not be relative to the current CFA register,
6967 which means that we may need to generate a new pattern for
6968 use by the unwind info. */
6969 else if (base != m->fs.cfa_reg)
6970 {
6971 addr = plus_constant (Pmode, m->fs.cfa_reg,
6972 m->fs.cfa_offset - cfa_offset);
6973 mem = gen_rtx_MEM (mode, addr);
6974 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6975 }
6976 }
6977
6978 /* Emit code to save registers using MOV insns.
6979 First register is stored at CFA - CFA_OFFSET. */
6980 static void
6981 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
6982 {
6983 unsigned int regno;
6984
6985 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6986 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
6987 {
6988 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
6989 cfa_offset -= UNITS_PER_WORD;
6990 }
6991 }
6992
6993 /* Emit code to save SSE registers using MOV insns.
6994 First register is stored at CFA - CFA_OFFSET. */
6995 static void
6996 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
6997 {
6998 unsigned int regno;
6999
7000 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7001 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
7002 {
7003 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
7004 cfa_offset -= GET_MODE_SIZE (V4SFmode);
7005 }
7006 }
7007
7008 static GTY(()) rtx queued_cfa_restores;
7009
7010 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
7011 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
7012 Don't add the note if the previously saved value will be left untouched
7013 within stack red-zone till return, as unwinders can find the same value
7014 in the register and on the stack. */
7015
7016 static void
7017 ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
7018 {
7019 if (!crtl->shrink_wrapped
7020 && cfa_offset <= cfun->machine->fs.red_zone_offset)
7021 return;
7022
7023 if (insn)
7024 {
7025 add_reg_note (insn, REG_CFA_RESTORE, reg);
7026 RTX_FRAME_RELATED_P (insn) = 1;
7027 }
7028 else
7029 queued_cfa_restores
7030 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
7031 }
7032
7033 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
7034
7035 static void
7036 ix86_add_queued_cfa_restore_notes (rtx insn)
7037 {
7038 rtx last;
7039 if (!queued_cfa_restores)
7040 return;
7041 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
7042 ;
7043 XEXP (last, 1) = REG_NOTES (insn);
7044 REG_NOTES (insn) = queued_cfa_restores;
7045 queued_cfa_restores = NULL_RTX;
7046 RTX_FRAME_RELATED_P (insn) = 1;
7047 }
7048
7049 /* Expand prologue or epilogue stack adjustment.
7050 The pattern exist to put a dependency on all ebp-based memory accesses.
7051 STYLE should be negative if instructions should be marked as frame related,
7052 zero if %r11 register is live and cannot be freely used and positive
7053 otherwise. */
7054
7055 static rtx
7056 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
7057 int style, bool set_cfa)
7058 {
7059 struct machine_function *m = cfun->machine;
7060 rtx addend = offset;
7061 rtx insn;
7062 bool add_frame_related_expr = false;
7063
7064 if (!x86_64_immediate_operand (offset, Pmode))
7065 {
7066 /* r11 is used by indirect sibcall return as well, set before the
7067 epilogue and used after the epilogue. */
7068 if (style)
7069 addend = gen_rtx_REG (Pmode, R11_REG);
7070 else
7071 {
7072 gcc_assert (src != hard_frame_pointer_rtx
7073 && dest != hard_frame_pointer_rtx);
7074 addend = hard_frame_pointer_rtx;
7075 }
7076 emit_insn (gen_rtx_SET (addend, offset));
7077 if (style < 0)
7078 add_frame_related_expr = true;
7079 }
7080
7081 insn = emit_insn (gen_pro_epilogue_adjust_stack_add
7082 (Pmode, dest, src, addend));
7083 if (style >= 0)
7084 ix86_add_queued_cfa_restore_notes (insn);
7085
7086 if (set_cfa)
7087 {
7088 rtx r;
7089
7090 gcc_assert (m->fs.cfa_reg == src);
7091 m->fs.cfa_offset += INTVAL (offset);
7092 m->fs.cfa_reg = dest;
7093
7094 r = gen_rtx_PLUS (Pmode, src, offset);
7095 r = gen_rtx_SET (dest, r);
7096 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
7097 RTX_FRAME_RELATED_P (insn) = 1;
7098 }
7099 else if (style < 0)
7100 {
7101 RTX_FRAME_RELATED_P (insn) = 1;
7102 if (add_frame_related_expr)
7103 {
7104 rtx r = gen_rtx_PLUS (Pmode, src, offset);
7105 r = gen_rtx_SET (dest, r);
7106 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
7107 }
7108 }
7109
7110 if (dest == stack_pointer_rtx)
7111 {
7112 HOST_WIDE_INT ooffset = m->fs.sp_offset;
7113 bool valid = m->fs.sp_valid;
7114 bool realigned = m->fs.sp_realigned;
7115
7116 if (src == hard_frame_pointer_rtx)
7117 {
7118 valid = m->fs.fp_valid;
7119 realigned = false;
7120 ooffset = m->fs.fp_offset;
7121 }
7122 else if (src == crtl->drap_reg)
7123 {
7124 valid = m->fs.drap_valid;
7125 realigned = false;
7126 ooffset = 0;
7127 }
7128 else
7129 {
7130 /* Else there are two possibilities: SP itself, which we set
7131 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
7132 taken care of this by hand along the eh_return path. */
7133 gcc_checking_assert (src == stack_pointer_rtx
7134 || offset == const0_rtx);
7135 }
7136
7137 m->fs.sp_offset = ooffset - INTVAL (offset);
7138 m->fs.sp_valid = valid;
7139 m->fs.sp_realigned = realigned;
7140 }
7141 return insn;
7142 }
7143
7144 /* Find an available register to be used as dynamic realign argument
7145 pointer regsiter. Such a register will be written in prologue and
7146 used in begin of body, so it must not be
7147 1. parameter passing register.
7148 2. GOT pointer.
7149 We reuse static-chain register if it is available. Otherwise, we
7150 use DI for i386 and R13 for x86-64. We chose R13 since it has
7151 shorter encoding.
7152
7153 Return: the regno of chosen register. */
7154
7155 static unsigned int
7156 find_drap_reg (void)
7157 {
7158 tree decl = cfun->decl;
7159
7160 /* Always use callee-saved register if there are no caller-saved
7161 registers. */
7162 if (TARGET_64BIT)
7163 {
7164 /* Use R13 for nested function or function need static chain.
7165 Since function with tail call may use any caller-saved
7166 registers in epilogue, DRAP must not use caller-saved
7167 register in such case. */
7168 if (DECL_STATIC_CHAIN (decl)
7169 || cfun->machine->no_caller_saved_registers
7170 || crtl->tail_call_emit)
7171 return R13_REG;
7172
7173 return R10_REG;
7174 }
7175 else
7176 {
7177 /* Use DI for nested function or function need static chain.
7178 Since function with tail call may use any caller-saved
7179 registers in epilogue, DRAP must not use caller-saved
7180 register in such case. */
7181 if (DECL_STATIC_CHAIN (decl)
7182 || cfun->machine->no_caller_saved_registers
7183 || crtl->tail_call_emit)
7184 return DI_REG;
7185
7186 /* Reuse static chain register if it isn't used for parameter
7187 passing. */
7188 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
7189 {
7190 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
7191 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
7192 return CX_REG;
7193 }
7194 return DI_REG;
7195 }
7196 }
7197
7198 /* Return minimum incoming stack alignment. */
7199
7200 static unsigned int
7201 ix86_minimum_incoming_stack_boundary (bool sibcall)
7202 {
7203 unsigned int incoming_stack_boundary;
7204
7205 /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */
7206 if (cfun->machine->func_type != TYPE_NORMAL)
7207 incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
7208 /* Prefer the one specified at command line. */
7209 else if (ix86_user_incoming_stack_boundary)
7210 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
7211 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
7212 if -mstackrealign is used, it isn't used for sibcall check and
7213 estimated stack alignment is 128bit. */
7214 else if (!sibcall
7215 && ix86_force_align_arg_pointer
7216 && crtl->stack_alignment_estimated == 128)
7217 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7218 else
7219 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
7220
7221 /* Incoming stack alignment can be changed on individual functions
7222 via force_align_arg_pointer attribute. We use the smallest
7223 incoming stack boundary. */
7224 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
7225 && lookup_attribute ("force_align_arg_pointer",
7226 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
7227 incoming_stack_boundary = MIN_STACK_BOUNDARY;
7228
7229 /* The incoming stack frame has to be aligned at least at
7230 parm_stack_boundary. */
7231 if (incoming_stack_boundary < crtl->parm_stack_boundary)
7232 incoming_stack_boundary = crtl->parm_stack_boundary;
7233
7234 /* Stack at entrance of main is aligned by runtime. We use the
7235 smallest incoming stack boundary. */
7236 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
7237 && DECL_NAME (current_function_decl)
7238 && MAIN_NAME_P (DECL_NAME (current_function_decl))
7239 && DECL_FILE_SCOPE_P (current_function_decl))
7240 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
7241
7242 return incoming_stack_boundary;
7243 }
7244
7245 /* Update incoming stack boundary and estimated stack alignment. */
7246
7247 static void
7248 ix86_update_stack_boundary (void)
7249 {
7250 ix86_incoming_stack_boundary
7251 = ix86_minimum_incoming_stack_boundary (false);
7252
7253 /* x86_64 vararg needs 16byte stack alignment for register save area. */
7254 if (TARGET_64BIT
7255 && cfun->stdarg
7256 && crtl->stack_alignment_estimated < 128)
7257 crtl->stack_alignment_estimated = 128;
7258
7259 /* __tls_get_addr needs to be called with 16-byte aligned stack. */
7260 if (ix86_tls_descriptor_calls_expanded_in_cfun
7261 && crtl->preferred_stack_boundary < 128)
7262 crtl->preferred_stack_boundary = 128;
7263 }
7264
7265 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
7266 needed or an rtx for DRAP otherwise. */
7267
7268 static rtx
7269 ix86_get_drap_rtx (void)
7270 {
7271 /* We must use DRAP if there are outgoing arguments on stack or
7272 the stack pointer register is clobbered by asm statment and
7273 ACCUMULATE_OUTGOING_ARGS is false. */
7274 if (ix86_force_drap
7275 || ((cfun->machine->outgoing_args_on_stack
7276 || crtl->sp_is_clobbered_by_asm)
7277 && !ACCUMULATE_OUTGOING_ARGS))
7278 crtl->need_drap = true;
7279
7280 if (stack_realign_drap)
7281 {
7282 /* Assign DRAP to vDRAP and returns vDRAP */
7283 unsigned int regno = find_drap_reg ();
7284 rtx drap_vreg;
7285 rtx arg_ptr;
7286 rtx_insn *seq, *insn;
7287
7288 arg_ptr = gen_rtx_REG (Pmode, regno);
7289 crtl->drap_reg = arg_ptr;
7290
7291 start_sequence ();
7292 drap_vreg = copy_to_reg (arg_ptr);
7293 seq = get_insns ();
7294 end_sequence ();
7295
7296 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
7297 if (!optimize)
7298 {
7299 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
7300 RTX_FRAME_RELATED_P (insn) = 1;
7301 }
7302 return drap_vreg;
7303 }
7304 else
7305 return NULL;
7306 }
7307
7308 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
7309
7310 static rtx
7311 ix86_internal_arg_pointer (void)
7312 {
7313 return virtual_incoming_args_rtx;
7314 }
7315
7316 struct scratch_reg {
7317 rtx reg;
7318 bool saved;
7319 };
7320
7321 /* Return a short-lived scratch register for use on function entry.
7322 In 32-bit mode, it is valid only after the registers are saved
7323 in the prologue. This register must be released by means of
7324 release_scratch_register_on_entry once it is dead. */
7325
7326 static void
7327 get_scratch_register_on_entry (struct scratch_reg *sr)
7328 {
7329 int regno;
7330
7331 sr->saved = false;
7332
7333 if (TARGET_64BIT)
7334 {
7335 /* We always use R11 in 64-bit mode. */
7336 regno = R11_REG;
7337 }
7338 else
7339 {
7340 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
7341 bool fastcall_p
7342 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7343 bool thiscall_p
7344 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
7345 bool static_chain_p = DECL_STATIC_CHAIN (decl);
7346 int regparm = ix86_function_regparm (fntype, decl);
7347 int drap_regno
7348 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
7349
7350 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
7351 for the static chain register. */
7352 if ((regparm < 1 || (fastcall_p && !static_chain_p))
7353 && drap_regno != AX_REG)
7354 regno = AX_REG;
7355 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
7356 for the static chain register. */
7357 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
7358 regno = AX_REG;
7359 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
7360 regno = DX_REG;
7361 /* ecx is the static chain register. */
7362 else if (regparm < 3 && !fastcall_p && !thiscall_p
7363 && !static_chain_p
7364 && drap_regno != CX_REG)
7365 regno = CX_REG;
7366 else if (ix86_save_reg (BX_REG, true, false))
7367 regno = BX_REG;
7368 /* esi is the static chain register. */
7369 else if (!(regparm == 3 && static_chain_p)
7370 && ix86_save_reg (SI_REG, true, false))
7371 regno = SI_REG;
7372 else if (ix86_save_reg (DI_REG, true, false))
7373 regno = DI_REG;
7374 else
7375 {
7376 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
7377 sr->saved = true;
7378 }
7379 }
7380
7381 sr->reg = gen_rtx_REG (Pmode, regno);
7382 if (sr->saved)
7383 {
7384 rtx_insn *insn = emit_insn (gen_push (sr->reg));
7385 RTX_FRAME_RELATED_P (insn) = 1;
7386 }
7387 }
7388
7389 /* Release a scratch register obtained from the preceding function.
7390
7391 If RELEASE_VIA_POP is true, we just pop the register off the stack
7392 to release it. This is what non-Linux systems use with -fstack-check.
7393
7394 Otherwise we use OFFSET to locate the saved register and the
7395 allocated stack space becomes part of the local frame and is
7396 deallocated by the epilogue. */
7397
7398 static void
7399 release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
7400 bool release_via_pop)
7401 {
7402 if (sr->saved)
7403 {
7404 if (release_via_pop)
7405 {
7406 struct machine_function *m = cfun->machine;
7407 rtx x, insn = emit_insn (gen_pop (sr->reg));
7408
7409 /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */
7410 RTX_FRAME_RELATED_P (insn) = 1;
7411 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7412 x = gen_rtx_SET (stack_pointer_rtx, x);
7413 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
7414 m->fs.sp_offset -= UNITS_PER_WORD;
7415 }
7416 else
7417 {
7418 rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
7419 x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
7420 emit_insn (x);
7421 }
7422 }
7423 }
7424
7425 /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
7426
7427 If INT_REGISTERS_SAVED is true, then integer registers have already been
7428 pushed on the stack.
7429
7430 If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
7431 beyond SIZE bytes.
7432
7433 This assumes no knowledge of the current probing state, i.e. it is never
7434 allowed to allocate more than PROBE_INTERVAL bytes of stack space without
7435 a suitable probe. */
7436
7437 static void
7438 ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
7439 const bool int_registers_saved,
7440 const bool protection_area)
7441 {
7442 struct machine_function *m = cfun->machine;
7443
7444 /* If this function does not statically allocate stack space, then
7445 no probes are needed. */
7446 if (!size)
7447 {
7448 /* However, the allocation of space via pushes for register
7449 saves could be viewed as allocating space, but without the
7450 need to probe. */
7451 if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
7452 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7453 else
7454 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
7455 return;
7456 }
7457
7458 /* If we are a noreturn function, then we have to consider the
7459 possibility that we're called via a jump rather than a call.
7460
7461 Thus we don't have the implicit probe generated by saving the
7462 return address into the stack at the call. Thus, the stack
7463 pointer could be anywhere in the guard page. The safe thing
7464 to do is emit a probe now.
7465
7466 The probe can be avoided if we have already emitted any callee
7467 register saves into the stack or have a frame pointer (which will
7468 have been saved as well). Those saves will function as implicit
7469 probes.
7470
7471 ?!? This should be revamped to work like aarch64 and s390 where
7472 we track the offset from the most recent probe. Normally that
7473 offset would be zero. For a noreturn function we would reset
7474 it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then
7475 we just probe when we cross PROBE_INTERVAL. */
7476 if (TREE_THIS_VOLATILE (cfun->decl)
7477 && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
7478 {
7479 /* We can safely use any register here since we're just going to push
7480 its value and immediately pop it back. But we do try and avoid
7481 argument passing registers so as not to introduce dependencies in
7482 the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */
7483 rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
7484 rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
7485 rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
7486 m->fs.sp_offset -= UNITS_PER_WORD;
7487 if (m->fs.cfa_reg == stack_pointer_rtx)
7488 {
7489 m->fs.cfa_offset -= UNITS_PER_WORD;
7490 rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
7491 x = gen_rtx_SET (stack_pointer_rtx, x);
7492 add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
7493 RTX_FRAME_RELATED_P (insn_push) = 1;
7494 x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
7495 x = gen_rtx_SET (stack_pointer_rtx, x);
7496 add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
7497 RTX_FRAME_RELATED_P (insn_pop) = 1;
7498 }
7499 emit_insn (gen_blockage ());
7500 }
7501
7502 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7503 const int dope = 4 * UNITS_PER_WORD;
7504
7505 /* If there is protection area, take it into account in the size. */
7506 if (protection_area)
7507 size += probe_interval + dope;
7508
7509 /* If we allocate less than the size of the guard statically,
7510 then no probing is necessary, but we do need to allocate
7511 the stack. */
7512 else if (size < (1 << param_stack_clash_protection_guard_size))
7513 {
7514 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7515 GEN_INT (-size), -1,
7516 m->fs.cfa_reg == stack_pointer_rtx);
7517 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
7518 return;
7519 }
7520
7521 /* We're allocating a large enough stack frame that we need to
7522 emit probes. Either emit them inline or in a loop depending
7523 on the size. */
7524 if (size <= 4 * probe_interval)
7525 {
7526 HOST_WIDE_INT i;
7527 for (i = probe_interval; i <= size; i += probe_interval)
7528 {
7529 /* Allocate PROBE_INTERVAL bytes. */
7530 rtx insn
7531 = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7532 GEN_INT (-probe_interval), -1,
7533 m->fs.cfa_reg == stack_pointer_rtx);
7534 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
7535
7536 /* And probe at *sp. */
7537 emit_stack_probe (stack_pointer_rtx);
7538 emit_insn (gen_blockage ());
7539 }
7540
7541 /* We need to allocate space for the residual, but we do not need
7542 to probe the residual... */
7543 HOST_WIDE_INT residual = (i - probe_interval - size);
7544 if (residual)
7545 {
7546 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7547 GEN_INT (residual), -1,
7548 m->fs.cfa_reg == stack_pointer_rtx);
7549
7550 /* ...except if there is a protection area to maintain. */
7551 if (protection_area)
7552 emit_stack_probe (stack_pointer_rtx);
7553 }
7554
7555 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
7556 }
7557 else
7558 {
7559 /* We expect the GP registers to be saved when probes are used
7560 as the probing sequences might need a scratch register and
7561 the routine to allocate one assumes the integer registers
7562 have already been saved. */
7563 gcc_assert (int_registers_saved);
7564
7565 struct scratch_reg sr;
7566 get_scratch_register_on_entry (&sr);
7567
7568 /* If we needed to save a register, then account for any space
7569 that was pushed (we are not going to pop the register when
7570 we do the restore). */
7571 if (sr.saved)
7572 size -= UNITS_PER_WORD;
7573
7574 /* Step 1: round SIZE down to a multiple of the interval. */
7575 HOST_WIDE_INT rounded_size = size & -probe_interval;
7576
7577 /* Step 2: compute final value of the loop counter. Use lea if
7578 possible. */
7579 rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
7580 rtx insn;
7581 if (address_no_seg_operand (addr, Pmode))
7582 insn = emit_insn (gen_rtx_SET (sr.reg, addr));
7583 else
7584 {
7585 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
7586 insn = emit_insn (gen_rtx_SET (sr.reg,
7587 gen_rtx_PLUS (Pmode, sr.reg,
7588 stack_pointer_rtx)));
7589 }
7590 if (m->fs.cfa_reg == stack_pointer_rtx)
7591 {
7592 add_reg_note (insn, REG_CFA_DEF_CFA,
7593 plus_constant (Pmode, sr.reg,
7594 m->fs.cfa_offset + rounded_size));
7595 RTX_FRAME_RELATED_P (insn) = 1;
7596 }
7597
7598 /* Step 3: the loop. */
7599 rtx size_rtx = GEN_INT (rounded_size);
7600 insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
7601 size_rtx));
7602 if (m->fs.cfa_reg == stack_pointer_rtx)
7603 {
7604 m->fs.cfa_offset += rounded_size;
7605 add_reg_note (insn, REG_CFA_DEF_CFA,
7606 plus_constant (Pmode, stack_pointer_rtx,
7607 m->fs.cfa_offset));
7608 RTX_FRAME_RELATED_P (insn) = 1;
7609 }
7610 m->fs.sp_offset += rounded_size;
7611 emit_insn (gen_blockage ());
7612
7613 /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
7614 is equal to ROUNDED_SIZE. */
7615
7616 if (size != rounded_size)
7617 {
7618 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7619 GEN_INT (rounded_size - size), -1,
7620 m->fs.cfa_reg == stack_pointer_rtx);
7621
7622 if (protection_area)
7623 emit_stack_probe (stack_pointer_rtx);
7624 }
7625
7626 dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
7627
7628 /* This does not deallocate the space reserved for the scratch
7629 register. That will be deallocated in the epilogue. */
7630 release_scratch_register_on_entry (&sr, size, false);
7631 }
7632
7633 /* Adjust back to account for the protection area. */
7634 if (protection_area)
7635 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7636 GEN_INT (probe_interval + dope), -1,
7637 m->fs.cfa_reg == stack_pointer_rtx);
7638
7639 /* Make sure nothing is scheduled before we are done. */
7640 emit_insn (gen_blockage ());
7641 }
7642
7643 /* Adjust the stack pointer up to REG while probing it. */
7644
7645 const char *
7646 output_adjust_stack_and_probe (rtx reg)
7647 {
7648 static int labelno = 0;
7649 char loop_lab[32];
7650 rtx xops[2];
7651
7652 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7653
7654 /* Loop. */
7655 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7656
7657 /* SP = SP + PROBE_INTERVAL. */
7658 xops[0] = stack_pointer_rtx;
7659 xops[1] = GEN_INT (get_probe_interval ());
7660 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7661
7662 /* Probe at SP. */
7663 xops[1] = const0_rtx;
7664 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
7665
7666 /* Test if SP == LAST_ADDR. */
7667 xops[0] = stack_pointer_rtx;
7668 xops[1] = reg;
7669 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7670
7671 /* Branch. */
7672 fputs ("\tjne\t", asm_out_file);
7673 assemble_name_raw (asm_out_file, loop_lab);
7674 fputc ('\n', asm_out_file);
7675
7676 return "";
7677 }
7678
7679 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
7680 inclusive. These are offsets from the current stack pointer.
7681
7682 INT_REGISTERS_SAVED is true if integer registers have already been
7683 pushed on the stack. */
7684
7685 static void
7686 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
7687 const bool int_registers_saved)
7688 {
7689 const HOST_WIDE_INT probe_interval = get_probe_interval ();
7690
7691 /* See if we have a constant small number of probes to generate. If so,
7692 that's the easy case. The run-time loop is made up of 6 insns in the
7693 generic case while the compile-time loop is made up of n insns for n #
7694 of intervals. */
7695 if (size <= 6 * probe_interval)
7696 {
7697 HOST_WIDE_INT i;
7698
7699 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
7700 it exceeds SIZE. If only one probe is needed, this will not
7701 generate any code. Then probe at FIRST + SIZE. */
7702 for (i = probe_interval; i < size; i += probe_interval)
7703 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7704 -(first + i)));
7705
7706 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
7707 -(first + size)));
7708 }
7709
7710 /* Otherwise, do the same as above, but in a loop. Note that we must be
7711 extra careful with variables wrapping around because we might be at
7712 the very top (or the very bottom) of the address space and we have
7713 to be able to handle this case properly; in particular, we use an
7714 equality test for the loop condition. */
7715 else
7716 {
7717 /* We expect the GP registers to be saved when probes are used
7718 as the probing sequences might need a scratch register and
7719 the routine to allocate one assumes the integer registers
7720 have already been saved. */
7721 gcc_assert (int_registers_saved);
7722
7723 HOST_WIDE_INT rounded_size, last;
7724 struct scratch_reg sr;
7725
7726 get_scratch_register_on_entry (&sr);
7727
7728
7729 /* Step 1: round SIZE to the previous multiple of the interval. */
7730
7731 rounded_size = ROUND_DOWN (size, probe_interval);
7732
7733
7734 /* Step 2: compute initial and final value of the loop counter. */
7735
7736 /* TEST_OFFSET = FIRST. */
7737 emit_move_insn (sr.reg, GEN_INT (-first));
7738
7739 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
7740 last = first + rounded_size;
7741
7742
7743 /* Step 3: the loop
7744
7745 do
7746 {
7747 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
7748 probe at TEST_ADDR
7749 }
7750 while (TEST_ADDR != LAST_ADDR)
7751
7752 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
7753 until it is equal to ROUNDED_SIZE. */
7754
7755 emit_insn
7756 (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
7757
7758
7759 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
7760 that SIZE is equal to ROUNDED_SIZE. */
7761
7762 if (size != rounded_size)
7763 emit_stack_probe (plus_constant (Pmode,
7764 gen_rtx_PLUS (Pmode,
7765 stack_pointer_rtx,
7766 sr.reg),
7767 rounded_size - size));
7768
7769 release_scratch_register_on_entry (&sr, size, true);
7770 }
7771
7772 /* Make sure nothing is scheduled before we are done. */
7773 emit_insn (gen_blockage ());
7774 }
7775
7776 /* Probe a range of stack addresses from REG to END, inclusive. These are
7777 offsets from the current stack pointer. */
7778
7779 const char *
7780 output_probe_stack_range (rtx reg, rtx end)
7781 {
7782 static int labelno = 0;
7783 char loop_lab[32];
7784 rtx xops[3];
7785
7786 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
7787
7788 /* Loop. */
7789 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
7790
7791 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
7792 xops[0] = reg;
7793 xops[1] = GEN_INT (get_probe_interval ());
7794 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
7795
7796 /* Probe at TEST_ADDR. */
7797 xops[0] = stack_pointer_rtx;
7798 xops[1] = reg;
7799 xops[2] = const0_rtx;
7800 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
7801
7802 /* Test if TEST_ADDR == LAST_ADDR. */
7803 xops[0] = reg;
7804 xops[1] = end;
7805 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
7806
7807 /* Branch. */
7808 fputs ("\tjne\t", asm_out_file);
7809 assemble_name_raw (asm_out_file, loop_lab);
7810 fputc ('\n', asm_out_file);
7811
7812 return "";
7813 }
7814
7815 /* Set stack_frame_required to false if stack frame isn't required.
7816 Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
7817 slot used if stack frame is required and CHECK_STACK_SLOT is true. */
7818
7819 static void
7820 ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
7821 bool check_stack_slot)
7822 {
7823 HARD_REG_SET set_up_by_prologue, prologue_used;
7824 basic_block bb;
7825
7826 CLEAR_HARD_REG_SET (prologue_used);
7827 CLEAR_HARD_REG_SET (set_up_by_prologue);
7828 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
7829 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
7830 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
7831 HARD_FRAME_POINTER_REGNUM);
7832
7833 /* The preferred stack alignment is the minimum stack alignment. */
7834 if (stack_alignment > crtl->preferred_stack_boundary)
7835 stack_alignment = crtl->preferred_stack_boundary;
7836
7837 bool require_stack_frame = false;
7838
7839 FOR_EACH_BB_FN (bb, cfun)
7840 {
7841 rtx_insn *insn;
7842 FOR_BB_INSNS (bb, insn)
7843 if (NONDEBUG_INSN_P (insn)
7844 && requires_stack_frame_p (insn, prologue_used,
7845 set_up_by_prologue))
7846 {
7847 require_stack_frame = true;
7848
7849 if (check_stack_slot)
7850 {
7851 /* Find the maximum stack alignment. */
7852 subrtx_iterator::array_type array;
7853 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
7854 if (MEM_P (*iter)
7855 && (reg_mentioned_p (stack_pointer_rtx,
7856 *iter)
7857 || reg_mentioned_p (frame_pointer_rtx,
7858 *iter)))
7859 {
7860 unsigned int alignment = MEM_ALIGN (*iter);
7861 if (alignment > stack_alignment)
7862 stack_alignment = alignment;
7863 }
7864 }
7865 }
7866 }
7867
7868 cfun->machine->stack_frame_required = require_stack_frame;
7869 }
7870
7871 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
7872 will guide prologue/epilogue to be generated in correct form. */
7873
7874 static void
7875 ix86_finalize_stack_frame_flags (void)
7876 {
7877 /* Check if stack realign is really needed after reload, and
7878 stores result in cfun */
7879 unsigned int incoming_stack_boundary
7880 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
7881 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
7882 unsigned int stack_alignment
7883 = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
7884 ? crtl->max_used_stack_slot_alignment
7885 : crtl->stack_alignment_needed);
7886 unsigned int stack_realign
7887 = (incoming_stack_boundary < stack_alignment);
7888 bool recompute_frame_layout_p = false;
7889
7890 if (crtl->stack_realign_finalized)
7891 {
7892 /* After stack_realign_needed is finalized, we can't no longer
7893 change it. */
7894 gcc_assert (crtl->stack_realign_needed == stack_realign);
7895 return;
7896 }
7897
7898 /* It is always safe to compute max_used_stack_alignment. We
7899 compute it only if 128-bit aligned load/store may be generated
7900 on misaligned stack slot which will lead to segfault. */
7901 bool check_stack_slot
7902 = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
7903 ix86_find_max_used_stack_alignment (stack_alignment,
7904 check_stack_slot);
7905
7906 /* If the only reason for frame_pointer_needed is that we conservatively
7907 assumed stack realignment might be needed or -fno-omit-frame-pointer
7908 is used, but in the end nothing that needed the stack alignment had
7909 been spilled nor stack access, clear frame_pointer_needed and say we
7910 don't need stack realignment. */
7911 if ((stack_realign || (!flag_omit_frame_pointer && optimize))
7912 && frame_pointer_needed
7913 && crtl->is_leaf
7914 && crtl->sp_is_unchanging
7915 && !ix86_current_function_calls_tls_descriptor
7916 && !crtl->accesses_prior_frames
7917 && !cfun->calls_alloca
7918 && !crtl->calls_eh_return
7919 /* See ira_setup_eliminable_regset for the rationale. */
7920 && !(STACK_CHECK_MOVING_SP
7921 && flag_stack_check
7922 && flag_exceptions
7923 && cfun->can_throw_non_call_exceptions)
7924 && !ix86_frame_pointer_required ()
7925 && ix86_get_frame_size () == 0
7926 && ix86_nsaved_sseregs () == 0
7927 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
7928 {
7929 if (cfun->machine->stack_frame_required)
7930 {
7931 /* Stack frame is required. If stack alignment needed is less
7932 than incoming stack boundary, don't realign stack. */
7933 stack_realign = incoming_stack_boundary < stack_alignment;
7934 if (!stack_realign)
7935 {
7936 crtl->max_used_stack_slot_alignment
7937 = incoming_stack_boundary;
7938 crtl->stack_alignment_needed
7939 = incoming_stack_boundary;
7940 /* Also update preferred_stack_boundary for leaf
7941 functions. */
7942 crtl->preferred_stack_boundary
7943 = incoming_stack_boundary;
7944 }
7945 }
7946 else
7947 {
7948 /* If drap has been set, but it actually isn't live at the
7949 start of the function, there is no reason to set it up. */
7950 if (crtl->drap_reg)
7951 {
7952 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
7953 if (! REGNO_REG_SET_P (DF_LR_IN (bb),
7954 REGNO (crtl->drap_reg)))
7955 {
7956 crtl->drap_reg = NULL_RTX;
7957 crtl->need_drap = false;
7958 }
7959 }
7960 else
7961 cfun->machine->no_drap_save_restore = true;
7962
7963 frame_pointer_needed = false;
7964 stack_realign = false;
7965 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
7966 crtl->stack_alignment_needed = incoming_stack_boundary;
7967 crtl->stack_alignment_estimated = incoming_stack_boundary;
7968 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
7969 crtl->preferred_stack_boundary = incoming_stack_boundary;
7970 df_finish_pass (true);
7971 df_scan_alloc (NULL);
7972 df_scan_blocks ();
7973 df_compute_regs_ever_live (true);
7974 df_analyze ();
7975
7976 if (flag_var_tracking)
7977 {
7978 /* Since frame pointer is no longer available, replace it with
7979 stack pointer - UNITS_PER_WORD in debug insns. */
7980 df_ref ref, next;
7981 for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
7982 ref; ref = next)
7983 {
7984 next = DF_REF_NEXT_REG (ref);
7985 if (!DF_REF_INSN_INFO (ref))
7986 continue;
7987
7988 /* Make sure the next ref is for a different instruction,
7989 so that we're not affected by the rescan. */
7990 rtx_insn *insn = DF_REF_INSN (ref);
7991 while (next && DF_REF_INSN (next) == insn)
7992 next = DF_REF_NEXT_REG (next);
7993
7994 if (DEBUG_INSN_P (insn))
7995 {
7996 bool changed = false;
7997 for (; ref != next; ref = DF_REF_NEXT_REG (ref))
7998 {
7999 rtx *loc = DF_REF_LOC (ref);
8000 if (*loc == hard_frame_pointer_rtx)
8001 {
8002 *loc = plus_constant (Pmode,
8003 stack_pointer_rtx,
8004 -UNITS_PER_WORD);
8005 changed = true;
8006 }
8007 }
8008 if (changed)
8009 df_insn_rescan (insn);
8010 }
8011 }
8012 }
8013
8014 recompute_frame_layout_p = true;
8015 }
8016 }
8017 else if (crtl->max_used_stack_slot_alignment >= 128
8018 && cfun->machine->stack_frame_required)
8019 {
8020 /* We don't need to realign stack. max_used_stack_alignment is
8021 used to decide how stack frame should be aligned. This is
8022 independent of any psABIs nor 32-bit vs 64-bit. */
8023 cfun->machine->max_used_stack_alignment
8024 = stack_alignment / BITS_PER_UNIT;
8025 }
8026
8027 if (crtl->stack_realign_needed != stack_realign)
8028 recompute_frame_layout_p = true;
8029 crtl->stack_realign_needed = stack_realign;
8030 crtl->stack_realign_finalized = true;
8031 if (recompute_frame_layout_p)
8032 ix86_compute_frame_layout ();
8033 }
8034
8035 /* Delete SET_GOT right after entry block if it is allocated to reg. */
8036
8037 static void
8038 ix86_elim_entry_set_got (rtx reg)
8039 {
8040 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
8041 rtx_insn *c_insn = BB_HEAD (bb);
8042 if (!NONDEBUG_INSN_P (c_insn))
8043 c_insn = next_nonnote_nondebug_insn (c_insn);
8044 if (c_insn && NONJUMP_INSN_P (c_insn))
8045 {
8046 rtx pat = PATTERN (c_insn);
8047 if (GET_CODE (pat) == PARALLEL)
8048 {
8049 rtx vec = XVECEXP (pat, 0, 0);
8050 if (GET_CODE (vec) == SET
8051 && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
8052 && REGNO (XEXP (vec, 0)) == REGNO (reg))
8053 delete_insn (c_insn);
8054 }
8055 }
8056 }
8057
8058 static rtx
8059 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
8060 {
8061 rtx addr, mem;
8062
8063 if (offset)
8064 addr = plus_constant (Pmode, frame_reg, offset);
8065 mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
8066 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
8067 }
8068
8069 static inline rtx
8070 gen_frame_load (rtx reg, rtx frame_reg, int offset)
8071 {
8072 return gen_frame_set (reg, frame_reg, offset, false);
8073 }
8074
8075 static inline rtx
8076 gen_frame_store (rtx reg, rtx frame_reg, int offset)
8077 {
8078 return gen_frame_set (reg, frame_reg, offset, true);
8079 }
8080
8081 static void
8082 ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
8083 {
8084 struct machine_function *m = cfun->machine;
8085 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8086 + m->call_ms2sysv_extra_regs;
8087 rtvec v = rtvec_alloc (ncregs + 1);
8088 unsigned int align, i, vi = 0;
8089 rtx_insn *insn;
8090 rtx sym, addr;
8091 rtx rax = gen_rtx_REG (word_mode, AX_REG);
8092 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8093
8094 /* AL should only be live with sysv_abi. */
8095 gcc_assert (!ix86_eax_live_at_start_p ());
8096 gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
8097
8098 /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather
8099 we've actually realigned the stack or not. */
8100 align = GET_MODE_ALIGNMENT (V4SFmode);
8101 addr = choose_baseaddr (frame.stack_realign_offset
8102 + xlogue.get_stub_ptr_offset (), &align, AX_REG);
8103 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8104
8105 emit_insn (gen_rtx_SET (rax, addr));
8106
8107 /* Get the stub symbol. */
8108 sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
8109 : XLOGUE_STUB_SAVE);
8110 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8111
8112 for (i = 0; i < ncregs; ++i)
8113 {
8114 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8115 rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
8116 r.regno);
8117 RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
8118 }
8119
8120 gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
8121
8122 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
8123 RTX_FRAME_RELATED_P (insn) = true;
8124 }
8125
8126 /* Generate and return an insn body to AND X with Y. */
8127
8128 static rtx_insn *
8129 gen_and2_insn (rtx x, rtx y)
8130 {
8131 enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
8132
8133 gcc_assert (insn_operand_matches (icode, 0, x));
8134 gcc_assert (insn_operand_matches (icode, 1, x));
8135 gcc_assert (insn_operand_matches (icode, 2, y));
8136
8137 return GEN_FCN (icode) (x, x, y);
8138 }
8139
8140 /* Expand the prologue into a bunch of separate insns. */
8141
8142 void
8143 ix86_expand_prologue (void)
8144 {
8145 struct machine_function *m = cfun->machine;
8146 rtx insn, t;
8147 HOST_WIDE_INT allocate;
8148 bool int_registers_saved;
8149 bool sse_registers_saved;
8150 bool save_stub_call_needed;
8151 rtx static_chain = NULL_RTX;
8152
8153 if (ix86_function_naked (current_function_decl))
8154 {
8155 if (flag_stack_usage_info)
8156 current_function_static_stack_size = 0;
8157 return;
8158 }
8159
8160 ix86_finalize_stack_frame_flags ();
8161
8162 /* DRAP should not coexist with stack_realign_fp */
8163 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8164
8165 memset (&m->fs, 0, sizeof (m->fs));
8166
8167 /* Initialize CFA state for before the prologue. */
8168 m->fs.cfa_reg = stack_pointer_rtx;
8169 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
8170
8171 /* Track SP offset to the CFA. We continue tracking this after we've
8172 swapped the CFA register away from SP. In the case of re-alignment
8173 this is fudged; we're interested to offsets within the local frame. */
8174 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8175 m->fs.sp_valid = true;
8176 m->fs.sp_realigned = false;
8177
8178 const struct ix86_frame &frame = cfun->machine->frame;
8179
8180 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
8181 {
8182 /* We should have already generated an error for any use of
8183 ms_hook on a nested function. */
8184 gcc_checking_assert (!ix86_static_chain_on_stack);
8185
8186 /* Check if profiling is active and we shall use profiling before
8187 prologue variant. If so sorry. */
8188 if (crtl->profile && flag_fentry != 0)
8189 sorry ("%<ms_hook_prologue%> attribute is not compatible "
8190 "with %<-mfentry%> for 32-bit");
8191
8192 /* In ix86_asm_output_function_label we emitted:
8193 8b ff movl.s %edi,%edi
8194 55 push %ebp
8195 8b ec movl.s %esp,%ebp
8196
8197 This matches the hookable function prologue in Win32 API
8198 functions in Microsoft Windows XP Service Pack 2 and newer.
8199 Wine uses this to enable Windows apps to hook the Win32 API
8200 functions provided by Wine.
8201
8202 What that means is that we've already set up the frame pointer. */
8203
8204 if (frame_pointer_needed
8205 && !(crtl->drap_reg && crtl->stack_realign_needed))
8206 {
8207 rtx push, mov;
8208
8209 /* We've decided to use the frame pointer already set up.
8210 Describe this to the unwinder by pretending that both
8211 push and mov insns happen right here.
8212
8213 Putting the unwind info here at the end of the ms_hook
8214 is done so that we can make absolutely certain we get
8215 the required byte sequence at the start of the function,
8216 rather than relying on an assembler that can produce
8217 the exact encoding required.
8218
8219 However it does mean (in the unpatched case) that we have
8220 a 1 insn window where the asynchronous unwind info is
8221 incorrect. However, if we placed the unwind info at
8222 its correct location we would have incorrect unwind info
8223 in the patched case. Which is probably all moot since
8224 I don't expect Wine generates dwarf2 unwind info for the
8225 system libraries that use this feature. */
8226
8227 insn = emit_insn (gen_blockage ());
8228
8229 push = gen_push (hard_frame_pointer_rtx);
8230 mov = gen_rtx_SET (hard_frame_pointer_rtx,
8231 stack_pointer_rtx);
8232 RTX_FRAME_RELATED_P (push) = 1;
8233 RTX_FRAME_RELATED_P (mov) = 1;
8234
8235 RTX_FRAME_RELATED_P (insn) = 1;
8236 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8237 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
8238
8239 /* Note that gen_push incremented m->fs.cfa_offset, even
8240 though we didn't emit the push insn here. */
8241 m->fs.cfa_reg = hard_frame_pointer_rtx;
8242 m->fs.fp_offset = m->fs.cfa_offset;
8243 m->fs.fp_valid = true;
8244 }
8245 else
8246 {
8247 /* The frame pointer is not needed so pop %ebp again.
8248 This leaves us with a pristine state. */
8249 emit_insn (gen_pop (hard_frame_pointer_rtx));
8250 }
8251 }
8252
8253 /* The first insn of a function that accepts its static chain on the
8254 stack is to push the register that would be filled in by a direct
8255 call. This insn will be skipped by the trampoline. */
8256 else if (ix86_static_chain_on_stack)
8257 {
8258 static_chain = ix86_static_chain (cfun->decl, false);
8259 insn = emit_insn (gen_push (static_chain));
8260 emit_insn (gen_blockage ());
8261
8262 /* We don't want to interpret this push insn as a register save,
8263 only as a stack adjustment. The real copy of the register as
8264 a save will be done later, if needed. */
8265 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
8266 t = gen_rtx_SET (stack_pointer_rtx, t);
8267 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8268 RTX_FRAME_RELATED_P (insn) = 1;
8269 }
8270
8271 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8272 of DRAP is needed and stack realignment is really needed after reload */
8273 if (stack_realign_drap)
8274 {
8275 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8276
8277 /* Can't use DRAP in interrupt function. */
8278 if (cfun->machine->func_type != TYPE_NORMAL)
8279 sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
8280 "in interrupt service routine. This may be worked "
8281 "around by avoiding functions with aggregate return.");
8282
8283 /* Only need to push parameter pointer reg if it is caller saved. */
8284 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8285 {
8286 /* Push arg pointer reg */
8287 insn = emit_insn (gen_push (crtl->drap_reg));
8288 RTX_FRAME_RELATED_P (insn) = 1;
8289 }
8290
8291 /* Grab the argument pointer. */
8292 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
8293 insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8294 RTX_FRAME_RELATED_P (insn) = 1;
8295 m->fs.cfa_reg = crtl->drap_reg;
8296 m->fs.cfa_offset = 0;
8297
8298 /* Align the stack. */
8299 insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
8300 GEN_INT (-align_bytes)));
8301 RTX_FRAME_RELATED_P (insn) = 1;
8302
8303 /* Replicate the return address on the stack so that return
8304 address can be reached via (argp - 1) slot. This is needed
8305 to implement macro RETURN_ADDR_RTX and intrinsic function
8306 expand_builtin_return_addr etc. */
8307 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
8308 t = gen_frame_mem (word_mode, t);
8309 insn = emit_insn (gen_push (t));
8310 RTX_FRAME_RELATED_P (insn) = 1;
8311
8312 /* For the purposes of frame and register save area addressing,
8313 we've started over with a new frame. */
8314 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
8315 m->fs.realigned = true;
8316
8317 if (static_chain)
8318 {
8319 /* Replicate static chain on the stack so that static chain
8320 can be reached via (argp - 2) slot. This is needed for
8321 nested function with stack realignment. */
8322 insn = emit_insn (gen_push (static_chain));
8323 RTX_FRAME_RELATED_P (insn) = 1;
8324 }
8325 }
8326
8327 int_registers_saved = (frame.nregs == 0);
8328 sse_registers_saved = (frame.nsseregs == 0);
8329 save_stub_call_needed = (m->call_ms2sysv);
8330 gcc_assert (sse_registers_saved || !save_stub_call_needed);
8331
8332 if (frame_pointer_needed && !m->fs.fp_valid)
8333 {
8334 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8335 slower on all targets. Also sdb didn't like it. */
8336 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8337 RTX_FRAME_RELATED_P (insn) = 1;
8338
8339 /* Push registers now, before setting the frame pointer
8340 on SEH target. */
8341 if (!int_registers_saved
8342 && TARGET_SEH
8343 && !frame.save_regs_using_mov)
8344 {
8345 ix86_emit_save_regs ();
8346 int_registers_saved = true;
8347 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8348 }
8349
8350 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
8351 {
8352 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8353 RTX_FRAME_RELATED_P (insn) = 1;
8354
8355 if (m->fs.cfa_reg == stack_pointer_rtx)
8356 m->fs.cfa_reg = hard_frame_pointer_rtx;
8357 m->fs.fp_offset = m->fs.sp_offset;
8358 m->fs.fp_valid = true;
8359 }
8360 }
8361
8362 if (!int_registers_saved)
8363 {
8364 /* If saving registers via PUSH, do so now. */
8365 if (!frame.save_regs_using_mov)
8366 {
8367 ix86_emit_save_regs ();
8368 int_registers_saved = true;
8369 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
8370 }
8371
8372 /* When using red zone we may start register saving before allocating
8373 the stack frame saving one cycle of the prologue. However, avoid
8374 doing this if we have to probe the stack; at least on x86_64 the
8375 stack probe can turn into a call that clobbers a red zone location. */
8376 else if (ix86_using_red_zone ()
8377 && (! TARGET_STACK_PROBE
8378 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
8379 {
8380 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8381 int_registers_saved = true;
8382 }
8383 }
8384
8385 if (stack_realign_fp)
8386 {
8387 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8388 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8389
8390 /* Record last valid frame pointer offset. */
8391 m->fs.sp_realigned_fp_last = frame.reg_save_offset;
8392
8393 /* The computation of the size of the re-aligned stack frame means
8394 that we must allocate the size of the register save area before
8395 performing the actual alignment. Otherwise we cannot guarantee
8396 that there's enough storage above the realignment point. */
8397 allocate = frame.reg_save_offset - m->fs.sp_offset
8398 + frame.stack_realign_allocate;
8399 if (allocate)
8400 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8401 GEN_INT (-allocate), -1, false);
8402
8403 /* Align the stack. */
8404 emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
8405 m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
8406 m->fs.sp_realigned_offset = m->fs.sp_offset
8407 - frame.stack_realign_allocate;
8408 /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
8409 Beyond this point, stack access should be done via choose_baseaddr or
8410 by using sp_valid_at and fp_valid_at to determine the correct base
8411 register. Henceforth, any CFA offset should be thought of as logical
8412 and not physical. */
8413 gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
8414 gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
8415 m->fs.sp_realigned = true;
8416
8417 /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
8418 is needed to describe where a register is saved using a realigned
8419 stack pointer, so we need to invalidate the stack pointer for that
8420 target. */
8421 if (TARGET_SEH)
8422 m->fs.sp_valid = false;
8423
8424 /* If SP offset is non-immediate after allocation of the stack frame,
8425 then emit SSE saves or stub call prior to allocating the rest of the
8426 stack frame. This is less efficient for the out-of-line stub because
8427 we can't combine allocations across the call barrier, but it's better
8428 than using a scratch register. */
8429 else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
8430 - m->fs.sp_realigned_offset),
8431 Pmode))
8432 {
8433 if (!sse_registers_saved)
8434 {
8435 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8436 sse_registers_saved = true;
8437 }
8438 else if (save_stub_call_needed)
8439 {
8440 ix86_emit_outlined_ms2sysv_save (frame);
8441 save_stub_call_needed = false;
8442 }
8443 }
8444 }
8445
8446 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
8447
8448 if (flag_stack_usage_info)
8449 {
8450 /* We start to count from ARG_POINTER. */
8451 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
8452
8453 /* If it was realigned, take into account the fake frame. */
8454 if (stack_realign_drap)
8455 {
8456 if (ix86_static_chain_on_stack)
8457 stack_size += UNITS_PER_WORD;
8458
8459 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
8460 stack_size += UNITS_PER_WORD;
8461
8462 /* This over-estimates by 1 minimal-stack-alignment-unit but
8463 mitigates that by counting in the new return address slot. */
8464 current_function_dynamic_stack_size
8465 += crtl->stack_alignment_needed / BITS_PER_UNIT;
8466 }
8467
8468 current_function_static_stack_size = stack_size;
8469 }
8470
8471 /* On SEH target with very large frame size, allocate an area to save
8472 SSE registers (as the very large allocation won't be described). */
8473 if (TARGET_SEH
8474 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
8475 && !sse_registers_saved)
8476 {
8477 HOST_WIDE_INT sse_size
8478 = frame.sse_reg_save_offset - frame.reg_save_offset;
8479
8480 gcc_assert (int_registers_saved);
8481
8482 /* No need to do stack checking as the area will be immediately
8483 written. */
8484 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8485 GEN_INT (-sse_size), -1,
8486 m->fs.cfa_reg == stack_pointer_rtx);
8487 allocate -= sse_size;
8488 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8489 sse_registers_saved = true;
8490 }
8491
8492 /* If stack clash protection is requested, then probe the stack. */
8493 if (allocate >= 0 && flag_stack_clash_protection)
8494 {
8495 ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
8496 allocate = 0;
8497 }
8498
8499 /* The stack has already been decremented by the instruction calling us
8500 so probe if the size is non-negative to preserve the protection area. */
8501 else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8502 {
8503 const HOST_WIDE_INT probe_interval = get_probe_interval ();
8504
8505 if (STACK_CHECK_MOVING_SP)
8506 {
8507 if (crtl->is_leaf
8508 && !cfun->calls_alloca
8509 && allocate <= probe_interval)
8510 ;
8511
8512 else
8513 {
8514 ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
8515 allocate = 0;
8516 }
8517 }
8518
8519 else
8520 {
8521 HOST_WIDE_INT size = allocate;
8522
8523 if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
8524 size = 0x80000000 - get_stack_check_protect () - 1;
8525
8526 if (TARGET_STACK_PROBE)
8527 {
8528 if (crtl->is_leaf && !cfun->calls_alloca)
8529 {
8530 if (size > probe_interval)
8531 ix86_emit_probe_stack_range (0, size, int_registers_saved);
8532 }
8533 else
8534 ix86_emit_probe_stack_range (0,
8535 size + get_stack_check_protect (),
8536 int_registers_saved);
8537 }
8538 else
8539 {
8540 if (crtl->is_leaf && !cfun->calls_alloca)
8541 {
8542 if (size > probe_interval
8543 && size > get_stack_check_protect ())
8544 ix86_emit_probe_stack_range (get_stack_check_protect (),
8545 (size
8546 - get_stack_check_protect ()),
8547 int_registers_saved);
8548 }
8549 else
8550 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
8551 int_registers_saved);
8552 }
8553 }
8554 }
8555
8556 if (allocate == 0)
8557 ;
8558 else if (!ix86_target_stack_probe ()
8559 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
8560 {
8561 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8562 GEN_INT (-allocate), -1,
8563 m->fs.cfa_reg == stack_pointer_rtx);
8564 }
8565 else
8566 {
8567 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8568 rtx r10 = NULL;
8569 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
8570 bool eax_live = ix86_eax_live_at_start_p ();
8571 bool r10_live = false;
8572
8573 if (TARGET_64BIT)
8574 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
8575
8576 if (eax_live)
8577 {
8578 insn = emit_insn (gen_push (eax));
8579 allocate -= UNITS_PER_WORD;
8580 /* Note that SEH directives need to continue tracking the stack
8581 pointer even after the frame pointer has been set up. */
8582 if (sp_is_cfa_reg || TARGET_SEH)
8583 {
8584 if (sp_is_cfa_reg)
8585 m->fs.cfa_offset += UNITS_PER_WORD;
8586 RTX_FRAME_RELATED_P (insn) = 1;
8587 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8588 gen_rtx_SET (stack_pointer_rtx,
8589 plus_constant (Pmode,
8590 stack_pointer_rtx,
8591 -UNITS_PER_WORD)));
8592 }
8593 }
8594
8595 if (r10_live)
8596 {
8597 r10 = gen_rtx_REG (Pmode, R10_REG);
8598 insn = emit_insn (gen_push (r10));
8599 allocate -= UNITS_PER_WORD;
8600 if (sp_is_cfa_reg || TARGET_SEH)
8601 {
8602 if (sp_is_cfa_reg)
8603 m->fs.cfa_offset += UNITS_PER_WORD;
8604 RTX_FRAME_RELATED_P (insn) = 1;
8605 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8606 gen_rtx_SET (stack_pointer_rtx,
8607 plus_constant (Pmode,
8608 stack_pointer_rtx,
8609 -UNITS_PER_WORD)));
8610 }
8611 }
8612
8613 emit_move_insn (eax, GEN_INT (allocate));
8614 emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
8615
8616 /* Use the fact that AX still contains ALLOCATE. */
8617 insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
8618 (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
8619
8620 if (sp_is_cfa_reg || TARGET_SEH)
8621 {
8622 if (sp_is_cfa_reg)
8623 m->fs.cfa_offset += allocate;
8624 RTX_FRAME_RELATED_P (insn) = 1;
8625 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8626 gen_rtx_SET (stack_pointer_rtx,
8627 plus_constant (Pmode, stack_pointer_rtx,
8628 -allocate)));
8629 }
8630 m->fs.sp_offset += allocate;
8631
8632 /* Use stack_pointer_rtx for relative addressing so that code works for
8633 realigned stack. But this means that we need a blockage to prevent
8634 stores based on the frame pointer from being scheduled before. */
8635 if (r10_live && eax_live)
8636 {
8637 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8638 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
8639 gen_frame_mem (word_mode, t));
8640 t = plus_constant (Pmode, t, UNITS_PER_WORD);
8641 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
8642 gen_frame_mem (word_mode, t));
8643 emit_insn (gen_memory_blockage ());
8644 }
8645 else if (eax_live || r10_live)
8646 {
8647 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
8648 emit_move_insn (gen_rtx_REG (word_mode,
8649 (eax_live ? AX_REG : R10_REG)),
8650 gen_frame_mem (word_mode, t));
8651 emit_insn (gen_memory_blockage ());
8652 }
8653 }
8654 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
8655
8656 /* If we havn't already set up the frame pointer, do so now. */
8657 if (frame_pointer_needed && !m->fs.fp_valid)
8658 {
8659 insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
8660 GEN_INT (frame.stack_pointer_offset
8661 - frame.hard_frame_pointer_offset));
8662 insn = emit_insn (insn);
8663 RTX_FRAME_RELATED_P (insn) = 1;
8664 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
8665
8666 if (m->fs.cfa_reg == stack_pointer_rtx)
8667 m->fs.cfa_reg = hard_frame_pointer_rtx;
8668 m->fs.fp_offset = frame.hard_frame_pointer_offset;
8669 m->fs.fp_valid = true;
8670 }
8671
8672 if (!int_registers_saved)
8673 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
8674 if (!sse_registers_saved)
8675 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
8676 else if (save_stub_call_needed)
8677 ix86_emit_outlined_ms2sysv_save (frame);
8678
8679 /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
8680 in PROLOGUE. */
8681 if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
8682 {
8683 rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
8684 insn = emit_insn (gen_set_got (pic));
8685 RTX_FRAME_RELATED_P (insn) = 1;
8686 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
8687 emit_insn (gen_prologue_use (pic));
8688 /* Deleting already emmitted SET_GOT if exist and allocated to
8689 REAL_PIC_OFFSET_TABLE_REGNUM. */
8690 ix86_elim_entry_set_got (pic);
8691 }
8692
8693 if (crtl->drap_reg && !crtl->stack_realign_needed)
8694 {
8695 /* vDRAP is setup but after reload it turns out stack realign
8696 isn't necessary, here we will emit prologue to setup DRAP
8697 without stack realign adjustment */
8698 t = choose_baseaddr (0, NULL);
8699 emit_insn (gen_rtx_SET (crtl->drap_reg, t));
8700 }
8701
8702 /* Prevent instructions from being scheduled into register save push
8703 sequence when access to the redzone area is done through frame pointer.
8704 The offset between the frame pointer and the stack pointer is calculated
8705 relative to the value of the stack pointer at the end of the function
8706 prologue, and moving instructions that access redzone area via frame
8707 pointer inside push sequence violates this assumption. */
8708 if (frame_pointer_needed && frame.red_zone_size)
8709 emit_insn (gen_memory_blockage ());
8710
8711 /* SEH requires that the prologue end within 256 bytes of the start of
8712 the function. Prevent instruction schedules that would extend that.
8713 Further, prevent alloca modifications to the stack pointer from being
8714 combined with prologue modifications. */
8715 if (TARGET_SEH)
8716 emit_insn (gen_prologue_use (stack_pointer_rtx));
8717 }
8718
8719 /* Emit code to restore REG using a POP insn. */
8720
8721 static void
8722 ix86_emit_restore_reg_using_pop (rtx reg)
8723 {
8724 struct machine_function *m = cfun->machine;
8725 rtx_insn *insn = emit_insn (gen_pop (reg));
8726
8727 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
8728 m->fs.sp_offset -= UNITS_PER_WORD;
8729
8730 if (m->fs.cfa_reg == crtl->drap_reg
8731 && REGNO (reg) == REGNO (crtl->drap_reg))
8732 {
8733 /* Previously we'd represented the CFA as an expression
8734 like *(%ebp - 8). We've just popped that value from
8735 the stack, which means we need to reset the CFA to
8736 the drap register. This will remain until we restore
8737 the stack pointer. */
8738 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8739 RTX_FRAME_RELATED_P (insn) = 1;
8740
8741 /* This means that the DRAP register is valid for addressing too. */
8742 m->fs.drap_valid = true;
8743 return;
8744 }
8745
8746 if (m->fs.cfa_reg == stack_pointer_rtx)
8747 {
8748 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
8749 x = gen_rtx_SET (stack_pointer_rtx, x);
8750 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
8751 RTX_FRAME_RELATED_P (insn) = 1;
8752
8753 m->fs.cfa_offset -= UNITS_PER_WORD;
8754 }
8755
8756 /* When the frame pointer is the CFA, and we pop it, we are
8757 swapping back to the stack pointer as the CFA. This happens
8758 for stack frames that don't allocate other data, so we assume
8759 the stack pointer is now pointing at the return address, i.e.
8760 the function entry state, which makes the offset be 1 word. */
8761 if (reg == hard_frame_pointer_rtx)
8762 {
8763 m->fs.fp_valid = false;
8764 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8765 {
8766 m->fs.cfa_reg = stack_pointer_rtx;
8767 m->fs.cfa_offset -= UNITS_PER_WORD;
8768
8769 add_reg_note (insn, REG_CFA_DEF_CFA,
8770 plus_constant (Pmode, stack_pointer_rtx,
8771 m->fs.cfa_offset));
8772 RTX_FRAME_RELATED_P (insn) = 1;
8773 }
8774 }
8775 }
8776
8777 /* Emit code to restore saved registers using POP insns. */
8778
8779 static void
8780 ix86_emit_restore_regs_using_pop (void)
8781 {
8782 unsigned int regno;
8783
8784 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8785 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
8786 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
8787 }
8788
8789 /* Emit code and notes for the LEAVE instruction. If insn is non-null,
8790 omits the emit and only attaches the notes. */
8791
8792 static void
8793 ix86_emit_leave (rtx_insn *insn)
8794 {
8795 struct machine_function *m = cfun->machine;
8796
8797 if (!insn)
8798 insn = emit_insn (gen_leave (word_mode));
8799
8800 ix86_add_queued_cfa_restore_notes (insn);
8801
8802 gcc_assert (m->fs.fp_valid);
8803 m->fs.sp_valid = true;
8804 m->fs.sp_realigned = false;
8805 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
8806 m->fs.fp_valid = false;
8807
8808 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
8809 {
8810 m->fs.cfa_reg = stack_pointer_rtx;
8811 m->fs.cfa_offset = m->fs.sp_offset;
8812
8813 add_reg_note (insn, REG_CFA_DEF_CFA,
8814 plus_constant (Pmode, stack_pointer_rtx,
8815 m->fs.sp_offset));
8816 RTX_FRAME_RELATED_P (insn) = 1;
8817 }
8818 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
8819 m->fs.fp_offset);
8820 }
8821
8822 /* Emit code to restore saved registers using MOV insns.
8823 First register is restored from CFA - CFA_OFFSET. */
8824 static void
8825 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
8826 bool maybe_eh_return)
8827 {
8828 struct machine_function *m = cfun->machine;
8829 unsigned int regno;
8830
8831 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8832 if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8833 {
8834 rtx reg = gen_rtx_REG (word_mode, regno);
8835 rtx mem;
8836 rtx_insn *insn;
8837
8838 mem = choose_baseaddr (cfa_offset, NULL);
8839 mem = gen_frame_mem (word_mode, mem);
8840 insn = emit_move_insn (reg, mem);
8841
8842 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8843 {
8844 /* Previously we'd represented the CFA as an expression
8845 like *(%ebp - 8). We've just popped that value from
8846 the stack, which means we need to reset the CFA to
8847 the drap register. This will remain until we restore
8848 the stack pointer. */
8849 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8850 RTX_FRAME_RELATED_P (insn) = 1;
8851
8852 /* This means that the DRAP register is valid for addressing. */
8853 m->fs.drap_valid = true;
8854 }
8855 else
8856 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8857
8858 cfa_offset -= UNITS_PER_WORD;
8859 }
8860 }
8861
8862 /* Emit code to restore saved registers using MOV insns.
8863 First register is restored from CFA - CFA_OFFSET. */
8864 static void
8865 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
8866 bool maybe_eh_return)
8867 {
8868 unsigned int regno;
8869
8870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8871 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
8872 {
8873 rtx reg = gen_rtx_REG (V4SFmode, regno);
8874 rtx mem;
8875 unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
8876
8877 mem = choose_baseaddr (cfa_offset, &align);
8878 mem = gen_rtx_MEM (V4SFmode, mem);
8879
8880 /* The location aligment depends upon the base register. */
8881 align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
8882 gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
8883 set_mem_align (mem, align);
8884 emit_insn (gen_rtx_SET (reg, mem));
8885
8886 ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
8887
8888 cfa_offset -= GET_MODE_SIZE (V4SFmode);
8889 }
8890 }
8891
8892 static void
8893 ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
8894 bool use_call, int style)
8895 {
8896 struct machine_function *m = cfun->machine;
8897 const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
8898 + m->call_ms2sysv_extra_regs;
8899 rtvec v;
8900 unsigned int elems_needed, align, i, vi = 0;
8901 rtx_insn *insn;
8902 rtx sym, tmp;
8903 rtx rsi = gen_rtx_REG (word_mode, SI_REG);
8904 rtx r10 = NULL_RTX;
8905 const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
8906 HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
8907 HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
8908 rtx rsi_frame_load = NULL_RTX;
8909 HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
8910 enum xlogue_stub stub;
8911
8912 gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
8913
8914 /* If using a realigned stack, we should never start with padding. */
8915 gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
8916
8917 /* Setup RSI as the stub's base pointer. */
8918 align = GET_MODE_ALIGNMENT (V4SFmode);
8919 tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
8920 gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
8921
8922 emit_insn (gen_rtx_SET (rsi, tmp));
8923
8924 /* Get a symbol for the stub. */
8925 if (frame_pointer_needed)
8926 stub = use_call ? XLOGUE_STUB_RESTORE_HFP
8927 : XLOGUE_STUB_RESTORE_HFP_TAIL;
8928 else
8929 stub = use_call ? XLOGUE_STUB_RESTORE
8930 : XLOGUE_STUB_RESTORE_TAIL;
8931 sym = xlogue.get_stub_rtx (stub);
8932
8933 elems_needed = ncregs;
8934 if (use_call)
8935 elems_needed += 1;
8936 else
8937 elems_needed += frame_pointer_needed ? 5 : 3;
8938 v = rtvec_alloc (elems_needed);
8939
8940 /* We call the epilogue stub when we need to pop incoming args or we are
8941 doing a sibling call as the tail. Otherwise, we will emit a jmp to the
8942 epilogue stub and it is the tail-call. */
8943 if (use_call)
8944 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8945 else
8946 {
8947 RTVEC_ELT (v, vi++) = ret_rtx;
8948 RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
8949 if (frame_pointer_needed)
8950 {
8951 rtx rbp = gen_rtx_REG (DImode, BP_REG);
8952 gcc_assert (m->fs.fp_valid);
8953 gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
8954
8955 tmp = plus_constant (DImode, rbp, 8);
8956 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
8957 RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
8958 tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8959 RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
8960 }
8961 else
8962 {
8963 /* If no hard frame pointer, we set R10 to the SP restore value. */
8964 gcc_assert (!m->fs.fp_valid);
8965 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
8966 gcc_assert (m->fs.sp_valid);
8967
8968 r10 = gen_rtx_REG (DImode, R10_REG);
8969 tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
8970 emit_insn (gen_rtx_SET (r10, tmp));
8971
8972 RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
8973 }
8974 }
8975
8976 /* Generate frame load insns and restore notes. */
8977 for (i = 0; i < ncregs; ++i)
8978 {
8979 const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
8980 machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
8981 rtx reg, frame_load;
8982
8983 reg = gen_rtx_REG (mode, r.regno);
8984 frame_load = gen_frame_load (reg, rsi, r.offset);
8985
8986 /* Save RSI frame load insn & note to add last. */
8987 if (r.regno == SI_REG)
8988 {
8989 gcc_assert (!rsi_frame_load);
8990 rsi_frame_load = frame_load;
8991 rsi_restore_offset = r.offset;
8992 }
8993 else
8994 {
8995 RTVEC_ELT (v, vi++) = frame_load;
8996 ix86_add_cfa_restore_note (NULL, reg, r.offset);
8997 }
8998 }
8999
9000 /* Add RSI frame load & restore note at the end. */
9001 gcc_assert (rsi_frame_load);
9002 gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
9003 RTVEC_ELT (v, vi++) = rsi_frame_load;
9004 ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
9005 rsi_restore_offset);
9006
9007 /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */
9008 if (!use_call && !frame_pointer_needed)
9009 {
9010 gcc_assert (m->fs.sp_valid);
9011 gcc_assert (!m->fs.sp_realigned);
9012
9013 /* At this point, R10 should point to frame.stack_realign_offset. */
9014 if (m->fs.cfa_reg == stack_pointer_rtx)
9015 m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
9016 m->fs.sp_offset = frame.stack_realign_offset;
9017 }
9018
9019 gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
9020 tmp = gen_rtx_PARALLEL (VOIDmode, v);
9021 if (use_call)
9022 insn = emit_insn (tmp);
9023 else
9024 {
9025 insn = emit_jump_insn (tmp);
9026 JUMP_LABEL (insn) = ret_rtx;
9027
9028 if (frame_pointer_needed)
9029 ix86_emit_leave (insn);
9030 else
9031 {
9032 /* Need CFA adjust note. */
9033 tmp = gen_rtx_SET (stack_pointer_rtx, r10);
9034 add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
9035 }
9036 }
9037
9038 RTX_FRAME_RELATED_P (insn) = true;
9039 ix86_add_queued_cfa_restore_notes (insn);
9040
9041 /* If we're not doing a tail-call, we need to adjust the stack. */
9042 if (use_call && m->fs.sp_valid)
9043 {
9044 HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
9045 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9046 GEN_INT (dealloc), style,
9047 m->fs.cfa_reg == stack_pointer_rtx);
9048 }
9049 }
9050
9051 /* Restore function stack, frame, and registers. */
9052
9053 void
9054 ix86_expand_epilogue (int style)
9055 {
9056 struct machine_function *m = cfun->machine;
9057 struct machine_frame_state frame_state_save = m->fs;
9058 bool restore_regs_via_mov;
9059 bool using_drap;
9060 bool restore_stub_is_tail = false;
9061
9062 if (ix86_function_naked (current_function_decl))
9063 {
9064 /* The program should not reach this point. */
9065 emit_insn (gen_ud2 ());
9066 return;
9067 }
9068
9069 ix86_finalize_stack_frame_flags ();
9070 const struct ix86_frame &frame = cfun->machine->frame;
9071
9072 m->fs.sp_realigned = stack_realign_fp;
9073 m->fs.sp_valid = stack_realign_fp
9074 || !frame_pointer_needed
9075 || crtl->sp_is_unchanging;
9076 gcc_assert (!m->fs.sp_valid
9077 || m->fs.sp_offset == frame.stack_pointer_offset);
9078
9079 /* The FP must be valid if the frame pointer is present. */
9080 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9081 gcc_assert (!m->fs.fp_valid
9082 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9083
9084 /* We must have *some* valid pointer to the stack frame. */
9085 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9086
9087 /* The DRAP is never valid at this point. */
9088 gcc_assert (!m->fs.drap_valid);
9089
9090 /* See the comment about red zone and frame
9091 pointer usage in ix86_expand_prologue. */
9092 if (frame_pointer_needed && frame.red_zone_size)
9093 emit_insn (gen_memory_blockage ());
9094
9095 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9096 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9097
9098 /* Determine the CFA offset of the end of the red-zone. */
9099 m->fs.red_zone_offset = 0;
9100 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9101 {
9102 /* The red-zone begins below return address and error code in
9103 exception handler. */
9104 m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
9105
9106 /* When the register save area is in the aligned portion of
9107 the stack, determine the maximum runtime displacement that
9108 matches up with the aligned frame. */
9109 if (stack_realign_drap)
9110 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9111 + UNITS_PER_WORD);
9112 }
9113
9114 HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
9115
9116 /* Special care must be taken for the normal return case of a function
9117 using eh_return: the eax and edx registers are marked as saved, but
9118 not restored along this path. Adjust the save location to match. */
9119 if (crtl->calls_eh_return && style != 2)
9120 reg_save_offset -= 2 * UNITS_PER_WORD;
9121
9122 /* EH_RETURN requires the use of moves to function properly. */
9123 if (crtl->calls_eh_return)
9124 restore_regs_via_mov = true;
9125 /* SEH requires the use of pops to identify the epilogue. */
9126 else if (TARGET_SEH)
9127 restore_regs_via_mov = false;
9128 /* If we're only restoring one register and sp cannot be used then
9129 using a move instruction to restore the register since it's
9130 less work than reloading sp and popping the register. */
9131 else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
9132 restore_regs_via_mov = true;
9133 else if (TARGET_EPILOGUE_USING_MOVE
9134 && cfun->machine->use_fast_prologue_epilogue
9135 && (frame.nregs > 1
9136 || m->fs.sp_offset != reg_save_offset))
9137 restore_regs_via_mov = true;
9138 else if (frame_pointer_needed
9139 && !frame.nregs
9140 && m->fs.sp_offset != reg_save_offset)
9141 restore_regs_via_mov = true;
9142 else if (frame_pointer_needed
9143 && TARGET_USE_LEAVE
9144 && cfun->machine->use_fast_prologue_epilogue
9145 && frame.nregs == 1)
9146 restore_regs_via_mov = true;
9147 else
9148 restore_regs_via_mov = false;
9149
9150 if (restore_regs_via_mov || frame.nsseregs)
9151 {
9152 /* Ensure that the entire register save area is addressable via
9153 the stack pointer, if we will restore SSE regs via sp. */
9154 if (TARGET_64BIT
9155 && m->fs.sp_offset > 0x7fffffff
9156 && sp_valid_at (frame.stack_realign_offset + 1)
9157 && (frame.nsseregs + frame.nregs) != 0)
9158 {
9159 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9160 GEN_INT (m->fs.sp_offset
9161 - frame.sse_reg_save_offset),
9162 style,
9163 m->fs.cfa_reg == stack_pointer_rtx);
9164 }
9165 }
9166
9167 /* If there are any SSE registers to restore, then we have to do it
9168 via moves, since there's obviously no pop for SSE regs. */
9169 if (frame.nsseregs)
9170 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
9171 style == 2);
9172
9173 if (m->call_ms2sysv)
9174 {
9175 int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
9176
9177 /* We cannot use a tail-call for the stub if:
9178 1. We have to pop incoming args,
9179 2. We have additional int regs to restore, or
9180 3. A sibling call will be the tail-call, or
9181 4. We are emitting an eh_return_internal epilogue.
9182
9183 TODO: Item 4 has not yet tested!
9184
9185 If any of the above are true, we will call the stub rather than
9186 jump to it. */
9187 restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
9188 ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
9189 }
9190
9191 /* If using out-of-line stub that is a tail-call, then...*/
9192 if (m->call_ms2sysv && restore_stub_is_tail)
9193 {
9194 /* TODO: parinoid tests. (remove eventually) */
9195 gcc_assert (m->fs.sp_valid);
9196 gcc_assert (!m->fs.sp_realigned);
9197 gcc_assert (!m->fs.fp_valid);
9198 gcc_assert (!m->fs.realigned);
9199 gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
9200 gcc_assert (!crtl->drap_reg);
9201 gcc_assert (!frame.nregs);
9202 }
9203 else if (restore_regs_via_mov)
9204 {
9205 rtx t;
9206
9207 if (frame.nregs)
9208 ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
9209
9210 /* eh_return epilogues need %ecx added to the stack pointer. */
9211 if (style == 2)
9212 {
9213 rtx sa = EH_RETURN_STACKADJ_RTX;
9214 rtx_insn *insn;
9215
9216 /* %ecx can't be used for both DRAP register and eh_return. */
9217 if (crtl->drap_reg)
9218 gcc_assert (REGNO (crtl->drap_reg) != CX_REG);
9219
9220 /* regparm nested functions don't work with eh_return. */
9221 gcc_assert (!ix86_static_chain_on_stack);
9222
9223 if (frame_pointer_needed)
9224 {
9225 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9226 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
9227 emit_insn (gen_rtx_SET (sa, t));
9228
9229 /* NB: eh_return epilogues must restore the frame pointer
9230 in word_mode since the upper 32 bits of RBP register
9231 can have any values. */
9232 t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
9233 rtx frame_reg = gen_rtx_REG (word_mode,
9234 HARD_FRAME_POINTER_REGNUM);
9235 insn = emit_move_insn (frame_reg, t);
9236
9237 /* Note that we use SA as a temporary CFA, as the return
9238 address is at the proper place relative to it. We
9239 pretend this happens at the FP restore insn because
9240 prior to this insn the FP would be stored at the wrong
9241 offset relative to SA, and after this insn we have no
9242 other reasonable register to use for the CFA. We don't
9243 bother resetting the CFA to the SP for the duration of
9244 the return insn, unless the control flow instrumentation
9245 is done. In this case the SP is used later and we have
9246 to reset CFA to SP. */
9247 add_reg_note (insn, REG_CFA_DEF_CFA,
9248 plus_constant (Pmode, sa, UNITS_PER_WORD));
9249 ix86_add_queued_cfa_restore_notes (insn);
9250 add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
9251 RTX_FRAME_RELATED_P (insn) = 1;
9252
9253 m->fs.cfa_reg = sa;
9254 m->fs.cfa_offset = UNITS_PER_WORD;
9255 m->fs.fp_valid = false;
9256
9257 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9258 const0_rtx, style,
9259 flag_cf_protection);
9260 }
9261 else
9262 {
9263 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9264 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
9265 insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
9266 ix86_add_queued_cfa_restore_notes (insn);
9267
9268 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
9269 if (m->fs.cfa_offset != UNITS_PER_WORD)
9270 {
9271 m->fs.cfa_offset = UNITS_PER_WORD;
9272 add_reg_note (insn, REG_CFA_DEF_CFA,
9273 plus_constant (Pmode, stack_pointer_rtx,
9274 UNITS_PER_WORD));
9275 RTX_FRAME_RELATED_P (insn) = 1;
9276 }
9277 }
9278 m->fs.sp_offset = UNITS_PER_WORD;
9279 m->fs.sp_valid = true;
9280 m->fs.sp_realigned = false;
9281 }
9282 }
9283 else
9284 {
9285 /* SEH requires that the function end with (1) a stack adjustment
9286 if necessary, (2) a sequence of pops, and (3) a return or
9287 jump instruction. Prevent insns from the function body from
9288 being scheduled into this sequence. */
9289 if (TARGET_SEH)
9290 {
9291 /* Prevent a catch region from being adjacent to the standard
9292 epilogue sequence. Unfortunately neither crtl->uses_eh_lsda
9293 nor several other flags that would be interesting to test are
9294 set up yet. */
9295 if (flag_non_call_exceptions)
9296 emit_insn (gen_nops (const1_rtx));
9297 else
9298 emit_insn (gen_blockage ());
9299 }
9300
9301 /* First step is to deallocate the stack frame so that we can
9302 pop the registers. If the stack pointer was realigned, it needs
9303 to be restored now. Also do it on SEH target for very large
9304 frame as the emitted instructions aren't allowed by the ABI
9305 in epilogues. */
9306 if (!m->fs.sp_valid || m->fs.sp_realigned
9307 || (TARGET_SEH
9308 && (m->fs.sp_offset - reg_save_offset
9309 >= SEH_MAX_FRAME_SIZE)))
9310 {
9311 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
9312 GEN_INT (m->fs.fp_offset
9313 - reg_save_offset),
9314 style, false);
9315 }
9316 else if (m->fs.sp_offset != reg_save_offset)
9317 {
9318 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9319 GEN_INT (m->fs.sp_offset
9320 - reg_save_offset),
9321 style,
9322 m->fs.cfa_reg == stack_pointer_rtx);
9323 }
9324
9325 ix86_emit_restore_regs_using_pop ();
9326 }
9327
9328 /* If we used a stack pointer and haven't already got rid of it,
9329 then do so now. */
9330 if (m->fs.fp_valid)
9331 {
9332 /* If the stack pointer is valid and pointing at the frame
9333 pointer store address, then we only need a pop. */
9334 if (sp_valid_at (frame.hfp_save_offset)
9335 && m->fs.sp_offset == frame.hfp_save_offset)
9336 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9337 /* Leave results in shorter dependency chains on CPUs that are
9338 able to grok it fast. */
9339 else if (TARGET_USE_LEAVE
9340 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
9341 || !cfun->machine->use_fast_prologue_epilogue)
9342 ix86_emit_leave (NULL);
9343 else
9344 {
9345 pro_epilogue_adjust_stack (stack_pointer_rtx,
9346 hard_frame_pointer_rtx,
9347 const0_rtx, style, !using_drap);
9348 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
9349 }
9350 }
9351
9352 if (using_drap)
9353 {
9354 int param_ptr_offset = UNITS_PER_WORD;
9355 rtx_insn *insn;
9356
9357 gcc_assert (stack_realign_drap);
9358
9359 if (ix86_static_chain_on_stack)
9360 param_ptr_offset += UNITS_PER_WORD;
9361 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9362 param_ptr_offset += UNITS_PER_WORD;
9363
9364 insn = emit_insn (gen_rtx_SET
9365 (stack_pointer_rtx,
9366 plus_constant (Pmode, crtl->drap_reg,
9367 -param_ptr_offset)));
9368 m->fs.cfa_reg = stack_pointer_rtx;
9369 m->fs.cfa_offset = param_ptr_offset;
9370 m->fs.sp_offset = param_ptr_offset;
9371 m->fs.realigned = false;
9372
9373 add_reg_note (insn, REG_CFA_DEF_CFA,
9374 plus_constant (Pmode, stack_pointer_rtx,
9375 param_ptr_offset));
9376 RTX_FRAME_RELATED_P (insn) = 1;
9377
9378 if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
9379 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
9380 }
9381
9382 /* At this point the stack pointer must be valid, and we must have
9383 restored all of the registers. We may not have deallocated the
9384 entire stack frame. We've delayed this until now because it may
9385 be possible to merge the local stack deallocation with the
9386 deallocation forced by ix86_static_chain_on_stack. */
9387 gcc_assert (m->fs.sp_valid);
9388 gcc_assert (!m->fs.sp_realigned);
9389 gcc_assert (!m->fs.fp_valid);
9390 gcc_assert (!m->fs.realigned);
9391 if (m->fs.sp_offset != UNITS_PER_WORD)
9392 {
9393 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9394 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
9395 style, true);
9396 }
9397 else
9398 ix86_add_queued_cfa_restore_notes (get_last_insn ());
9399
9400 /* Sibcall epilogues don't want a return instruction. */
9401 if (style == 0)
9402 {
9403 m->fs = frame_state_save;
9404 return;
9405 }
9406
9407 if (cfun->machine->func_type != TYPE_NORMAL)
9408 emit_jump_insn (gen_interrupt_return ());
9409 else if (crtl->args.pops_args && crtl->args.size)
9410 {
9411 rtx popc = GEN_INT (crtl->args.pops_args);
9412
9413 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9414 address, do explicit add, and jump indirectly to the caller. */
9415
9416 if (crtl->args.pops_args >= 65536)
9417 {
9418 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9419 rtx_insn *insn;
9420
9421 /* There is no "pascal" calling convention in any 64bit ABI. */
9422 gcc_assert (!TARGET_64BIT);
9423
9424 insn = emit_insn (gen_pop (ecx));
9425 m->fs.cfa_offset -= UNITS_PER_WORD;
9426 m->fs.sp_offset -= UNITS_PER_WORD;
9427
9428 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9429 x = gen_rtx_SET (stack_pointer_rtx, x);
9430 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9431 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9432 RTX_FRAME_RELATED_P (insn) = 1;
9433
9434 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9435 popc, -1, true);
9436 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9437 }
9438 else
9439 emit_jump_insn (gen_simple_return_pop_internal (popc));
9440 }
9441 else if (!m->call_ms2sysv || !restore_stub_is_tail)
9442 {
9443 /* In case of return from EH a simple return cannot be used
9444 as a return address will be compared with a shadow stack
9445 return address. Use indirect jump instead. */
9446 if (style == 2 && flag_cf_protection)
9447 {
9448 /* Register used in indirect jump must be in word_mode. But
9449 Pmode may not be the same as word_mode for x32. */
9450 rtx ecx = gen_rtx_REG (word_mode, CX_REG);
9451 rtx_insn *insn;
9452
9453 insn = emit_insn (gen_pop (ecx));
9454 m->fs.cfa_offset -= UNITS_PER_WORD;
9455 m->fs.sp_offset -= UNITS_PER_WORD;
9456
9457 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
9458 x = gen_rtx_SET (stack_pointer_rtx, x);
9459 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9460 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
9461 RTX_FRAME_RELATED_P (insn) = 1;
9462
9463 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
9464 }
9465 else
9466 emit_jump_insn (gen_simple_return_internal ());
9467 }
9468
9469 /* Restore the state back to the state from the prologue,
9470 so that it's correct for the next epilogue. */
9471 m->fs = frame_state_save;
9472 }
9473
9474 /* Reset from the function's potential modifications. */
9475
9476 static void
9477 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
9478 {
9479 if (pic_offset_table_rtx
9480 && !ix86_use_pseudo_pic_reg ())
9481 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9482
9483 if (TARGET_MACHO)
9484 {
9485 rtx_insn *insn = get_last_insn ();
9486 rtx_insn *deleted_debug_label = NULL;
9487
9488 /* Mach-O doesn't support labels at the end of objects, so if
9489 it looks like we might want one, take special action.
9490 First, collect any sequence of deleted debug labels. */
9491 while (insn
9492 && NOTE_P (insn)
9493 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9494 {
9495 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
9496 notes only, instead set their CODE_LABEL_NUMBER to -1,
9497 otherwise there would be code generation differences
9498 in between -g and -g0. */
9499 if (NOTE_P (insn) && NOTE_KIND (insn)
9500 == NOTE_INSN_DELETED_DEBUG_LABEL)
9501 deleted_debug_label = insn;
9502 insn = PREV_INSN (insn);
9503 }
9504
9505 /* If we have:
9506 label:
9507 barrier
9508 then this needs to be detected, so skip past the barrier. */
9509
9510 if (insn && BARRIER_P (insn))
9511 insn = PREV_INSN (insn);
9512
9513 /* Up to now we've only seen notes or barriers. */
9514 if (insn)
9515 {
9516 if (LABEL_P (insn)
9517 || (NOTE_P (insn)
9518 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
9519 /* Trailing label. */
9520 fputs ("\tnop\n", file);
9521 else if (cfun && ! cfun->is_thunk)
9522 {
9523 /* See if we have a completely empty function body, skipping
9524 the special case of the picbase thunk emitted as asm. */
9525 while (insn && ! INSN_P (insn))
9526 insn = PREV_INSN (insn);
9527 /* If we don't find any insns, we've got an empty function body;
9528 I.e. completely empty - without a return or branch. This is
9529 taken as the case where a function body has been removed
9530 because it contains an inline __builtin_unreachable(). GCC
9531 declares that reaching __builtin_unreachable() means UB so
9532 we're not obliged to do anything special; however, we want
9533 non-zero-sized function bodies. To meet this, and help the
9534 user out, let's trap the case. */
9535 if (insn == NULL)
9536 fputs ("\tud2\n", file);
9537 }
9538 }
9539 else if (deleted_debug_label)
9540 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
9541 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
9542 CODE_LABEL_NUMBER (insn) = -1;
9543 }
9544 }
9545
9546 /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY. */
9547
9548 void
9549 ix86_print_patchable_function_entry (FILE *file,
9550 unsigned HOST_WIDE_INT patch_area_size,
9551 bool record_p)
9552 {
9553 if (cfun->machine->function_label_emitted)
9554 {
9555 /* NB: When ix86_print_patchable_function_entry is called after
9556 function table has been emitted, we have inserted or queued
9557 a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
9558 place. There is nothing to do here. */
9559 return;
9560 }
9561
9562 default_print_patchable_function_entry (file, patch_area_size,
9563 record_p);
9564 }
9565
9566 /* Output patchable area. NB: default_print_patchable_function_entry
9567 isn't available in i386.md. */
9568
9569 void
9570 ix86_output_patchable_area (unsigned int patch_area_size,
9571 bool record_p)
9572 {
9573 default_print_patchable_function_entry (asm_out_file,
9574 patch_area_size,
9575 record_p);
9576 }
9577
9578 /* Return a scratch register to use in the split stack prologue. The
9579 split stack prologue is used for -fsplit-stack. It is the first
9580 instructions in the function, even before the regular prologue.
9581 The scratch register can be any caller-saved register which is not
9582 used for parameters or for the static chain. */
9583
9584 static unsigned int
9585 split_stack_prologue_scratch_regno (void)
9586 {
9587 if (TARGET_64BIT)
9588 return R11_REG;
9589 else
9590 {
9591 bool is_fastcall, is_thiscall;
9592 int regparm;
9593
9594 is_fastcall = (lookup_attribute ("fastcall",
9595 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9596 != NULL);
9597 is_thiscall = (lookup_attribute ("thiscall",
9598 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
9599 != NULL);
9600 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
9601
9602 if (is_fastcall)
9603 {
9604 if (DECL_STATIC_CHAIN (cfun->decl))
9605 {
9606 sorry ("%<-fsplit-stack%> does not support fastcall with "
9607 "nested function");
9608 return INVALID_REGNUM;
9609 }
9610 return AX_REG;
9611 }
9612 else if (is_thiscall)
9613 {
9614 if (!DECL_STATIC_CHAIN (cfun->decl))
9615 return DX_REG;
9616 return AX_REG;
9617 }
9618 else if (regparm < 3)
9619 {
9620 if (!DECL_STATIC_CHAIN (cfun->decl))
9621 return CX_REG;
9622 else
9623 {
9624 if (regparm >= 2)
9625 {
9626 sorry ("%<-fsplit-stack%> does not support 2 register "
9627 "parameters for a nested function");
9628 return INVALID_REGNUM;
9629 }
9630 return DX_REG;
9631 }
9632 }
9633 else
9634 {
9635 /* FIXME: We could make this work by pushing a register
9636 around the addition and comparison. */
9637 sorry ("%<-fsplit-stack%> does not support 3 register parameters");
9638 return INVALID_REGNUM;
9639 }
9640 }
9641 }
9642
9643 /* A SYMBOL_REF for the function which allocates new stackspace for
9644 -fsplit-stack. */
9645
9646 static GTY(()) rtx split_stack_fn;
9647
9648 /* A SYMBOL_REF for the more stack function when using the large
9649 model. */
9650
9651 static GTY(()) rtx split_stack_fn_large;
9652
9653 /* Return location of the stack guard value in the TLS block. */
9654
9655 rtx
9656 ix86_split_stack_guard (void)
9657 {
9658 int offset;
9659 addr_space_t as = DEFAULT_TLS_SEG_REG;
9660 rtx r;
9661
9662 gcc_assert (flag_split_stack);
9663
9664 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
9665 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
9666 #else
9667 gcc_unreachable ();
9668 #endif
9669
9670 r = GEN_INT (offset);
9671 r = gen_const_mem (Pmode, r);
9672 set_mem_addr_space (r, as);
9673
9674 return r;
9675 }
9676
9677 /* Handle -fsplit-stack. These are the first instructions in the
9678 function, even before the regular prologue. */
9679
9680 void
9681 ix86_expand_split_stack_prologue (void)
9682 {
9683 HOST_WIDE_INT allocate;
9684 unsigned HOST_WIDE_INT args_size;
9685 rtx_code_label *label;
9686 rtx limit, current, allocate_rtx, call_fusage;
9687 rtx_insn *call_insn;
9688 rtx scratch_reg = NULL_RTX;
9689 rtx_code_label *varargs_label = NULL;
9690 rtx fn;
9691
9692 gcc_assert (flag_split_stack && reload_completed);
9693
9694 ix86_finalize_stack_frame_flags ();
9695 struct ix86_frame &frame = cfun->machine->frame;
9696 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
9697
9698 /* This is the label we will branch to if we have enough stack
9699 space. We expect the basic block reordering pass to reverse this
9700 branch if optimizing, so that we branch in the unlikely case. */
9701 label = gen_label_rtx ();
9702
9703 /* We need to compare the stack pointer minus the frame size with
9704 the stack boundary in the TCB. The stack boundary always gives
9705 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
9706 can compare directly. Otherwise we need to do an addition. */
9707
9708 limit = ix86_split_stack_guard ();
9709
9710 if (allocate < SPLIT_STACK_AVAILABLE)
9711 current = stack_pointer_rtx;
9712 else
9713 {
9714 unsigned int scratch_regno;
9715 rtx offset;
9716
9717 /* We need a scratch register to hold the stack pointer minus
9718 the required frame size. Since this is the very start of the
9719 function, the scratch register can be any caller-saved
9720 register which is not used for parameters. */
9721 offset = GEN_INT (- allocate);
9722 scratch_regno = split_stack_prologue_scratch_regno ();
9723 if (scratch_regno == INVALID_REGNUM)
9724 return;
9725 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9726 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
9727 {
9728 /* We don't use gen_add in this case because it will
9729 want to split to lea, but when not optimizing the insn
9730 will not be split after this point. */
9731 emit_insn (gen_rtx_SET (scratch_reg,
9732 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9733 offset)));
9734 }
9735 else
9736 {
9737 emit_move_insn (scratch_reg, offset);
9738 emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
9739 }
9740 current = scratch_reg;
9741 }
9742
9743 ix86_expand_branch (GEU, current, limit, label);
9744 rtx_insn *jump_insn = get_last_insn ();
9745 JUMP_LABEL (jump_insn) = label;
9746
9747 /* Mark the jump as very likely to be taken. */
9748 add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
9749
9750 if (split_stack_fn == NULL_RTX)
9751 {
9752 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
9753 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
9754 }
9755 fn = split_stack_fn;
9756
9757 /* Get more stack space. We pass in the desired stack space and the
9758 size of the arguments to copy to the new stack. In 32-bit mode
9759 we push the parameters; __morestack will return on a new stack
9760 anyhow. In 64-bit mode we pass the parameters in r10 and
9761 r11. */
9762 allocate_rtx = GEN_INT (allocate);
9763 args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
9764 call_fusage = NULL_RTX;
9765 rtx pop = NULL_RTX;
9766 if (TARGET_64BIT)
9767 {
9768 rtx reg10, reg11;
9769
9770 reg10 = gen_rtx_REG (Pmode, R10_REG);
9771 reg11 = gen_rtx_REG (Pmode, R11_REG);
9772
9773 /* If this function uses a static chain, it will be in %r10.
9774 Preserve it across the call to __morestack. */
9775 if (DECL_STATIC_CHAIN (cfun->decl))
9776 {
9777 rtx rax;
9778
9779 rax = gen_rtx_REG (word_mode, AX_REG);
9780 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
9781 use_reg (&call_fusage, rax);
9782 }
9783
9784 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
9785 && !TARGET_PECOFF)
9786 {
9787 HOST_WIDE_INT argval;
9788
9789 gcc_assert (Pmode == DImode);
9790 /* When using the large model we need to load the address
9791 into a register, and we've run out of registers. So we
9792 switch to a different calling convention, and we call a
9793 different function: __morestack_large. We pass the
9794 argument size in the upper 32 bits of r10 and pass the
9795 frame size in the lower 32 bits. */
9796 gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
9797 gcc_assert ((args_size & 0xffffffff) == args_size);
9798
9799 if (split_stack_fn_large == NULL_RTX)
9800 {
9801 split_stack_fn_large
9802 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
9803 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
9804 }
9805 if (ix86_cmodel == CM_LARGE_PIC)
9806 {
9807 rtx_code_label *label;
9808 rtx x;
9809
9810 label = gen_label_rtx ();
9811 emit_label (label);
9812 LABEL_PRESERVE_P (label) = 1;
9813 emit_insn (gen_set_rip_rex64 (reg10, label));
9814 emit_insn (gen_set_got_offset_rex64 (reg11, label));
9815 emit_insn (gen_add2_insn (reg10, reg11));
9816 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
9817 UNSPEC_GOT);
9818 x = gen_rtx_CONST (Pmode, x);
9819 emit_move_insn (reg11, x);
9820 x = gen_rtx_PLUS (Pmode, reg10, reg11);
9821 x = gen_const_mem (Pmode, x);
9822 emit_move_insn (reg11, x);
9823 }
9824 else
9825 emit_move_insn (reg11, split_stack_fn_large);
9826
9827 fn = reg11;
9828
9829 argval = ((args_size << 16) << 16) + allocate;
9830 emit_move_insn (reg10, GEN_INT (argval));
9831 }
9832 else
9833 {
9834 emit_move_insn (reg10, allocate_rtx);
9835 emit_move_insn (reg11, GEN_INT (args_size));
9836 use_reg (&call_fusage, reg11);
9837 }
9838
9839 use_reg (&call_fusage, reg10);
9840 }
9841 else
9842 {
9843 rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
9844 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
9845 insn = emit_insn (gen_push (allocate_rtx));
9846 add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
9847 pop = GEN_INT (2 * UNITS_PER_WORD);
9848 }
9849 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
9850 GEN_INT (UNITS_PER_WORD), constm1_rtx,
9851 pop, false);
9852 add_function_usage_to (call_insn, call_fusage);
9853 if (!TARGET_64BIT)
9854 add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
9855 /* Indicate that this function can't jump to non-local gotos. */
9856 make_reg_eh_region_note_nothrow_nononlocal (call_insn);
9857
9858 /* In order to make call/return prediction work right, we now need
9859 to execute a return instruction. See
9860 libgcc/config/i386/morestack.S for the details on how this works.
9861
9862 For flow purposes gcc must not see this as a return
9863 instruction--we need control flow to continue at the subsequent
9864 label. Therefore, we use an unspec. */
9865 gcc_assert (crtl->args.pops_args < 65536);
9866 rtx_insn *ret_insn
9867 = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
9868
9869 if ((flag_cf_protection & CF_BRANCH))
9870 {
9871 /* Insert ENDBR since __morestack will jump back here via indirect
9872 call. */
9873 rtx cet_eb = gen_nop_endbr ();
9874 emit_insn_after (cet_eb, ret_insn);
9875 }
9876
9877 /* If we are in 64-bit mode and this function uses a static chain,
9878 we saved %r10 in %rax before calling _morestack. */
9879 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
9880 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
9881 gen_rtx_REG (word_mode, AX_REG));
9882
9883 /* If this function calls va_start, we need to store a pointer to
9884 the arguments on the old stack, because they may not have been
9885 all copied to the new stack. At this point the old stack can be
9886 found at the frame pointer value used by __morestack, because
9887 __morestack has set that up before calling back to us. Here we
9888 store that pointer in a scratch register, and in
9889 ix86_expand_prologue we store the scratch register in a stack
9890 slot. */
9891 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9892 {
9893 unsigned int scratch_regno;
9894 rtx frame_reg;
9895 int words;
9896
9897 scratch_regno = split_stack_prologue_scratch_regno ();
9898 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
9899 frame_reg = gen_rtx_REG (Pmode, BP_REG);
9900
9901 /* 64-bit:
9902 fp -> old fp value
9903 return address within this function
9904 return address of caller of this function
9905 stack arguments
9906 So we add three words to get to the stack arguments.
9907
9908 32-bit:
9909 fp -> old fp value
9910 return address within this function
9911 first argument to __morestack
9912 second argument to __morestack
9913 return address of caller of this function
9914 stack arguments
9915 So we add five words to get to the stack arguments.
9916 */
9917 words = TARGET_64BIT ? 3 : 5;
9918 emit_insn (gen_rtx_SET (scratch_reg,
9919 plus_constant (Pmode, frame_reg,
9920 words * UNITS_PER_WORD)));
9921
9922 varargs_label = gen_label_rtx ();
9923 emit_jump_insn (gen_jump (varargs_label));
9924 JUMP_LABEL (get_last_insn ()) = varargs_label;
9925
9926 emit_barrier ();
9927 }
9928
9929 emit_label (label);
9930 LABEL_NUSES (label) = 1;
9931
9932 /* If this function calls va_start, we now have to set the scratch
9933 register for the case where we do not call __morestack. In this
9934 case we need to set it based on the stack pointer. */
9935 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9936 {
9937 emit_insn (gen_rtx_SET (scratch_reg,
9938 plus_constant (Pmode, stack_pointer_rtx,
9939 UNITS_PER_WORD)));
9940
9941 emit_label (varargs_label);
9942 LABEL_NUSES (varargs_label) = 1;
9943 }
9944 }
9945
9946 /* We may have to tell the dataflow pass that the split stack prologue
9947 is initializing a scratch register. */
9948
9949 static void
9950 ix86_live_on_entry (bitmap regs)
9951 {
9952 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
9953 {
9954 gcc_assert (flag_split_stack);
9955 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
9956 }
9957 }
9958 \f
9959 /* Extract the parts of an RTL expression that is a valid memory address
9960 for an instruction. Return 0 if the structure of the address is
9961 grossly off. Return -1 if the address contains ASHIFT, so it is not
9962 strictly valid, but still used for computing length of lea instruction. */
9963
9964 int
9965 ix86_decompose_address (rtx addr, struct ix86_address *out)
9966 {
9967 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9968 rtx base_reg, index_reg;
9969 HOST_WIDE_INT scale = 1;
9970 rtx scale_rtx = NULL_RTX;
9971 rtx tmp;
9972 int retval = 1;
9973 addr_space_t seg = ADDR_SPACE_GENERIC;
9974
9975 /* Allow zero-extended SImode addresses,
9976 they will be emitted with addr32 prefix. */
9977 if (TARGET_64BIT && GET_MODE (addr) == DImode)
9978 {
9979 if (GET_CODE (addr) == ZERO_EXTEND
9980 && GET_MODE (XEXP (addr, 0)) == SImode)
9981 {
9982 addr = XEXP (addr, 0);
9983 if (CONST_INT_P (addr))
9984 return 0;
9985 }
9986 else if (GET_CODE (addr) == AND
9987 && const_32bit_mask (XEXP (addr, 1), DImode))
9988 {
9989 addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
9990 if (addr == NULL_RTX)
9991 return 0;
9992
9993 if (CONST_INT_P (addr))
9994 return 0;
9995 }
9996 }
9997
9998 /* Allow SImode subregs of DImode addresses,
9999 they will be emitted with addr32 prefix. */
10000 if (TARGET_64BIT && GET_MODE (addr) == SImode)
10001 {
10002 if (SUBREG_P (addr)
10003 && GET_MODE (SUBREG_REG (addr)) == DImode)
10004 {
10005 addr = SUBREG_REG (addr);
10006 if (CONST_INT_P (addr))
10007 return 0;
10008 }
10009 }
10010
10011 if (REG_P (addr))
10012 base = addr;
10013 else if (SUBREG_P (addr))
10014 {
10015 if (REG_P (SUBREG_REG (addr)))
10016 base = addr;
10017 else
10018 return 0;
10019 }
10020 else if (GET_CODE (addr) == PLUS)
10021 {
10022 rtx addends[4], op;
10023 int n = 0, i;
10024
10025 op = addr;
10026 do
10027 {
10028 if (n >= 4)
10029 return 0;
10030 addends[n++] = XEXP (op, 1);
10031 op = XEXP (op, 0);
10032 }
10033 while (GET_CODE (op) == PLUS);
10034 if (n >= 4)
10035 return 0;
10036 addends[n] = op;
10037
10038 for (i = n; i >= 0; --i)
10039 {
10040 op = addends[i];
10041 switch (GET_CODE (op))
10042 {
10043 case MULT:
10044 if (index)
10045 return 0;
10046 index = XEXP (op, 0);
10047 scale_rtx = XEXP (op, 1);
10048 break;
10049
10050 case ASHIFT:
10051 if (index)
10052 return 0;
10053 index = XEXP (op, 0);
10054 tmp = XEXP (op, 1);
10055 if (!CONST_INT_P (tmp))
10056 return 0;
10057 scale = INTVAL (tmp);
10058 if ((unsigned HOST_WIDE_INT) scale > 3)
10059 return 0;
10060 scale = 1 << scale;
10061 break;
10062
10063 case ZERO_EXTEND:
10064 op = XEXP (op, 0);
10065 if (GET_CODE (op) != UNSPEC)
10066 return 0;
10067 /* FALLTHRU */
10068
10069 case UNSPEC:
10070 if (XINT (op, 1) == UNSPEC_TP
10071 && TARGET_TLS_DIRECT_SEG_REFS
10072 && seg == ADDR_SPACE_GENERIC)
10073 seg = DEFAULT_TLS_SEG_REG;
10074 else
10075 return 0;
10076 break;
10077
10078 case SUBREG:
10079 if (!REG_P (SUBREG_REG (op)))
10080 return 0;
10081 /* FALLTHRU */
10082
10083 case REG:
10084 if (!base)
10085 base = op;
10086 else if (!index)
10087 index = op;
10088 else
10089 return 0;
10090 break;
10091
10092 case CONST:
10093 case CONST_INT:
10094 case SYMBOL_REF:
10095 case LABEL_REF:
10096 if (disp)
10097 return 0;
10098 disp = op;
10099 break;
10100
10101 default:
10102 return 0;
10103 }
10104 }
10105 }
10106 else if (GET_CODE (addr) == MULT)
10107 {
10108 index = XEXP (addr, 0); /* index*scale */
10109 scale_rtx = XEXP (addr, 1);
10110 }
10111 else if (GET_CODE (addr) == ASHIFT)
10112 {
10113 /* We're called for lea too, which implements ashift on occasion. */
10114 index = XEXP (addr, 0);
10115 tmp = XEXP (addr, 1);
10116 if (!CONST_INT_P (tmp))
10117 return 0;
10118 scale = INTVAL (tmp);
10119 if ((unsigned HOST_WIDE_INT) scale > 3)
10120 return 0;
10121 scale = 1 << scale;
10122 retval = -1;
10123 }
10124 else
10125 disp = addr; /* displacement */
10126
10127 if (index)
10128 {
10129 if (REG_P (index))
10130 ;
10131 else if (SUBREG_P (index)
10132 && REG_P (SUBREG_REG (index)))
10133 ;
10134 else
10135 return 0;
10136 }
10137
10138 /* Extract the integral value of scale. */
10139 if (scale_rtx)
10140 {
10141 if (!CONST_INT_P (scale_rtx))
10142 return 0;
10143 scale = INTVAL (scale_rtx);
10144 }
10145
10146 base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
10147 index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
10148
10149 /* Avoid useless 0 displacement. */
10150 if (disp == const0_rtx && (base || index))
10151 disp = NULL_RTX;
10152
10153 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10154 if (base_reg && index_reg && scale == 1
10155 && (REGNO (index_reg) == ARG_POINTER_REGNUM
10156 || REGNO (index_reg) == FRAME_POINTER_REGNUM
10157 || REGNO (index_reg) == SP_REG))
10158 {
10159 std::swap (base, index);
10160 std::swap (base_reg, index_reg);
10161 }
10162
10163 /* Special case: %ebp cannot be encoded as a base without a displacement.
10164 Similarly %r13. */
10165 if (!disp && base_reg
10166 && (REGNO (base_reg) == ARG_POINTER_REGNUM
10167 || REGNO (base_reg) == FRAME_POINTER_REGNUM
10168 || REGNO (base_reg) == BP_REG
10169 || REGNO (base_reg) == R13_REG))
10170 disp = const0_rtx;
10171
10172 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10173 Avoid this by transforming to [%esi+0].
10174 Reload calls address legitimization without cfun defined, so we need
10175 to test cfun for being non-NULL. */
10176 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10177 && base_reg && !index_reg && !disp
10178 && REGNO (base_reg) == SI_REG)
10179 disp = const0_rtx;
10180
10181 /* Special case: encode reg+reg instead of reg*2. */
10182 if (!base && index && scale == 2)
10183 base = index, base_reg = index_reg, scale = 1;
10184
10185 /* Special case: scaling cannot be encoded without base or displacement. */
10186 if (!base && !disp && index && scale != 1)
10187 disp = const0_rtx;
10188
10189 out->base = base;
10190 out->index = index;
10191 out->disp = disp;
10192 out->scale = scale;
10193 out->seg = seg;
10194
10195 return retval;
10196 }
10197 \f
10198 /* Return cost of the memory address x.
10199 For i386, it is better to use a complex address than let gcc copy
10200 the address into a reg and make a new pseudo. But not if the address
10201 requires to two regs - that would mean more pseudos with longer
10202 lifetimes. */
10203 static int
10204 ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
10205 {
10206 struct ix86_address parts;
10207 int cost = 1;
10208 int ok = ix86_decompose_address (x, &parts);
10209
10210 gcc_assert (ok);
10211
10212 if (parts.base && SUBREG_P (parts.base))
10213 parts.base = SUBREG_REG (parts.base);
10214 if (parts.index && SUBREG_P (parts.index))
10215 parts.index = SUBREG_REG (parts.index);
10216
10217 /* Attempt to minimize number of registers in the address by increasing
10218 address cost for each used register. We don't increase address cost
10219 for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx"
10220 is not invariant itself it most likely means that base or index is not
10221 invariant. Therefore only "pic_offset_table_rtx" could be hoisted out,
10222 which is not profitable for x86. */
10223 if (parts.base
10224 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10225 && (current_pass->type == GIMPLE_PASS
10226 || !pic_offset_table_rtx
10227 || !REG_P (parts.base)
10228 || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
10229 cost++;
10230
10231 if (parts.index
10232 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10233 && (current_pass->type == GIMPLE_PASS
10234 || !pic_offset_table_rtx
10235 || !REG_P (parts.index)
10236 || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
10237 cost++;
10238
10239 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10240 since it's predecode logic can't detect the length of instructions
10241 and it degenerates to vector decoded. Increase cost of such
10242 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10243 to split such addresses or even refuse such addresses at all.
10244
10245 Following addressing modes are affected:
10246 [base+scale*index]
10247 [scale*index+disp]
10248 [base+index]
10249
10250 The first and last case may be avoidable by explicitly coding the zero in
10251 memory address, but I don't have AMD-K6 machine handy to check this
10252 theory. */
10253
10254 if (TARGET_K6
10255 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10256 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10257 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10258 cost += 10;
10259
10260 return cost;
10261 }
10262 \f
10263 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10264 this is used for to form addresses to local data when -fPIC is in
10265 use. */
10266
10267 static bool
10268 darwin_local_data_pic (rtx disp)
10269 {
10270 return (GET_CODE (disp) == UNSPEC
10271 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10272 }
10273
10274 /* True if operand X should be loaded from GOT. */
10275
10276 bool
10277 ix86_force_load_from_GOT_p (rtx x)
10278 {
10279 return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X)
10280 && !TARGET_PECOFF && !TARGET_MACHO
10281 && !flag_pic
10282 && ix86_cmodel != CM_LARGE
10283 && GET_CODE (x) == SYMBOL_REF
10284 && SYMBOL_REF_FUNCTION_P (x)
10285 && (!flag_plt
10286 || (SYMBOL_REF_DECL (x)
10287 && lookup_attribute ("noplt",
10288 DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))
10289 && !SYMBOL_REF_LOCAL_P (x));
10290 }
10291
10292 /* Determine if a given RTX is a valid constant. We already know this
10293 satisfies CONSTANT_P. */
10294
10295 static bool
10296 ix86_legitimate_constant_p (machine_mode mode, rtx x)
10297 {
10298 switch (GET_CODE (x))
10299 {
10300 case CONST:
10301 x = XEXP (x, 0);
10302
10303 if (GET_CODE (x) == PLUS)
10304 {
10305 if (!CONST_INT_P (XEXP (x, 1)))
10306 return false;
10307 x = XEXP (x, 0);
10308 }
10309
10310 if (TARGET_MACHO && darwin_local_data_pic (x))
10311 return true;
10312
10313 /* Only some unspecs are valid as "constants". */
10314 if (GET_CODE (x) == UNSPEC)
10315 switch (XINT (x, 1))
10316 {
10317 case UNSPEC_GOT:
10318 case UNSPEC_GOTOFF:
10319 case UNSPEC_PLTOFF:
10320 return TARGET_64BIT;
10321 case UNSPEC_TPOFF:
10322 case UNSPEC_NTPOFF:
10323 x = XVECEXP (x, 0, 0);
10324 return (GET_CODE (x) == SYMBOL_REF
10325 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10326 case UNSPEC_DTPOFF:
10327 x = XVECEXP (x, 0, 0);
10328 return (GET_CODE (x) == SYMBOL_REF
10329 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10330 default:
10331 return false;
10332 }
10333
10334 /* We must have drilled down to a symbol. */
10335 if (GET_CODE (x) == LABEL_REF)
10336 return true;
10337 if (GET_CODE (x) != SYMBOL_REF)
10338 return false;
10339 /* FALLTHRU */
10340
10341 case SYMBOL_REF:
10342 /* TLS symbols are never valid. */
10343 if (SYMBOL_REF_TLS_MODEL (x))
10344 return false;
10345
10346 /* DLLIMPORT symbols are never valid. */
10347 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10348 && SYMBOL_REF_DLLIMPORT_P (x))
10349 return false;
10350
10351 #if TARGET_MACHO
10352 /* mdynamic-no-pic */
10353 if (MACHO_DYNAMIC_NO_PIC_P)
10354 return machopic_symbol_defined_p (x);
10355 #endif
10356
10357 /* External function address should be loaded
10358 via the GOT slot to avoid PLT. */
10359 if (ix86_force_load_from_GOT_p (x))
10360 return false;
10361
10362 break;
10363
10364 CASE_CONST_SCALAR_INT:
10365 if (ix86_endbr_immediate_operand (x, VOIDmode))
10366 return false;
10367
10368 switch (mode)
10369 {
10370 case E_TImode:
10371 if (TARGET_64BIT)
10372 return true;
10373 /* FALLTHRU */
10374 case E_OImode:
10375 case E_XImode:
10376 if (!standard_sse_constant_p (x, mode))
10377 return false;
10378 default:
10379 break;
10380 }
10381 break;
10382
10383 case CONST_VECTOR:
10384 if (!standard_sse_constant_p (x, mode))
10385 return false;
10386
10387 default:
10388 break;
10389 }
10390
10391 /* Otherwise we handle everything else in the move patterns. */
10392 return true;
10393 }
10394
10395 /* Determine if it's legal to put X into the constant pool. This
10396 is not possible for the address of thread-local symbols, which
10397 is checked above. */
10398
10399 static bool
10400 ix86_cannot_force_const_mem (machine_mode mode, rtx x)
10401 {
10402 /* We can put any immediate constant in memory. */
10403 switch (GET_CODE (x))
10404 {
10405 CASE_CONST_ANY:
10406 return false;
10407
10408 default:
10409 break;
10410 }
10411
10412 return !ix86_legitimate_constant_p (mode, x);
10413 }
10414
10415 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
10416 otherwise zero. */
10417
10418 static bool
10419 is_imported_p (rtx x)
10420 {
10421 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
10422 || GET_CODE (x) != SYMBOL_REF)
10423 return false;
10424
10425 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
10426 }
10427
10428
10429 /* Nonzero if the constant value X is a legitimate general operand
10430 when generating PIC code. It is given that flag_pic is on and
10431 that X satisfies CONSTANT_P. */
10432
10433 bool
10434 legitimate_pic_operand_p (rtx x)
10435 {
10436 rtx inner;
10437
10438 switch (GET_CODE (x))
10439 {
10440 case CONST:
10441 inner = XEXP (x, 0);
10442 if (GET_CODE (inner) == PLUS
10443 && CONST_INT_P (XEXP (inner, 1)))
10444 inner = XEXP (inner, 0);
10445
10446 /* Only some unspecs are valid as "constants". */
10447 if (GET_CODE (inner) == UNSPEC)
10448 switch (XINT (inner, 1))
10449 {
10450 case UNSPEC_GOT:
10451 case UNSPEC_GOTOFF:
10452 case UNSPEC_PLTOFF:
10453 return TARGET_64BIT;
10454 case UNSPEC_TPOFF:
10455 x = XVECEXP (inner, 0, 0);
10456 return (GET_CODE (x) == SYMBOL_REF
10457 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10458 case UNSPEC_MACHOPIC_OFFSET:
10459 return legitimate_pic_address_disp_p (x);
10460 default:
10461 return false;
10462 }
10463 /* FALLTHRU */
10464
10465 case SYMBOL_REF:
10466 case LABEL_REF:
10467 return legitimate_pic_address_disp_p (x);
10468
10469 default:
10470 return true;
10471 }
10472 }
10473
10474 /* Determine if a given CONST RTX is a valid memory displacement
10475 in PIC mode. */
10476
10477 bool
10478 legitimate_pic_address_disp_p (rtx disp)
10479 {
10480 bool saw_plus;
10481
10482 /* In 64bit mode we can allow direct addresses of symbols and labels
10483 when they are not dynamic symbols. */
10484 if (TARGET_64BIT)
10485 {
10486 rtx op0 = disp, op1;
10487
10488 switch (GET_CODE (disp))
10489 {
10490 case LABEL_REF:
10491 return true;
10492
10493 case CONST:
10494 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10495 break;
10496 op0 = XEXP (XEXP (disp, 0), 0);
10497 op1 = XEXP (XEXP (disp, 0), 1);
10498 if (!CONST_INT_P (op1))
10499 break;
10500 if (GET_CODE (op0) == UNSPEC
10501 && (XINT (op0, 1) == UNSPEC_DTPOFF
10502 || XINT (op0, 1) == UNSPEC_NTPOFF)
10503 && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
10504 return true;
10505 if (INTVAL (op1) >= 16*1024*1024
10506 || INTVAL (op1) < -16*1024*1024)
10507 break;
10508 if (GET_CODE (op0) == LABEL_REF)
10509 return true;
10510 if (GET_CODE (op0) == CONST
10511 && GET_CODE (XEXP (op0, 0)) == UNSPEC
10512 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
10513 return true;
10514 if (GET_CODE (op0) == UNSPEC
10515 && XINT (op0, 1) == UNSPEC_PCREL)
10516 return true;
10517 if (GET_CODE (op0) != SYMBOL_REF)
10518 break;
10519 /* FALLTHRU */
10520
10521 case SYMBOL_REF:
10522 /* TLS references should always be enclosed in UNSPEC.
10523 The dllimported symbol needs always to be resolved. */
10524 if (SYMBOL_REF_TLS_MODEL (op0)
10525 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
10526 return false;
10527
10528 if (TARGET_PECOFF)
10529 {
10530 if (is_imported_p (op0))
10531 return true;
10532
10533 if (SYMBOL_REF_FAR_ADDR_P (op0)
10534 || !SYMBOL_REF_LOCAL_P (op0))
10535 break;
10536
10537 /* Function-symbols need to be resolved only for
10538 large-model.
10539 For the small-model we don't need to resolve anything
10540 here. */
10541 if ((ix86_cmodel != CM_LARGE_PIC
10542 && SYMBOL_REF_FUNCTION_P (op0))
10543 || ix86_cmodel == CM_SMALL_PIC)
10544 return true;
10545 /* Non-external symbols don't need to be resolved for
10546 large, and medium-model. */
10547 if ((ix86_cmodel == CM_LARGE_PIC
10548 || ix86_cmodel == CM_MEDIUM_PIC)
10549 && !SYMBOL_REF_EXTERNAL_P (op0))
10550 return true;
10551 }
10552 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
10553 && (SYMBOL_REF_LOCAL_P (op0)
10554 || (HAVE_LD_PIE_COPYRELOC
10555 && flag_pie
10556 && !SYMBOL_REF_WEAK (op0)
10557 && !SYMBOL_REF_FUNCTION_P (op0)))
10558 && ix86_cmodel != CM_LARGE_PIC)
10559 return true;
10560 break;
10561
10562 default:
10563 break;
10564 }
10565 }
10566 if (GET_CODE (disp) != CONST)
10567 return false;
10568 disp = XEXP (disp, 0);
10569
10570 if (TARGET_64BIT)
10571 {
10572 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10573 of GOT tables. We should not need these anyway. */
10574 if (GET_CODE (disp) != UNSPEC
10575 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10576 && XINT (disp, 1) != UNSPEC_GOTOFF
10577 && XINT (disp, 1) != UNSPEC_PCREL
10578 && XINT (disp, 1) != UNSPEC_PLTOFF))
10579 return false;
10580
10581 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10582 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10583 return false;
10584 return true;
10585 }
10586
10587 saw_plus = false;
10588 if (GET_CODE (disp) == PLUS)
10589 {
10590 if (!CONST_INT_P (XEXP (disp, 1)))
10591 return false;
10592 disp = XEXP (disp, 0);
10593 saw_plus = true;
10594 }
10595
10596 if (TARGET_MACHO && darwin_local_data_pic (disp))
10597 return true;
10598
10599 if (GET_CODE (disp) != UNSPEC)
10600 return false;
10601
10602 switch (XINT (disp, 1))
10603 {
10604 case UNSPEC_GOT:
10605 if (saw_plus)
10606 return false;
10607 /* We need to check for both symbols and labels because VxWorks loads
10608 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10609 details. */
10610 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10611 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10612 case UNSPEC_GOTOFF:
10613 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10614 While ABI specify also 32bit relocation but we don't produce it in
10615 small PIC model at all. */
10616 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10617 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10618 && !TARGET_64BIT)
10619 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10620 return false;
10621 case UNSPEC_GOTTPOFF:
10622 case UNSPEC_GOTNTPOFF:
10623 case UNSPEC_INDNTPOFF:
10624 if (saw_plus)
10625 return false;
10626 disp = XVECEXP (disp, 0, 0);
10627 return (GET_CODE (disp) == SYMBOL_REF
10628 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10629 case UNSPEC_NTPOFF:
10630 disp = XVECEXP (disp, 0, 0);
10631 return (GET_CODE (disp) == SYMBOL_REF
10632 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10633 case UNSPEC_DTPOFF:
10634 disp = XVECEXP (disp, 0, 0);
10635 return (GET_CODE (disp) == SYMBOL_REF
10636 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10637 }
10638
10639 return false;
10640 }
10641
10642 /* Determine if op is suitable RTX for an address register.
10643 Return naked register if a register or a register subreg is
10644 found, otherwise return NULL_RTX. */
10645
10646 static rtx
10647 ix86_validate_address_register (rtx op)
10648 {
10649 machine_mode mode = GET_MODE (op);
10650
10651 /* Only SImode or DImode registers can form the address. */
10652 if (mode != SImode && mode != DImode)
10653 return NULL_RTX;
10654
10655 if (REG_P (op))
10656 return op;
10657 else if (SUBREG_P (op))
10658 {
10659 rtx reg = SUBREG_REG (op);
10660
10661 if (!REG_P (reg))
10662 return NULL_RTX;
10663
10664 mode = GET_MODE (reg);
10665
10666 /* Don't allow SUBREGs that span more than a word. It can
10667 lead to spill failures when the register is one word out
10668 of a two word structure. */
10669 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
10670 return NULL_RTX;
10671
10672 /* Allow only SUBREGs of non-eliminable hard registers. */
10673 if (register_no_elim_operand (reg, mode))
10674 return reg;
10675 }
10676
10677 /* Op is not a register. */
10678 return NULL_RTX;
10679 }
10680
10681 /* Recognizes RTL expressions that are valid memory addresses for an
10682 instruction. The MODE argument is the machine mode for the MEM
10683 expression that wants to use this address.
10684
10685 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10686 convert common non-canonical forms to canonical form so that they will
10687 be recognized. */
10688
10689 static bool
10690 ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
10691 {
10692 struct ix86_address parts;
10693 rtx base, index, disp;
10694 HOST_WIDE_INT scale;
10695 addr_space_t seg;
10696
10697 if (ix86_decompose_address (addr, &parts) <= 0)
10698 /* Decomposition failed. */
10699 return false;
10700
10701 base = parts.base;
10702 index = parts.index;
10703 disp = parts.disp;
10704 scale = parts.scale;
10705 seg = parts.seg;
10706
10707 /* Validate base register. */
10708 if (base)
10709 {
10710 rtx reg = ix86_validate_address_register (base);
10711
10712 if (reg == NULL_RTX)
10713 return false;
10714
10715 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10716 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10717 /* Base is not valid. */
10718 return false;
10719 }
10720
10721 /* Validate index register. */
10722 if (index)
10723 {
10724 rtx reg = ix86_validate_address_register (index);
10725
10726 if (reg == NULL_RTX)
10727 return false;
10728
10729 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10730 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10731 /* Index is not valid. */
10732 return false;
10733 }
10734
10735 /* Index and base should have the same mode. */
10736 if (base && index
10737 && GET_MODE (base) != GET_MODE (index))
10738 return false;
10739
10740 /* Address override works only on the (%reg) part of %fs:(%reg). */
10741 if (seg != ADDR_SPACE_GENERIC
10742 && ((base && GET_MODE (base) != word_mode)
10743 || (index && GET_MODE (index) != word_mode)))
10744 return false;
10745
10746 /* Validate scale factor. */
10747 if (scale != 1)
10748 {
10749 if (!index)
10750 /* Scale without index. */
10751 return false;
10752
10753 if (scale != 2 && scale != 4 && scale != 8)
10754 /* Scale is not a valid multiplier. */
10755 return false;
10756 }
10757
10758 /* Validate displacement. */
10759 if (disp)
10760 {
10761 if (ix86_endbr_immediate_operand (disp, VOIDmode))
10762 return false;
10763
10764 if (GET_CODE (disp) == CONST
10765 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10766 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10767 switch (XINT (XEXP (disp, 0), 1))
10768 {
10769 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
10770 when used. While ABI specify also 32bit relocations, we
10771 don't produce them at all and use IP relative instead.
10772 Allow GOT in 32bit mode for both PIC and non-PIC if symbol
10773 should be loaded via GOT. */
10774 case UNSPEC_GOT:
10775 if (!TARGET_64BIT
10776 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10777 goto is_legitimate_pic;
10778 /* FALLTHRU */
10779 case UNSPEC_GOTOFF:
10780 gcc_assert (flag_pic);
10781 if (!TARGET_64BIT)
10782 goto is_legitimate_pic;
10783
10784 /* 64bit address unspec. */
10785 return false;
10786
10787 case UNSPEC_GOTPCREL:
10788 if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
10789 goto is_legitimate_pic;
10790 /* FALLTHRU */
10791 case UNSPEC_PCREL:
10792 gcc_assert (flag_pic);
10793 goto is_legitimate_pic;
10794
10795 case UNSPEC_GOTTPOFF:
10796 case UNSPEC_GOTNTPOFF:
10797 case UNSPEC_INDNTPOFF:
10798 case UNSPEC_NTPOFF:
10799 case UNSPEC_DTPOFF:
10800 break;
10801
10802 default:
10803 /* Invalid address unspec. */
10804 return false;
10805 }
10806
10807 else if (SYMBOLIC_CONST (disp)
10808 && (flag_pic
10809 || (TARGET_MACHO
10810 #if TARGET_MACHO
10811 && MACHOPIC_INDIRECT
10812 && !machopic_operand_p (disp)
10813 #endif
10814 )))
10815 {
10816
10817 is_legitimate_pic:
10818 if (TARGET_64BIT && (index || base))
10819 {
10820 /* foo@dtpoff(%rX) is ok. */
10821 if (GET_CODE (disp) != CONST
10822 || GET_CODE (XEXP (disp, 0)) != PLUS
10823 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10824 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10825 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10826 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10827 /* Non-constant pic memory reference. */
10828 return false;
10829 }
10830 else if ((!TARGET_MACHO || flag_pic)
10831 && ! legitimate_pic_address_disp_p (disp))
10832 /* Displacement is an invalid pic construct. */
10833 return false;
10834 #if TARGET_MACHO
10835 else if (MACHO_DYNAMIC_NO_PIC_P
10836 && !ix86_legitimate_constant_p (Pmode, disp))
10837 /* displacment must be referenced via non_lazy_pointer */
10838 return false;
10839 #endif
10840
10841 /* This code used to verify that a symbolic pic displacement
10842 includes the pic_offset_table_rtx register.
10843
10844 While this is good idea, unfortunately these constructs may
10845 be created by "adds using lea" optimization for incorrect
10846 code like:
10847
10848 int a;
10849 int foo(int i)
10850 {
10851 return *(&a+i);
10852 }
10853
10854 This code is nonsensical, but results in addressing
10855 GOT table with pic_offset_table_rtx base. We can't
10856 just refuse it easily, since it gets matched by
10857 "addsi3" pattern, that later gets split to lea in the
10858 case output register differs from input. While this
10859 can be handled by separate addsi pattern for this case
10860 that never results in lea, this seems to be easier and
10861 correct fix for crash to disable this test. */
10862 }
10863 else if (GET_CODE (disp) != LABEL_REF
10864 && !CONST_INT_P (disp)
10865 && (GET_CODE (disp) != CONST
10866 || !ix86_legitimate_constant_p (Pmode, disp))
10867 && (GET_CODE (disp) != SYMBOL_REF
10868 || !ix86_legitimate_constant_p (Pmode, disp)))
10869 /* Displacement is not constant. */
10870 return false;
10871 else if (TARGET_64BIT
10872 && !x86_64_immediate_operand (disp, VOIDmode))
10873 /* Displacement is out of range. */
10874 return false;
10875 /* In x32 mode, constant addresses are sign extended to 64bit, so
10876 we have to prevent addresses from 0x80000000 to 0xffffffff. */
10877 else if (TARGET_X32 && !(index || base)
10878 && CONST_INT_P (disp)
10879 && val_signbit_known_set_p (SImode, INTVAL (disp)))
10880 return false;
10881 }
10882
10883 /* Everything looks valid. */
10884 return true;
10885 }
10886
10887 /* Determine if a given RTX is a valid constant address. */
10888
10889 bool
10890 constant_address_p (rtx x)
10891 {
10892 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10893 }
10894 \f
10895 /* Return a unique alias set for the GOT. */
10896
10897 alias_set_type
10898 ix86_GOT_alias_set (void)
10899 {
10900 static alias_set_type set = -1;
10901 if (set == -1)
10902 set = new_alias_set ();
10903 return set;
10904 }
10905
10906 /* Return a legitimate reference for ORIG (an address) using the
10907 register REG. If REG is 0, a new pseudo is generated.
10908
10909 There are two types of references that must be handled:
10910
10911 1. Global data references must load the address from the GOT, via
10912 the PIC reg. An insn is emitted to do this load, and the reg is
10913 returned.
10914
10915 2. Static data references, constant pool addresses, and code labels
10916 compute the address as an offset from the GOT, whose base is in
10917 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10918 differentiate them from global data objects. The returned
10919 address is the PIC reg + an unspec constant.
10920
10921 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10922 reg also appears in the address. */
10923
10924 rtx
10925 legitimize_pic_address (rtx orig, rtx reg)
10926 {
10927 rtx addr = orig;
10928 rtx new_rtx = orig;
10929
10930 #if TARGET_MACHO
10931 if (TARGET_MACHO && !TARGET_64BIT)
10932 {
10933 if (reg == 0)
10934 reg = gen_reg_rtx (Pmode);
10935 /* Use the generic Mach-O PIC machinery. */
10936 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10937 }
10938 #endif
10939
10940 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10941 {
10942 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10943 if (tmp)
10944 return tmp;
10945 }
10946
10947 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10948 new_rtx = addr;
10949 else if ((!TARGET_64BIT
10950 || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
10951 && !TARGET_PECOFF
10952 && gotoff_operand (addr, Pmode))
10953 {
10954 /* This symbol may be referenced via a displacement
10955 from the PIC base address (@GOTOFF). */
10956 if (GET_CODE (addr) == CONST)
10957 addr = XEXP (addr, 0);
10958
10959 if (GET_CODE (addr) == PLUS)
10960 {
10961 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10962 UNSPEC_GOTOFF);
10963 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10964 }
10965 else
10966 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10967
10968 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10969
10970 if (TARGET_64BIT)
10971 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
10972
10973 if (reg != 0)
10974 {
10975 gcc_assert (REG_P (reg));
10976 new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
10977 new_rtx, reg, 1, OPTAB_DIRECT);
10978 }
10979 else
10980 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10981 }
10982 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10983 /* We can't use @GOTOFF for text labels
10984 on VxWorks, see gotoff_operand. */
10985 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10986 {
10987 rtx tmp = legitimize_pe_coff_symbol (addr, true);
10988 if (tmp)
10989 return tmp;
10990
10991 /* For x64 PE-COFF there is no GOT table,
10992 so we use address directly. */
10993 if (TARGET_64BIT && TARGET_PECOFF)
10994 {
10995 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
10996 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10997 }
10998 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10999 {
11000 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
11001 UNSPEC_GOTPCREL);
11002 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11003 new_rtx = gen_const_mem (Pmode, new_rtx);
11004 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11005 }
11006 else
11007 {
11008 /* This symbol must be referenced via a load
11009 from the Global Offset Table (@GOT). */
11010 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11011 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11012 if (TARGET_64BIT)
11013 new_rtx = force_reg (Pmode, new_rtx);
11014 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11015 new_rtx = gen_const_mem (Pmode, new_rtx);
11016 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11017 }
11018
11019 new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
11020 }
11021 else
11022 {
11023 if (CONST_INT_P (addr)
11024 && !x86_64_immediate_operand (addr, VOIDmode))
11025 new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
11026 else if (GET_CODE (addr) == CONST)
11027 {
11028 addr = XEXP (addr, 0);
11029
11030 /* We must match stuff we generate before. Assume the only
11031 unspecs that can get here are ours. Not that we could do
11032 anything with them anyway.... */
11033 if (GET_CODE (addr) == UNSPEC
11034 || (GET_CODE (addr) == PLUS
11035 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11036 return orig;
11037 gcc_assert (GET_CODE (addr) == PLUS);
11038 }
11039
11040 if (GET_CODE (addr) == PLUS)
11041 {
11042 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11043
11044 /* Check first to see if this is a constant
11045 offset from a @GOTOFF symbol reference. */
11046 if (!TARGET_PECOFF
11047 && gotoff_operand (op0, Pmode)
11048 && CONST_INT_P (op1))
11049 {
11050 if (!TARGET_64BIT)
11051 {
11052 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11053 UNSPEC_GOTOFF);
11054 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11055 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11056
11057 if (reg != 0)
11058 {
11059 gcc_assert (REG_P (reg));
11060 new_rtx = expand_simple_binop (Pmode, PLUS,
11061 pic_offset_table_rtx,
11062 new_rtx, reg, 1,
11063 OPTAB_DIRECT);
11064 }
11065 else
11066 new_rtx
11067 = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11068 }
11069 else
11070 {
11071 if (INTVAL (op1) < -16*1024*1024
11072 || INTVAL (op1) >= 16*1024*1024)
11073 {
11074 if (!x86_64_immediate_operand (op1, Pmode))
11075 op1 = force_reg (Pmode, op1);
11076
11077 new_rtx
11078 = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11079 }
11080 }
11081 }
11082 else
11083 {
11084 rtx base = legitimize_pic_address (op0, reg);
11085 machine_mode mode = GET_MODE (base);
11086 new_rtx
11087 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
11088
11089 if (CONST_INT_P (new_rtx))
11090 {
11091 if (INTVAL (new_rtx) < -16*1024*1024
11092 || INTVAL (new_rtx) >= 16*1024*1024)
11093 {
11094 if (!x86_64_immediate_operand (new_rtx, mode))
11095 new_rtx = force_reg (mode, new_rtx);
11096
11097 new_rtx
11098 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
11099 }
11100 else
11101 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
11102 }
11103 else
11104 {
11105 /* For %rip addressing, we have to use
11106 just disp32, not base nor index. */
11107 if (TARGET_64BIT
11108 && (GET_CODE (base) == SYMBOL_REF
11109 || GET_CODE (base) == LABEL_REF))
11110 base = force_reg (mode, base);
11111 if (GET_CODE (new_rtx) == PLUS
11112 && CONSTANT_P (XEXP (new_rtx, 1)))
11113 {
11114 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
11115 new_rtx = XEXP (new_rtx, 1);
11116 }
11117 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
11118 }
11119 }
11120 }
11121 }
11122 return new_rtx;
11123 }
11124 \f
11125 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11126
11127 static rtx
11128 get_thread_pointer (machine_mode tp_mode, bool to_reg)
11129 {
11130 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11131
11132 if (GET_MODE (tp) != tp_mode)
11133 {
11134 gcc_assert (GET_MODE (tp) == SImode);
11135 gcc_assert (tp_mode == DImode);
11136
11137 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
11138 }
11139
11140 if (to_reg)
11141 tp = copy_to_mode_reg (tp_mode, tp);
11142
11143 return tp;
11144 }
11145
11146 /* Construct the SYMBOL_REF for the tls_get_addr function. */
11147
11148 static GTY(()) rtx ix86_tls_symbol;
11149
11150 static rtx
11151 ix86_tls_get_addr (void)
11152 {
11153 if (!ix86_tls_symbol)
11154 {
11155 const char *sym
11156 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
11157 ? "___tls_get_addr" : "__tls_get_addr");
11158
11159 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
11160 }
11161
11162 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
11163 {
11164 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
11165 UNSPEC_PLTOFF);
11166 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
11167 gen_rtx_CONST (Pmode, unspec));
11168 }
11169
11170 return ix86_tls_symbol;
11171 }
11172
11173 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
11174
11175 static GTY(()) rtx ix86_tls_module_base_symbol;
11176
11177 rtx
11178 ix86_tls_module_base (void)
11179 {
11180 if (!ix86_tls_module_base_symbol)
11181 {
11182 ix86_tls_module_base_symbol
11183 = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
11184
11185 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
11186 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
11187 }
11188
11189 return ix86_tls_module_base_symbol;
11190 }
11191
11192 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11193 false if we expect this to be used for a memory address and true if
11194 we expect to load the address into a register. */
11195
11196 rtx
11197 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
11198 {
11199 rtx dest, base, off;
11200 rtx pic = NULL_RTX, tp = NULL_RTX;
11201 machine_mode tp_mode = Pmode;
11202 int type;
11203
11204 /* Fall back to global dynamic model if tool chain cannot support local
11205 dynamic. */
11206 if (TARGET_SUN_TLS && !TARGET_64BIT
11207 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
11208 && model == TLS_MODEL_LOCAL_DYNAMIC)
11209 model = TLS_MODEL_GLOBAL_DYNAMIC;
11210
11211 switch (model)
11212 {
11213 case TLS_MODEL_GLOBAL_DYNAMIC:
11214 if (!TARGET_64BIT)
11215 {
11216 if (flag_pic && !TARGET_PECOFF)
11217 pic = pic_offset_table_rtx;
11218 else
11219 {
11220 pic = gen_reg_rtx (Pmode);
11221 emit_insn (gen_set_got (pic));
11222 }
11223 }
11224
11225 if (TARGET_GNU2_TLS)
11226 {
11227 dest = gen_reg_rtx (ptr_mode);
11228 if (TARGET_64BIT)
11229 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
11230 else
11231 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
11232
11233 tp = get_thread_pointer (ptr_mode, true);
11234 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11235 if (GET_MODE (dest) != Pmode)
11236 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11237 dest = force_reg (Pmode, dest);
11238
11239 if (GET_MODE (x) != Pmode)
11240 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11241
11242 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11243 }
11244 else
11245 {
11246 rtx caddr = ix86_tls_get_addr ();
11247
11248 dest = gen_reg_rtx (Pmode);
11249 if (TARGET_64BIT)
11250 {
11251 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11252 rtx_insn *insns;
11253
11254 start_sequence ();
11255 emit_call_insn
11256 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
11257 insns = get_insns ();
11258 end_sequence ();
11259
11260 if (GET_MODE (x) != Pmode)
11261 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11262
11263 RTL_CONST_CALL_P (insns) = 1;
11264 emit_libcall_block (insns, dest, rax, x);
11265 }
11266 else
11267 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
11268 }
11269 break;
11270
11271 case TLS_MODEL_LOCAL_DYNAMIC:
11272 if (!TARGET_64BIT)
11273 {
11274 if (flag_pic)
11275 pic = pic_offset_table_rtx;
11276 else
11277 {
11278 pic = gen_reg_rtx (Pmode);
11279 emit_insn (gen_set_got (pic));
11280 }
11281 }
11282
11283 if (TARGET_GNU2_TLS)
11284 {
11285 rtx tmp = ix86_tls_module_base ();
11286
11287 base = gen_reg_rtx (ptr_mode);
11288 if (TARGET_64BIT)
11289 emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
11290 else
11291 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
11292
11293 tp = get_thread_pointer (ptr_mode, true);
11294 if (GET_MODE (base) != Pmode)
11295 base = gen_rtx_ZERO_EXTEND (Pmode, base);
11296 base = force_reg (Pmode, base);
11297 }
11298 else
11299 {
11300 rtx caddr = ix86_tls_get_addr ();
11301
11302 base = gen_reg_rtx (Pmode);
11303 if (TARGET_64BIT)
11304 {
11305 rtx rax = gen_rtx_REG (Pmode, AX_REG);
11306 rtx_insn *insns;
11307 rtx eqv;
11308
11309 start_sequence ();
11310 emit_call_insn
11311 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
11312 insns = get_insns ();
11313 end_sequence ();
11314
11315 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
11316 share the LD_BASE result with other LD model accesses. */
11317 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11318 UNSPEC_TLS_LD_BASE);
11319
11320 RTL_CONST_CALL_P (insns) = 1;
11321 emit_libcall_block (insns, base, rax, eqv);
11322 }
11323 else
11324 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
11325 }
11326
11327 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11328 off = gen_rtx_CONST (Pmode, off);
11329
11330 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11331
11332 if (TARGET_GNU2_TLS)
11333 {
11334 if (GET_MODE (tp) != Pmode)
11335 {
11336 dest = lowpart_subreg (ptr_mode, dest, Pmode);
11337 dest = gen_rtx_PLUS (ptr_mode, tp, dest);
11338 dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
11339 }
11340 else
11341 dest = gen_rtx_PLUS (Pmode, tp, dest);
11342 dest = force_reg (Pmode, dest);
11343
11344 if (GET_MODE (x) != Pmode)
11345 x = gen_rtx_ZERO_EXTEND (Pmode, x);
11346
11347 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
11348 }
11349 break;
11350
11351 case TLS_MODEL_INITIAL_EXEC:
11352 if (TARGET_64BIT)
11353 {
11354 if (TARGET_SUN_TLS && !TARGET_X32)
11355 {
11356 /* The Sun linker took the AMD64 TLS spec literally
11357 and can only handle %rax as destination of the
11358 initial executable code sequence. */
11359
11360 dest = gen_reg_rtx (DImode);
11361 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
11362 return dest;
11363 }
11364
11365 /* Generate DImode references to avoid %fs:(%reg32)
11366 problems and linker IE->LE relaxation bug. */
11367 tp_mode = DImode;
11368 pic = NULL;
11369 type = UNSPEC_GOTNTPOFF;
11370 }
11371 else if (flag_pic)
11372 {
11373 pic = pic_offset_table_rtx;
11374 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11375 }
11376 else if (!TARGET_ANY_GNU_TLS)
11377 {
11378 pic = gen_reg_rtx (Pmode);
11379 emit_insn (gen_set_got (pic));
11380 type = UNSPEC_GOTTPOFF;
11381 }
11382 else
11383 {
11384 pic = NULL;
11385 type = UNSPEC_INDNTPOFF;
11386 }
11387
11388 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
11389 off = gen_rtx_CONST (tp_mode, off);
11390 if (pic)
11391 off = gen_rtx_PLUS (tp_mode, pic, off);
11392 off = gen_const_mem (tp_mode, off);
11393 set_mem_alias_set (off, ix86_GOT_alias_set ());
11394
11395 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11396 {
11397 base = get_thread_pointer (tp_mode,
11398 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11399 off = force_reg (tp_mode, off);
11400 dest = gen_rtx_PLUS (tp_mode, base, off);
11401 if (tp_mode != Pmode)
11402 dest = convert_to_mode (Pmode, dest, 1);
11403 }
11404 else
11405 {
11406 base = get_thread_pointer (Pmode, true);
11407 dest = gen_reg_rtx (Pmode);
11408 emit_insn (gen_sub3_insn (dest, base, off));
11409 }
11410 break;
11411
11412 case TLS_MODEL_LOCAL_EXEC:
11413 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11414 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11415 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11416 off = gen_rtx_CONST (Pmode, off);
11417
11418 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11419 {
11420 base = get_thread_pointer (Pmode,
11421 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11422 return gen_rtx_PLUS (Pmode, base, off);
11423 }
11424 else
11425 {
11426 base = get_thread_pointer (Pmode, true);
11427 dest = gen_reg_rtx (Pmode);
11428 emit_insn (gen_sub3_insn (dest, base, off));
11429 }
11430 break;
11431
11432 default:
11433 gcc_unreachable ();
11434 }
11435
11436 return dest;
11437 }
11438
11439 /* Return true if OP refers to a TLS address. */
11440 bool
11441 ix86_tls_address_pattern_p (rtx op)
11442 {
11443 subrtx_var_iterator::array_type array;
11444 FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
11445 {
11446 rtx op = *iter;
11447 if (MEM_P (op))
11448 {
11449 rtx *x = &XEXP (op, 0);
11450 while (GET_CODE (*x) == PLUS)
11451 {
11452 int i;
11453 for (i = 0; i < 2; i++)
11454 {
11455 rtx u = XEXP (*x, i);
11456 if (GET_CODE (u) == ZERO_EXTEND)
11457 u = XEXP (u, 0);
11458 if (GET_CODE (u) == UNSPEC
11459 && XINT (u, 1) == UNSPEC_TP)
11460 return true;
11461 }
11462 x = &XEXP (*x, 0);
11463 }
11464
11465 iter.skip_subrtxes ();
11466 }
11467 }
11468
11469 return false;
11470 }
11471
11472 /* Rewrite *LOC so that it refers to a default TLS address space. */
11473 void
11474 ix86_rewrite_tls_address_1 (rtx *loc)
11475 {
11476 subrtx_ptr_iterator::array_type array;
11477 FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
11478 {
11479 rtx *loc = *iter;
11480 if (MEM_P (*loc))
11481 {
11482 rtx addr = XEXP (*loc, 0);
11483 rtx *x = &addr;
11484 while (GET_CODE (*x) == PLUS)
11485 {
11486 int i;
11487 for (i = 0; i < 2; i++)
11488 {
11489 rtx u = XEXP (*x, i);
11490 if (GET_CODE (u) == ZERO_EXTEND)
11491 u = XEXP (u, 0);
11492 if (GET_CODE (u) == UNSPEC
11493 && XINT (u, 1) == UNSPEC_TP)
11494 {
11495 addr_space_t as = DEFAULT_TLS_SEG_REG;
11496
11497 *x = XEXP (*x, 1 - i);
11498
11499 *loc = replace_equiv_address_nv (*loc, addr, true);
11500 set_mem_addr_space (*loc, as);
11501 return;
11502 }
11503 }
11504 x = &XEXP (*x, 0);
11505 }
11506
11507 iter.skip_subrtxes ();
11508 }
11509 }
11510 }
11511
11512 /* Rewrite instruction pattern involvning TLS address
11513 so that it refers to a default TLS address space. */
11514 rtx
11515 ix86_rewrite_tls_address (rtx pattern)
11516 {
11517 pattern = copy_insn (pattern);
11518 ix86_rewrite_tls_address_1 (&pattern);
11519 return pattern;
11520 }
11521
11522 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11523 to symbol DECL if BEIMPORT is true. Otherwise create or return the
11524 unique refptr-DECL symbol corresponding to symbol DECL. */
11525
11526 struct dllimport_hasher : ggc_cache_ptr_hash<tree_map>
11527 {
11528 static inline hashval_t hash (tree_map *m) { return m->hash; }
11529 static inline bool
11530 equal (tree_map *a, tree_map *b)
11531 {
11532 return a->base.from == b->base.from;
11533 }
11534
11535 static int
11536 keep_cache_entry (tree_map *&m)
11537 {
11538 return ggc_marked_p (m->base.from);
11539 }
11540 };
11541
11542 static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
11543
11544 static tree
11545 get_dllimport_decl (tree decl, bool beimport)
11546 {
11547 struct tree_map *h, in;
11548 const char *name;
11549 const char *prefix;
11550 size_t namelen, prefixlen;
11551 char *imp_name;
11552 tree to;
11553 rtx rtl;
11554
11555 if (!dllimport_map)
11556 dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
11557
11558 in.hash = htab_hash_pointer (decl);
11559 in.base.from = decl;
11560 tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
11561 h = *loc;
11562 if (h)
11563 return h->to;
11564
11565 *loc = h = ggc_alloc<tree_map> ();
11566 h->hash = in.hash;
11567 h->base.from = decl;
11568 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11569 VAR_DECL, NULL, ptr_type_node);
11570 DECL_ARTIFICIAL (to) = 1;
11571 DECL_IGNORED_P (to) = 1;
11572 DECL_EXTERNAL (to) = 1;
11573 TREE_READONLY (to) = 1;
11574
11575 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11576 name = targetm.strip_name_encoding (name);
11577 if (beimport)
11578 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11579 ? "*__imp_" : "*__imp__";
11580 else
11581 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
11582 namelen = strlen (name);
11583 prefixlen = strlen (prefix);
11584 imp_name = (char *) alloca (namelen + prefixlen + 1);
11585 memcpy (imp_name, prefix, prefixlen);
11586 memcpy (imp_name + prefixlen, name, namelen + 1);
11587
11588 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11589 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11590 SET_SYMBOL_REF_DECL (rtl, to);
11591 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
11592 if (!beimport)
11593 {
11594 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
11595 #ifdef SUB_TARGET_RECORD_STUB
11596 SUB_TARGET_RECORD_STUB (name);
11597 #endif
11598 }
11599
11600 rtl = gen_const_mem (Pmode, rtl);
11601 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11602
11603 SET_DECL_RTL (to, rtl);
11604 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11605
11606 return to;
11607 }
11608
11609 /* Expand SYMBOL into its corresponding far-address symbol.
11610 WANT_REG is true if we require the result be a register. */
11611
11612 static rtx
11613 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
11614 {
11615 tree imp_decl;
11616 rtx x;
11617
11618 gcc_assert (SYMBOL_REF_DECL (symbol));
11619 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
11620
11621 x = DECL_RTL (imp_decl);
11622 if (want_reg)
11623 x = force_reg (Pmode, x);
11624 return x;
11625 }
11626
11627 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11628 true if we require the result be a register. */
11629
11630 static rtx
11631 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11632 {
11633 tree imp_decl;
11634 rtx x;
11635
11636 gcc_assert (SYMBOL_REF_DECL (symbol));
11637 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
11638
11639 x = DECL_RTL (imp_decl);
11640 if (want_reg)
11641 x = force_reg (Pmode, x);
11642 return x;
11643 }
11644
11645 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
11646 is true if we require the result be a register. */
11647
11648 rtx
11649 legitimize_pe_coff_symbol (rtx addr, bool inreg)
11650 {
11651 if (!TARGET_PECOFF)
11652 return NULL_RTX;
11653
11654 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11655 {
11656 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11657 return legitimize_dllimport_symbol (addr, inreg);
11658 if (GET_CODE (addr) == CONST
11659 && GET_CODE (XEXP (addr, 0)) == PLUS
11660 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11661 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11662 {
11663 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
11664 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11665 }
11666 }
11667
11668 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
11669 return NULL_RTX;
11670 if (GET_CODE (addr) == SYMBOL_REF
11671 && !is_imported_p (addr)
11672 && SYMBOL_REF_EXTERNAL_P (addr)
11673 && SYMBOL_REF_DECL (addr))
11674 return legitimize_pe_coff_extern_decl (addr, inreg);
11675
11676 if (GET_CODE (addr) == CONST
11677 && GET_CODE (XEXP (addr, 0)) == PLUS
11678 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11679 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
11680 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
11681 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
11682 {
11683 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
11684 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11685 }
11686 return NULL_RTX;
11687 }
11688
11689 /* Try machine-dependent ways of modifying an illegitimate address
11690 to be legitimate. If we find one, return the new, valid address.
11691 This macro is used in only one place: `memory_address' in explow.c.
11692
11693 OLDX is the address as it was before break_out_memory_refs was called.
11694 In some cases it is useful to look at this to decide what needs to be done.
11695
11696 It is always safe for this macro to do nothing. It exists to recognize
11697 opportunities to optimize the output.
11698
11699 For the 80386, we handle X+REG by loading X into a register R and
11700 using R+REG. R will go in a general reg and indexing will be used.
11701 However, if REG is a broken-out memory address or multiplication,
11702 nothing needs to be done because REG can certainly go in a general reg.
11703
11704 When -fpic is used, special handling is needed for symbolic references.
11705 See comments by legitimize_pic_address in i386.c for details. */
11706
11707 static rtx
11708 ix86_legitimize_address (rtx x, rtx, machine_mode mode)
11709 {
11710 bool changed = false;
11711 unsigned log;
11712
11713 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11714 if (log)
11715 return legitimize_tls_address (x, (enum tls_model) log, false);
11716 if (GET_CODE (x) == CONST
11717 && GET_CODE (XEXP (x, 0)) == PLUS
11718 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11719 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11720 {
11721 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11722 (enum tls_model) log, false);
11723 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11724 }
11725
11726 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11727 {
11728 rtx tmp = legitimize_pe_coff_symbol (x, true);
11729 if (tmp)
11730 return tmp;
11731 }
11732
11733 if (flag_pic && SYMBOLIC_CONST (x))
11734 return legitimize_pic_address (x, 0);
11735
11736 #if TARGET_MACHO
11737 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
11738 return machopic_indirect_data_reference (x, 0);
11739 #endif
11740
11741 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11742 if (GET_CODE (x) == ASHIFT
11743 && CONST_INT_P (XEXP (x, 1))
11744 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11745 {
11746 changed = true;
11747 log = INTVAL (XEXP (x, 1));
11748 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11749 GEN_INT (1 << log));
11750 }
11751
11752 if (GET_CODE (x) == PLUS)
11753 {
11754 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11755
11756 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11757 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11758 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11759 {
11760 changed = true;
11761 log = INTVAL (XEXP (XEXP (x, 0), 1));
11762 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11763 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11764 GEN_INT (1 << log));
11765 }
11766
11767 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11768 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11769 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11770 {
11771 changed = true;
11772 log = INTVAL (XEXP (XEXP (x, 1), 1));
11773 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11774 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11775 GEN_INT (1 << log));
11776 }
11777
11778 /* Put multiply first if it isn't already. */
11779 if (GET_CODE (XEXP (x, 1)) == MULT)
11780 {
11781 std::swap (XEXP (x, 0), XEXP (x, 1));
11782 changed = true;
11783 }
11784
11785 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11786 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11787 created by virtual register instantiation, register elimination, and
11788 similar optimizations. */
11789 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11790 {
11791 changed = true;
11792 x = gen_rtx_PLUS (Pmode,
11793 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11794 XEXP (XEXP (x, 1), 0)),
11795 XEXP (XEXP (x, 1), 1));
11796 }
11797
11798 /* Canonicalize
11799 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11800 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11801 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11802 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11803 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11804 && CONSTANT_P (XEXP (x, 1)))
11805 {
11806 rtx constant;
11807 rtx other = NULL_RTX;
11808
11809 if (CONST_INT_P (XEXP (x, 1)))
11810 {
11811 constant = XEXP (x, 1);
11812 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11813 }
11814 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11815 {
11816 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11817 other = XEXP (x, 1);
11818 }
11819 else
11820 constant = 0;
11821
11822 if (constant)
11823 {
11824 changed = true;
11825 x = gen_rtx_PLUS (Pmode,
11826 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11827 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11828 plus_constant (Pmode, other,
11829 INTVAL (constant)));
11830 }
11831 }
11832
11833 if (changed && ix86_legitimate_address_p (mode, x, false))
11834 return x;
11835
11836 if (GET_CODE (XEXP (x, 0)) == MULT)
11837 {
11838 changed = true;
11839 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
11840 }
11841
11842 if (GET_CODE (XEXP (x, 1)) == MULT)
11843 {
11844 changed = true;
11845 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
11846 }
11847
11848 if (changed
11849 && REG_P (XEXP (x, 1))
11850 && REG_P (XEXP (x, 0)))
11851 return x;
11852
11853 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11854 {
11855 changed = true;
11856 x = legitimize_pic_address (x, 0);
11857 }
11858
11859 if (changed && ix86_legitimate_address_p (mode, x, false))
11860 return x;
11861
11862 if (REG_P (XEXP (x, 0)))
11863 {
11864 rtx temp = gen_reg_rtx (Pmode);
11865 rtx val = force_operand (XEXP (x, 1), temp);
11866 if (val != temp)
11867 {
11868 val = convert_to_mode (Pmode, val, 1);
11869 emit_move_insn (temp, val);
11870 }
11871
11872 XEXP (x, 1) = temp;
11873 return x;
11874 }
11875
11876 else if (REG_P (XEXP (x, 1)))
11877 {
11878 rtx temp = gen_reg_rtx (Pmode);
11879 rtx val = force_operand (XEXP (x, 0), temp);
11880 if (val != temp)
11881 {
11882 val = convert_to_mode (Pmode, val, 1);
11883 emit_move_insn (temp, val);
11884 }
11885
11886 XEXP (x, 0) = temp;
11887 return x;
11888 }
11889 }
11890
11891 return x;
11892 }
11893 \f
11894 /* Print an integer constant expression in assembler syntax. Addition
11895 and subtraction are the only arithmetic that may appear in these
11896 expressions. FILE is the stdio stream to write to, X is the rtx, and
11897 CODE is the operand print code from the output string. */
11898
11899 static void
11900 output_pic_addr_const (FILE *file, rtx x, int code)
11901 {
11902 char buf[256];
11903
11904 switch (GET_CODE (x))
11905 {
11906 case PC:
11907 gcc_assert (flag_pic);
11908 putc ('.', file);
11909 break;
11910
11911 case SYMBOL_REF:
11912 if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
11913 output_addr_const (file, x);
11914 else
11915 {
11916 const char *name = XSTR (x, 0);
11917
11918 /* Mark the decl as referenced so that cgraph will
11919 output the function. */
11920 if (SYMBOL_REF_DECL (x))
11921 mark_decl_referenced (SYMBOL_REF_DECL (x));
11922
11923 #if TARGET_MACHO
11924 if (MACHOPIC_INDIRECT
11925 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11926 name = machopic_indirection_name (x, /*stub_p=*/true);
11927 #endif
11928 assemble_name (file, name);
11929 }
11930 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
11931 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11932 fputs ("@PLT", file);
11933 break;
11934
11935 case LABEL_REF:
11936 x = XEXP (x, 0);
11937 /* FALLTHRU */
11938 case CODE_LABEL:
11939 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11940 assemble_name (asm_out_file, buf);
11941 break;
11942
11943 case CONST_INT:
11944 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11945 break;
11946
11947 case CONST:
11948 /* This used to output parentheses around the expression,
11949 but that does not work on the 386 (either ATT or BSD assembler). */
11950 output_pic_addr_const (file, XEXP (x, 0), code);
11951 break;
11952
11953 case CONST_DOUBLE:
11954 /* We can't handle floating point constants;
11955 TARGET_PRINT_OPERAND must handle them. */
11956 output_operand_lossage ("floating constant misused");
11957 break;
11958
11959 case PLUS:
11960 /* Some assemblers need integer constants to appear first. */
11961 if (CONST_INT_P (XEXP (x, 0)))
11962 {
11963 output_pic_addr_const (file, XEXP (x, 0), code);
11964 putc ('+', file);
11965 output_pic_addr_const (file, XEXP (x, 1), code);
11966 }
11967 else
11968 {
11969 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11970 output_pic_addr_const (file, XEXP (x, 1), code);
11971 putc ('+', file);
11972 output_pic_addr_const (file, XEXP (x, 0), code);
11973 }
11974 break;
11975
11976 case MINUS:
11977 if (!TARGET_MACHO)
11978 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11979 output_pic_addr_const (file, XEXP (x, 0), code);
11980 putc ('-', file);
11981 output_pic_addr_const (file, XEXP (x, 1), code);
11982 if (!TARGET_MACHO)
11983 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11984 break;
11985
11986 case UNSPEC:
11987 gcc_assert (XVECLEN (x, 0) == 1);
11988 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11989 switch (XINT (x, 1))
11990 {
11991 case UNSPEC_GOT:
11992 fputs ("@GOT", file);
11993 break;
11994 case UNSPEC_GOTOFF:
11995 fputs ("@GOTOFF", file);
11996 break;
11997 case UNSPEC_PLTOFF:
11998 fputs ("@PLTOFF", file);
11999 break;
12000 case UNSPEC_PCREL:
12001 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12002 "(%rip)" : "[rip]", file);
12003 break;
12004 case UNSPEC_GOTPCREL:
12005 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12006 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12007 break;
12008 case UNSPEC_GOTTPOFF:
12009 /* FIXME: This might be @TPOFF in Sun ld too. */
12010 fputs ("@gottpoff", file);
12011 break;
12012 case UNSPEC_TPOFF:
12013 fputs ("@tpoff", file);
12014 break;
12015 case UNSPEC_NTPOFF:
12016 if (TARGET_64BIT)
12017 fputs ("@tpoff", file);
12018 else
12019 fputs ("@ntpoff", file);
12020 break;
12021 case UNSPEC_DTPOFF:
12022 fputs ("@dtpoff", file);
12023 break;
12024 case UNSPEC_GOTNTPOFF:
12025 if (TARGET_64BIT)
12026 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12027 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12028 else
12029 fputs ("@gotntpoff", file);
12030 break;
12031 case UNSPEC_INDNTPOFF:
12032 fputs ("@indntpoff", file);
12033 break;
12034 #if TARGET_MACHO
12035 case UNSPEC_MACHOPIC_OFFSET:
12036 putc ('-', file);
12037 machopic_output_function_base_name (file);
12038 break;
12039 #endif
12040 default:
12041 output_operand_lossage ("invalid UNSPEC as operand");
12042 break;
12043 }
12044 break;
12045
12046 default:
12047 output_operand_lossage ("invalid expression as operand");
12048 }
12049 }
12050
12051 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12052 We need to emit DTP-relative relocations. */
12053
12054 static void ATTRIBUTE_UNUSED
12055 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12056 {
12057 fputs (ASM_LONG, file);
12058 output_addr_const (file, x);
12059 fputs ("@dtpoff", file);
12060 switch (size)
12061 {
12062 case 4:
12063 break;
12064 case 8:
12065 fputs (", 0", file);
12066 break;
12067 default:
12068 gcc_unreachable ();
12069 }
12070 }
12071
12072 /* Return true if X is a representation of the PIC register. This copes
12073 with calls from ix86_find_base_term, where the register might have
12074 been replaced by a cselib value. */
12075
12076 static bool
12077 ix86_pic_register_p (rtx x)
12078 {
12079 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12080 return (pic_offset_table_rtx
12081 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12082 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
12083 return true;
12084 else if (!REG_P (x))
12085 return false;
12086 else if (pic_offset_table_rtx)
12087 {
12088 if (REGNO (x) == REGNO (pic_offset_table_rtx))
12089 return true;
12090 if (HARD_REGISTER_P (x)
12091 && !HARD_REGISTER_P (pic_offset_table_rtx)
12092 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
12093 return true;
12094 return false;
12095 }
12096 else
12097 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12098 }
12099
12100 /* Helper function for ix86_delegitimize_address.
12101 Attempt to delegitimize TLS local-exec accesses. */
12102
12103 static rtx
12104 ix86_delegitimize_tls_address (rtx orig_x)
12105 {
12106 rtx x = orig_x, unspec;
12107 struct ix86_address addr;
12108
12109 if (!TARGET_TLS_DIRECT_SEG_REFS)
12110 return orig_x;
12111 if (MEM_P (x))
12112 x = XEXP (x, 0);
12113 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12114 return orig_x;
12115 if (ix86_decompose_address (x, &addr) == 0
12116 || addr.seg != DEFAULT_TLS_SEG_REG
12117 || addr.disp == NULL_RTX
12118 || GET_CODE (addr.disp) != CONST)
12119 return orig_x;
12120 unspec = XEXP (addr.disp, 0);
12121 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12122 unspec = XEXP (unspec, 0);
12123 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12124 return orig_x;
12125 x = XVECEXP (unspec, 0, 0);
12126 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12127 if (unspec != XEXP (addr.disp, 0))
12128 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12129 if (addr.index)
12130 {
12131 rtx idx = addr.index;
12132 if (addr.scale != 1)
12133 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12134 x = gen_rtx_PLUS (Pmode, idx, x);
12135 }
12136 if (addr.base)
12137 x = gen_rtx_PLUS (Pmode, addr.base, x);
12138 if (MEM_P (orig_x))
12139 x = replace_equiv_address_nv (orig_x, x);
12140 return x;
12141 }
12142
12143 /* In the name of slightly smaller debug output, and to cater to
12144 general assembler lossage, recognize PIC+GOTOFF and turn it back
12145 into a direct symbol reference.
12146
12147 On Darwin, this is necessary to avoid a crash, because Darwin
12148 has a different PIC label for each routine but the DWARF debugging
12149 information is not associated with any particular routine, so it's
12150 necessary to remove references to the PIC label from RTL stored by
12151 the DWARF output code.
12152
12153 This helper is used in the normal ix86_delegitimize_address
12154 entrypoint (e.g. used in the target delegitimization hook) and
12155 in ix86_find_base_term. As compile time memory optimization, we
12156 avoid allocating rtxes that will not change anything on the outcome
12157 of the callers (find_base_value and find_base_term). */
12158
12159 static inline rtx
12160 ix86_delegitimize_address_1 (rtx x, bool base_term_p)
12161 {
12162 rtx orig_x = delegitimize_mem_from_attrs (x);
12163 /* addend is NULL or some rtx if x is something+GOTOFF where
12164 something doesn't include the PIC register. */
12165 rtx addend = NULL_RTX;
12166 /* reg_addend is NULL or a multiple of some register. */
12167 rtx reg_addend = NULL_RTX;
12168 /* const_addend is NULL or a const_int. */
12169 rtx const_addend = NULL_RTX;
12170 /* This is the result, or NULL. */
12171 rtx result = NULL_RTX;
12172
12173 x = orig_x;
12174
12175 if (MEM_P (x))
12176 x = XEXP (x, 0);
12177
12178 if (TARGET_64BIT)
12179 {
12180 if (GET_CODE (x) == CONST
12181 && GET_CODE (XEXP (x, 0)) == PLUS
12182 && GET_MODE (XEXP (x, 0)) == Pmode
12183 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
12184 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
12185 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
12186 {
12187 /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
12188 base. A CONST can't be arg_pointer_rtx based. */
12189 if (base_term_p && MEM_P (orig_x))
12190 return orig_x;
12191 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
12192 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
12193 if (MEM_P (orig_x))
12194 x = replace_equiv_address_nv (orig_x, x);
12195 return x;
12196 }
12197
12198 if (GET_CODE (x) == CONST
12199 && GET_CODE (XEXP (x, 0)) == UNSPEC
12200 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
12201 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
12202 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
12203 {
12204 x = XVECEXP (XEXP (x, 0), 0, 0);
12205 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
12206 {
12207 x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
12208 if (x == NULL_RTX)
12209 return orig_x;
12210 }
12211 return x;
12212 }
12213
12214 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
12215 return ix86_delegitimize_tls_address (orig_x);
12216
12217 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
12218 and -mcmodel=medium -fpic. */
12219 }
12220
12221 if (GET_CODE (x) != PLUS
12222 || GET_CODE (XEXP (x, 1)) != CONST)
12223 return ix86_delegitimize_tls_address (orig_x);
12224
12225 if (ix86_pic_register_p (XEXP (x, 0)))
12226 /* %ebx + GOT/GOTOFF */
12227 ;
12228 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12229 {
12230 /* %ebx + %reg * scale + GOT/GOTOFF */
12231 reg_addend = XEXP (x, 0);
12232 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12233 reg_addend = XEXP (reg_addend, 1);
12234 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12235 reg_addend = XEXP (reg_addend, 0);
12236 else
12237 {
12238 reg_addend = NULL_RTX;
12239 addend = XEXP (x, 0);
12240 }
12241 }
12242 else
12243 addend = XEXP (x, 0);
12244
12245 x = XEXP (XEXP (x, 1), 0);
12246 if (GET_CODE (x) == PLUS
12247 && CONST_INT_P (XEXP (x, 1)))
12248 {
12249 const_addend = XEXP (x, 1);
12250 x = XEXP (x, 0);
12251 }
12252
12253 if (GET_CODE (x) == UNSPEC
12254 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12255 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
12256 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
12257 && !MEM_P (orig_x) && !addend)))
12258 result = XVECEXP (x, 0, 0);
12259
12260 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
12261 && !MEM_P (orig_x))
12262 result = XVECEXP (x, 0, 0);
12263
12264 if (! result)
12265 return ix86_delegitimize_tls_address (orig_x);
12266
12267 /* For (PLUS something CONST_INT) both find_base_{value,term} just
12268 recurse on the first operand. */
12269 if (const_addend && !base_term_p)
12270 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12271 if (reg_addend)
12272 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12273 if (addend)
12274 {
12275 /* If the rest of original X doesn't involve the PIC register, add
12276 addend and subtract pic_offset_table_rtx. This can happen e.g.
12277 for code like:
12278 leal (%ebx, %ecx, 4), %ecx
12279 ...
12280 movl foo@GOTOFF(%ecx), %edx
12281 in which case we return (%ecx - %ebx) + foo
12282 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
12283 and reload has completed. Don't do the latter for debug,
12284 as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */
12285 if (pic_offset_table_rtx
12286 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
12287 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12288 pic_offset_table_rtx),
12289 result);
12290 else if (base_term_p
12291 && pic_offset_table_rtx
12292 && !TARGET_MACHO
12293 && !TARGET_VXWORKS_RTP)
12294 {
12295 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
12296 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
12297 result = gen_rtx_PLUS (Pmode, tmp, result);
12298 }
12299 else
12300 return orig_x;
12301 }
12302 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12303 {
12304 result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
12305 if (result == NULL_RTX)
12306 return orig_x;
12307 }
12308 return result;
12309 }
12310
12311 /* The normal instantiation of the above template. */
12312
12313 static rtx
12314 ix86_delegitimize_address (rtx x)
12315 {
12316 return ix86_delegitimize_address_1 (x, false);
12317 }
12318
12319 /* If X is a machine specific address (i.e. a symbol or label being
12320 referenced as a displacement from the GOT implemented using an
12321 UNSPEC), then return the base term. Otherwise return X. */
12322
12323 rtx
12324 ix86_find_base_term (rtx x)
12325 {
12326 rtx term;
12327
12328 if (TARGET_64BIT)
12329 {
12330 if (GET_CODE (x) != CONST)
12331 return x;
12332 term = XEXP (x, 0);
12333 if (GET_CODE (term) == PLUS
12334 && CONST_INT_P (XEXP (term, 1)))
12335 term = XEXP (term, 0);
12336 if (GET_CODE (term) != UNSPEC
12337 || (XINT (term, 1) != UNSPEC_GOTPCREL
12338 && XINT (term, 1) != UNSPEC_PCREL))
12339 return x;
12340
12341 return XVECEXP (term, 0, 0);
12342 }
12343
12344 return ix86_delegitimize_address_1 (x, true);
12345 }
12346
12347 /* Return true if X shouldn't be emitted into the debug info.
12348 Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
12349 symbol easily into the .debug_info section, so we need not to
12350 delegitimize, but instead assemble as @gotoff.
12351 Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
12352 assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */
12353
12354 static bool
12355 ix86_const_not_ok_for_debug_p (rtx x)
12356 {
12357 if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
12358 return true;
12359
12360 if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
12361 return true;
12362
12363 return false;
12364 }
12365 \f
12366 static void
12367 put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
12368 bool fp, FILE *file)
12369 {
12370 const char *suffix;
12371
12372 if (mode == CCFPmode)
12373 {
12374 code = ix86_fp_compare_code_to_integer (code);
12375 mode = CCmode;
12376 }
12377 if (reverse)
12378 code = reverse_condition (code);
12379
12380 switch (code)
12381 {
12382 case EQ:
12383 gcc_assert (mode != CCGZmode);
12384 switch (mode)
12385 {
12386 case E_CCAmode:
12387 suffix = "a";
12388 break;
12389 case E_CCCmode:
12390 suffix = "c";
12391 break;
12392 case E_CCOmode:
12393 suffix = "o";
12394 break;
12395 case E_CCPmode:
12396 suffix = "p";
12397 break;
12398 case E_CCSmode:
12399 suffix = "s";
12400 break;
12401 default:
12402 suffix = "e";
12403 break;
12404 }
12405 break;
12406 case NE:
12407 gcc_assert (mode != CCGZmode);
12408 switch (mode)
12409 {
12410 case E_CCAmode:
12411 suffix = "na";
12412 break;
12413 case E_CCCmode:
12414 suffix = "nc";
12415 break;
12416 case E_CCOmode:
12417 suffix = "no";
12418 break;
12419 case E_CCPmode:
12420 suffix = "np";
12421 break;
12422 case E_CCSmode:
12423 suffix = "ns";
12424 break;
12425 default:
12426 suffix = "ne";
12427 break;
12428 }
12429 break;
12430 case GT:
12431 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12432 suffix = "g";
12433 break;
12434 case GTU:
12435 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12436 Those same assemblers have the same but opposite lossage on cmov. */
12437 if (mode == CCmode)
12438 suffix = fp ? "nbe" : "a";
12439 else
12440 gcc_unreachable ();
12441 break;
12442 case LT:
12443 switch (mode)
12444 {
12445 case E_CCNOmode:
12446 case E_CCGOCmode:
12447 suffix = "s";
12448 break;
12449
12450 case E_CCmode:
12451 case E_CCGCmode:
12452 case E_CCGZmode:
12453 suffix = "l";
12454 break;
12455
12456 default:
12457 gcc_unreachable ();
12458 }
12459 break;
12460 case LTU:
12461 if (mode == CCmode || mode == CCGZmode)
12462 suffix = "b";
12463 else if (mode == CCCmode)
12464 suffix = fp ? "b" : "c";
12465 else
12466 gcc_unreachable ();
12467 break;
12468 case GE:
12469 switch (mode)
12470 {
12471 case E_CCNOmode:
12472 case E_CCGOCmode:
12473 suffix = "ns";
12474 break;
12475
12476 case E_CCmode:
12477 case E_CCGCmode:
12478 case E_CCGZmode:
12479 suffix = "ge";
12480 break;
12481
12482 default:
12483 gcc_unreachable ();
12484 }
12485 break;
12486 case GEU:
12487 if (mode == CCmode || mode == CCGZmode)
12488 suffix = "nb";
12489 else if (mode == CCCmode)
12490 suffix = fp ? "nb" : "nc";
12491 else
12492 gcc_unreachable ();
12493 break;
12494 case LE:
12495 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12496 suffix = "le";
12497 break;
12498 case LEU:
12499 if (mode == CCmode)
12500 suffix = "be";
12501 else
12502 gcc_unreachable ();
12503 break;
12504 case UNORDERED:
12505 suffix = fp ? "u" : "p";
12506 break;
12507 case ORDERED:
12508 suffix = fp ? "nu" : "np";
12509 break;
12510 default:
12511 gcc_unreachable ();
12512 }
12513 fputs (suffix, file);
12514 }
12515
12516 /* Print the name of register X to FILE based on its machine mode and number.
12517 If CODE is 'w', pretend the mode is HImode.
12518 If CODE is 'b', pretend the mode is QImode.
12519 If CODE is 'k', pretend the mode is SImode.
12520 If CODE is 'q', pretend the mode is DImode.
12521 If CODE is 'x', pretend the mode is V4SFmode.
12522 If CODE is 't', pretend the mode is V8SFmode.
12523 If CODE is 'g', pretend the mode is V16SFmode.
12524 If CODE is 'h', pretend the reg is the 'high' byte register.
12525 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12526 If CODE is 'd', duplicate the operand for AVX instruction.
12527 If CODE is 'V', print naked full integer register name without %.
12528 */
12529
12530 void
12531 print_reg (rtx x, int code, FILE *file)
12532 {
12533 const char *reg;
12534 int msize;
12535 unsigned int regno;
12536 bool duplicated;
12537
12538 if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
12539 putc ('%', file);
12540
12541 if (x == pc_rtx)
12542 {
12543 gcc_assert (TARGET_64BIT);
12544 fputs ("rip", file);
12545 return;
12546 }
12547
12548 if (code == 'y' && STACK_TOP_P (x))
12549 {
12550 fputs ("st(0)", file);
12551 return;
12552 }
12553
12554 if (code == 'w')
12555 msize = 2;
12556 else if (code == 'b')
12557 msize = 1;
12558 else if (code == 'k')
12559 msize = 4;
12560 else if (code == 'q')
12561 msize = 8;
12562 else if (code == 'h')
12563 msize = 0;
12564 else if (code == 'x')
12565 msize = 16;
12566 else if (code == 't')
12567 msize = 32;
12568 else if (code == 'g')
12569 msize = 64;
12570 else
12571 msize = GET_MODE_SIZE (GET_MODE (x));
12572
12573 regno = REGNO (x);
12574
12575 if (regno == ARG_POINTER_REGNUM
12576 || regno == FRAME_POINTER_REGNUM
12577 || regno == FPSR_REG)
12578 {
12579 output_operand_lossage
12580 ("invalid use of register '%s'", reg_names[regno]);
12581 return;
12582 }
12583 else if (regno == FLAGS_REG)
12584 {
12585 output_operand_lossage ("invalid use of asm flag output");
12586 return;
12587 }
12588
12589 if (code == 'V')
12590 {
12591 if (GENERAL_REGNO_P (regno))
12592 msize = GET_MODE_SIZE (word_mode);
12593 else
12594 error ("%<V%> modifier on non-integer register");
12595 }
12596
12597 duplicated = code == 'd' && TARGET_AVX;
12598
12599 switch (msize)
12600 {
12601 case 16:
12602 case 12:
12603 case 8:
12604 if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
12605 warning (0, "unsupported size for integer register");
12606 /* FALLTHRU */
12607 case 4:
12608 if (LEGACY_INT_REGNO_P (regno))
12609 putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
12610 /* FALLTHRU */
12611 case 2:
12612 normal:
12613 reg = hi_reg_name[regno];
12614 break;
12615 case 1:
12616 if (regno >= ARRAY_SIZE (qi_reg_name))
12617 goto normal;
12618 if (!ANY_QI_REGNO_P (regno))
12619 error ("unsupported size for integer register");
12620 reg = qi_reg_name[regno];
12621 break;
12622 case 0:
12623 if (regno >= ARRAY_SIZE (qi_high_reg_name))
12624 goto normal;
12625 reg = qi_high_reg_name[regno];
12626 break;
12627 case 32:
12628 case 64:
12629 if (SSE_REGNO_P (regno))
12630 {
12631 gcc_assert (!duplicated);
12632 putc (msize == 32 ? 'y' : 'z', file);
12633 reg = hi_reg_name[regno] + 1;
12634 break;
12635 }
12636 goto normal;
12637 default:
12638 gcc_unreachable ();
12639 }
12640
12641 fputs (reg, file);
12642
12643 /* Irritatingly, AMD extended registers use
12644 different naming convention: "r%d[bwd]" */
12645 if (REX_INT_REGNO_P (regno))
12646 {
12647 gcc_assert (TARGET_64BIT);
12648 switch (msize)
12649 {
12650 case 0:
12651 error ("extended registers have no high halves");
12652 break;
12653 case 1:
12654 putc ('b', file);
12655 break;
12656 case 2:
12657 putc ('w', file);
12658 break;
12659 case 4:
12660 putc ('d', file);
12661 break;
12662 case 8:
12663 /* no suffix */
12664 break;
12665 default:
12666 error ("unsupported operand size for extended register");
12667 break;
12668 }
12669 return;
12670 }
12671
12672 if (duplicated)
12673 {
12674 if (ASSEMBLER_DIALECT == ASM_ATT)
12675 fprintf (file, ", %%%s", reg);
12676 else
12677 fprintf (file, ", %s", reg);
12678 }
12679 }
12680
12681 /* Meaning of CODE:
12682 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12683 C -- print opcode suffix for set/cmov insn.
12684 c -- like C, but print reversed condition
12685 F,f -- likewise, but for floating-point.
12686 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12687 otherwise nothing
12688 R -- print embedded rounding and sae.
12689 r -- print only sae.
12690 z -- print the opcode suffix for the size of the current operand.
12691 Z -- likewise, with special suffixes for x87 instructions.
12692 * -- print a star (in certain assembler syntax)
12693 A -- print an absolute memory reference.
12694 E -- print address with DImode register names if TARGET_64BIT.
12695 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12696 s -- print a shift double count, followed by the assemblers argument
12697 delimiter.
12698 b -- print the QImode name of the register for the indicated operand.
12699 %b0 would print %al if operands[0] is reg 0.
12700 w -- likewise, print the HImode name of the register.
12701 k -- likewise, print the SImode name of the register.
12702 q -- likewise, print the DImode name of the register.
12703 x -- likewise, print the V4SFmode name of the register.
12704 t -- likewise, print the V8SFmode name of the register.
12705 g -- likewise, print the V16SFmode name of the register.
12706 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12707 y -- print "st(0)" instead of "st" as a register.
12708 d -- print duplicated register operand for AVX instruction.
12709 D -- print condition for SSE cmp instruction.
12710 P -- if PIC, print an @PLT suffix.
12711 p -- print raw symbol name.
12712 X -- don't print any sort of PIC '@' suffix for a symbol.
12713 & -- print some in-use local-dynamic symbol name.
12714 H -- print a memory address offset by 8; used for sse high-parts
12715 Y -- print condition for XOP pcom* instruction.
12716 V -- print naked full integer register name without %.
12717 + -- print a branch hint as 'cs' or 'ds' prefix
12718 ; -- print a semicolon (after prefixes due to bug in older gas).
12719 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
12720 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
12721 M -- print addr32 prefix for TARGET_X32 with VSIB address.
12722 ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
12723 N -- print maskz if it's constant 0 operand.
12724 */
12725
12726 void
12727 ix86_print_operand (FILE *file, rtx x, int code)
12728 {
12729 if (code)
12730 {
12731 switch (code)
12732 {
12733 case 'A':
12734 switch (ASSEMBLER_DIALECT)
12735 {
12736 case ASM_ATT:
12737 putc ('*', file);
12738 break;
12739
12740 case ASM_INTEL:
12741 /* Intel syntax. For absolute addresses, registers should not
12742 be surrounded by braces. */
12743 if (!REG_P (x))
12744 {
12745 putc ('[', file);
12746 ix86_print_operand (file, x, 0);
12747 putc (']', file);
12748 return;
12749 }
12750 break;
12751
12752 default:
12753 gcc_unreachable ();
12754 }
12755
12756 ix86_print_operand (file, x, 0);
12757 return;
12758
12759 case 'E':
12760 /* Wrap address in an UNSPEC to declare special handling. */
12761 if (TARGET_64BIT)
12762 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
12763
12764 output_address (VOIDmode, x);
12765 return;
12766
12767 case 'L':
12768 if (ASSEMBLER_DIALECT == ASM_ATT)
12769 putc ('l', file);
12770 return;
12771
12772 case 'W':
12773 if (ASSEMBLER_DIALECT == ASM_ATT)
12774 putc ('w', file);
12775 return;
12776
12777 case 'B':
12778 if (ASSEMBLER_DIALECT == ASM_ATT)
12779 putc ('b', file);
12780 return;
12781
12782 case 'Q':
12783 if (ASSEMBLER_DIALECT == ASM_ATT)
12784 putc ('l', file);
12785 return;
12786
12787 case 'S':
12788 if (ASSEMBLER_DIALECT == ASM_ATT)
12789 putc ('s', file);
12790 return;
12791
12792 case 'T':
12793 if (ASSEMBLER_DIALECT == ASM_ATT)
12794 putc ('t', file);
12795 return;
12796
12797 case 'O':
12798 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12799 if (ASSEMBLER_DIALECT != ASM_ATT)
12800 return;
12801
12802 switch (GET_MODE_SIZE (GET_MODE (x)))
12803 {
12804 case 2:
12805 putc ('w', file);
12806 break;
12807
12808 case 4:
12809 putc ('l', file);
12810 break;
12811
12812 case 8:
12813 putc ('q', file);
12814 break;
12815
12816 default:
12817 output_operand_lossage ("invalid operand size for operand "
12818 "code 'O'");
12819 return;
12820 }
12821
12822 putc ('.', file);
12823 #endif
12824 return;
12825
12826 case 'z':
12827 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12828 {
12829 /* Opcodes don't get size suffixes if using Intel opcodes. */
12830 if (ASSEMBLER_DIALECT == ASM_INTEL)
12831 return;
12832
12833 switch (GET_MODE_SIZE (GET_MODE (x)))
12834 {
12835 case 1:
12836 putc ('b', file);
12837 return;
12838
12839 case 2:
12840 putc ('w', file);
12841 return;
12842
12843 case 4:
12844 putc ('l', file);
12845 return;
12846
12847 case 8:
12848 putc ('q', file);
12849 return;
12850
12851 default:
12852 output_operand_lossage ("invalid operand size for operand "
12853 "code 'z'");
12854 return;
12855 }
12856 }
12857
12858 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12859 warning (0, "non-integer operand used with operand code %<z%>");
12860 /* FALLTHRU */
12861
12862 case 'Z':
12863 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12864 if (ASSEMBLER_DIALECT == ASM_INTEL)
12865 return;
12866
12867 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12868 {
12869 switch (GET_MODE_SIZE (GET_MODE (x)))
12870 {
12871 case 2:
12872 #ifdef HAVE_AS_IX86_FILDS
12873 putc ('s', file);
12874 #endif
12875 return;
12876
12877 case 4:
12878 putc ('l', file);
12879 return;
12880
12881 case 8:
12882 #ifdef HAVE_AS_IX86_FILDQ
12883 putc ('q', file);
12884 #else
12885 fputs ("ll", file);
12886 #endif
12887 return;
12888
12889 default:
12890 break;
12891 }
12892 }
12893 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12894 {
12895 /* 387 opcodes don't get size suffixes
12896 if the operands are registers. */
12897 if (STACK_REG_P (x))
12898 return;
12899
12900 switch (GET_MODE_SIZE (GET_MODE (x)))
12901 {
12902 case 4:
12903 putc ('s', file);
12904 return;
12905
12906 case 8:
12907 putc ('l', file);
12908 return;
12909
12910 case 12:
12911 case 16:
12912 putc ('t', file);
12913 return;
12914
12915 default:
12916 break;
12917 }
12918 }
12919 else
12920 {
12921 output_operand_lossage ("invalid operand type used with "
12922 "operand code 'Z'");
12923 return;
12924 }
12925
12926 output_operand_lossage ("invalid operand size for operand code 'Z'");
12927 return;
12928
12929 case 'd':
12930 case 'b':
12931 case 'w':
12932 case 'k':
12933 case 'q':
12934 case 'h':
12935 case 't':
12936 case 'g':
12937 case 'y':
12938 case 'x':
12939 case 'X':
12940 case 'P':
12941 case 'p':
12942 case 'V':
12943 break;
12944
12945 case 's':
12946 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12947 {
12948 ix86_print_operand (file, x, 0);
12949 fputs (", ", file);
12950 }
12951 return;
12952
12953 case 'Y':
12954 switch (GET_CODE (x))
12955 {
12956 case NE:
12957 fputs ("neq", file);
12958 break;
12959 case EQ:
12960 fputs ("eq", file);
12961 break;
12962 case GE:
12963 case GEU:
12964 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12965 break;
12966 case GT:
12967 case GTU:
12968 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12969 break;
12970 case LE:
12971 case LEU:
12972 fputs ("le", file);
12973 break;
12974 case LT:
12975 case LTU:
12976 fputs ("lt", file);
12977 break;
12978 case UNORDERED:
12979 fputs ("unord", file);
12980 break;
12981 case ORDERED:
12982 fputs ("ord", file);
12983 break;
12984 case UNEQ:
12985 fputs ("ueq", file);
12986 break;
12987 case UNGE:
12988 fputs ("nlt", file);
12989 break;
12990 case UNGT:
12991 fputs ("nle", file);
12992 break;
12993 case UNLE:
12994 fputs ("ule", file);
12995 break;
12996 case UNLT:
12997 fputs ("ult", file);
12998 break;
12999 case LTGT:
13000 fputs ("une", file);
13001 break;
13002 default:
13003 output_operand_lossage ("operand is not a condition code, "
13004 "invalid operand code 'Y'");
13005 return;
13006 }
13007 return;
13008
13009 case 'D':
13010 /* Little bit of braindamage here. The SSE compare instructions
13011 does use completely different names for the comparisons that the
13012 fp conditional moves. */
13013 switch (GET_CODE (x))
13014 {
13015 case UNEQ:
13016 if (TARGET_AVX)
13017 {
13018 fputs ("eq_us", file);
13019 break;
13020 }
13021 /* FALLTHRU */
13022 case EQ:
13023 fputs ("eq", file);
13024 break;
13025 case UNLT:
13026 if (TARGET_AVX)
13027 {
13028 fputs ("nge", file);
13029 break;
13030 }
13031 /* FALLTHRU */
13032 case LT:
13033 fputs ("lt", file);
13034 break;
13035 case UNLE:
13036 if (TARGET_AVX)
13037 {
13038 fputs ("ngt", file);
13039 break;
13040 }
13041 /* FALLTHRU */
13042 case LE:
13043 fputs ("le", file);
13044 break;
13045 case UNORDERED:
13046 fputs ("unord", file);
13047 break;
13048 case LTGT:
13049 if (TARGET_AVX)
13050 {
13051 fputs ("neq_oq", file);
13052 break;
13053 }
13054 /* FALLTHRU */
13055 case NE:
13056 fputs ("neq", file);
13057 break;
13058 case GE:
13059 if (TARGET_AVX)
13060 {
13061 fputs ("ge", file);
13062 break;
13063 }
13064 /* FALLTHRU */
13065 case UNGE:
13066 fputs ("nlt", file);
13067 break;
13068 case GT:
13069 if (TARGET_AVX)
13070 {
13071 fputs ("gt", file);
13072 break;
13073 }
13074 /* FALLTHRU */
13075 case UNGT:
13076 fputs ("nle", file);
13077 break;
13078 case ORDERED:
13079 fputs ("ord", file);
13080 break;
13081 default:
13082 output_operand_lossage ("operand is not a condition code, "
13083 "invalid operand code 'D'");
13084 return;
13085 }
13086 return;
13087
13088 case 'F':
13089 case 'f':
13090 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13091 if (ASSEMBLER_DIALECT == ASM_ATT)
13092 putc ('.', file);
13093 gcc_fallthrough ();
13094 #endif
13095
13096 case 'C':
13097 case 'c':
13098 if (!COMPARISON_P (x))
13099 {
13100 output_operand_lossage ("operand is not a condition code, "
13101 "invalid operand code '%c'", code);
13102 return;
13103 }
13104 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
13105 code == 'c' || code == 'f',
13106 code == 'F' || code == 'f',
13107 file);
13108 return;
13109
13110 case 'H':
13111 if (!offsettable_memref_p (x))
13112 {
13113 output_operand_lossage ("operand is not an offsettable memory "
13114 "reference, invalid operand code 'H'");
13115 return;
13116 }
13117 /* It doesn't actually matter what mode we use here, as we're
13118 only going to use this for printing. */
13119 x = adjust_address_nv (x, DImode, 8);
13120 /* Output 'qword ptr' for intel assembler dialect. */
13121 if (ASSEMBLER_DIALECT == ASM_INTEL)
13122 code = 'q';
13123 break;
13124
13125 case 'K':
13126 if (!CONST_INT_P (x))
13127 {
13128 output_operand_lossage ("operand is not an integer, invalid "
13129 "operand code 'K'");
13130 return;
13131 }
13132
13133 if (INTVAL (x) & IX86_HLE_ACQUIRE)
13134 #ifdef HAVE_AS_IX86_HLE
13135 fputs ("xacquire ", file);
13136 #else
13137 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
13138 #endif
13139 else if (INTVAL (x) & IX86_HLE_RELEASE)
13140 #ifdef HAVE_AS_IX86_HLE
13141 fputs ("xrelease ", file);
13142 #else
13143 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
13144 #endif
13145 /* We do not want to print value of the operand. */
13146 return;
13147
13148 case 'N':
13149 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
13150 fputs ("{z}", file);
13151 return;
13152
13153 case 'r':
13154 if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
13155 {
13156 output_operand_lossage ("operand is not a specific integer, "
13157 "invalid operand code 'r'");
13158 return;
13159 }
13160
13161 if (ASSEMBLER_DIALECT == ASM_INTEL)
13162 fputs (", ", file);
13163
13164 fputs ("{sae}", file);
13165
13166 if (ASSEMBLER_DIALECT == ASM_ATT)
13167 fputs (", ", file);
13168
13169 return;
13170
13171 case 'R':
13172 if (!CONST_INT_P (x))
13173 {
13174 output_operand_lossage ("operand is not an integer, invalid "
13175 "operand code 'R'");
13176 return;
13177 }
13178
13179 if (ASSEMBLER_DIALECT == ASM_INTEL)
13180 fputs (", ", file);
13181
13182 switch (INTVAL (x))
13183 {
13184 case ROUND_NEAREST_INT | ROUND_SAE:
13185 fputs ("{rn-sae}", file);
13186 break;
13187 case ROUND_NEG_INF | ROUND_SAE:
13188 fputs ("{rd-sae}", file);
13189 break;
13190 case ROUND_POS_INF | ROUND_SAE:
13191 fputs ("{ru-sae}", file);
13192 break;
13193 case ROUND_ZERO | ROUND_SAE:
13194 fputs ("{rz-sae}", file);
13195 break;
13196 default:
13197 output_operand_lossage ("operand is not a specific integer, "
13198 "invalid operand code 'R'");
13199 }
13200
13201 if (ASSEMBLER_DIALECT == ASM_ATT)
13202 fputs (", ", file);
13203
13204 return;
13205
13206 case '*':
13207 if (ASSEMBLER_DIALECT == ASM_ATT)
13208 putc ('*', file);
13209 return;
13210
13211 case '&':
13212 {
13213 const char *name = get_some_local_dynamic_name ();
13214 if (name == NULL)
13215 output_operand_lossage ("'%%&' used without any "
13216 "local dynamic TLS references");
13217 else
13218 assemble_name (file, name);
13219 return;
13220 }
13221
13222 case '+':
13223 {
13224 rtx x;
13225
13226 if (!optimize
13227 || optimize_function_for_size_p (cfun)
13228 || !TARGET_BRANCH_PREDICTION_HINTS)
13229 return;
13230
13231 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13232 if (x)
13233 {
13234 int pred_val = profile_probability::from_reg_br_prob_note
13235 (XINT (x, 0)).to_reg_br_prob_base ();
13236
13237 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13238 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13239 {
13240 bool taken = pred_val > REG_BR_PROB_BASE / 2;
13241 bool cputaken
13242 = final_forward_branch_p (current_output_insn) == 0;
13243
13244 /* Emit hints only in the case default branch prediction
13245 heuristics would fail. */
13246 if (taken != cputaken)
13247 {
13248 /* We use 3e (DS) prefix for taken branches and
13249 2e (CS) prefix for not taken branches. */
13250 if (taken)
13251 fputs ("ds ; ", file);
13252 else
13253 fputs ("cs ; ", file);
13254 }
13255 }
13256 }
13257 return;
13258 }
13259
13260 case ';':
13261 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13262 putc (';', file);
13263 #endif
13264 return;
13265
13266 case '~':
13267 putc (TARGET_AVX2 ? 'i' : 'f', file);
13268 return;
13269
13270 case 'M':
13271 if (TARGET_X32)
13272 {
13273 /* NB: 32-bit indices in VSIB address are sign-extended
13274 to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
13275 sign-extended to 0xfffffffff7fa3010 which is invalid
13276 address. Add addr32 prefix if there is no base
13277 register nor symbol. */
13278 bool ok;
13279 struct ix86_address parts;
13280 ok = ix86_decompose_address (x, &parts);
13281 gcc_assert (ok && parts.index == NULL_RTX);
13282 if (parts.base == NULL_RTX
13283 && (parts.disp == NULL_RTX
13284 || !symbolic_operand (parts.disp,
13285 GET_MODE (parts.disp))))
13286 fputs ("addr32 ", file);
13287 }
13288 return;
13289
13290 case '^':
13291 if (TARGET_64BIT && Pmode != word_mode)
13292 fputs ("addr32 ", file);
13293 return;
13294
13295 case '!':
13296 if (ix86_notrack_prefixed_insn_p (current_output_insn))
13297 fputs ("notrack ", file);
13298 return;
13299
13300 default:
13301 output_operand_lossage ("invalid operand code '%c'", code);
13302 }
13303 }
13304
13305 if (REG_P (x))
13306 print_reg (x, code, file);
13307
13308 else if (MEM_P (x))
13309 {
13310 rtx addr = XEXP (x, 0);
13311
13312 /* No `byte ptr' prefix for call instructions ... */
13313 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
13314 {
13315 machine_mode mode = GET_MODE (x);
13316 const char *size;
13317
13318 /* Check for explicit size override codes. */
13319 if (code == 'b')
13320 size = "BYTE";
13321 else if (code == 'w')
13322 size = "WORD";
13323 else if (code == 'k')
13324 size = "DWORD";
13325 else if (code == 'q')
13326 size = "QWORD";
13327 else if (code == 'x')
13328 size = "XMMWORD";
13329 else if (code == 't')
13330 size = "YMMWORD";
13331 else if (code == 'g')
13332 size = "ZMMWORD";
13333 else if (mode == BLKmode)
13334 /* ... or BLKmode operands, when not overridden. */
13335 size = NULL;
13336 else
13337 switch (GET_MODE_SIZE (mode))
13338 {
13339 case 1: size = "BYTE"; break;
13340 case 2: size = "WORD"; break;
13341 case 4: size = "DWORD"; break;
13342 case 8: size = "QWORD"; break;
13343 case 12: size = "TBYTE"; break;
13344 case 16:
13345 if (mode == XFmode)
13346 size = "TBYTE";
13347 else
13348 size = "XMMWORD";
13349 break;
13350 case 32: size = "YMMWORD"; break;
13351 case 64: size = "ZMMWORD"; break;
13352 default:
13353 gcc_unreachable ();
13354 }
13355 if (size)
13356 {
13357 fputs (size, file);
13358 fputs (" PTR ", file);
13359 }
13360 }
13361
13362 if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
13363 output_operand_lossage ("invalid constraints for operand");
13364 else
13365 ix86_print_operand_address_as
13366 (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
13367 }
13368
13369 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
13370 {
13371 long l;
13372
13373 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13374
13375 if (ASSEMBLER_DIALECT == ASM_ATT)
13376 putc ('$', file);
13377 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13378 if (code == 'q')
13379 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
13380 (unsigned long long) (int) l);
13381 else
13382 fprintf (file, "0x%08x", (unsigned int) l);
13383 }
13384
13385 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
13386 {
13387 long l[2];
13388
13389 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
13390
13391 if (ASSEMBLER_DIALECT == ASM_ATT)
13392 putc ('$', file);
13393 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
13394 }
13395
13396 /* These float cases don't actually occur as immediate operands. */
13397 else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
13398 {
13399 char dstr[30];
13400
13401 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13402 fputs (dstr, file);
13403 }
13404
13405 /* Print bcst_mem_operand. */
13406 else if (GET_CODE (x) == VEC_DUPLICATE)
13407 {
13408 machine_mode vmode = GET_MODE (x);
13409 /* Must be bcst_memory_operand. */
13410 gcc_assert (bcst_mem_operand (x, vmode));
13411
13412 rtx mem = XEXP (x,0);
13413 ix86_print_operand (file, mem, 0);
13414
13415 switch (vmode)
13416 {
13417 case E_V2DImode:
13418 case E_V2DFmode:
13419 fputs ("{1to2}", file);
13420 break;
13421 case E_V4SImode:
13422 case E_V4SFmode:
13423 case E_V4DImode:
13424 case E_V4DFmode:
13425 fputs ("{1to4}", file);
13426 break;
13427 case E_V8SImode:
13428 case E_V8SFmode:
13429 case E_V8DFmode:
13430 case E_V8DImode:
13431 fputs ("{1to8}", file);
13432 break;
13433 case E_V16SFmode:
13434 case E_V16SImode:
13435 fputs ("{1to16}", file);
13436 break;
13437 default:
13438 gcc_unreachable ();
13439 }
13440 }
13441
13442 else
13443 {
13444 /* We have patterns that allow zero sets of memory, for instance.
13445 In 64-bit mode, we should probably support all 8-byte vectors,
13446 since we can in fact encode that into an immediate. */
13447 if (GET_CODE (x) == CONST_VECTOR)
13448 {
13449 if (x != CONST0_RTX (GET_MODE (x)))
13450 output_operand_lossage ("invalid vector immediate");
13451 x = const0_rtx;
13452 }
13453
13454 if (code != 'P' && code != 'p')
13455 {
13456 if (CONST_INT_P (x))
13457 {
13458 if (ASSEMBLER_DIALECT == ASM_ATT)
13459 putc ('$', file);
13460 }
13461 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13462 || GET_CODE (x) == LABEL_REF)
13463 {
13464 if (ASSEMBLER_DIALECT == ASM_ATT)
13465 putc ('$', file);
13466 else
13467 fputs ("OFFSET FLAT:", file);
13468 }
13469 }
13470 if (CONST_INT_P (x))
13471 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13472 else if (flag_pic || MACHOPIC_INDIRECT)
13473 output_pic_addr_const (file, x, code);
13474 else
13475 output_addr_const (file, x);
13476 }
13477 }
13478
13479 static bool
13480 ix86_print_operand_punct_valid_p (unsigned char code)
13481 {
13482 return (code == '*' || code == '+' || code == '&' || code == ';'
13483 || code == '~' || code == '^' || code == '!');
13484 }
13485 \f
13486 /* Print a memory operand whose address is ADDR. */
13487
13488 static void
13489 ix86_print_operand_address_as (FILE *file, rtx addr,
13490 addr_space_t as, bool no_rip)
13491 {
13492 struct ix86_address parts;
13493 rtx base, index, disp;
13494 int scale;
13495 int ok;
13496 bool vsib = false;
13497 int code = 0;
13498
13499 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
13500 {
13501 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13502 gcc_assert (parts.index == NULL_RTX);
13503 parts.index = XVECEXP (addr, 0, 1);
13504 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
13505 addr = XVECEXP (addr, 0, 0);
13506 vsib = true;
13507 }
13508 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
13509 {
13510 gcc_assert (TARGET_64BIT);
13511 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
13512 code = 'q';
13513 }
13514 else
13515 ok = ix86_decompose_address (addr, &parts);
13516
13517 gcc_assert (ok);
13518
13519 base = parts.base;
13520 index = parts.index;
13521 disp = parts.disp;
13522 scale = parts.scale;
13523
13524 if (ADDR_SPACE_GENERIC_P (as))
13525 as = parts.seg;
13526 else
13527 gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
13528
13529 if (!ADDR_SPACE_GENERIC_P (as))
13530 {
13531 if (ASSEMBLER_DIALECT == ASM_ATT)
13532 putc ('%', file);
13533
13534 switch (as)
13535 {
13536 case ADDR_SPACE_SEG_FS:
13537 fputs ("fs:", file);
13538 break;
13539 case ADDR_SPACE_SEG_GS:
13540 fputs ("gs:", file);
13541 break;
13542 default:
13543 gcc_unreachable ();
13544 }
13545 }
13546
13547 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13548 if (TARGET_64BIT && !base && !index && !no_rip)
13549 {
13550 rtx symbol = disp;
13551
13552 if (GET_CODE (disp) == CONST
13553 && GET_CODE (XEXP (disp, 0)) == PLUS
13554 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13555 symbol = XEXP (XEXP (disp, 0), 0);
13556
13557 if (GET_CODE (symbol) == LABEL_REF
13558 || (GET_CODE (symbol) == SYMBOL_REF
13559 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13560 base = pc_rtx;
13561 }
13562
13563 if (!base && !index)
13564 {
13565 /* Displacement only requires special attention. */
13566 if (CONST_INT_P (disp))
13567 {
13568 if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
13569 fputs ("ds:", file);
13570 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13571 }
13572 /* Load the external function address via the GOT slot to avoid PLT. */
13573 else if (GET_CODE (disp) == CONST
13574 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13575 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
13576 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
13577 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
13578 output_pic_addr_const (file, disp, 0);
13579 else if (flag_pic)
13580 output_pic_addr_const (file, disp, 0);
13581 else
13582 output_addr_const (file, disp);
13583 }
13584 else
13585 {
13586 /* Print SImode register names to force addr32 prefix. */
13587 if (SImode_address_operand (addr, VOIDmode))
13588 {
13589 if (flag_checking)
13590 {
13591 gcc_assert (TARGET_64BIT);
13592 switch (GET_CODE (addr))
13593 {
13594 case SUBREG:
13595 gcc_assert (GET_MODE (addr) == SImode);
13596 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
13597 break;
13598 case ZERO_EXTEND:
13599 case AND:
13600 gcc_assert (GET_MODE (addr) == DImode);
13601 break;
13602 default:
13603 gcc_unreachable ();
13604 }
13605 }
13606 gcc_assert (!code);
13607 code = 'k';
13608 }
13609 else if (code == 0
13610 && TARGET_X32
13611 && disp
13612 && CONST_INT_P (disp)
13613 && INTVAL (disp) < -16*1024*1024)
13614 {
13615 /* X32 runs in 64-bit mode, where displacement, DISP, in
13616 address DISP(%r64), is encoded as 32-bit immediate sign-
13617 extended from 32-bit to 64-bit. For -0x40000300(%r64),
13618 address is %r64 + 0xffffffffbffffd00. When %r64 <
13619 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
13620 which is invalid for x32. The correct address is %r64
13621 - 0x40000300 == 0xf7ffdd64. To properly encode
13622 -0x40000300(%r64) for x32, we zero-extend negative
13623 displacement by forcing addr32 prefix which truncates
13624 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
13625 zero-extend all negative displacements, including -1(%rsp).
13626 However, for small negative displacements, sign-extension
13627 won't cause overflow. We only zero-extend negative
13628 displacements if they < -16*1024*1024, which is also used
13629 to check legitimate address displacements for PIC. */
13630 code = 'k';
13631 }
13632
13633 /* Since the upper 32 bits of RSP are always zero for x32,
13634 we can encode %esp as %rsp to avoid 0x67 prefix if
13635 there is no index register. */
13636 if (TARGET_X32 && Pmode == SImode
13637 && !index && base && REG_P (base) && REGNO (base) == SP_REG)
13638 code = 'q';
13639
13640 if (ASSEMBLER_DIALECT == ASM_ATT)
13641 {
13642 if (disp)
13643 {
13644 if (flag_pic)
13645 output_pic_addr_const (file, disp, 0);
13646 else if (GET_CODE (disp) == LABEL_REF)
13647 output_asm_label (disp);
13648 else
13649 output_addr_const (file, disp);
13650 }
13651
13652 putc ('(', file);
13653 if (base)
13654 print_reg (base, code, file);
13655 if (index)
13656 {
13657 putc (',', file);
13658 print_reg (index, vsib ? 0 : code, file);
13659 if (scale != 1 || vsib)
13660 fprintf (file, ",%d", scale);
13661 }
13662 putc (')', file);
13663 }
13664 else
13665 {
13666 rtx offset = NULL_RTX;
13667
13668 if (disp)
13669 {
13670 /* Pull out the offset of a symbol; print any symbol itself. */
13671 if (GET_CODE (disp) == CONST
13672 && GET_CODE (XEXP (disp, 0)) == PLUS
13673 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13674 {
13675 offset = XEXP (XEXP (disp, 0), 1);
13676 disp = gen_rtx_CONST (VOIDmode,
13677 XEXP (XEXP (disp, 0), 0));
13678 }
13679
13680 if (flag_pic)
13681 output_pic_addr_const (file, disp, 0);
13682 else if (GET_CODE (disp) == LABEL_REF)
13683 output_asm_label (disp);
13684 else if (CONST_INT_P (disp))
13685 offset = disp;
13686 else
13687 output_addr_const (file, disp);
13688 }
13689
13690 putc ('[', file);
13691 if (base)
13692 {
13693 print_reg (base, code, file);
13694 if (offset)
13695 {
13696 if (INTVAL (offset) >= 0)
13697 putc ('+', file);
13698 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13699 }
13700 }
13701 else if (offset)
13702 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13703 else
13704 putc ('0', file);
13705
13706 if (index)
13707 {
13708 putc ('+', file);
13709 print_reg (index, vsib ? 0 : code, file);
13710 if (scale != 1 || vsib)
13711 fprintf (file, "*%d", scale);
13712 }
13713 putc (']', file);
13714 }
13715 }
13716 }
13717
13718 static void
13719 ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
13720 {
13721 ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
13722 }
13723
13724 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13725
13726 static bool
13727 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13728 {
13729 rtx op;
13730
13731 if (GET_CODE (x) != UNSPEC)
13732 return false;
13733
13734 op = XVECEXP (x, 0, 0);
13735 switch (XINT (x, 1))
13736 {
13737 case UNSPEC_GOTOFF:
13738 output_addr_const (file, op);
13739 fputs ("@gotoff", file);
13740 break;
13741 case UNSPEC_GOTTPOFF:
13742 output_addr_const (file, op);
13743 /* FIXME: This might be @TPOFF in Sun ld. */
13744 fputs ("@gottpoff", file);
13745 break;
13746 case UNSPEC_TPOFF:
13747 output_addr_const (file, op);
13748 fputs ("@tpoff", file);
13749 break;
13750 case UNSPEC_NTPOFF:
13751 output_addr_const (file, op);
13752 if (TARGET_64BIT)
13753 fputs ("@tpoff", file);
13754 else
13755 fputs ("@ntpoff", file);
13756 break;
13757 case UNSPEC_DTPOFF:
13758 output_addr_const (file, op);
13759 fputs ("@dtpoff", file);
13760 break;
13761 case UNSPEC_GOTNTPOFF:
13762 output_addr_const (file, op);
13763 if (TARGET_64BIT)
13764 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13765 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13766 else
13767 fputs ("@gotntpoff", file);
13768 break;
13769 case UNSPEC_INDNTPOFF:
13770 output_addr_const (file, op);
13771 fputs ("@indntpoff", file);
13772 break;
13773 #if TARGET_MACHO
13774 case UNSPEC_MACHOPIC_OFFSET:
13775 output_addr_const (file, op);
13776 putc ('-', file);
13777 machopic_output_function_base_name (file);
13778 break;
13779 #endif
13780
13781 default:
13782 return false;
13783 }
13784
13785 return true;
13786 }
13787 \f
13788 \f
13789 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13790 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13791 is the expression of the binary operation. The output may either be
13792 emitted here, or returned to the caller, like all output_* functions.
13793
13794 There is no guarantee that the operands are the same mode, as they
13795 might be within FLOAT or FLOAT_EXTEND expressions. */
13796
13797 #ifndef SYSV386_COMPAT
13798 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13799 wants to fix the assemblers because that causes incompatibility
13800 with gcc. No-one wants to fix gcc because that causes
13801 incompatibility with assemblers... You can use the option of
13802 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13803 #define SYSV386_COMPAT 1
13804 #endif
13805
13806 const char *
13807 output_387_binary_op (rtx_insn *insn, rtx *operands)
13808 {
13809 static char buf[40];
13810 const char *p;
13811 bool is_sse
13812 = (SSE_REG_P (operands[0])
13813 || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
13814
13815 if (is_sse)
13816 p = "%v";
13817 else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13818 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13819 p = "fi";
13820 else
13821 p = "f";
13822
13823 strcpy (buf, p);
13824
13825 switch (GET_CODE (operands[3]))
13826 {
13827 case PLUS:
13828 p = "add"; break;
13829 case MINUS:
13830 p = "sub"; break;
13831 case MULT:
13832 p = "mul"; break;
13833 case DIV:
13834 p = "div"; break;
13835 default:
13836 gcc_unreachable ();
13837 }
13838
13839 strcat (buf, p);
13840
13841 if (is_sse)
13842 {
13843 p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd";
13844 strcat (buf, p);
13845
13846 if (TARGET_AVX)
13847 p = "\t{%2, %1, %0|%0, %1, %2}";
13848 else
13849 p = "\t{%2, %0|%0, %2}";
13850
13851 strcat (buf, p);
13852 return buf;
13853 }
13854
13855 /* Even if we do not want to check the inputs, this documents input
13856 constraints. Which helps in understanding the following code. */
13857 if (flag_checking)
13858 {
13859 if (STACK_REG_P (operands[0])
13860 && ((REG_P (operands[1])
13861 && REGNO (operands[0]) == REGNO (operands[1])
13862 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13863 || (REG_P (operands[2])
13864 && REGNO (operands[0]) == REGNO (operands[2])
13865 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13866 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13867 ; /* ok */
13868 else
13869 gcc_unreachable ();
13870 }
13871
13872 switch (GET_CODE (operands[3]))
13873 {
13874 case MULT:
13875 case PLUS:
13876 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13877 std::swap (operands[1], operands[2]);
13878
13879 /* know operands[0] == operands[1]. */
13880
13881 if (MEM_P (operands[2]))
13882 {
13883 p = "%Z2\t%2";
13884 break;
13885 }
13886
13887 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13888 {
13889 if (STACK_TOP_P (operands[0]))
13890 /* How is it that we are storing to a dead operand[2]?
13891 Well, presumably operands[1] is dead too. We can't
13892 store the result to st(0) as st(0) gets popped on this
13893 instruction. Instead store to operands[2] (which I
13894 think has to be st(1)). st(1) will be popped later.
13895 gcc <= 2.8.1 didn't have this check and generated
13896 assembly code that the Unixware assembler rejected. */
13897 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13898 else
13899 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13900 break;
13901 }
13902
13903 if (STACK_TOP_P (operands[0]))
13904 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13905 else
13906 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13907 break;
13908
13909 case MINUS:
13910 case DIV:
13911 if (MEM_P (operands[1]))
13912 {
13913 p = "r%Z1\t%1";
13914 break;
13915 }
13916
13917 if (MEM_P (operands[2]))
13918 {
13919 p = "%Z2\t%2";
13920 break;
13921 }
13922
13923 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13924 {
13925 #if SYSV386_COMPAT
13926 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13927 derived assemblers, confusingly reverse the direction of
13928 the operation for fsub{r} and fdiv{r} when the
13929 destination register is not st(0). The Intel assembler
13930 doesn't have this brain damage. Read !SYSV386_COMPAT to
13931 figure out what the hardware really does. */
13932 if (STACK_TOP_P (operands[0]))
13933 p = "{p\t%0, %2|rp\t%2, %0}";
13934 else
13935 p = "{rp\t%2, %0|p\t%0, %2}";
13936 #else
13937 if (STACK_TOP_P (operands[0]))
13938 /* As above for fmul/fadd, we can't store to st(0). */
13939 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13940 else
13941 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13942 #endif
13943 break;
13944 }
13945
13946 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13947 {
13948 #if SYSV386_COMPAT
13949 if (STACK_TOP_P (operands[0]))
13950 p = "{rp\t%0, %1|p\t%1, %0}";
13951 else
13952 p = "{p\t%1, %0|rp\t%0, %1}";
13953 #else
13954 if (STACK_TOP_P (operands[0]))
13955 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13956 else
13957 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13958 #endif
13959 break;
13960 }
13961
13962 if (STACK_TOP_P (operands[0]))
13963 {
13964 if (STACK_TOP_P (operands[1]))
13965 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13966 else
13967 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13968 break;
13969 }
13970 else if (STACK_TOP_P (operands[1]))
13971 {
13972 #if SYSV386_COMPAT
13973 p = "{\t%1, %0|r\t%0, %1}";
13974 #else
13975 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13976 #endif
13977 }
13978 else
13979 {
13980 #if SYSV386_COMPAT
13981 p = "{r\t%2, %0|\t%0, %2}";
13982 #else
13983 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13984 #endif
13985 }
13986 break;
13987
13988 default:
13989 gcc_unreachable ();
13990 }
13991
13992 strcat (buf, p);
13993 return buf;
13994 }
13995
13996 /* Return needed mode for entity in optimize_mode_switching pass. */
13997
13998 static int
13999 ix86_dirflag_mode_needed (rtx_insn *insn)
14000 {
14001 if (CALL_P (insn))
14002 {
14003 if (cfun->machine->func_type == TYPE_NORMAL)
14004 return X86_DIRFLAG_ANY;
14005 else
14006 /* No need to emit CLD in interrupt handler for TARGET_CLD. */
14007 return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
14008 }
14009
14010 if (recog_memoized (insn) < 0)
14011 return X86_DIRFLAG_ANY;
14012
14013 if (get_attr_type (insn) == TYPE_STR)
14014 {
14015 /* Emit cld instruction if stringops are used in the function. */
14016 if (cfun->machine->func_type == TYPE_NORMAL)
14017 return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
14018 else
14019 return X86_DIRFLAG_RESET;
14020 }
14021
14022 return X86_DIRFLAG_ANY;
14023 }
14024
14025 /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
14026
14027 static bool
14028 ix86_check_avx_upper_register (const_rtx exp)
14029 {
14030 return (SSE_REG_P (exp)
14031 && !EXT_REX_SSE_REG_P (exp)
14032 && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
14033 }
14034
14035 /* Return needed mode for entity in optimize_mode_switching pass. */
14036
14037 static int
14038 ix86_avx_u128_mode_needed (rtx_insn *insn)
14039 {
14040 if (CALL_P (insn))
14041 {
14042 rtx link;
14043
14044 /* Needed mode is set to AVX_U128_CLEAN if there are
14045 no 256bit or 512bit modes used in function arguments. */
14046 for (link = CALL_INSN_FUNCTION_USAGE (insn);
14047 link;
14048 link = XEXP (link, 1))
14049 {
14050 if (GET_CODE (XEXP (link, 0)) == USE)
14051 {
14052 rtx arg = XEXP (XEXP (link, 0), 0);
14053
14054 if (ix86_check_avx_upper_register (arg))
14055 return AVX_U128_DIRTY;
14056 }
14057 }
14058
14059 /* If the function is known to preserve some SSE registers,
14060 RA and previous passes can legitimately rely on that for
14061 modes wider than 256 bits. It's only safe to issue a
14062 vzeroupper if all SSE registers are clobbered. */
14063 const function_abi &abi = insn_callee_abi (insn);
14064 if (!hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
14065 abi.mode_clobbers (V4DImode)))
14066 return AVX_U128_ANY;
14067
14068 return AVX_U128_CLEAN;
14069 }
14070
14071 /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
14072 Hardware changes state only when a 256bit register is written to,
14073 but we need to prevent the compiler from moving optimal insertion
14074 point above eventual read from 256bit or 512 bit register. */
14075 subrtx_iterator::array_type array;
14076 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14077 if (ix86_check_avx_upper_register (*iter))
14078 return AVX_U128_DIRTY;
14079
14080 return AVX_U128_ANY;
14081 }
14082
14083 /* Return mode that i387 must be switched into
14084 prior to the execution of insn. */
14085
14086 static int
14087 ix86_i387_mode_needed (int entity, rtx_insn *insn)
14088 {
14089 enum attr_i387_cw mode;
14090
14091 /* The mode UNINITIALIZED is used to store control word after a
14092 function call or ASM pattern. The mode ANY specify that function
14093 has no requirements on the control word and make no changes in the
14094 bits we are interested in. */
14095
14096 if (CALL_P (insn)
14097 || (NONJUMP_INSN_P (insn)
14098 && (asm_noperands (PATTERN (insn)) >= 0
14099 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
14100 return I387_CW_UNINITIALIZED;
14101
14102 if (recog_memoized (insn) < 0)
14103 return I387_CW_ANY;
14104
14105 mode = get_attr_i387_cw (insn);
14106
14107 switch (entity)
14108 {
14109 case I387_ROUNDEVEN:
14110 if (mode == I387_CW_ROUNDEVEN)
14111 return mode;
14112 break;
14113
14114 case I387_TRUNC:
14115 if (mode == I387_CW_TRUNC)
14116 return mode;
14117 break;
14118
14119 case I387_FLOOR:
14120 if (mode == I387_CW_FLOOR)
14121 return mode;
14122 break;
14123
14124 case I387_CEIL:
14125 if (mode == I387_CW_CEIL)
14126 return mode;
14127 break;
14128
14129 default:
14130 gcc_unreachable ();
14131 }
14132
14133 return I387_CW_ANY;
14134 }
14135
14136 /* Return mode that entity must be switched into
14137 prior to the execution of insn. */
14138
14139 static int
14140 ix86_mode_needed (int entity, rtx_insn *insn)
14141 {
14142 switch (entity)
14143 {
14144 case X86_DIRFLAG:
14145 return ix86_dirflag_mode_needed (insn);
14146 case AVX_U128:
14147 return ix86_avx_u128_mode_needed (insn);
14148 case I387_ROUNDEVEN:
14149 case I387_TRUNC:
14150 case I387_FLOOR:
14151 case I387_CEIL:
14152 return ix86_i387_mode_needed (entity, insn);
14153 default:
14154 gcc_unreachable ();
14155 }
14156 return 0;
14157 }
14158
14159 /* Check if a 256bit or 512bit AVX register is referenced in stores. */
14160
14161 static void
14162 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
14163 {
14164 if (ix86_check_avx_upper_register (dest))
14165 {
14166 bool *used = (bool *) data;
14167 *used = true;
14168 }
14169 }
14170
14171 /* Calculate mode of upper 128bit AVX registers after the insn. */
14172
14173 static int
14174 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
14175 {
14176 rtx pat = PATTERN (insn);
14177
14178 if (vzeroupper_pattern (pat, VOIDmode)
14179 || vzeroall_pattern (pat, VOIDmode))
14180 return AVX_U128_CLEAN;
14181
14182 /* We know that state is clean after CALL insn if there are no
14183 256bit or 512bit registers used in the function return register. */
14184 if (CALL_P (insn))
14185 {
14186 bool avx_upper_reg_found = false;
14187 note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
14188
14189 return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
14190 }
14191
14192 /* Otherwise, return current mode. Remember that if insn
14193 references AVX 256bit or 512bit registers, the mode was already
14194 changed to DIRTY from MODE_NEEDED. */
14195 return mode;
14196 }
14197
14198 /* Return the mode that an insn results in. */
14199
14200 static int
14201 ix86_mode_after (int entity, int mode, rtx_insn *insn)
14202 {
14203 switch (entity)
14204 {
14205 case X86_DIRFLAG:
14206 return mode;
14207 case AVX_U128:
14208 return ix86_avx_u128_mode_after (mode, insn);
14209 case I387_ROUNDEVEN:
14210 case I387_TRUNC:
14211 case I387_FLOOR:
14212 case I387_CEIL:
14213 return mode;
14214 default:
14215 gcc_unreachable ();
14216 }
14217 }
14218
14219 static int
14220 ix86_dirflag_mode_entry (void)
14221 {
14222 /* For TARGET_CLD or in the interrupt handler we can't assume
14223 direction flag state at function entry. */
14224 if (TARGET_CLD
14225 || cfun->machine->func_type != TYPE_NORMAL)
14226 return X86_DIRFLAG_ANY;
14227
14228 return X86_DIRFLAG_RESET;
14229 }
14230
14231 static int
14232 ix86_avx_u128_mode_entry (void)
14233 {
14234 tree arg;
14235
14236 /* Entry mode is set to AVX_U128_DIRTY if there are
14237 256bit or 512bit modes used in function arguments. */
14238 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
14239 arg = TREE_CHAIN (arg))
14240 {
14241 rtx incoming = DECL_INCOMING_RTL (arg);
14242
14243 if (incoming && ix86_check_avx_upper_register (incoming))
14244 return AVX_U128_DIRTY;
14245 }
14246
14247 return AVX_U128_CLEAN;
14248 }
14249
14250 /* Return a mode that ENTITY is assumed to be
14251 switched to at function entry. */
14252
14253 static int
14254 ix86_mode_entry (int entity)
14255 {
14256 switch (entity)
14257 {
14258 case X86_DIRFLAG:
14259 return ix86_dirflag_mode_entry ();
14260 case AVX_U128:
14261 return ix86_avx_u128_mode_entry ();
14262 case I387_ROUNDEVEN:
14263 case I387_TRUNC:
14264 case I387_FLOOR:
14265 case I387_CEIL:
14266 return I387_CW_ANY;
14267 default:
14268 gcc_unreachable ();
14269 }
14270 }
14271
14272 static int
14273 ix86_avx_u128_mode_exit (void)
14274 {
14275 rtx reg = crtl->return_rtx;
14276
14277 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
14278 or 512 bit modes used in the function return register. */
14279 if (reg && ix86_check_avx_upper_register (reg))
14280 return AVX_U128_DIRTY;
14281
14282 /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
14283 modes used in function arguments, otherwise return AVX_U128_CLEAN.
14284 */
14285 return ix86_avx_u128_mode_entry ();
14286 }
14287
14288 /* Return a mode that ENTITY is assumed to be
14289 switched to at function exit. */
14290
14291 static int
14292 ix86_mode_exit (int entity)
14293 {
14294 switch (entity)
14295 {
14296 case X86_DIRFLAG:
14297 return X86_DIRFLAG_ANY;
14298 case AVX_U128:
14299 return ix86_avx_u128_mode_exit ();
14300 case I387_ROUNDEVEN:
14301 case I387_TRUNC:
14302 case I387_FLOOR:
14303 case I387_CEIL:
14304 return I387_CW_ANY;
14305 default:
14306 gcc_unreachable ();
14307 }
14308 }
14309
14310 static int
14311 ix86_mode_priority (int, int n)
14312 {
14313 return n;
14314 }
14315
14316 /* Output code to initialize control word copies used by trunc?f?i and
14317 rounding patterns. CURRENT_MODE is set to current control word,
14318 while NEW_MODE is set to new control word. */
14319
14320 static void
14321 emit_i387_cw_initialization (int mode)
14322 {
14323 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14324 rtx new_mode;
14325
14326 enum ix86_stack_slot slot;
14327
14328 rtx reg = gen_reg_rtx (HImode);
14329
14330 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14331 emit_move_insn (reg, copy_rtx (stored_mode));
14332
14333 switch (mode)
14334 {
14335 case I387_CW_ROUNDEVEN:
14336 /* round to nearest */
14337 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14338 slot = SLOT_CW_ROUNDEVEN;
14339 break;
14340
14341 case I387_CW_TRUNC:
14342 /* round toward zero (truncate) */
14343 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14344 slot = SLOT_CW_TRUNC;
14345 break;
14346
14347 case I387_CW_FLOOR:
14348 /* round down toward -oo */
14349 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14350 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14351 slot = SLOT_CW_FLOOR;
14352 break;
14353
14354 case I387_CW_CEIL:
14355 /* round up toward +oo */
14356 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14357 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14358 slot = SLOT_CW_CEIL;
14359 break;
14360
14361 default:
14362 gcc_unreachable ();
14363 }
14364
14365 gcc_assert (slot < MAX_386_STACK_LOCALS);
14366
14367 new_mode = assign_386_stack_local (HImode, slot);
14368 emit_move_insn (new_mode, reg);
14369 }
14370
14371 /* Generate one or more insns to set ENTITY to MODE. */
14372
14373 static void
14374 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
14375 HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
14376 {
14377 switch (entity)
14378 {
14379 case X86_DIRFLAG:
14380 if (mode == X86_DIRFLAG_RESET)
14381 emit_insn (gen_cld ());
14382 break;
14383 case AVX_U128:
14384 if (mode == AVX_U128_CLEAN)
14385 emit_insn (gen_avx_vzeroupper ());
14386 break;
14387 case I387_ROUNDEVEN:
14388 case I387_TRUNC:
14389 case I387_FLOOR:
14390 case I387_CEIL:
14391 if (mode != I387_CW_ANY
14392 && mode != I387_CW_UNINITIALIZED)
14393 emit_i387_cw_initialization (mode);
14394 break;
14395 default:
14396 gcc_unreachable ();
14397 }
14398 }
14399
14400 /* Output code for INSN to convert a float to a signed int. OPERANDS
14401 are the insn operands. The output may be [HSD]Imode and the input
14402 operand may be [SDX]Fmode. */
14403
14404 const char *
14405 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
14406 {
14407 bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14408 bool dimode_p = GET_MODE (operands[0]) == DImode;
14409 int round_mode = get_attr_i387_cw (insn);
14410
14411 static char buf[40];
14412 const char *p;
14413
14414 /* Jump through a hoop or two for DImode, since the hardware has no
14415 non-popping instruction. We used to do this a different way, but
14416 that was somewhat fragile and broke with post-reload splitters. */
14417 if ((dimode_p || fisttp) && !stack_top_dies)
14418 output_asm_insn ("fld\t%y1", operands);
14419
14420 gcc_assert (STACK_TOP_P (operands[1]));
14421 gcc_assert (MEM_P (operands[0]));
14422 gcc_assert (GET_MODE (operands[1]) != TFmode);
14423
14424 if (fisttp)
14425 return "fisttp%Z0\t%0";
14426
14427 strcpy (buf, "fist");
14428
14429 if (round_mode != I387_CW_ANY)
14430 output_asm_insn ("fldcw\t%3", operands);
14431
14432 p = "p%Z0\t%0";
14433 strcat (buf, p + !(stack_top_dies || dimode_p));
14434
14435 output_asm_insn (buf, operands);
14436
14437 if (round_mode != I387_CW_ANY)
14438 output_asm_insn ("fldcw\t%2", operands);
14439
14440 return "";
14441 }
14442
14443 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14444 have the values zero or one, indicates the ffreep insn's operand
14445 from the OPERANDS array. */
14446
14447 static const char *
14448 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14449 {
14450 if (TARGET_USE_FFREEP)
14451 #ifdef HAVE_AS_IX86_FFREEP
14452 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14453 #else
14454 {
14455 static char retval[32];
14456 int regno = REGNO (operands[opno]);
14457
14458 gcc_assert (STACK_REGNO_P (regno));
14459
14460 regno -= FIRST_STACK_REG;
14461
14462 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14463 return retval;
14464 }
14465 #endif
14466
14467 return opno ? "fstp\t%y1" : "fstp\t%y0";
14468 }
14469
14470
14471 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14472 should be used. UNORDERED_P is true when fucom should be used. */
14473
14474 const char *
14475 output_fp_compare (rtx_insn *insn, rtx *operands,
14476 bool eflags_p, bool unordered_p)
14477 {
14478 rtx *xops = eflags_p ? &operands[0] : &operands[1];
14479 bool stack_top_dies;
14480
14481 static char buf[40];
14482 const char *p;
14483
14484 gcc_assert (STACK_TOP_P (xops[0]));
14485
14486 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
14487
14488 if (eflags_p)
14489 {
14490 p = unordered_p ? "fucomi" : "fcomi";
14491 strcpy (buf, p);
14492
14493 p = "p\t{%y1, %0|%0, %y1}";
14494 strcat (buf, p + !stack_top_dies);
14495
14496 return buf;
14497 }
14498
14499 if (STACK_REG_P (xops[1])
14500 && stack_top_dies
14501 && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
14502 {
14503 gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
14504
14505 /* If both the top of the 387 stack die, and the other operand
14506 is also a stack register that dies, then this must be a
14507 `fcompp' float compare. */
14508 p = unordered_p ? "fucompp" : "fcompp";
14509 strcpy (buf, p);
14510 }
14511 else if (const0_operand (xops[1], VOIDmode))
14512 {
14513 gcc_assert (!unordered_p);
14514 strcpy (buf, "ftst");
14515 }
14516 else
14517 {
14518 if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
14519 {
14520 gcc_assert (!unordered_p);
14521 p = "ficom";
14522 }
14523 else
14524 p = unordered_p ? "fucom" : "fcom";
14525
14526 strcpy (buf, p);
14527
14528 p = "p%Z2\t%y2";
14529 strcat (buf, p + !stack_top_dies);
14530 }
14531
14532 output_asm_insn (buf, operands);
14533 return "fnstsw\t%0";
14534 }
14535
14536 void
14537 ix86_output_addr_vec_elt (FILE *file, int value)
14538 {
14539 const char *directive = ASM_LONG;
14540
14541 #ifdef ASM_QUAD
14542 if (TARGET_LP64)
14543 directive = ASM_QUAD;
14544 #else
14545 gcc_assert (!TARGET_64BIT);
14546 #endif
14547
14548 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14549 }
14550
14551 void
14552 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14553 {
14554 const char *directive = ASM_LONG;
14555
14556 #ifdef ASM_QUAD
14557 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14558 directive = ASM_QUAD;
14559 #else
14560 gcc_assert (!TARGET_64BIT);
14561 #endif
14562 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14563 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14564 fprintf (file, "%s%s%d-%s%d\n",
14565 directive, LPREFIX, value, LPREFIX, rel);
14566 #if TARGET_MACHO
14567 else if (TARGET_MACHO)
14568 {
14569 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14570 machopic_output_function_base_name (file);
14571 putc ('\n', file);
14572 }
14573 #endif
14574 else if (HAVE_AS_GOTOFF_IN_DATA)
14575 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14576 else
14577 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14578 GOT_SYMBOL_NAME, LPREFIX, value);
14579 }
14580 \f
14581 #define LEA_MAX_STALL (3)
14582 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
14583
14584 /* Increase given DISTANCE in half-cycles according to
14585 dependencies between PREV and NEXT instructions.
14586 Add 1 half-cycle if there is no dependency and
14587 go to next cycle if there is some dependecy. */
14588
14589 static unsigned int
14590 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
14591 {
14592 df_ref def, use;
14593
14594 if (!prev || !next)
14595 return distance + (distance & 1) + 2;
14596
14597 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
14598 return distance + 1;
14599
14600 FOR_EACH_INSN_USE (use, next)
14601 FOR_EACH_INSN_DEF (def, prev)
14602 if (!DF_REF_IS_ARTIFICIAL (def)
14603 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
14604 return distance + (distance & 1) + 2;
14605
14606 return distance + 1;
14607 }
14608
14609 /* Function checks if instruction INSN defines register number
14610 REGNO1 or REGNO2. */
14611
14612 bool
14613 insn_defines_reg (unsigned int regno1, unsigned int regno2,
14614 rtx_insn *insn)
14615 {
14616 df_ref def;
14617
14618 FOR_EACH_INSN_DEF (def, insn)
14619 if (DF_REF_REG_DEF_P (def)
14620 && !DF_REF_IS_ARTIFICIAL (def)
14621 && (regno1 == DF_REF_REGNO (def)
14622 || regno2 == DF_REF_REGNO (def)))
14623 return true;
14624
14625 return false;
14626 }
14627
14628 /* Function checks if instruction INSN uses register number
14629 REGNO as a part of address expression. */
14630
14631 static bool
14632 insn_uses_reg_mem (unsigned int regno, rtx insn)
14633 {
14634 df_ref use;
14635
14636 FOR_EACH_INSN_USE (use, insn)
14637 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
14638 return true;
14639
14640 return false;
14641 }
14642
14643 /* Search backward for non-agu definition of register number REGNO1
14644 or register number REGNO2 in basic block starting from instruction
14645 START up to head of basic block or instruction INSN.
14646
14647 Function puts true value into *FOUND var if definition was found
14648 and false otherwise.
14649
14650 Distance in half-cycles between START and found instruction or head
14651 of BB is added to DISTANCE and returned. */
14652
14653 static int
14654 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
14655 rtx_insn *insn, int distance,
14656 rtx_insn *start, bool *found)
14657 {
14658 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
14659 rtx_insn *prev = start;
14660 rtx_insn *next = NULL;
14661
14662 *found = false;
14663
14664 while (prev
14665 && prev != insn
14666 && distance < LEA_SEARCH_THRESHOLD)
14667 {
14668 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
14669 {
14670 distance = increase_distance (prev, next, distance);
14671 if (insn_defines_reg (regno1, regno2, prev))
14672 {
14673 if (recog_memoized (prev) < 0
14674 || get_attr_type (prev) != TYPE_LEA)
14675 {
14676 *found = true;
14677 return distance;
14678 }
14679 }
14680
14681 next = prev;
14682 }
14683 if (prev == BB_HEAD (bb))
14684 break;
14685
14686 prev = PREV_INSN (prev);
14687 }
14688
14689 return distance;
14690 }
14691
14692 /* Search backward for non-agu definition of register number REGNO1
14693 or register number REGNO2 in INSN's basic block until
14694 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14695 2. Reach neighbor BBs boundary, or
14696 3. Reach agu definition.
14697 Returns the distance between the non-agu definition point and INSN.
14698 If no definition point, returns -1. */
14699
14700 static int
14701 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14702 rtx_insn *insn)
14703 {
14704 basic_block bb = BLOCK_FOR_INSN (insn);
14705 int distance = 0;
14706 bool found = false;
14707
14708 if (insn != BB_HEAD (bb))
14709 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
14710 distance, PREV_INSN (insn),
14711 &found);
14712
14713 if (!found && distance < LEA_SEARCH_THRESHOLD)
14714 {
14715 edge e;
14716 edge_iterator ei;
14717 bool simple_loop = false;
14718
14719 FOR_EACH_EDGE (e, ei, bb->preds)
14720 if (e->src == bb)
14721 {
14722 simple_loop = true;
14723 break;
14724 }
14725
14726 if (simple_loop)
14727 distance = distance_non_agu_define_in_bb (regno1, regno2,
14728 insn, distance,
14729 BB_END (bb), &found);
14730 else
14731 {
14732 int shortest_dist = -1;
14733 bool found_in_bb = false;
14734
14735 FOR_EACH_EDGE (e, ei, bb->preds)
14736 {
14737 int bb_dist
14738 = distance_non_agu_define_in_bb (regno1, regno2,
14739 insn, distance,
14740 BB_END (e->src),
14741 &found_in_bb);
14742 if (found_in_bb)
14743 {
14744 if (shortest_dist < 0)
14745 shortest_dist = bb_dist;
14746 else if (bb_dist > 0)
14747 shortest_dist = MIN (bb_dist, shortest_dist);
14748
14749 found = true;
14750 }
14751 }
14752
14753 distance = shortest_dist;
14754 }
14755 }
14756
14757 /* get_attr_type may modify recog data. We want to make sure
14758 that recog data is valid for instruction INSN, on which
14759 distance_non_agu_define is called. INSN is unchanged here. */
14760 extract_insn_cached (insn);
14761
14762 if (!found)
14763 return -1;
14764
14765 return distance >> 1;
14766 }
14767
14768 /* Return the distance in half-cycles between INSN and the next
14769 insn that uses register number REGNO in memory address added
14770 to DISTANCE. Return -1 if REGNO0 is set.
14771
14772 Put true value into *FOUND if register usage was found and
14773 false otherwise.
14774 Put true value into *REDEFINED if register redefinition was
14775 found and false otherwise. */
14776
14777 static int
14778 distance_agu_use_in_bb (unsigned int regno,
14779 rtx_insn *insn, int distance, rtx_insn *start,
14780 bool *found, bool *redefined)
14781 {
14782 basic_block bb = NULL;
14783 rtx_insn *next = start;
14784 rtx_insn *prev = NULL;
14785
14786 *found = false;
14787 *redefined = false;
14788
14789 if (start != NULL_RTX)
14790 {
14791 bb = BLOCK_FOR_INSN (start);
14792 if (start != BB_HEAD (bb))
14793 /* If insn and start belong to the same bb, set prev to insn,
14794 so the call to increase_distance will increase the distance
14795 between insns by 1. */
14796 prev = insn;
14797 }
14798
14799 while (next
14800 && next != insn
14801 && distance < LEA_SEARCH_THRESHOLD)
14802 {
14803 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
14804 {
14805 distance = increase_distance(prev, next, distance);
14806 if (insn_uses_reg_mem (regno, next))
14807 {
14808 /* Return DISTANCE if OP0 is used in memory
14809 address in NEXT. */
14810 *found = true;
14811 return distance;
14812 }
14813
14814 if (insn_defines_reg (regno, INVALID_REGNUM, next))
14815 {
14816 /* Return -1 if OP0 is set in NEXT. */
14817 *redefined = true;
14818 return -1;
14819 }
14820
14821 prev = next;
14822 }
14823
14824 if (next == BB_END (bb))
14825 break;
14826
14827 next = NEXT_INSN (next);
14828 }
14829
14830 return distance;
14831 }
14832
14833 /* Return the distance between INSN and the next insn that uses
14834 register number REGNO0 in memory address. Return -1 if no such
14835 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14836
14837 static int
14838 distance_agu_use (unsigned int regno0, rtx_insn *insn)
14839 {
14840 basic_block bb = BLOCK_FOR_INSN (insn);
14841 int distance = 0;
14842 bool found = false;
14843 bool redefined = false;
14844
14845 if (insn != BB_END (bb))
14846 distance = distance_agu_use_in_bb (regno0, insn, distance,
14847 NEXT_INSN (insn),
14848 &found, &redefined);
14849
14850 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
14851 {
14852 edge e;
14853 edge_iterator ei;
14854 bool simple_loop = false;
14855
14856 FOR_EACH_EDGE (e, ei, bb->succs)
14857 if (e->dest == bb)
14858 {
14859 simple_loop = true;
14860 break;
14861 }
14862
14863 if (simple_loop)
14864 distance = distance_agu_use_in_bb (regno0, insn,
14865 distance, BB_HEAD (bb),
14866 &found, &redefined);
14867 else
14868 {
14869 int shortest_dist = -1;
14870 bool found_in_bb = false;
14871 bool redefined_in_bb = false;
14872
14873 FOR_EACH_EDGE (e, ei, bb->succs)
14874 {
14875 int bb_dist
14876 = distance_agu_use_in_bb (regno0, insn,
14877 distance, BB_HEAD (e->dest),
14878 &found_in_bb, &redefined_in_bb);
14879 if (found_in_bb)
14880 {
14881 if (shortest_dist < 0)
14882 shortest_dist = bb_dist;
14883 else if (bb_dist > 0)
14884 shortest_dist = MIN (bb_dist, shortest_dist);
14885
14886 found = true;
14887 }
14888 }
14889
14890 distance = shortest_dist;
14891 }
14892 }
14893
14894 if (!found || redefined)
14895 return -1;
14896
14897 return distance >> 1;
14898 }
14899
14900 /* Define this macro to tune LEA priority vs ADD, it take effect when
14901 there is a dilemma of choosing LEA or ADD
14902 Negative value: ADD is more preferred than LEA
14903 Zero: Neutral
14904 Positive value: LEA is more preferred than ADD. */
14905 #define IX86_LEA_PRIORITY 0
14906
14907 /* Return true if usage of lea INSN has performance advantage
14908 over a sequence of instructions. Instructions sequence has
14909 SPLIT_COST cycles higher latency than lea latency. */
14910
14911 static bool
14912 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
14913 unsigned int regno2, int split_cost, bool has_scale)
14914 {
14915 int dist_define, dist_use;
14916
14917 /* For Atom processors newer than Bonnell, if using a 2-source or
14918 3-source LEA for non-destructive destination purposes, or due to
14919 wanting ability to use SCALE, the use of LEA is justified. */
14920 if (!TARGET_BONNELL)
14921 {
14922 if (has_scale)
14923 return true;
14924 if (split_cost < 1)
14925 return false;
14926 if (regno0 == regno1 || regno0 == regno2)
14927 return false;
14928 return true;
14929 }
14930
14931 rtx_insn *rinsn = recog_data.insn;
14932
14933 dist_define = distance_non_agu_define (regno1, regno2, insn);
14934 dist_use = distance_agu_use (regno0, insn);
14935
14936 /* distance_non_agu_define can call extract_insn_cached. If this function
14937 is called from define_split conditions, that can break insn splitting,
14938 because split_insns works by clearing recog_data.insn and then modifying
14939 recog_data.operand array and match the various split conditions. */
14940 if (recog_data.insn != rinsn)
14941 recog_data.insn = NULL;
14942
14943 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
14944 {
14945 /* If there is no non AGU operand definition, no AGU
14946 operand usage and split cost is 0 then both lea
14947 and non lea variants have same priority. Currently
14948 we prefer lea for 64 bit code and non lea on 32 bit
14949 code. */
14950 if (dist_use < 0 && split_cost == 0)
14951 return TARGET_64BIT || IX86_LEA_PRIORITY;
14952 else
14953 return true;
14954 }
14955
14956 /* With longer definitions distance lea is more preferable.
14957 Here we change it to take into account splitting cost and
14958 lea priority. */
14959 dist_define += split_cost + IX86_LEA_PRIORITY;
14960
14961 /* If there is no use in memory addess then we just check
14962 that split cost exceeds AGU stall. */
14963 if (dist_use < 0)
14964 return dist_define > LEA_MAX_STALL;
14965
14966 /* If this insn has both backward non-agu dependence and forward
14967 agu dependence, the one with short distance takes effect. */
14968 return dist_define >= dist_use;
14969 }
14970
14971 /* Return true if it is legal to clobber flags by INSN and
14972 false otherwise. */
14973
14974 static bool
14975 ix86_ok_to_clobber_flags (rtx_insn *insn)
14976 {
14977 basic_block bb = BLOCK_FOR_INSN (insn);
14978 df_ref use;
14979 bitmap live;
14980
14981 while (insn)
14982 {
14983 if (NONDEBUG_INSN_P (insn))
14984 {
14985 FOR_EACH_INSN_USE (use, insn)
14986 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
14987 return false;
14988
14989 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
14990 return true;
14991 }
14992
14993 if (insn == BB_END (bb))
14994 break;
14995
14996 insn = NEXT_INSN (insn);
14997 }
14998
14999 live = df_get_live_out(bb);
15000 return !REGNO_REG_SET_P (live, FLAGS_REG);
15001 }
15002
15003 /* Return true if we need to split op0 = op1 + op2 into a sequence of
15004 move and add to avoid AGU stalls. */
15005
15006 bool
15007 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
15008 {
15009 unsigned int regno0, regno1, regno2;
15010
15011 /* Check if we need to optimize. */
15012 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15013 return false;
15014
15015 /* Check it is correct to split here. */
15016 if (!ix86_ok_to_clobber_flags(insn))
15017 return false;
15018
15019 regno0 = true_regnum (operands[0]);
15020 regno1 = true_regnum (operands[1]);
15021 regno2 = true_regnum (operands[2]);
15022
15023 /* We need to split only adds with non destructive
15024 destination operand. */
15025 if (regno0 == regno1 || regno0 == regno2)
15026 return false;
15027 else
15028 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
15029 }
15030
15031 /* Return true if we should emit lea instruction instead of mov
15032 instruction. */
15033
15034 bool
15035 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
15036 {
15037 unsigned int regno0, regno1;
15038
15039 /* Check if we need to optimize. */
15040 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15041 return false;
15042
15043 /* Use lea for reg to reg moves only. */
15044 if (!REG_P (operands[0]) || !REG_P (operands[1]))
15045 return false;
15046
15047 regno0 = true_regnum (operands[0]);
15048 regno1 = true_regnum (operands[1]);
15049
15050 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
15051 }
15052
15053 /* Return true if we need to split lea into a sequence of
15054 instructions to avoid AGU stalls. */
15055
15056 bool
15057 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
15058 {
15059 unsigned int regno0, regno1, regno2;
15060 int split_cost;
15061 struct ix86_address parts;
15062 int ok;
15063
15064 /* The "at least two components" test below might not catch simple
15065 move or zero extension insns if parts.base is non-NULL and parts.disp
15066 is const0_rtx as the only components in the address, e.g. if the
15067 register is %rbp or %r13. As this test is much cheaper and moves or
15068 zero extensions are the common case, do this check first. */
15069 if (REG_P (operands[1])
15070 || (SImode_address_operand (operands[1], VOIDmode)
15071 && REG_P (XEXP (operands[1], 0))))
15072 return false;
15073
15074 /* Check if it is OK to split here. */
15075 if (!ix86_ok_to_clobber_flags (insn))
15076 return false;
15077
15078 ok = ix86_decompose_address (operands[1], &parts);
15079 gcc_assert (ok);
15080
15081 /* There should be at least two components in the address. */
15082 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
15083 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
15084 return false;
15085
15086 /* We should not split into add if non legitimate pic
15087 operand is used as displacement. */
15088 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
15089 return false;
15090
15091 regno0 = true_regnum (operands[0]) ;
15092 regno1 = INVALID_REGNUM;
15093 regno2 = INVALID_REGNUM;
15094
15095 if (parts.base)
15096 regno1 = true_regnum (parts.base);
15097 if (parts.index)
15098 regno2 = true_regnum (parts.index);
15099
15100 /* Use add for a = a + b and a = b + a since it is faster and shorter
15101 than lea for most processors. For the processors like BONNELL, if
15102 the destination register of LEA holds an actual address which will
15103 be used soon, LEA is better and otherwise ADD is better. */
15104 if (!TARGET_BONNELL
15105 && parts.scale == 1
15106 && (!parts.disp || parts.disp == const0_rtx)
15107 && (regno0 == regno1 || regno0 == regno2))
15108 return true;
15109
15110 /* Check we need to optimize. */
15111 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
15112 return false;
15113
15114 split_cost = 0;
15115
15116 /* Compute how many cycles we will add to execution time
15117 if split lea into a sequence of instructions. */
15118 if (parts.base || parts.index)
15119 {
15120 /* Have to use mov instruction if non desctructive
15121 destination form is used. */
15122 if (regno1 != regno0 && regno2 != regno0)
15123 split_cost += 1;
15124
15125 /* Have to add index to base if both exist. */
15126 if (parts.base && parts.index)
15127 split_cost += 1;
15128
15129 /* Have to use shift and adds if scale is 2 or greater. */
15130 if (parts.scale > 1)
15131 {
15132 if (regno0 != regno1)
15133 split_cost += 1;
15134 else if (regno2 == regno0)
15135 split_cost += 4;
15136 else
15137 split_cost += parts.scale;
15138 }
15139
15140 /* Have to use add instruction with immediate if
15141 disp is non zero. */
15142 if (parts.disp && parts.disp != const0_rtx)
15143 split_cost += 1;
15144
15145 /* Subtract the price of lea. */
15146 split_cost -= 1;
15147 }
15148
15149 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
15150 parts.scale > 1);
15151 }
15152
15153 /* Return true if it is ok to optimize an ADD operation to LEA
15154 operation to avoid flag register consumation. For most processors,
15155 ADD is faster than LEA. For the processors like BONNELL, if the
15156 destination register of LEA holds an actual address which will be
15157 used soon, LEA is better and otherwise ADD is better. */
15158
15159 bool
15160 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
15161 {
15162 unsigned int regno0 = true_regnum (operands[0]);
15163 unsigned int regno1 = true_regnum (operands[1]);
15164 unsigned int regno2 = true_regnum (operands[2]);
15165
15166 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15167 if (regno0 != regno1 && regno0 != regno2)
15168 return true;
15169
15170 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15171 return false;
15172
15173 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
15174 }
15175
15176 /* Return true if destination reg of SET_BODY is shift count of
15177 USE_BODY. */
15178
15179 static bool
15180 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15181 {
15182 rtx set_dest;
15183 rtx shift_rtx;
15184 int i;
15185
15186 /* Retrieve destination of SET_BODY. */
15187 switch (GET_CODE (set_body))
15188 {
15189 case SET:
15190 set_dest = SET_DEST (set_body);
15191 if (!set_dest || !REG_P (set_dest))
15192 return false;
15193 break;
15194 case PARALLEL:
15195 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15196 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15197 use_body))
15198 return true;
15199 /* FALLTHROUGH */
15200 default:
15201 return false;
15202 }
15203
15204 /* Retrieve shift count of USE_BODY. */
15205 switch (GET_CODE (use_body))
15206 {
15207 case SET:
15208 shift_rtx = XEXP (use_body, 1);
15209 break;
15210 case PARALLEL:
15211 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15212 if (ix86_dep_by_shift_count_body (set_body,
15213 XVECEXP (use_body, 0, i)))
15214 return true;
15215 /* FALLTHROUGH */
15216 default:
15217 return false;
15218 }
15219
15220 if (shift_rtx
15221 && (GET_CODE (shift_rtx) == ASHIFT
15222 || GET_CODE (shift_rtx) == LSHIFTRT
15223 || GET_CODE (shift_rtx) == ASHIFTRT
15224 || GET_CODE (shift_rtx) == ROTATE
15225 || GET_CODE (shift_rtx) == ROTATERT))
15226 {
15227 rtx shift_count = XEXP (shift_rtx, 1);
15228
15229 /* Return true if shift count is dest of SET_BODY. */
15230 if (REG_P (shift_count))
15231 {
15232 /* Add check since it can be invoked before register
15233 allocation in pre-reload schedule. */
15234 if (reload_completed
15235 && true_regnum (set_dest) == true_regnum (shift_count))
15236 return true;
15237 else if (REGNO(set_dest) == REGNO(shift_count))
15238 return true;
15239 }
15240 }
15241
15242 return false;
15243 }
15244
15245 /* Return true if destination reg of SET_INSN is shift count of
15246 USE_INSN. */
15247
15248 bool
15249 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15250 {
15251 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15252 PATTERN (use_insn));
15253 }
15254
15255 /* Return TRUE or FALSE depending on whether the unary operator meets the
15256 appropriate constraints. */
15257
15258 bool
15259 ix86_unary_operator_ok (enum rtx_code,
15260 machine_mode,
15261 rtx operands[2])
15262 {
15263 /* If one of operands is memory, source and destination must match. */
15264 if ((MEM_P (operands[0])
15265 || MEM_P (operands[1]))
15266 && ! rtx_equal_p (operands[0], operands[1]))
15267 return false;
15268 return true;
15269 }
15270
15271 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15272 are ok, keeping in mind the possible movddup alternative. */
15273
15274 bool
15275 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15276 {
15277 if (MEM_P (operands[0]))
15278 return rtx_equal_p (operands[0], operands[1 + high]);
15279 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15280 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15281 return true;
15282 }
15283
15284 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15285 then replicate the value for all elements of the vector
15286 register. */
15287
15288 rtx
15289 ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
15290 {
15291 int i, n_elt;
15292 rtvec v;
15293 machine_mode scalar_mode;
15294
15295 switch (mode)
15296 {
15297 case E_V64QImode:
15298 case E_V32QImode:
15299 case E_V16QImode:
15300 case E_V32HImode:
15301 case E_V16HImode:
15302 case E_V8HImode:
15303 case E_V16SImode:
15304 case E_V8SImode:
15305 case E_V4SImode:
15306 case E_V8DImode:
15307 case E_V4DImode:
15308 case E_V2DImode:
15309 gcc_assert (vect);
15310 /* FALLTHRU */
15311 case E_V16SFmode:
15312 case E_V8SFmode:
15313 case E_V4SFmode:
15314 case E_V2SFmode:
15315 case E_V8DFmode:
15316 case E_V4DFmode:
15317 case E_V2DFmode:
15318 n_elt = GET_MODE_NUNITS (mode);
15319 v = rtvec_alloc (n_elt);
15320 scalar_mode = GET_MODE_INNER (mode);
15321
15322 RTVEC_ELT (v, 0) = value;
15323
15324 for (i = 1; i < n_elt; ++i)
15325 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
15326
15327 return gen_rtx_CONST_VECTOR (mode, v);
15328
15329 default:
15330 gcc_unreachable ();
15331 }
15332 }
15333
15334 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15335 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15336 for an SSE register. If VECT is true, then replicate the mask for
15337 all elements of the vector register. If INVERT is true, then create
15338 a mask excluding the sign bit. */
15339
15340 rtx
15341 ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
15342 {
15343 machine_mode vec_mode, imode;
15344 wide_int w;
15345 rtx mask, v;
15346
15347 switch (mode)
15348 {
15349 case E_V16SImode:
15350 case E_V16SFmode:
15351 case E_V8SImode:
15352 case E_V4SImode:
15353 case E_V8SFmode:
15354 case E_V4SFmode:
15355 case E_V2SFmode:
15356 vec_mode = mode;
15357 imode = SImode;
15358 break;
15359
15360 case E_V8DImode:
15361 case E_V4DImode:
15362 case E_V2DImode:
15363 case E_V8DFmode:
15364 case E_V4DFmode:
15365 case E_V2DFmode:
15366 vec_mode = mode;
15367 imode = DImode;
15368 break;
15369
15370 case E_TImode:
15371 case E_TFmode:
15372 vec_mode = VOIDmode;
15373 imode = TImode;
15374 break;
15375
15376 default:
15377 gcc_unreachable ();
15378 }
15379
15380 machine_mode inner_mode = GET_MODE_INNER (mode);
15381 w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
15382 GET_MODE_BITSIZE (inner_mode));
15383 if (invert)
15384 w = wi::bit_not (w);
15385
15386 /* Force this value into the low part of a fp vector constant. */
15387 mask = immed_wide_int_const (w, imode);
15388 mask = gen_lowpart (inner_mode, mask);
15389
15390 if (vec_mode == VOIDmode)
15391 return force_reg (inner_mode, mask);
15392
15393 v = ix86_build_const_vector (vec_mode, vect, mask);
15394 return force_reg (vec_mode, v);
15395 }
15396
15397 /* Return TRUE or FALSE depending on whether the first SET in INSN
15398 has source and destination with matching CC modes, and that the
15399 CC mode is at least as constrained as REQ_MODE. */
15400
15401 bool
15402 ix86_match_ccmode (rtx insn, machine_mode req_mode)
15403 {
15404 rtx set;
15405 machine_mode set_mode;
15406
15407 set = PATTERN (insn);
15408 if (GET_CODE (set) == PARALLEL)
15409 set = XVECEXP (set, 0, 0);
15410 gcc_assert (GET_CODE (set) == SET);
15411 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15412
15413 set_mode = GET_MODE (SET_DEST (set));
15414 switch (set_mode)
15415 {
15416 case E_CCNOmode:
15417 if (req_mode != CCNOmode
15418 && (req_mode != CCmode
15419 || XEXP (SET_SRC (set), 1) != const0_rtx))
15420 return false;
15421 break;
15422 case E_CCmode:
15423 if (req_mode == CCGCmode)
15424 return false;
15425 /* FALLTHRU */
15426 case E_CCGCmode:
15427 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15428 return false;
15429 /* FALLTHRU */
15430 case E_CCGOCmode:
15431 if (req_mode == CCZmode)
15432 return false;
15433 /* FALLTHRU */
15434 case E_CCZmode:
15435 break;
15436
15437 case E_CCGZmode:
15438
15439 case E_CCAmode:
15440 case E_CCCmode:
15441 case E_CCOmode:
15442 case E_CCPmode:
15443 case E_CCSmode:
15444 if (set_mode != req_mode)
15445 return false;
15446 break;
15447
15448 default:
15449 gcc_unreachable ();
15450 }
15451
15452 return GET_MODE (SET_SRC (set)) == set_mode;
15453 }
15454
15455 machine_mode
15456 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15457 {
15458 machine_mode mode = GET_MODE (op0);
15459
15460 if (SCALAR_FLOAT_MODE_P (mode))
15461 {
15462 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15463 return CCFPmode;
15464 }
15465
15466 switch (code)
15467 {
15468 /* Only zero flag is needed. */
15469 case EQ: /* ZF=0 */
15470 case NE: /* ZF!=0 */
15471 return CCZmode;
15472 /* Codes needing carry flag. */
15473 case GEU: /* CF=0 */
15474 case LTU: /* CF=1 */
15475 rtx geu;
15476 /* Detect overflow checks. They need just the carry flag. */
15477 if (GET_CODE (op0) == PLUS
15478 && (rtx_equal_p (op1, XEXP (op0, 0))
15479 || rtx_equal_p (op1, XEXP (op0, 1))))
15480 return CCCmode;
15481 /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
15482 Match LTU of op0
15483 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
15484 and op1
15485 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
15486 where CC_CCC is either CC or CCC. */
15487 else if (code == LTU
15488 && GET_CODE (op0) == NEG
15489 && GET_CODE (geu = XEXP (op0, 0)) == GEU
15490 && REG_P (XEXP (geu, 0))
15491 && (GET_MODE (XEXP (geu, 0)) == CCCmode
15492 || GET_MODE (XEXP (geu, 0)) == CCmode)
15493 && REGNO (XEXP (geu, 0)) == FLAGS_REG
15494 && XEXP (geu, 1) == const0_rtx
15495 && GET_CODE (op1) == LTU
15496 && REG_P (XEXP (op1, 0))
15497 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
15498 && REGNO (XEXP (op1, 0)) == FLAGS_REG
15499 && XEXP (op1, 1) == const0_rtx)
15500 return CCCmode;
15501 else
15502 return CCmode;
15503 case GTU: /* CF=0 & ZF=0 */
15504 case LEU: /* CF=1 | ZF=1 */
15505 return CCmode;
15506 /* Codes possibly doable only with sign flag when
15507 comparing against zero. */
15508 case GE: /* SF=OF or SF=0 */
15509 case LT: /* SF<>OF or SF=1 */
15510 if (op1 == const0_rtx)
15511 return CCGOCmode;
15512 else
15513 /* For other cases Carry flag is not required. */
15514 return CCGCmode;
15515 /* Codes doable only with sign flag when comparing
15516 against zero, but we miss jump instruction for it
15517 so we need to use relational tests against overflow
15518 that thus needs to be zero. */
15519 case GT: /* ZF=0 & SF=OF */
15520 case LE: /* ZF=1 | SF<>OF */
15521 if (op1 == const0_rtx)
15522 return CCNOmode;
15523 else
15524 return CCGCmode;
15525 /* strcmp pattern do (use flags) and combine may ask us for proper
15526 mode. */
15527 case USE:
15528 return CCmode;
15529 default:
15530 gcc_unreachable ();
15531 }
15532 }
15533
15534 /* Return the fixed registers used for condition codes. */
15535
15536 static bool
15537 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15538 {
15539 *p1 = FLAGS_REG;
15540 *p2 = INVALID_REGNUM;
15541 return true;
15542 }
15543
15544 /* If two condition code modes are compatible, return a condition code
15545 mode which is compatible with both. Otherwise, return
15546 VOIDmode. */
15547
15548 static machine_mode
15549 ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
15550 {
15551 if (m1 == m2)
15552 return m1;
15553
15554 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15555 return VOIDmode;
15556
15557 if ((m1 == CCGCmode && m2 == CCGOCmode)
15558 || (m1 == CCGOCmode && m2 == CCGCmode))
15559 return CCGCmode;
15560
15561 if ((m1 == CCNOmode && m2 == CCGOCmode)
15562 || (m1 == CCGOCmode && m2 == CCNOmode))
15563 return CCNOmode;
15564
15565 if (m1 == CCZmode
15566 && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
15567 return m2;
15568 else if (m2 == CCZmode
15569 && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
15570 return m1;
15571
15572 switch (m1)
15573 {
15574 default:
15575 gcc_unreachable ();
15576
15577 case E_CCmode:
15578 case E_CCGCmode:
15579 case E_CCGOCmode:
15580 case E_CCNOmode:
15581 case E_CCAmode:
15582 case E_CCCmode:
15583 case E_CCOmode:
15584 case E_CCPmode:
15585 case E_CCSmode:
15586 case E_CCZmode:
15587 switch (m2)
15588 {
15589 default:
15590 return VOIDmode;
15591
15592 case E_CCmode:
15593 case E_CCGCmode:
15594 case E_CCGOCmode:
15595 case E_CCNOmode:
15596 case E_CCAmode:
15597 case E_CCCmode:
15598 case E_CCOmode:
15599 case E_CCPmode:
15600 case E_CCSmode:
15601 case E_CCZmode:
15602 return CCmode;
15603 }
15604
15605 case E_CCFPmode:
15606 /* These are only compatible with themselves, which we already
15607 checked above. */
15608 return VOIDmode;
15609 }
15610 }
15611
15612 /* Return strategy to use for floating-point. We assume that fcomi is always
15613 preferrable where available, since that is also true when looking at size
15614 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15615
15616 enum ix86_fpcmp_strategy
15617 ix86_fp_comparison_strategy (enum rtx_code)
15618 {
15619 /* Do fcomi/sahf based test when profitable. */
15620
15621 if (TARGET_CMOVE)
15622 return IX86_FPCMP_COMI;
15623
15624 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
15625 return IX86_FPCMP_SAHF;
15626
15627 return IX86_FPCMP_ARITH;
15628 }
15629
15630 /* Convert comparison codes we use to represent FP comparison to integer
15631 code that will result in proper branch. Return UNKNOWN if no such code
15632 is available. */
15633
15634 enum rtx_code
15635 ix86_fp_compare_code_to_integer (enum rtx_code code)
15636 {
15637 switch (code)
15638 {
15639 case GT:
15640 return GTU;
15641 case GE:
15642 return GEU;
15643 case ORDERED:
15644 case UNORDERED:
15645 return code;
15646 case UNEQ:
15647 return EQ;
15648 case UNLT:
15649 return LTU;
15650 case UNLE:
15651 return LEU;
15652 case LTGT:
15653 return NE;
15654 default:
15655 return UNKNOWN;
15656 }
15657 }
15658
15659 /* Zero extend possibly SImode EXP to Pmode register. */
15660 rtx
15661 ix86_zero_extend_to_Pmode (rtx exp)
15662 {
15663 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
15664 }
15665
15666 /* Return true if the function being called was marked with attribute
15667 "noplt" or using -fno-plt and we are compiling for non-PIC. We need
15668 to handle the non-PIC case in the backend because there is no easy
15669 interface for the front-end to force non-PLT calls to use the GOT.
15670 This is currently used only with 64-bit or 32-bit GOT32X ELF targets
15671 to call the function marked "noplt" indirectly. */
15672
15673 static bool
15674 ix86_nopic_noplt_attribute_p (rtx call_op)
15675 {
15676 if (flag_pic || ix86_cmodel == CM_LARGE
15677 || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
15678 || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
15679 || SYMBOL_REF_LOCAL_P (call_op))
15680 return false;
15681
15682 tree symbol_decl = SYMBOL_REF_DECL (call_op);
15683
15684 if (!flag_plt
15685 || (symbol_decl != NULL_TREE
15686 && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
15687 return true;
15688
15689 return false;
15690 }
15691
15692 /* Helper to output the jmp/call. */
15693 static void
15694 ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
15695 {
15696 if (thunk_name != NULL)
15697 {
15698 fprintf (asm_out_file, "\tjmp\t");
15699 assemble_name (asm_out_file, thunk_name);
15700 putc ('\n', asm_out_file);
15701 }
15702 else
15703 output_indirect_thunk (regno);
15704 }
15705
15706 /* Output indirect branch via a call and return thunk. CALL_OP is a
15707 register which contains the branch target. XASM is the assembly
15708 template for CALL_OP. Branch is a tail call if SIBCALL_P is true.
15709 A normal call is converted to:
15710
15711 call __x86_indirect_thunk_reg
15712
15713 and a tail call is converted to:
15714
15715 jmp __x86_indirect_thunk_reg
15716 */
15717
15718 static void
15719 ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
15720 {
15721 char thunk_name_buf[32];
15722 char *thunk_name;
15723 enum indirect_thunk_prefix need_prefix
15724 = indirect_thunk_need_prefix (current_output_insn);
15725 int regno = REGNO (call_op);
15726
15727 if (cfun->machine->indirect_branch_type
15728 != indirect_branch_thunk_inline)
15729 {
15730 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15731 {
15732 int i = regno;
15733 if (i >= FIRST_REX_INT_REG)
15734 i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1);
15735 indirect_thunks_used |= 1 << i;
15736 }
15737 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15738 thunk_name = thunk_name_buf;
15739 }
15740 else
15741 thunk_name = NULL;
15742
15743 if (sibcall_p)
15744 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15745 else
15746 {
15747 if (thunk_name != NULL)
15748 {
15749 fprintf (asm_out_file, "\tcall\t");
15750 assemble_name (asm_out_file, thunk_name);
15751 putc ('\n', asm_out_file);
15752 return;
15753 }
15754
15755 char indirectlabel1[32];
15756 char indirectlabel2[32];
15757
15758 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15759 INDIRECT_LABEL,
15760 indirectlabelno++);
15761 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15762 INDIRECT_LABEL,
15763 indirectlabelno++);
15764
15765 /* Jump. */
15766 fputs ("\tjmp\t", asm_out_file);
15767 assemble_name_raw (asm_out_file, indirectlabel2);
15768 fputc ('\n', asm_out_file);
15769
15770 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15771
15772 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15773
15774 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15775
15776 /* Call. */
15777 fputs ("\tcall\t", asm_out_file);
15778 assemble_name_raw (asm_out_file, indirectlabel1);
15779 fputc ('\n', asm_out_file);
15780 }
15781 }
15782
15783 /* Output indirect branch via a call and return thunk. CALL_OP is
15784 the branch target. XASM is the assembly template for CALL_OP.
15785 Branch is a tail call if SIBCALL_P is true. A normal call is
15786 converted to:
15787
15788 jmp L2
15789 L1:
15790 push CALL_OP
15791 jmp __x86_indirect_thunk
15792 L2:
15793 call L1
15794
15795 and a tail call is converted to:
15796
15797 push CALL_OP
15798 jmp __x86_indirect_thunk
15799 */
15800
15801 static void
15802 ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
15803 bool sibcall_p)
15804 {
15805 char thunk_name_buf[32];
15806 char *thunk_name;
15807 char push_buf[64];
15808 enum indirect_thunk_prefix need_prefix
15809 = indirect_thunk_need_prefix (current_output_insn);
15810 int regno = -1;
15811
15812 if (cfun->machine->indirect_branch_type
15813 != indirect_branch_thunk_inline)
15814 {
15815 if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
15816 indirect_thunk_needed = true;
15817 indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
15818 thunk_name = thunk_name_buf;
15819 }
15820 else
15821 thunk_name = NULL;
15822
15823 snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
15824 TARGET_64BIT ? 'q' : 'l', xasm);
15825
15826 if (sibcall_p)
15827 {
15828 output_asm_insn (push_buf, &call_op);
15829 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15830 }
15831 else
15832 {
15833 char indirectlabel1[32];
15834 char indirectlabel2[32];
15835
15836 ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
15837 INDIRECT_LABEL,
15838 indirectlabelno++);
15839 ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
15840 INDIRECT_LABEL,
15841 indirectlabelno++);
15842
15843 /* Jump. */
15844 fputs ("\tjmp\t", asm_out_file);
15845 assemble_name_raw (asm_out_file, indirectlabel2);
15846 fputc ('\n', asm_out_file);
15847
15848 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
15849
15850 /* An external function may be called via GOT, instead of PLT. */
15851 if (MEM_P (call_op))
15852 {
15853 struct ix86_address parts;
15854 rtx addr = XEXP (call_op, 0);
15855 if (ix86_decompose_address (addr, &parts)
15856 && parts.base == stack_pointer_rtx)
15857 {
15858 /* Since call will adjust stack by -UNITS_PER_WORD,
15859 we must convert "disp(stack, index, scale)" to
15860 "disp+UNITS_PER_WORD(stack, index, scale)". */
15861 if (parts.index)
15862 {
15863 addr = gen_rtx_MULT (Pmode, parts.index,
15864 GEN_INT (parts.scale));
15865 addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15866 addr);
15867 }
15868 else
15869 addr = stack_pointer_rtx;
15870
15871 rtx disp;
15872 if (parts.disp != NULL_RTX)
15873 disp = plus_constant (Pmode, parts.disp,
15874 UNITS_PER_WORD);
15875 else
15876 disp = GEN_INT (UNITS_PER_WORD);
15877
15878 addr = gen_rtx_PLUS (Pmode, addr, disp);
15879 call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
15880 }
15881 }
15882
15883 output_asm_insn (push_buf, &call_op);
15884
15885 ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
15886
15887 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
15888
15889 /* Call. */
15890 fputs ("\tcall\t", asm_out_file);
15891 assemble_name_raw (asm_out_file, indirectlabel1);
15892 fputc ('\n', asm_out_file);
15893 }
15894 }
15895
15896 /* Output indirect branch via a call and return thunk. CALL_OP is
15897 the branch target. XASM is the assembly template for CALL_OP.
15898 Branch is a tail call if SIBCALL_P is true. */
15899
15900 static void
15901 ix86_output_indirect_branch (rtx call_op, const char *xasm,
15902 bool sibcall_p)
15903 {
15904 if (REG_P (call_op))
15905 ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
15906 else
15907 ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
15908 }
15909
15910 /* Output indirect jump. CALL_OP is the jump target. */
15911
15912 const char *
15913 ix86_output_indirect_jmp (rtx call_op)
15914 {
15915 if (cfun->machine->indirect_branch_type != indirect_branch_keep)
15916 {
15917 /* We can't have red-zone since "call" in the indirect thunk
15918 pushes the return address onto stack, destroying red-zone. */
15919 if (ix86_red_zone_size != 0)
15920 gcc_unreachable ();
15921
15922 ix86_output_indirect_branch (call_op, "%0", true);
15923 return "";
15924 }
15925 else
15926 return "%!jmp\t%A0";
15927 }
15928
15929 /* Output return instrumentation for current function if needed. */
15930
15931 static void
15932 output_return_instrumentation (void)
15933 {
15934 if (ix86_instrument_return != instrument_return_none
15935 && flag_fentry
15936 && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
15937 {
15938 if (ix86_flag_record_return)
15939 fprintf (asm_out_file, "1:\n");
15940 switch (ix86_instrument_return)
15941 {
15942 case instrument_return_call:
15943 fprintf (asm_out_file, "\tcall\t__return__\n");
15944 break;
15945 case instrument_return_nop5:
15946 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
15947 fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
15948 break;
15949 case instrument_return_none:
15950 break;
15951 }
15952
15953 if (ix86_flag_record_return)
15954 {
15955 fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
15956 fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
15957 fprintf (asm_out_file, "\t.previous\n");
15958 }
15959 }
15960 }
15961
15962 /* Output function return. CALL_OP is the jump target. Add a REP
15963 prefix to RET if LONG_P is true and function return is kept. */
15964
15965 const char *
15966 ix86_output_function_return (bool long_p)
15967 {
15968 output_return_instrumentation ();
15969
15970 if (cfun->machine->function_return_type != indirect_branch_keep)
15971 {
15972 char thunk_name[32];
15973 enum indirect_thunk_prefix need_prefix
15974 = indirect_thunk_need_prefix (current_output_insn);
15975
15976 if (cfun->machine->function_return_type
15977 != indirect_branch_thunk_inline)
15978 {
15979 bool need_thunk = (cfun->machine->function_return_type
15980 == indirect_branch_thunk);
15981 indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
15982 true);
15983 indirect_return_needed |= need_thunk;
15984 fprintf (asm_out_file, "\tjmp\t");
15985 assemble_name (asm_out_file, thunk_name);
15986 putc ('\n', asm_out_file);
15987 }
15988 else
15989 output_indirect_thunk (INVALID_REGNUM);
15990
15991 return "";
15992 }
15993
15994 if (!long_p)
15995 return "%!ret";
15996
15997 return "rep%; ret";
15998 }
15999
16000 /* Output indirect function return. RET_OP is the function return
16001 target. */
16002
16003 const char *
16004 ix86_output_indirect_function_return (rtx ret_op)
16005 {
16006 if (cfun->machine->function_return_type != indirect_branch_keep)
16007 {
16008 char thunk_name[32];
16009 enum indirect_thunk_prefix need_prefix
16010 = indirect_thunk_need_prefix (current_output_insn);
16011 unsigned int regno = REGNO (ret_op);
16012 gcc_assert (regno == CX_REG);
16013
16014 if (cfun->machine->function_return_type
16015 != indirect_branch_thunk_inline)
16016 {
16017 bool need_thunk = (cfun->machine->function_return_type
16018 == indirect_branch_thunk);
16019 indirect_thunk_name (thunk_name, regno, need_prefix, true);
16020
16021 if (need_thunk)
16022 {
16023 indirect_return_via_cx = true;
16024 indirect_thunks_used |= 1 << CX_REG;
16025 }
16026 fprintf (asm_out_file, "\tjmp\t");
16027 assemble_name (asm_out_file, thunk_name);
16028 putc ('\n', asm_out_file);
16029 }
16030 else
16031 output_indirect_thunk (regno);
16032
16033 return "";
16034 }
16035 else
16036 return "%!jmp\t%A0";
16037 }
16038
16039 /* Output the assembly for a call instruction. */
16040
16041 const char *
16042 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
16043 {
16044 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
16045 bool output_indirect_p
16046 = (!TARGET_SEH
16047 && cfun->machine->indirect_branch_type != indirect_branch_keep);
16048 bool seh_nop_p = false;
16049 const char *xasm;
16050
16051 if (SIBLING_CALL_P (insn))
16052 {
16053 output_return_instrumentation ();
16054 if (direct_p)
16055 {
16056 if (ix86_nopic_noplt_attribute_p (call_op))
16057 {
16058 direct_p = false;
16059 if (TARGET_64BIT)
16060 {
16061 if (output_indirect_p)
16062 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16063 else
16064 xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16065 }
16066 else
16067 {
16068 if (output_indirect_p)
16069 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16070 else
16071 xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16072 }
16073 }
16074 else
16075 xasm = "%!jmp\t%P0";
16076 }
16077 /* SEH epilogue detection requires the indirect branch case
16078 to include REX.W. */
16079 else if (TARGET_SEH)
16080 xasm = "%!rex.W jmp\t%A0";
16081 else
16082 {
16083 if (output_indirect_p)
16084 xasm = "%0";
16085 else
16086 xasm = "%!jmp\t%A0";
16087 }
16088
16089 if (output_indirect_p && !direct_p)
16090 ix86_output_indirect_branch (call_op, xasm, true);
16091 else
16092 output_asm_insn (xasm, &call_op);
16093 return "";
16094 }
16095
16096 /* SEH unwinding can require an extra nop to be emitted in several
16097 circumstances. Determine if we have one of those. */
16098 if (TARGET_SEH)
16099 {
16100 rtx_insn *i;
16101
16102 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
16103 {
16104 /* Prevent a catch region from being adjacent to a jump that would
16105 be interpreted as an epilogue sequence by the unwinder. */
16106 if (JUMP_P(i) && CROSSING_JUMP_P (i))
16107 {
16108 seh_nop_p = true;
16109 break;
16110 }
16111
16112 /* If we get to another real insn, we don't need the nop. */
16113 if (INSN_P (i))
16114 break;
16115
16116 /* If we get to the epilogue note, prevent a catch region from
16117 being adjacent to the standard epilogue sequence. If non-
16118 call-exceptions, we'll have done this during epilogue emission. */
16119 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
16120 && !flag_non_call_exceptions
16121 && !can_throw_internal (insn))
16122 {
16123 seh_nop_p = true;
16124 break;
16125 }
16126 }
16127
16128 /* If we didn't find a real insn following the call, prevent the
16129 unwinder from looking into the next function. */
16130 if (i == NULL)
16131 seh_nop_p = true;
16132 }
16133
16134 if (direct_p)
16135 {
16136 if (ix86_nopic_noplt_attribute_p (call_op))
16137 {
16138 direct_p = false;
16139 if (TARGET_64BIT)
16140 {
16141 if (output_indirect_p)
16142 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16143 else
16144 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
16145 }
16146 else
16147 {
16148 if (output_indirect_p)
16149 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
16150 else
16151 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
16152 }
16153 }
16154 else
16155 xasm = "%!call\t%P0";
16156 }
16157 else
16158 {
16159 if (output_indirect_p)
16160 xasm = "%0";
16161 else
16162 xasm = "%!call\t%A0";
16163 }
16164
16165 if (output_indirect_p && !direct_p)
16166 ix86_output_indirect_branch (call_op, xasm, false);
16167 else
16168 output_asm_insn (xasm, &call_op);
16169
16170 if (seh_nop_p)
16171 return "nop";
16172
16173 return "";
16174 }
16175 \f
16176 /* Return a MEM corresponding to a stack slot with mode MODE.
16177 Allocate a new slot if necessary.
16178
16179 The RTL for a function can have several slots available: N is
16180 which slot to use. */
16181
16182 rtx
16183 assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
16184 {
16185 struct stack_local_entry *s;
16186
16187 gcc_assert (n < MAX_386_STACK_LOCALS);
16188
16189 for (s = ix86_stack_locals; s; s = s->next)
16190 if (s->mode == mode && s->n == n)
16191 return validize_mem (copy_rtx (s->rtl));
16192
16193 s = ggc_alloc<stack_local_entry> ();
16194 s->n = n;
16195 s->mode = mode;
16196 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16197
16198 s->next = ix86_stack_locals;
16199 ix86_stack_locals = s;
16200 return validize_mem (copy_rtx (s->rtl));
16201 }
16202
16203 static void
16204 ix86_instantiate_decls (void)
16205 {
16206 struct stack_local_entry *s;
16207
16208 for (s = ix86_stack_locals; s; s = s->next)
16209 if (s->rtl != NULL_RTX)
16210 instantiate_decl_rtl (s->rtl);
16211 }
16212 \f
16213 /* Check whether x86 address PARTS is a pc-relative address. */
16214
16215 bool
16216 ix86_rip_relative_addr_p (struct ix86_address *parts)
16217 {
16218 rtx base, index, disp;
16219
16220 base = parts->base;
16221 index = parts->index;
16222 disp = parts->disp;
16223
16224 if (disp && !base && !index)
16225 {
16226 if (TARGET_64BIT)
16227 {
16228 rtx symbol = disp;
16229
16230 if (GET_CODE (disp) == CONST)
16231 symbol = XEXP (disp, 0);
16232 if (GET_CODE (symbol) == PLUS
16233 && CONST_INT_P (XEXP (symbol, 1)))
16234 symbol = XEXP (symbol, 0);
16235
16236 if (GET_CODE (symbol) == LABEL_REF
16237 || (GET_CODE (symbol) == SYMBOL_REF
16238 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
16239 || (GET_CODE (symbol) == UNSPEC
16240 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
16241 || XINT (symbol, 1) == UNSPEC_PCREL
16242 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
16243 return true;
16244 }
16245 }
16246 return false;
16247 }
16248
16249 /* Calculate the length of the memory address in the instruction encoding.
16250 Includes addr32 prefix, does not include the one-byte modrm, opcode,
16251 or other prefixes. We never generate addr32 prefix for LEA insn. */
16252
16253 int
16254 memory_address_length (rtx addr, bool lea)
16255 {
16256 struct ix86_address parts;
16257 rtx base, index, disp;
16258 int len;
16259 int ok;
16260
16261 if (GET_CODE (addr) == PRE_DEC
16262 || GET_CODE (addr) == POST_INC
16263 || GET_CODE (addr) == PRE_MODIFY
16264 || GET_CODE (addr) == POST_MODIFY)
16265 return 0;
16266
16267 ok = ix86_decompose_address (addr, &parts);
16268 gcc_assert (ok);
16269
16270 len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
16271
16272 /* If this is not LEA instruction, add the length of addr32 prefix. */
16273 if (TARGET_64BIT && !lea
16274 && (SImode_address_operand (addr, VOIDmode)
16275 || (parts.base && GET_MODE (parts.base) == SImode)
16276 || (parts.index && GET_MODE (parts.index) == SImode)))
16277 len++;
16278
16279 base = parts.base;
16280 index = parts.index;
16281 disp = parts.disp;
16282
16283 if (base && SUBREG_P (base))
16284 base = SUBREG_REG (base);
16285 if (index && SUBREG_P (index))
16286 index = SUBREG_REG (index);
16287
16288 gcc_assert (base == NULL_RTX || REG_P (base));
16289 gcc_assert (index == NULL_RTX || REG_P (index));
16290
16291 /* Rule of thumb:
16292 - esp as the base always wants an index,
16293 - ebp as the base always wants a displacement,
16294 - r12 as the base always wants an index,
16295 - r13 as the base always wants a displacement. */
16296
16297 /* Register Indirect. */
16298 if (base && !index && !disp)
16299 {
16300 /* esp (for its index) and ebp (for its displacement) need
16301 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
16302 code. */
16303 if (base == arg_pointer_rtx
16304 || base == frame_pointer_rtx
16305 || REGNO (base) == SP_REG
16306 || REGNO (base) == BP_REG
16307 || REGNO (base) == R12_REG
16308 || REGNO (base) == R13_REG)
16309 len++;
16310 }
16311
16312 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
16313 is not disp32, but disp32(%rip), so for disp32
16314 SIB byte is needed, unless print_operand_address
16315 optimizes it into disp32(%rip) or (%rip) is implied
16316 by UNSPEC. */
16317 else if (disp && !base && !index)
16318 {
16319 len += 4;
16320 if (!ix86_rip_relative_addr_p (&parts))
16321 len++;
16322 }
16323 else
16324 {
16325 /* Find the length of the displacement constant. */
16326 if (disp)
16327 {
16328 if (base && satisfies_constraint_K (disp))
16329 len += 1;
16330 else
16331 len += 4;
16332 }
16333 /* ebp always wants a displacement. Similarly r13. */
16334 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
16335 len++;
16336
16337 /* An index requires the two-byte modrm form.... */
16338 if (index
16339 /* ...like esp (or r12), which always wants an index. */
16340 || base == arg_pointer_rtx
16341 || base == frame_pointer_rtx
16342 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
16343 len++;
16344 }
16345
16346 return len;
16347 }
16348
16349 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16350 is set, expect that insn have 8bit immediate alternative. */
16351 int
16352 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
16353 {
16354 int len = 0;
16355 int i;
16356 extract_insn_cached (insn);
16357 for (i = recog_data.n_operands - 1; i >= 0; --i)
16358 if (CONSTANT_P (recog_data.operand[i]))
16359 {
16360 enum attr_mode mode = get_attr_mode (insn);
16361
16362 gcc_assert (!len);
16363 if (shortform && CONST_INT_P (recog_data.operand[i]))
16364 {
16365 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
16366 switch (mode)
16367 {
16368 case MODE_QI:
16369 len = 1;
16370 continue;
16371 case MODE_HI:
16372 ival = trunc_int_for_mode (ival, HImode);
16373 break;
16374 case MODE_SI:
16375 ival = trunc_int_for_mode (ival, SImode);
16376 break;
16377 default:
16378 break;
16379 }
16380 if (IN_RANGE (ival, -128, 127))
16381 {
16382 len = 1;
16383 continue;
16384 }
16385 }
16386 switch (mode)
16387 {
16388 case MODE_QI:
16389 len = 1;
16390 break;
16391 case MODE_HI:
16392 len = 2;
16393 break;
16394 case MODE_SI:
16395 len = 4;
16396 break;
16397 /* Immediates for DImode instructions are encoded
16398 as 32bit sign extended values. */
16399 case MODE_DI:
16400 len = 4;
16401 break;
16402 default:
16403 fatal_insn ("unknown insn mode", insn);
16404 }
16405 }
16406 return len;
16407 }
16408
16409 /* Compute default value for "length_address" attribute. */
16410 int
16411 ix86_attr_length_address_default (rtx_insn *insn)
16412 {
16413 int i;
16414
16415 if (get_attr_type (insn) == TYPE_LEA)
16416 {
16417 rtx set = PATTERN (insn), addr;
16418
16419 if (GET_CODE (set) == PARALLEL)
16420 set = XVECEXP (set, 0, 0);
16421
16422 gcc_assert (GET_CODE (set) == SET);
16423
16424 addr = SET_SRC (set);
16425
16426 return memory_address_length (addr, true);
16427 }
16428
16429 extract_insn_cached (insn);
16430 for (i = recog_data.n_operands - 1; i >= 0; --i)
16431 {
16432 rtx op = recog_data.operand[i];
16433 if (MEM_P (op))
16434 {
16435 constrain_operands_cached (insn, reload_completed);
16436 if (which_alternative != -1)
16437 {
16438 const char *constraints = recog_data.constraints[i];
16439 int alt = which_alternative;
16440
16441 while (*constraints == '=' || *constraints == '+')
16442 constraints++;
16443 while (alt-- > 0)
16444 while (*constraints++ != ',')
16445 ;
16446 /* Skip ignored operands. */
16447 if (*constraints == 'X')
16448 continue;
16449 }
16450
16451 int len = memory_address_length (XEXP (op, 0), false);
16452
16453 /* Account for segment prefix for non-default addr spaces. */
16454 if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
16455 len++;
16456
16457 return len;
16458 }
16459 }
16460 return 0;
16461 }
16462
16463 /* Compute default value for "length_vex" attribute. It includes
16464 2 or 3 byte VEX prefix and 1 opcode byte. */
16465
16466 int
16467 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
16468 bool has_vex_w)
16469 {
16470 int i;
16471
16472 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
16473 byte VEX prefix. */
16474 if (!has_0f_opcode || has_vex_w)
16475 return 3 + 1;
16476
16477 /* We can always use 2 byte VEX prefix in 32bit. */
16478 if (!TARGET_64BIT)
16479 return 2 + 1;
16480
16481 extract_insn_cached (insn);
16482
16483 for (i = recog_data.n_operands - 1; i >= 0; --i)
16484 if (REG_P (recog_data.operand[i]))
16485 {
16486 /* REX.W bit uses 3 byte VEX prefix. */
16487 if (GET_MODE (recog_data.operand[i]) == DImode
16488 && GENERAL_REG_P (recog_data.operand[i]))
16489 return 3 + 1;
16490 }
16491 else
16492 {
16493 /* REX.X or REX.B bits use 3 byte VEX prefix. */
16494 if (MEM_P (recog_data.operand[i])
16495 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
16496 return 3 + 1;
16497 }
16498
16499 return 2 + 1;
16500 }
16501 \f
16502
16503 static bool
16504 ix86_class_likely_spilled_p (reg_class_t);
16505
16506 /* Returns true if lhs of insn is HW function argument register and set up
16507 is_spilled to true if it is likely spilled HW register. */
16508 static bool
16509 insn_is_function_arg (rtx insn, bool* is_spilled)
16510 {
16511 rtx dst;
16512
16513 if (!NONDEBUG_INSN_P (insn))
16514 return false;
16515 /* Call instructions are not movable, ignore it. */
16516 if (CALL_P (insn))
16517 return false;
16518 insn = PATTERN (insn);
16519 if (GET_CODE (insn) == PARALLEL)
16520 insn = XVECEXP (insn, 0, 0);
16521 if (GET_CODE (insn) != SET)
16522 return false;
16523 dst = SET_DEST (insn);
16524 if (REG_P (dst) && HARD_REGISTER_P (dst)
16525 && ix86_function_arg_regno_p (REGNO (dst)))
16526 {
16527 /* Is it likely spilled HW register? */
16528 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
16529 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
16530 *is_spilled = true;
16531 return true;
16532 }
16533 return false;
16534 }
16535
16536 /* Add output dependencies for chain of function adjacent arguments if only
16537 there is a move to likely spilled HW register. Return first argument
16538 if at least one dependence was added or NULL otherwise. */
16539 static rtx_insn *
16540 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
16541 {
16542 rtx_insn *insn;
16543 rtx_insn *last = call;
16544 rtx_insn *first_arg = NULL;
16545 bool is_spilled = false;
16546
16547 head = PREV_INSN (head);
16548
16549 /* Find nearest to call argument passing instruction. */
16550 while (true)
16551 {
16552 last = PREV_INSN (last);
16553 if (last == head)
16554 return NULL;
16555 if (!NONDEBUG_INSN_P (last))
16556 continue;
16557 if (insn_is_function_arg (last, &is_spilled))
16558 break;
16559 return NULL;
16560 }
16561
16562 first_arg = last;
16563 while (true)
16564 {
16565 insn = PREV_INSN (last);
16566 if (!INSN_P (insn))
16567 break;
16568 if (insn == head)
16569 break;
16570 if (!NONDEBUG_INSN_P (insn))
16571 {
16572 last = insn;
16573 continue;
16574 }
16575 if (insn_is_function_arg (insn, &is_spilled))
16576 {
16577 /* Add output depdendence between two function arguments if chain
16578 of output arguments contains likely spilled HW registers. */
16579 if (is_spilled)
16580 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16581 first_arg = last = insn;
16582 }
16583 else
16584 break;
16585 }
16586 if (!is_spilled)
16587 return NULL;
16588 return first_arg;
16589 }
16590
16591 /* Add output or anti dependency from insn to first_arg to restrict its code
16592 motion. */
16593 static void
16594 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
16595 {
16596 rtx set;
16597 rtx tmp;
16598
16599 set = single_set (insn);
16600 if (!set)
16601 return;
16602 tmp = SET_DEST (set);
16603 if (REG_P (tmp))
16604 {
16605 /* Add output dependency to the first function argument. */
16606 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
16607 return;
16608 }
16609 /* Add anti dependency. */
16610 add_dependence (first_arg, insn, REG_DEP_ANTI);
16611 }
16612
16613 /* Avoid cross block motion of function argument through adding dependency
16614 from the first non-jump instruction in bb. */
16615 static void
16616 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
16617 {
16618 rtx_insn *insn = BB_END (bb);
16619
16620 while (insn)
16621 {
16622 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
16623 {
16624 rtx set = single_set (insn);
16625 if (set)
16626 {
16627 avoid_func_arg_motion (arg, insn);
16628 return;
16629 }
16630 }
16631 if (insn == BB_HEAD (bb))
16632 return;
16633 insn = PREV_INSN (insn);
16634 }
16635 }
16636
16637 /* Hook for pre-reload schedule - avoid motion of function arguments
16638 passed in likely spilled HW registers. */
16639 static void
16640 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
16641 {
16642 rtx_insn *insn;
16643 rtx_insn *first_arg = NULL;
16644 if (reload_completed)
16645 return;
16646 while (head != tail && DEBUG_INSN_P (head))
16647 head = NEXT_INSN (head);
16648 for (insn = tail; insn != head; insn = PREV_INSN (insn))
16649 if (INSN_P (insn) && CALL_P (insn))
16650 {
16651 first_arg = add_parameter_dependencies (insn, head);
16652 if (first_arg)
16653 {
16654 /* Add dependee for first argument to predecessors if only
16655 region contains more than one block. */
16656 basic_block bb = BLOCK_FOR_INSN (insn);
16657 int rgn = CONTAINING_RGN (bb->index);
16658 int nr_blks = RGN_NR_BLOCKS (rgn);
16659 /* Skip trivial regions and region head blocks that can have
16660 predecessors outside of region. */
16661 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
16662 {
16663 edge e;
16664 edge_iterator ei;
16665
16666 /* Regions are SCCs with the exception of selective
16667 scheduling with pipelining of outer blocks enabled.
16668 So also check that immediate predecessors of a non-head
16669 block are in the same region. */
16670 FOR_EACH_EDGE (e, ei, bb->preds)
16671 {
16672 /* Avoid creating of loop-carried dependencies through
16673 using topological ordering in the region. */
16674 if (rgn == CONTAINING_RGN (e->src->index)
16675 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
16676 add_dependee_for_func_arg (first_arg, e->src);
16677 }
16678 }
16679 insn = first_arg;
16680 if (insn == head)
16681 break;
16682 }
16683 }
16684 else if (first_arg)
16685 avoid_func_arg_motion (first_arg, insn);
16686 }
16687
16688 /* Hook for pre-reload schedule - set priority of moves from likely spilled
16689 HW registers to maximum, to schedule them at soon as possible. These are
16690 moves from function argument registers at the top of the function entry
16691 and moves from function return value registers after call. */
16692 static int
16693 ix86_adjust_priority (rtx_insn *insn, int priority)
16694 {
16695 rtx set;
16696
16697 if (reload_completed)
16698 return priority;
16699
16700 if (!NONDEBUG_INSN_P (insn))
16701 return priority;
16702
16703 set = single_set (insn);
16704 if (set)
16705 {
16706 rtx tmp = SET_SRC (set);
16707 if (REG_P (tmp)
16708 && HARD_REGISTER_P (tmp)
16709 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
16710 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
16711 return current_sched_info->sched_max_insns_priority;
16712 }
16713
16714 return priority;
16715 }
16716
16717 /* Prepare for scheduling pass. */
16718 static void
16719 ix86_sched_init_global (FILE *, int, int)
16720 {
16721 /* Install scheduling hooks for current CPU. Some of these hooks are used
16722 in time-critical parts of the scheduler, so we only set them up when
16723 they are actually used. */
16724 switch (ix86_tune)
16725 {
16726 case PROCESSOR_CORE2:
16727 case PROCESSOR_NEHALEM:
16728 case PROCESSOR_SANDYBRIDGE:
16729 case PROCESSOR_HASWELL:
16730 case PROCESSOR_GENERIC:
16731 /* Do not perform multipass scheduling for pre-reload schedule
16732 to save compile time. */
16733 if (reload_completed)
16734 {
16735 ix86_core2i7_init_hooks ();
16736 break;
16737 }
16738 /* Fall through. */
16739 default:
16740 targetm.sched.dfa_post_advance_cycle = NULL;
16741 targetm.sched.first_cycle_multipass_init = NULL;
16742 targetm.sched.first_cycle_multipass_begin = NULL;
16743 targetm.sched.first_cycle_multipass_issue = NULL;
16744 targetm.sched.first_cycle_multipass_backtrack = NULL;
16745 targetm.sched.first_cycle_multipass_end = NULL;
16746 targetm.sched.first_cycle_multipass_fini = NULL;
16747 break;
16748 }
16749 }
16750
16751 \f
16752 /* Implement TARGET_STATIC_RTX_ALIGNMENT. */
16753
16754 static HOST_WIDE_INT
16755 ix86_static_rtx_alignment (machine_mode mode)
16756 {
16757 if (mode == DFmode)
16758 return 64;
16759 if (ALIGN_MODE_128 (mode))
16760 return MAX (128, GET_MODE_ALIGNMENT (mode));
16761 return GET_MODE_ALIGNMENT (mode);
16762 }
16763
16764 /* Implement TARGET_CONSTANT_ALIGNMENT. */
16765
16766 static HOST_WIDE_INT
16767 ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
16768 {
16769 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16770 || TREE_CODE (exp) == INTEGER_CST)
16771 {
16772 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
16773 HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
16774 return MAX (mode_align, align);
16775 }
16776 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16777 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16778 return BITS_PER_WORD;
16779
16780 return align;
16781 }
16782
16783 /* Implement TARGET_EMPTY_RECORD_P. */
16784
16785 static bool
16786 ix86_is_empty_record (const_tree type)
16787 {
16788 if (!TARGET_64BIT)
16789 return false;
16790 return default_is_empty_record (type);
16791 }
16792
16793 /* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */
16794
16795 static void
16796 ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
16797 {
16798 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
16799
16800 if (!cum->warn_empty)
16801 return;
16802
16803 if (!TYPE_EMPTY_P (type))
16804 return;
16805
16806 /* Don't warn if the function isn't visible outside of the TU. */
16807 if (cum->decl && !TREE_PUBLIC (cum->decl))
16808 return;
16809
16810 const_tree ctx = get_ultimate_context (cum->decl);
16811 if (ctx != NULL_TREE
16812 && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
16813 return;
16814
16815 /* If the actual size of the type is zero, then there is no change
16816 in how objects of this size are passed. */
16817 if (int_size_in_bytes (type) == 0)
16818 return;
16819
16820 warning (OPT_Wabi, "empty class %qT parameter passing ABI "
16821 "changes in %<-fabi-version=12%> (GCC 8)", type);
16822
16823 /* Only warn once. */
16824 cum->warn_empty = false;
16825 }
16826
16827 /* This hook returns name of multilib ABI. */
16828
16829 static const char *
16830 ix86_get_multilib_abi_name (void)
16831 {
16832 if (!(TARGET_64BIT_P (ix86_isa_flags)))
16833 return "i386";
16834 else if (TARGET_X32_P (ix86_isa_flags))
16835 return "x32";
16836 else
16837 return "x86_64";
16838 }
16839
16840 /* Compute the alignment for a variable for Intel MCU psABI. TYPE is
16841 the data type, and ALIGN is the alignment that the object would
16842 ordinarily have. */
16843
16844 static int
16845 iamcu_alignment (tree type, int align)
16846 {
16847 machine_mode mode;
16848
16849 if (align < 32 || TYPE_USER_ALIGN (type))
16850 return align;
16851
16852 /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
16853 bytes. */
16854 type = strip_array_types (type);
16855 if (TYPE_ATOMIC (type))
16856 return align;
16857
16858 mode = TYPE_MODE (type);
16859 switch (GET_MODE_CLASS (mode))
16860 {
16861 case MODE_INT:
16862 case MODE_COMPLEX_INT:
16863 case MODE_COMPLEX_FLOAT:
16864 case MODE_FLOAT:
16865 case MODE_DECIMAL_FLOAT:
16866 return 32;
16867 default:
16868 return align;
16869 }
16870 }
16871
16872 /* Compute the alignment for a static variable.
16873 TYPE is the data type, and ALIGN is the alignment that
16874 the object would ordinarily have. The value of this function is used
16875 instead of that alignment to align the object. */
16876
16877 int
16878 ix86_data_alignment (tree type, unsigned int align, bool opt)
16879 {
16880 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
16881 for symbols from other compilation units or symbols that don't need
16882 to bind locally. In order to preserve some ABI compatibility with
16883 those compilers, ensure we don't decrease alignment from what we
16884 used to assume. */
16885
16886 unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
16887
16888 /* A data structure, equal or greater than the size of a cache line
16889 (64 bytes in the Pentium 4 and other recent Intel processors, including
16890 processors based on Intel Core microarchitecture) should be aligned
16891 so that its base address is a multiple of a cache line size. */
16892
16893 unsigned int max_align
16894 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
16895
16896 if (max_align < BITS_PER_WORD)
16897 max_align = BITS_PER_WORD;
16898
16899 switch (ix86_align_data_type)
16900 {
16901 case ix86_align_data_type_abi: opt = false; break;
16902 case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
16903 case ix86_align_data_type_cacheline: break;
16904 }
16905
16906 if (TARGET_IAMCU)
16907 align = iamcu_alignment (type, align);
16908
16909 if (opt
16910 && AGGREGATE_TYPE_P (type)
16911 && TYPE_SIZE (type)
16912 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
16913 {
16914 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
16915 && align < max_align_compat)
16916 align = max_align_compat;
16917 if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
16918 && align < max_align)
16919 align = max_align;
16920 }
16921
16922 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16923 to 16byte boundary. */
16924 if (TARGET_64BIT)
16925 {
16926 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
16927 && TYPE_SIZE (type)
16928 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16929 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
16930 && align < 128)
16931 return 128;
16932 }
16933
16934 if (!opt)
16935 return align;
16936
16937 if (TREE_CODE (type) == ARRAY_TYPE)
16938 {
16939 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16940 return 64;
16941 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16942 return 128;
16943 }
16944 else if (TREE_CODE (type) == COMPLEX_TYPE)
16945 {
16946
16947 if (TYPE_MODE (type) == DCmode && align < 64)
16948 return 64;
16949 if ((TYPE_MODE (type) == XCmode
16950 || TYPE_MODE (type) == TCmode) && align < 128)
16951 return 128;
16952 }
16953 else if ((TREE_CODE (type) == RECORD_TYPE
16954 || TREE_CODE (type) == UNION_TYPE
16955 || TREE_CODE (type) == QUAL_UNION_TYPE)
16956 && TYPE_FIELDS (type))
16957 {
16958 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16959 return 64;
16960 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16961 return 128;
16962 }
16963 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16964 || TREE_CODE (type) == INTEGER_TYPE)
16965 {
16966 if (TYPE_MODE (type) == DFmode && align < 64)
16967 return 64;
16968 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16969 return 128;
16970 }
16971
16972 return align;
16973 }
16974
16975 /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT. */
16976 static void
16977 ix86_lower_local_decl_alignment (tree decl)
16978 {
16979 unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
16980 DECL_ALIGN (decl), true);
16981 if (new_align < DECL_ALIGN (decl))
16982 SET_DECL_ALIGN (decl, new_align);
16983 }
16984
16985 /* Compute the alignment for a local variable or a stack slot. EXP is
16986 the data type or decl itself, MODE is the widest mode available and
16987 ALIGN is the alignment that the object would ordinarily have. The
16988 value of this macro is used instead of that alignment to align the
16989 object. */
16990
16991 unsigned int
16992 ix86_local_alignment (tree exp, machine_mode mode,
16993 unsigned int align, bool may_lower)
16994 {
16995 tree type, decl;
16996
16997 if (exp && DECL_P (exp))
16998 {
16999 type = TREE_TYPE (exp);
17000 decl = exp;
17001 }
17002 else
17003 {
17004 type = exp;
17005 decl = NULL;
17006 }
17007
17008 /* Don't do dynamic stack realignment for long long objects with
17009 -mpreferred-stack-boundary=2. */
17010 if (may_lower
17011 && !TARGET_64BIT
17012 && align == 64
17013 && ix86_preferred_stack_boundary < 64
17014 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
17015 && (!type || (!TYPE_USER_ALIGN (type)
17016 && !TYPE_ATOMIC (strip_array_types (type))))
17017 && (!decl || !DECL_USER_ALIGN (decl)))
17018 align = 32;
17019
17020 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17021 register in MODE. We will return the largest alignment of XF
17022 and DF. */
17023 if (!type)
17024 {
17025 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17026 align = GET_MODE_ALIGNMENT (DFmode);
17027 return align;
17028 }
17029
17030 /* Don't increase alignment for Intel MCU psABI. */
17031 if (TARGET_IAMCU)
17032 return align;
17033
17034 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17035 to 16byte boundary. Exact wording is:
17036
17037 An array uses the same alignment as its elements, except that a local or
17038 global array variable of length at least 16 bytes or
17039 a C99 variable-length array variable always has alignment of at least 16 bytes.
17040
17041 This was added to allow use of aligned SSE instructions at arrays. This
17042 rule is meant for static storage (where compiler cannot do the analysis
17043 by itself). We follow it for automatic variables only when convenient.
17044 We fully control everything in the function compiled and functions from
17045 other unit cannot rely on the alignment.
17046
17047 Exclude va_list type. It is the common case of local array where
17048 we cannot benefit from the alignment.
17049
17050 TODO: Probably one should optimize for size only when var is not escaping. */
17051 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
17052 && TARGET_SSE)
17053 {
17054 if (AGGREGATE_TYPE_P (type)
17055 && (va_list_type_node == NULL_TREE
17056 || (TYPE_MAIN_VARIANT (type)
17057 != TYPE_MAIN_VARIANT (va_list_type_node)))
17058 && TYPE_SIZE (type)
17059 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17060 && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
17061 && align < 128)
17062 return 128;
17063 }
17064 if (TREE_CODE (type) == ARRAY_TYPE)
17065 {
17066 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17067 return 64;
17068 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17069 return 128;
17070 }
17071 else if (TREE_CODE (type) == COMPLEX_TYPE)
17072 {
17073 if (TYPE_MODE (type) == DCmode && align < 64)
17074 return 64;
17075 if ((TYPE_MODE (type) == XCmode
17076 || TYPE_MODE (type) == TCmode) && align < 128)
17077 return 128;
17078 }
17079 else if ((TREE_CODE (type) == RECORD_TYPE
17080 || TREE_CODE (type) == UNION_TYPE
17081 || TREE_CODE (type) == QUAL_UNION_TYPE)
17082 && TYPE_FIELDS (type))
17083 {
17084 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17085 return 64;
17086 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17087 return 128;
17088 }
17089 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17090 || TREE_CODE (type) == INTEGER_TYPE)
17091 {
17092
17093 if (TYPE_MODE (type) == DFmode && align < 64)
17094 return 64;
17095 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17096 return 128;
17097 }
17098 return align;
17099 }
17100
17101 /* Compute the minimum required alignment for dynamic stack realignment
17102 purposes for a local variable, parameter or a stack slot. EXP is
17103 the data type or decl itself, MODE is its mode and ALIGN is the
17104 alignment that the object would ordinarily have. */
17105
17106 unsigned int
17107 ix86_minimum_alignment (tree exp, machine_mode mode,
17108 unsigned int align)
17109 {
17110 tree type, decl;
17111
17112 if (exp && DECL_P (exp))
17113 {
17114 type = TREE_TYPE (exp);
17115 decl = exp;
17116 }
17117 else
17118 {
17119 type = exp;
17120 decl = NULL;
17121 }
17122
17123 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
17124 return align;
17125
17126 /* Don't do dynamic stack realignment for long long objects with
17127 -mpreferred-stack-boundary=2. */
17128 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
17129 && (!type || (!TYPE_USER_ALIGN (type)
17130 && !TYPE_ATOMIC (strip_array_types (type))))
17131 && (!decl || !DECL_USER_ALIGN (decl)))
17132 {
17133 gcc_checking_assert (!TARGET_STV);
17134 return 32;
17135 }
17136
17137 return align;
17138 }
17139 \f
17140 /* Find a location for the static chain incoming to a nested function.
17141 This is a register, unless all free registers are used by arguments. */
17142
17143 static rtx
17144 ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
17145 {
17146 unsigned regno;
17147
17148 if (TARGET_64BIT)
17149 {
17150 /* We always use R10 in 64-bit mode. */
17151 regno = R10_REG;
17152 }
17153 else
17154 {
17155 const_tree fntype, fndecl;
17156 unsigned int ccvt;
17157
17158 /* By default in 32-bit mode we use ECX to pass the static chain. */
17159 regno = CX_REG;
17160
17161 if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
17162 {
17163 fntype = TREE_TYPE (fndecl_or_type);
17164 fndecl = fndecl_or_type;
17165 }
17166 else
17167 {
17168 fntype = fndecl_or_type;
17169 fndecl = NULL;
17170 }
17171
17172 ccvt = ix86_get_callcvt (fntype);
17173 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
17174 {
17175 /* Fastcall functions use ecx/edx for arguments, which leaves
17176 us with EAX for the static chain.
17177 Thiscall functions use ecx for arguments, which also
17178 leaves us with EAX for the static chain. */
17179 regno = AX_REG;
17180 }
17181 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
17182 {
17183 /* Thiscall functions use ecx for arguments, which leaves
17184 us with EAX and EDX for the static chain.
17185 We are using for abi-compatibility EAX. */
17186 regno = AX_REG;
17187 }
17188 else if (ix86_function_regparm (fntype, fndecl) == 3)
17189 {
17190 /* For regparm 3, we have no free call-clobbered registers in
17191 which to store the static chain. In order to implement this,
17192 we have the trampoline push the static chain to the stack.
17193 However, we can't push a value below the return address when
17194 we call the nested function directly, so we have to use an
17195 alternate entry point. For this we use ESI, and have the
17196 alternate entry point push ESI, so that things appear the
17197 same once we're executing the nested function. */
17198 if (incoming_p)
17199 {
17200 if (fndecl == current_function_decl
17201 && !ix86_static_chain_on_stack)
17202 {
17203 gcc_assert (!reload_completed);
17204 ix86_static_chain_on_stack = true;
17205 }
17206 return gen_frame_mem (SImode,
17207 plus_constant (Pmode,
17208 arg_pointer_rtx, -8));
17209 }
17210 regno = SI_REG;
17211 }
17212 }
17213
17214 return gen_rtx_REG (Pmode, regno);
17215 }
17216
17217 /* Emit RTL insns to initialize the variable parts of a trampoline.
17218 FNDECL is the decl of the target address; M_TRAMP is a MEM for
17219 the trampoline, and CHAIN_VALUE is an RTX for the static chain
17220 to be passed to the target function. */
17221
17222 static void
17223 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
17224 {
17225 rtx mem, fnaddr;
17226 int opcode;
17227 int offset = 0;
17228 bool need_endbr = (flag_cf_protection & CF_BRANCH);
17229
17230 fnaddr = XEXP (DECL_RTL (fndecl), 0);
17231
17232 if (TARGET_64BIT)
17233 {
17234 int size;
17235
17236 if (need_endbr)
17237 {
17238 /* Insert ENDBR64. */
17239 mem = adjust_address (m_tramp, SImode, offset);
17240 emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
17241 offset += 4;
17242 }
17243
17244 /* Load the function address to r11. Try to load address using
17245 the shorter movl instead of movabs. We may want to support
17246 movq for kernel mode, but kernel does not use trampolines at
17247 the moment. FNADDR is a 32bit address and may not be in
17248 DImode when ptr_mode == SImode. Always use movl in this
17249 case. */
17250 if (ptr_mode == SImode
17251 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17252 {
17253 fnaddr = copy_addr_to_reg (fnaddr);
17254
17255 mem = adjust_address (m_tramp, HImode, offset);
17256 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
17257
17258 mem = adjust_address (m_tramp, SImode, offset + 2);
17259 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
17260 offset += 6;
17261 }
17262 else
17263 {
17264 mem = adjust_address (m_tramp, HImode, offset);
17265 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
17266
17267 mem = adjust_address (m_tramp, DImode, offset + 2);
17268 emit_move_insn (mem, fnaddr);
17269 offset += 10;
17270 }
17271
17272 /* Load static chain using movabs to r10. Use the shorter movl
17273 instead of movabs when ptr_mode == SImode. */
17274 if (ptr_mode == SImode)
17275 {
17276 opcode = 0xba41;
17277 size = 6;
17278 }
17279 else
17280 {
17281 opcode = 0xba49;
17282 size = 10;
17283 }
17284
17285 mem = adjust_address (m_tramp, HImode, offset);
17286 emit_move_insn (mem, gen_int_mode (opcode, HImode));
17287
17288 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
17289 emit_move_insn (mem, chain_value);
17290 offset += size;
17291
17292 /* Jump to r11; the last (unused) byte is a nop, only there to
17293 pad the write out to a single 32-bit store. */
17294 mem = adjust_address (m_tramp, SImode, offset);
17295 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
17296 offset += 4;
17297 }
17298 else
17299 {
17300 rtx disp, chain;
17301
17302 /* Depending on the static chain location, either load a register
17303 with a constant, or push the constant to the stack. All of the
17304 instructions are the same size. */
17305 chain = ix86_static_chain (fndecl, true);
17306 if (REG_P (chain))
17307 {
17308 switch (REGNO (chain))
17309 {
17310 case AX_REG:
17311 opcode = 0xb8; break;
17312 case CX_REG:
17313 opcode = 0xb9; break;
17314 default:
17315 gcc_unreachable ();
17316 }
17317 }
17318 else
17319 opcode = 0x68;
17320
17321 if (need_endbr)
17322 {
17323 /* Insert ENDBR32. */
17324 mem = adjust_address (m_tramp, SImode, offset);
17325 emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
17326 offset += 4;
17327 }
17328
17329 mem = adjust_address (m_tramp, QImode, offset);
17330 emit_move_insn (mem, gen_int_mode (opcode, QImode));
17331
17332 mem = adjust_address (m_tramp, SImode, offset + 1);
17333 emit_move_insn (mem, chain_value);
17334 offset += 5;
17335
17336 mem = adjust_address (m_tramp, QImode, offset);
17337 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
17338
17339 mem = adjust_address (m_tramp, SImode, offset + 1);
17340
17341 /* Compute offset from the end of the jmp to the target function.
17342 In the case in which the trampoline stores the static chain on
17343 the stack, we need to skip the first insn which pushes the
17344 (call-saved) register static chain; this push is 1 byte. */
17345 offset += 5;
17346 int skip = MEM_P (chain) ? 1 : 0;
17347 /* Skip ENDBR32 at the entry of the target function. */
17348 if (need_endbr
17349 && !cgraph_node::get (fndecl)->only_called_directly_p ())
17350 skip += 4;
17351 disp = expand_binop (SImode, sub_optab, fnaddr,
17352 plus_constant (Pmode, XEXP (m_tramp, 0),
17353 offset - skip),
17354 NULL_RTX, 1, OPTAB_DIRECT);
17355 emit_move_insn (mem, disp);
17356 }
17357
17358 gcc_assert (offset <= TRAMPOLINE_SIZE);
17359
17360 #ifdef HAVE_ENABLE_EXECUTE_STACK
17361 #ifdef CHECK_EXECUTE_STACK_ENABLED
17362 if (CHECK_EXECUTE_STACK_ENABLED)
17363 #endif
17364 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17365 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
17366 #endif
17367 }
17368
17369 static bool
17370 ix86_allocate_stack_slots_for_args (void)
17371 {
17372 /* Naked functions should not allocate stack slots for arguments. */
17373 return !ix86_function_naked (current_function_decl);
17374 }
17375
17376 static bool
17377 ix86_warn_func_return (tree decl)
17378 {
17379 /* Naked functions are implemented entirely in assembly, including the
17380 return sequence, so suppress warnings about this. */
17381 return !ix86_function_naked (decl);
17382 }
17383 \f
17384 /* Return the shift count of a vector by scalar shift builtin second argument
17385 ARG1. */
17386 static tree
17387 ix86_vector_shift_count (tree arg1)
17388 {
17389 if (tree_fits_uhwi_p (arg1))
17390 return arg1;
17391 else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
17392 {
17393 /* The count argument is weird, passed in as various 128-bit
17394 (or 64-bit) vectors, the low 64 bits from it are the count. */
17395 unsigned char buf[16];
17396 int len = native_encode_expr (arg1, buf, 16);
17397 if (len == 0)
17398 return NULL_TREE;
17399 tree t = native_interpret_expr (uint64_type_node, buf, len);
17400 if (t && tree_fits_uhwi_p (t))
17401 return t;
17402 }
17403 return NULL_TREE;
17404 }
17405
17406 static tree
17407 ix86_fold_builtin (tree fndecl, int n_args,
17408 tree *args, bool ignore ATTRIBUTE_UNUSED)
17409 {
17410 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
17411 {
17412 enum ix86_builtins fn_code
17413 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17414 enum rtx_code rcode;
17415 bool is_vshift;
17416 unsigned HOST_WIDE_INT mask;
17417
17418 switch (fn_code)
17419 {
17420 case IX86_BUILTIN_CPU_IS:
17421 case IX86_BUILTIN_CPU_SUPPORTS:
17422 gcc_assert (n_args == 1);
17423 return fold_builtin_cpu (fndecl, args);
17424
17425 case IX86_BUILTIN_NANQ:
17426 case IX86_BUILTIN_NANSQ:
17427 {
17428 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17429 const char *str = c_getstr (*args);
17430 int quiet = fn_code == IX86_BUILTIN_NANQ;
17431 REAL_VALUE_TYPE real;
17432
17433 if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
17434 return build_real (type, real);
17435 return NULL_TREE;
17436 }
17437
17438 case IX86_BUILTIN_INFQ:
17439 case IX86_BUILTIN_HUGE_VALQ:
17440 {
17441 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17442 REAL_VALUE_TYPE inf;
17443 real_inf (&inf);
17444 return build_real (type, inf);
17445 }
17446
17447 case IX86_BUILTIN_TZCNT16:
17448 case IX86_BUILTIN_CTZS:
17449 case IX86_BUILTIN_TZCNT32:
17450 case IX86_BUILTIN_TZCNT64:
17451 gcc_assert (n_args == 1);
17452 if (TREE_CODE (args[0]) == INTEGER_CST)
17453 {
17454 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17455 tree arg = args[0];
17456 if (fn_code == IX86_BUILTIN_TZCNT16
17457 || fn_code == IX86_BUILTIN_CTZS)
17458 arg = fold_convert (short_unsigned_type_node, arg);
17459 if (integer_zerop (arg))
17460 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17461 else
17462 return fold_const_call (CFN_CTZ, type, arg);
17463 }
17464 break;
17465
17466 case IX86_BUILTIN_LZCNT16:
17467 case IX86_BUILTIN_CLZS:
17468 case IX86_BUILTIN_LZCNT32:
17469 case IX86_BUILTIN_LZCNT64:
17470 gcc_assert (n_args == 1);
17471 if (TREE_CODE (args[0]) == INTEGER_CST)
17472 {
17473 tree type = TREE_TYPE (TREE_TYPE (fndecl));
17474 tree arg = args[0];
17475 if (fn_code == IX86_BUILTIN_LZCNT16
17476 || fn_code == IX86_BUILTIN_CLZS)
17477 arg = fold_convert (short_unsigned_type_node, arg);
17478 if (integer_zerop (arg))
17479 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
17480 else
17481 return fold_const_call (CFN_CLZ, type, arg);
17482 }
17483 break;
17484
17485 case IX86_BUILTIN_BEXTR32:
17486 case IX86_BUILTIN_BEXTR64:
17487 case IX86_BUILTIN_BEXTRI32:
17488 case IX86_BUILTIN_BEXTRI64:
17489 gcc_assert (n_args == 2);
17490 if (tree_fits_uhwi_p (args[1]))
17491 {
17492 unsigned HOST_WIDE_INT res = 0;
17493 unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
17494 unsigned int start = tree_to_uhwi (args[1]);
17495 unsigned int len = (start & 0xff00) >> 8;
17496 start &= 0xff;
17497 if (start >= prec || len == 0)
17498 res = 0;
17499 else if (!tree_fits_uhwi_p (args[0]))
17500 break;
17501 else
17502 res = tree_to_uhwi (args[0]) >> start;
17503 if (len > prec)
17504 len = prec;
17505 if (len < HOST_BITS_PER_WIDE_INT)
17506 res &= (HOST_WIDE_INT_1U << len) - 1;
17507 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17508 }
17509 break;
17510
17511 case IX86_BUILTIN_BZHI32:
17512 case IX86_BUILTIN_BZHI64:
17513 gcc_assert (n_args == 2);
17514 if (tree_fits_uhwi_p (args[1]))
17515 {
17516 unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
17517 if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
17518 return args[0];
17519 if (idx == 0)
17520 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0);
17521 if (!tree_fits_uhwi_p (args[0]))
17522 break;
17523 unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
17524 res &= ~(HOST_WIDE_INT_M1U << idx);
17525 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17526 }
17527 break;
17528
17529 case IX86_BUILTIN_PDEP32:
17530 case IX86_BUILTIN_PDEP64:
17531 gcc_assert (n_args == 2);
17532 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17533 {
17534 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17535 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17536 unsigned HOST_WIDE_INT res = 0;
17537 unsigned HOST_WIDE_INT m, k = 1;
17538 for (m = 1; m; m <<= 1)
17539 if ((mask & m) != 0)
17540 {
17541 if ((src & k) != 0)
17542 res |= m;
17543 k <<= 1;
17544 }
17545 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17546 }
17547 break;
17548
17549 case IX86_BUILTIN_PEXT32:
17550 case IX86_BUILTIN_PEXT64:
17551 gcc_assert (n_args == 2);
17552 if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
17553 {
17554 unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
17555 unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
17556 unsigned HOST_WIDE_INT res = 0;
17557 unsigned HOST_WIDE_INT m, k = 1;
17558 for (m = 1; m; m <<= 1)
17559 if ((mask & m) != 0)
17560 {
17561 if ((src & m) != 0)
17562 res |= k;
17563 k <<= 1;
17564 }
17565 return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
17566 }
17567 break;
17568
17569 case IX86_BUILTIN_MOVMSKPS:
17570 case IX86_BUILTIN_PMOVMSKB:
17571 case IX86_BUILTIN_MOVMSKPD:
17572 case IX86_BUILTIN_PMOVMSKB128:
17573 case IX86_BUILTIN_MOVMSKPD256:
17574 case IX86_BUILTIN_MOVMSKPS256:
17575 case IX86_BUILTIN_PMOVMSKB256:
17576 gcc_assert (n_args == 1);
17577 if (TREE_CODE (args[0]) == VECTOR_CST)
17578 {
17579 HOST_WIDE_INT res = 0;
17580 for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
17581 {
17582 tree e = VECTOR_CST_ELT (args[0], i);
17583 if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
17584 {
17585 if (wi::neg_p (wi::to_wide (e)))
17586 res |= HOST_WIDE_INT_1 << i;
17587 }
17588 else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
17589 {
17590 if (TREE_REAL_CST (e).sign)
17591 res |= HOST_WIDE_INT_1 << i;
17592 }
17593 else
17594 return NULL_TREE;
17595 }
17596 return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
17597 }
17598 break;
17599
17600 case IX86_BUILTIN_PSLLD:
17601 case IX86_BUILTIN_PSLLD128:
17602 case IX86_BUILTIN_PSLLD128_MASK:
17603 case IX86_BUILTIN_PSLLD256:
17604 case IX86_BUILTIN_PSLLD256_MASK:
17605 case IX86_BUILTIN_PSLLD512:
17606 case IX86_BUILTIN_PSLLDI:
17607 case IX86_BUILTIN_PSLLDI128:
17608 case IX86_BUILTIN_PSLLDI128_MASK:
17609 case IX86_BUILTIN_PSLLDI256:
17610 case IX86_BUILTIN_PSLLDI256_MASK:
17611 case IX86_BUILTIN_PSLLDI512:
17612 case IX86_BUILTIN_PSLLQ:
17613 case IX86_BUILTIN_PSLLQ128:
17614 case IX86_BUILTIN_PSLLQ128_MASK:
17615 case IX86_BUILTIN_PSLLQ256:
17616 case IX86_BUILTIN_PSLLQ256_MASK:
17617 case IX86_BUILTIN_PSLLQ512:
17618 case IX86_BUILTIN_PSLLQI:
17619 case IX86_BUILTIN_PSLLQI128:
17620 case IX86_BUILTIN_PSLLQI128_MASK:
17621 case IX86_BUILTIN_PSLLQI256:
17622 case IX86_BUILTIN_PSLLQI256_MASK:
17623 case IX86_BUILTIN_PSLLQI512:
17624 case IX86_BUILTIN_PSLLW:
17625 case IX86_BUILTIN_PSLLW128:
17626 case IX86_BUILTIN_PSLLW128_MASK:
17627 case IX86_BUILTIN_PSLLW256:
17628 case IX86_BUILTIN_PSLLW256_MASK:
17629 case IX86_BUILTIN_PSLLW512_MASK:
17630 case IX86_BUILTIN_PSLLWI:
17631 case IX86_BUILTIN_PSLLWI128:
17632 case IX86_BUILTIN_PSLLWI128_MASK:
17633 case IX86_BUILTIN_PSLLWI256:
17634 case IX86_BUILTIN_PSLLWI256_MASK:
17635 case IX86_BUILTIN_PSLLWI512_MASK:
17636 rcode = ASHIFT;
17637 is_vshift = false;
17638 goto do_shift;
17639 case IX86_BUILTIN_PSRAD:
17640 case IX86_BUILTIN_PSRAD128:
17641 case IX86_BUILTIN_PSRAD128_MASK:
17642 case IX86_BUILTIN_PSRAD256:
17643 case IX86_BUILTIN_PSRAD256_MASK:
17644 case IX86_BUILTIN_PSRAD512:
17645 case IX86_BUILTIN_PSRADI:
17646 case IX86_BUILTIN_PSRADI128:
17647 case IX86_BUILTIN_PSRADI128_MASK:
17648 case IX86_BUILTIN_PSRADI256:
17649 case IX86_BUILTIN_PSRADI256_MASK:
17650 case IX86_BUILTIN_PSRADI512:
17651 case IX86_BUILTIN_PSRAQ128_MASK:
17652 case IX86_BUILTIN_PSRAQ256_MASK:
17653 case IX86_BUILTIN_PSRAQ512:
17654 case IX86_BUILTIN_PSRAQI128_MASK:
17655 case IX86_BUILTIN_PSRAQI256_MASK:
17656 case IX86_BUILTIN_PSRAQI512:
17657 case IX86_BUILTIN_PSRAW:
17658 case IX86_BUILTIN_PSRAW128:
17659 case IX86_BUILTIN_PSRAW128_MASK:
17660 case IX86_BUILTIN_PSRAW256:
17661 case IX86_BUILTIN_PSRAW256_MASK:
17662 case IX86_BUILTIN_PSRAW512:
17663 case IX86_BUILTIN_PSRAWI:
17664 case IX86_BUILTIN_PSRAWI128:
17665 case IX86_BUILTIN_PSRAWI128_MASK:
17666 case IX86_BUILTIN_PSRAWI256:
17667 case IX86_BUILTIN_PSRAWI256_MASK:
17668 case IX86_BUILTIN_PSRAWI512:
17669 rcode = ASHIFTRT;
17670 is_vshift = false;
17671 goto do_shift;
17672 case IX86_BUILTIN_PSRLD:
17673 case IX86_BUILTIN_PSRLD128:
17674 case IX86_BUILTIN_PSRLD128_MASK:
17675 case IX86_BUILTIN_PSRLD256:
17676 case IX86_BUILTIN_PSRLD256_MASK:
17677 case IX86_BUILTIN_PSRLD512:
17678 case IX86_BUILTIN_PSRLDI:
17679 case IX86_BUILTIN_PSRLDI128:
17680 case IX86_BUILTIN_PSRLDI128_MASK:
17681 case IX86_BUILTIN_PSRLDI256:
17682 case IX86_BUILTIN_PSRLDI256_MASK:
17683 case IX86_BUILTIN_PSRLDI512:
17684 case IX86_BUILTIN_PSRLQ:
17685 case IX86_BUILTIN_PSRLQ128:
17686 case IX86_BUILTIN_PSRLQ128_MASK:
17687 case IX86_BUILTIN_PSRLQ256:
17688 case IX86_BUILTIN_PSRLQ256_MASK:
17689 case IX86_BUILTIN_PSRLQ512:
17690 case IX86_BUILTIN_PSRLQI:
17691 case IX86_BUILTIN_PSRLQI128:
17692 case IX86_BUILTIN_PSRLQI128_MASK:
17693 case IX86_BUILTIN_PSRLQI256:
17694 case IX86_BUILTIN_PSRLQI256_MASK:
17695 case IX86_BUILTIN_PSRLQI512:
17696 case IX86_BUILTIN_PSRLW:
17697 case IX86_BUILTIN_PSRLW128:
17698 case IX86_BUILTIN_PSRLW128_MASK:
17699 case IX86_BUILTIN_PSRLW256:
17700 case IX86_BUILTIN_PSRLW256_MASK:
17701 case IX86_BUILTIN_PSRLW512:
17702 case IX86_BUILTIN_PSRLWI:
17703 case IX86_BUILTIN_PSRLWI128:
17704 case IX86_BUILTIN_PSRLWI128_MASK:
17705 case IX86_BUILTIN_PSRLWI256:
17706 case IX86_BUILTIN_PSRLWI256_MASK:
17707 case IX86_BUILTIN_PSRLWI512:
17708 rcode = LSHIFTRT;
17709 is_vshift = false;
17710 goto do_shift;
17711 case IX86_BUILTIN_PSLLVV16HI:
17712 case IX86_BUILTIN_PSLLVV16SI:
17713 case IX86_BUILTIN_PSLLVV2DI:
17714 case IX86_BUILTIN_PSLLVV2DI_MASK:
17715 case IX86_BUILTIN_PSLLVV32HI:
17716 case IX86_BUILTIN_PSLLVV4DI:
17717 case IX86_BUILTIN_PSLLVV4DI_MASK:
17718 case IX86_BUILTIN_PSLLVV4SI:
17719 case IX86_BUILTIN_PSLLVV4SI_MASK:
17720 case IX86_BUILTIN_PSLLVV8DI:
17721 case IX86_BUILTIN_PSLLVV8HI:
17722 case IX86_BUILTIN_PSLLVV8SI:
17723 case IX86_BUILTIN_PSLLVV8SI_MASK:
17724 rcode = ASHIFT;
17725 is_vshift = true;
17726 goto do_shift;
17727 case IX86_BUILTIN_PSRAVQ128:
17728 case IX86_BUILTIN_PSRAVQ256:
17729 case IX86_BUILTIN_PSRAVV16HI:
17730 case IX86_BUILTIN_PSRAVV16SI:
17731 case IX86_BUILTIN_PSRAVV32HI:
17732 case IX86_BUILTIN_PSRAVV4SI:
17733 case IX86_BUILTIN_PSRAVV4SI_MASK:
17734 case IX86_BUILTIN_PSRAVV8DI:
17735 case IX86_BUILTIN_PSRAVV8HI:
17736 case IX86_BUILTIN_PSRAVV8SI:
17737 case IX86_BUILTIN_PSRAVV8SI_MASK:
17738 rcode = ASHIFTRT;
17739 is_vshift = true;
17740 goto do_shift;
17741 case IX86_BUILTIN_PSRLVV16HI:
17742 case IX86_BUILTIN_PSRLVV16SI:
17743 case IX86_BUILTIN_PSRLVV2DI:
17744 case IX86_BUILTIN_PSRLVV2DI_MASK:
17745 case IX86_BUILTIN_PSRLVV32HI:
17746 case IX86_BUILTIN_PSRLVV4DI:
17747 case IX86_BUILTIN_PSRLVV4DI_MASK:
17748 case IX86_BUILTIN_PSRLVV4SI:
17749 case IX86_BUILTIN_PSRLVV4SI_MASK:
17750 case IX86_BUILTIN_PSRLVV8DI:
17751 case IX86_BUILTIN_PSRLVV8HI:
17752 case IX86_BUILTIN_PSRLVV8SI:
17753 case IX86_BUILTIN_PSRLVV8SI_MASK:
17754 rcode = LSHIFTRT;
17755 is_vshift = true;
17756 goto do_shift;
17757
17758 do_shift:
17759 gcc_assert (n_args >= 2);
17760 if (TREE_CODE (args[0]) != VECTOR_CST)
17761 break;
17762 mask = HOST_WIDE_INT_M1U;
17763 if (n_args > 2)
17764 {
17765 /* This is masked shift. */
17766 if (!tree_fits_uhwi_p (args[n_args - 1])
17767 || TREE_SIDE_EFFECTS (args[n_args - 2]))
17768 break;
17769 mask = tree_to_uhwi (args[n_args - 1]);
17770 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
17771 mask |= HOST_WIDE_INT_M1U << elems;
17772 if (mask != HOST_WIDE_INT_M1U
17773 && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
17774 break;
17775 if (mask == (HOST_WIDE_INT_M1U << elems))
17776 return args[n_args - 2];
17777 }
17778 if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
17779 break;
17780 if (tree tem = (is_vshift ? integer_one_node
17781 : ix86_vector_shift_count (args[1])))
17782 {
17783 unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
17784 unsigned HOST_WIDE_INT prec
17785 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
17786 if (count == 0 && mask == HOST_WIDE_INT_M1U)
17787 return args[0];
17788 if (count >= prec)
17789 {
17790 if (rcode == ASHIFTRT)
17791 count = prec - 1;
17792 else if (mask == HOST_WIDE_INT_M1U)
17793 return build_zero_cst (TREE_TYPE (args[0]));
17794 }
17795 tree countt = NULL_TREE;
17796 if (!is_vshift)
17797 {
17798 if (count >= prec)
17799 countt = integer_zero_node;
17800 else
17801 countt = build_int_cst (integer_type_node, count);
17802 }
17803 tree_vector_builder builder;
17804 if (mask != HOST_WIDE_INT_M1U || is_vshift)
17805 builder.new_vector (TREE_TYPE (args[0]),
17806 TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
17807 1);
17808 else
17809 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
17810 false);
17811 unsigned int cnt = builder.encoded_nelts ();
17812 for (unsigned int i = 0; i < cnt; ++i)
17813 {
17814 tree elt = VECTOR_CST_ELT (args[0], i);
17815 if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
17816 return NULL_TREE;
17817 tree type = TREE_TYPE (elt);
17818 if (rcode == LSHIFTRT)
17819 elt = fold_convert (unsigned_type_for (type), elt);
17820 if (is_vshift)
17821 {
17822 countt = VECTOR_CST_ELT (args[1], i);
17823 if (TREE_CODE (countt) != INTEGER_CST
17824 || TREE_OVERFLOW (countt))
17825 return NULL_TREE;
17826 if (wi::neg_p (wi::to_wide (countt))
17827 || wi::to_widest (countt) >= prec)
17828 {
17829 if (rcode == ASHIFTRT)
17830 countt = build_int_cst (TREE_TYPE (countt),
17831 prec - 1);
17832 else
17833 {
17834 elt = build_zero_cst (TREE_TYPE (elt));
17835 countt = build_zero_cst (TREE_TYPE (countt));
17836 }
17837 }
17838 }
17839 else if (count >= prec)
17840 elt = build_zero_cst (TREE_TYPE (elt));
17841 elt = const_binop (rcode == ASHIFT
17842 ? LSHIFT_EXPR : RSHIFT_EXPR,
17843 TREE_TYPE (elt), elt, countt);
17844 if (!elt || TREE_CODE (elt) != INTEGER_CST)
17845 return NULL_TREE;
17846 if (rcode == LSHIFTRT)
17847 elt = fold_convert (type, elt);
17848 if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
17849 {
17850 elt = VECTOR_CST_ELT (args[n_args - 2], i);
17851 if (TREE_CODE (elt) != INTEGER_CST
17852 || TREE_OVERFLOW (elt))
17853 return NULL_TREE;
17854 }
17855 builder.quick_push (elt);
17856 }
17857 return builder.build ();
17858 }
17859 break;
17860
17861 default:
17862 break;
17863 }
17864 }
17865
17866 #ifdef SUBTARGET_FOLD_BUILTIN
17867 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
17868 #endif
17869
17870 return NULL_TREE;
17871 }
17872
17873 /* Fold a MD builtin (use ix86_fold_builtin for folding into
17874 constant) in GIMPLE. */
17875
17876 bool
17877 ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
17878 {
17879 gimple *stmt = gsi_stmt (*gsi);
17880 tree fndecl = gimple_call_fndecl (stmt);
17881 gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
17882 int n_args = gimple_call_num_args (stmt);
17883 enum ix86_builtins fn_code
17884 = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
17885 tree decl = NULL_TREE;
17886 tree arg0, arg1, arg2;
17887 enum rtx_code rcode;
17888 unsigned HOST_WIDE_INT count;
17889 bool is_vshift;
17890
17891 switch (fn_code)
17892 {
17893 case IX86_BUILTIN_TZCNT32:
17894 decl = builtin_decl_implicit (BUILT_IN_CTZ);
17895 goto fold_tzcnt_lzcnt;
17896
17897 case IX86_BUILTIN_TZCNT64:
17898 decl = builtin_decl_implicit (BUILT_IN_CTZLL);
17899 goto fold_tzcnt_lzcnt;
17900
17901 case IX86_BUILTIN_LZCNT32:
17902 decl = builtin_decl_implicit (BUILT_IN_CLZ);
17903 goto fold_tzcnt_lzcnt;
17904
17905 case IX86_BUILTIN_LZCNT64:
17906 decl = builtin_decl_implicit (BUILT_IN_CLZLL);
17907 goto fold_tzcnt_lzcnt;
17908
17909 fold_tzcnt_lzcnt:
17910 gcc_assert (n_args == 1);
17911 arg0 = gimple_call_arg (stmt, 0);
17912 if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
17913 {
17914 int prec = TYPE_PRECISION (TREE_TYPE (arg0));
17915 /* If arg0 is provably non-zero, optimize into generic
17916 __builtin_c[tl]z{,ll} function the middle-end handles
17917 better. */
17918 if (!expr_not_equal_to (arg0, wi::zero (prec)))
17919 return false;
17920
17921 location_t loc = gimple_location (stmt);
17922 gimple *g = gimple_build_call (decl, 1, arg0);
17923 gimple_set_location (g, loc);
17924 tree lhs = make_ssa_name (integer_type_node);
17925 gimple_call_set_lhs (g, lhs);
17926 gsi_insert_before (gsi, g, GSI_SAME_STMT);
17927 g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
17928 gimple_set_location (g, loc);
17929 gsi_replace (gsi, g, false);
17930 return true;
17931 }
17932 break;
17933
17934 case IX86_BUILTIN_BZHI32:
17935 case IX86_BUILTIN_BZHI64:
17936 gcc_assert (n_args == 2);
17937 arg1 = gimple_call_arg (stmt, 1);
17938 if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
17939 {
17940 unsigned int idx = tree_to_uhwi (arg1) & 0xff;
17941 arg0 = gimple_call_arg (stmt, 0);
17942 if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
17943 break;
17944 location_t loc = gimple_location (stmt);
17945 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17946 gimple_set_location (g, loc);
17947 gsi_replace (gsi, g, false);
17948 return true;
17949 }
17950 break;
17951
17952 case IX86_BUILTIN_PDEP32:
17953 case IX86_BUILTIN_PDEP64:
17954 case IX86_BUILTIN_PEXT32:
17955 case IX86_BUILTIN_PEXT64:
17956 gcc_assert (n_args == 2);
17957 arg1 = gimple_call_arg (stmt, 1);
17958 if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
17959 {
17960 location_t loc = gimple_location (stmt);
17961 arg0 = gimple_call_arg (stmt, 0);
17962 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
17963 gimple_set_location (g, loc);
17964 gsi_replace (gsi, g, false);
17965 return true;
17966 }
17967 break;
17968
17969 case IX86_BUILTIN_PSLLD:
17970 case IX86_BUILTIN_PSLLD128:
17971 case IX86_BUILTIN_PSLLD128_MASK:
17972 case IX86_BUILTIN_PSLLD256:
17973 case IX86_BUILTIN_PSLLD256_MASK:
17974 case IX86_BUILTIN_PSLLD512:
17975 case IX86_BUILTIN_PSLLDI:
17976 case IX86_BUILTIN_PSLLDI128:
17977 case IX86_BUILTIN_PSLLDI128_MASK:
17978 case IX86_BUILTIN_PSLLDI256:
17979 case IX86_BUILTIN_PSLLDI256_MASK:
17980 case IX86_BUILTIN_PSLLDI512:
17981 case IX86_BUILTIN_PSLLQ:
17982 case IX86_BUILTIN_PSLLQ128:
17983 case IX86_BUILTIN_PSLLQ128_MASK:
17984 case IX86_BUILTIN_PSLLQ256:
17985 case IX86_BUILTIN_PSLLQ256_MASK:
17986 case IX86_BUILTIN_PSLLQ512:
17987 case IX86_BUILTIN_PSLLQI:
17988 case IX86_BUILTIN_PSLLQI128:
17989 case IX86_BUILTIN_PSLLQI128_MASK:
17990 case IX86_BUILTIN_PSLLQI256:
17991 case IX86_BUILTIN_PSLLQI256_MASK:
17992 case IX86_BUILTIN_PSLLQI512:
17993 case IX86_BUILTIN_PSLLW:
17994 case IX86_BUILTIN_PSLLW128:
17995 case IX86_BUILTIN_PSLLW128_MASK:
17996 case IX86_BUILTIN_PSLLW256:
17997 case IX86_BUILTIN_PSLLW256_MASK:
17998 case IX86_BUILTIN_PSLLW512_MASK:
17999 case IX86_BUILTIN_PSLLWI:
18000 case IX86_BUILTIN_PSLLWI128:
18001 case IX86_BUILTIN_PSLLWI128_MASK:
18002 case IX86_BUILTIN_PSLLWI256:
18003 case IX86_BUILTIN_PSLLWI256_MASK:
18004 case IX86_BUILTIN_PSLLWI512_MASK:
18005 rcode = ASHIFT;
18006 is_vshift = false;
18007 goto do_shift;
18008 case IX86_BUILTIN_PSRAD:
18009 case IX86_BUILTIN_PSRAD128:
18010 case IX86_BUILTIN_PSRAD128_MASK:
18011 case IX86_BUILTIN_PSRAD256:
18012 case IX86_BUILTIN_PSRAD256_MASK:
18013 case IX86_BUILTIN_PSRAD512:
18014 case IX86_BUILTIN_PSRADI:
18015 case IX86_BUILTIN_PSRADI128:
18016 case IX86_BUILTIN_PSRADI128_MASK:
18017 case IX86_BUILTIN_PSRADI256:
18018 case IX86_BUILTIN_PSRADI256_MASK:
18019 case IX86_BUILTIN_PSRADI512:
18020 case IX86_BUILTIN_PSRAQ128_MASK:
18021 case IX86_BUILTIN_PSRAQ256_MASK:
18022 case IX86_BUILTIN_PSRAQ512:
18023 case IX86_BUILTIN_PSRAQI128_MASK:
18024 case IX86_BUILTIN_PSRAQI256_MASK:
18025 case IX86_BUILTIN_PSRAQI512:
18026 case IX86_BUILTIN_PSRAW:
18027 case IX86_BUILTIN_PSRAW128:
18028 case IX86_BUILTIN_PSRAW128_MASK:
18029 case IX86_BUILTIN_PSRAW256:
18030 case IX86_BUILTIN_PSRAW256_MASK:
18031 case IX86_BUILTIN_PSRAW512:
18032 case IX86_BUILTIN_PSRAWI:
18033 case IX86_BUILTIN_PSRAWI128:
18034 case IX86_BUILTIN_PSRAWI128_MASK:
18035 case IX86_BUILTIN_PSRAWI256:
18036 case IX86_BUILTIN_PSRAWI256_MASK:
18037 case IX86_BUILTIN_PSRAWI512:
18038 rcode = ASHIFTRT;
18039 is_vshift = false;
18040 goto do_shift;
18041 case IX86_BUILTIN_PSRLD:
18042 case IX86_BUILTIN_PSRLD128:
18043 case IX86_BUILTIN_PSRLD128_MASK:
18044 case IX86_BUILTIN_PSRLD256:
18045 case IX86_BUILTIN_PSRLD256_MASK:
18046 case IX86_BUILTIN_PSRLD512:
18047 case IX86_BUILTIN_PSRLDI:
18048 case IX86_BUILTIN_PSRLDI128:
18049 case IX86_BUILTIN_PSRLDI128_MASK:
18050 case IX86_BUILTIN_PSRLDI256:
18051 case IX86_BUILTIN_PSRLDI256_MASK:
18052 case IX86_BUILTIN_PSRLDI512:
18053 case IX86_BUILTIN_PSRLQ:
18054 case IX86_BUILTIN_PSRLQ128:
18055 case IX86_BUILTIN_PSRLQ128_MASK:
18056 case IX86_BUILTIN_PSRLQ256:
18057 case IX86_BUILTIN_PSRLQ256_MASK:
18058 case IX86_BUILTIN_PSRLQ512:
18059 case IX86_BUILTIN_PSRLQI:
18060 case IX86_BUILTIN_PSRLQI128:
18061 case IX86_BUILTIN_PSRLQI128_MASK:
18062 case IX86_BUILTIN_PSRLQI256:
18063 case IX86_BUILTIN_PSRLQI256_MASK:
18064 case IX86_BUILTIN_PSRLQI512:
18065 case IX86_BUILTIN_PSRLW:
18066 case IX86_BUILTIN_PSRLW128:
18067 case IX86_BUILTIN_PSRLW128_MASK:
18068 case IX86_BUILTIN_PSRLW256:
18069 case IX86_BUILTIN_PSRLW256_MASK:
18070 case IX86_BUILTIN_PSRLW512:
18071 case IX86_BUILTIN_PSRLWI:
18072 case IX86_BUILTIN_PSRLWI128:
18073 case IX86_BUILTIN_PSRLWI128_MASK:
18074 case IX86_BUILTIN_PSRLWI256:
18075 case IX86_BUILTIN_PSRLWI256_MASK:
18076 case IX86_BUILTIN_PSRLWI512:
18077 rcode = LSHIFTRT;
18078 is_vshift = false;
18079 goto do_shift;
18080 case IX86_BUILTIN_PSLLVV16HI:
18081 case IX86_BUILTIN_PSLLVV16SI:
18082 case IX86_BUILTIN_PSLLVV2DI:
18083 case IX86_BUILTIN_PSLLVV2DI_MASK:
18084 case IX86_BUILTIN_PSLLVV32HI:
18085 case IX86_BUILTIN_PSLLVV4DI:
18086 case IX86_BUILTIN_PSLLVV4DI_MASK:
18087 case IX86_BUILTIN_PSLLVV4SI:
18088 case IX86_BUILTIN_PSLLVV4SI_MASK:
18089 case IX86_BUILTIN_PSLLVV8DI:
18090 case IX86_BUILTIN_PSLLVV8HI:
18091 case IX86_BUILTIN_PSLLVV8SI:
18092 case IX86_BUILTIN_PSLLVV8SI_MASK:
18093 rcode = ASHIFT;
18094 is_vshift = true;
18095 goto do_shift;
18096 case IX86_BUILTIN_PSRAVQ128:
18097 case IX86_BUILTIN_PSRAVQ256:
18098 case IX86_BUILTIN_PSRAVV16HI:
18099 case IX86_BUILTIN_PSRAVV16SI:
18100 case IX86_BUILTIN_PSRAVV32HI:
18101 case IX86_BUILTIN_PSRAVV4SI:
18102 case IX86_BUILTIN_PSRAVV4SI_MASK:
18103 case IX86_BUILTIN_PSRAVV8DI:
18104 case IX86_BUILTIN_PSRAVV8HI:
18105 case IX86_BUILTIN_PSRAVV8SI:
18106 case IX86_BUILTIN_PSRAVV8SI_MASK:
18107 rcode = ASHIFTRT;
18108 is_vshift = true;
18109 goto do_shift;
18110 case IX86_BUILTIN_PSRLVV16HI:
18111 case IX86_BUILTIN_PSRLVV16SI:
18112 case IX86_BUILTIN_PSRLVV2DI:
18113 case IX86_BUILTIN_PSRLVV2DI_MASK:
18114 case IX86_BUILTIN_PSRLVV32HI:
18115 case IX86_BUILTIN_PSRLVV4DI:
18116 case IX86_BUILTIN_PSRLVV4DI_MASK:
18117 case IX86_BUILTIN_PSRLVV4SI:
18118 case IX86_BUILTIN_PSRLVV4SI_MASK:
18119 case IX86_BUILTIN_PSRLVV8DI:
18120 case IX86_BUILTIN_PSRLVV8HI:
18121 case IX86_BUILTIN_PSRLVV8SI:
18122 case IX86_BUILTIN_PSRLVV8SI_MASK:
18123 rcode = LSHIFTRT;
18124 is_vshift = true;
18125 goto do_shift;
18126
18127 do_shift:
18128 gcc_assert (n_args >= 2);
18129 arg0 = gimple_call_arg (stmt, 0);
18130 arg1 = gimple_call_arg (stmt, 1);
18131 if (n_args > 2)
18132 {
18133 /* This is masked shift. Only optimize if the mask is all ones. */
18134 tree argl = gimple_call_arg (stmt, n_args - 1);
18135 if (!tree_fits_uhwi_p (argl))
18136 break;
18137 unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
18138 unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
18139 if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
18140 break;
18141 }
18142 if (is_vshift)
18143 {
18144 if (TREE_CODE (arg1) != VECTOR_CST)
18145 break;
18146 count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
18147 if (integer_zerop (arg1))
18148 count = 0;
18149 else if (rcode == ASHIFTRT)
18150 break;
18151 else
18152 for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
18153 {
18154 tree elt = VECTOR_CST_ELT (arg1, i);
18155 if (!wi::neg_p (wi::to_wide (elt))
18156 && wi::to_widest (elt) < count)
18157 return false;
18158 }
18159 }
18160 else
18161 {
18162 arg1 = ix86_vector_shift_count (arg1);
18163 if (!arg1)
18164 break;
18165 count = tree_to_uhwi (arg1);
18166 }
18167 if (count == 0)
18168 {
18169 /* Just return the first argument for shift by 0. */
18170 location_t loc = gimple_location (stmt);
18171 gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
18172 gimple_set_location (g, loc);
18173 gsi_replace (gsi, g, false);
18174 return true;
18175 }
18176 if (rcode != ASHIFTRT
18177 && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
18178 {
18179 /* For shift counts equal or greater than precision, except for
18180 arithmetic right shift the result is zero. */
18181 location_t loc = gimple_location (stmt);
18182 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18183 build_zero_cst (TREE_TYPE (arg0)));
18184 gimple_set_location (g, loc);
18185 gsi_replace (gsi, g, false);
18186 return true;
18187 }
18188 break;
18189
18190 case IX86_BUILTIN_SHUFPD:
18191 arg2 = gimple_call_arg (stmt, 2);
18192 if (TREE_CODE (arg2) == INTEGER_CST)
18193 {
18194 location_t loc = gimple_location (stmt);
18195 unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
18196 arg0 = gimple_call_arg (stmt, 0);
18197 arg1 = gimple_call_arg (stmt, 1);
18198 tree itype = long_long_integer_type_node;
18199 tree vtype = build_vector_type (itype, 2); /* V2DI */
18200 tree_vector_builder elts (vtype, 2, 1);
18201 /* Ignore bits other than the lowest 2. */
18202 elts.quick_push (build_int_cst (itype, imask & 1));
18203 imask >>= 1;
18204 elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
18205 tree omask = elts.build ();
18206 gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
18207 VEC_PERM_EXPR,
18208 arg0, arg1, omask);
18209 gimple_set_location (g, loc);
18210 gsi_replace (gsi, g, false);
18211 return true;
18212 }
18213 // Do not error yet, the constant could be propagated later?
18214 break;
18215
18216 default:
18217 break;
18218 }
18219
18220 return false;
18221 }
18222
18223 /* Handler for an SVML-style interface to
18224 a library with vectorized intrinsics. */
18225
18226 tree
18227 ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
18228 {
18229 char name[20];
18230 tree fntype, new_fndecl, args;
18231 unsigned arity;
18232 const char *bname;
18233 machine_mode el_mode, in_mode;
18234 int n, in_n;
18235
18236 /* The SVML is suitable for unsafe math only. */
18237 if (!flag_unsafe_math_optimizations)
18238 return NULL_TREE;
18239
18240 el_mode = TYPE_MODE (TREE_TYPE (type_out));
18241 n = TYPE_VECTOR_SUBPARTS (type_out);
18242 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18243 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18244 if (el_mode != in_mode
18245 || n != in_n)
18246 return NULL_TREE;
18247
18248 switch (fn)
18249 {
18250 CASE_CFN_EXP:
18251 CASE_CFN_LOG:
18252 CASE_CFN_LOG10:
18253 CASE_CFN_POW:
18254 CASE_CFN_TANH:
18255 CASE_CFN_TAN:
18256 CASE_CFN_ATAN:
18257 CASE_CFN_ATAN2:
18258 CASE_CFN_ATANH:
18259 CASE_CFN_CBRT:
18260 CASE_CFN_SINH:
18261 CASE_CFN_SIN:
18262 CASE_CFN_ASINH:
18263 CASE_CFN_ASIN:
18264 CASE_CFN_COSH:
18265 CASE_CFN_COS:
18266 CASE_CFN_ACOSH:
18267 CASE_CFN_ACOS:
18268 if ((el_mode != DFmode || n != 2)
18269 && (el_mode != SFmode || n != 4))
18270 return NULL_TREE;
18271 break;
18272
18273 default:
18274 return NULL_TREE;
18275 }
18276
18277 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18278 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18279
18280 if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
18281 strcpy (name, "vmlsLn4");
18282 else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
18283 strcpy (name, "vmldLn2");
18284 else if (n == 4)
18285 {
18286 sprintf (name, "vmls%s", bname+10);
18287 name[strlen (name)-1] = '4';
18288 }
18289 else
18290 sprintf (name, "vmld%s2", bname+10);
18291
18292 /* Convert to uppercase. */
18293 name[4] &= ~0x20;
18294
18295 arity = 0;
18296 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18297 arity++;
18298
18299 if (arity == 1)
18300 fntype = build_function_type_list (type_out, type_in, NULL);
18301 else
18302 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18303
18304 /* Build a function declaration for the vectorized function. */
18305 new_fndecl = build_decl (BUILTINS_LOCATION,
18306 FUNCTION_DECL, get_identifier (name), fntype);
18307 TREE_PUBLIC (new_fndecl) = 1;
18308 DECL_EXTERNAL (new_fndecl) = 1;
18309 DECL_IS_NOVOPS (new_fndecl) = 1;
18310 TREE_READONLY (new_fndecl) = 1;
18311
18312 return new_fndecl;
18313 }
18314
18315 /* Handler for an ACML-style interface to
18316 a library with vectorized intrinsics. */
18317
18318 tree
18319 ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
18320 {
18321 char name[20] = "__vr.._";
18322 tree fntype, new_fndecl, args;
18323 unsigned arity;
18324 const char *bname;
18325 machine_mode el_mode, in_mode;
18326 int n, in_n;
18327
18328 /* The ACML is 64bits only and suitable for unsafe math only as
18329 it does not correctly support parts of IEEE with the required
18330 precision such as denormals. */
18331 if (!TARGET_64BIT
18332 || !flag_unsafe_math_optimizations)
18333 return NULL_TREE;
18334
18335 el_mode = TYPE_MODE (TREE_TYPE (type_out));
18336 n = TYPE_VECTOR_SUBPARTS (type_out);
18337 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18338 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18339 if (el_mode != in_mode
18340 || n != in_n)
18341 return NULL_TREE;
18342
18343 switch (fn)
18344 {
18345 CASE_CFN_SIN:
18346 CASE_CFN_COS:
18347 CASE_CFN_EXP:
18348 CASE_CFN_LOG:
18349 CASE_CFN_LOG2:
18350 CASE_CFN_LOG10:
18351 if (el_mode == DFmode && n == 2)
18352 {
18353 name[4] = 'd';
18354 name[5] = '2';
18355 }
18356 else if (el_mode == SFmode && n == 4)
18357 {
18358 name[4] = 's';
18359 name[5] = '4';
18360 }
18361 else
18362 return NULL_TREE;
18363 break;
18364
18365 default:
18366 return NULL_TREE;
18367 }
18368
18369 tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn);
18370 bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
18371 sprintf (name + 7, "%s", bname+10);
18372
18373 arity = 0;
18374 for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
18375 arity++;
18376
18377 if (arity == 1)
18378 fntype = build_function_type_list (type_out, type_in, NULL);
18379 else
18380 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
18381
18382 /* Build a function declaration for the vectorized function. */
18383 new_fndecl = build_decl (BUILTINS_LOCATION,
18384 FUNCTION_DECL, get_identifier (name), fntype);
18385 TREE_PUBLIC (new_fndecl) = 1;
18386 DECL_EXTERNAL (new_fndecl) = 1;
18387 DECL_IS_NOVOPS (new_fndecl) = 1;
18388 TREE_READONLY (new_fndecl) = 1;
18389
18390 return new_fndecl;
18391 }
18392
18393 /* Returns a decl of a function that implements scatter store with
18394 register type VECTYPE and index type INDEX_TYPE and SCALE.
18395 Return NULL_TREE if it is not available. */
18396
18397 static tree
18398 ix86_vectorize_builtin_scatter (const_tree vectype,
18399 const_tree index_type, int scale)
18400 {
18401 bool si;
18402 enum ix86_builtins code;
18403
18404 if (!TARGET_AVX512F)
18405 return NULL_TREE;
18406
18407 if ((TREE_CODE (index_type) != INTEGER_TYPE
18408 && !POINTER_TYPE_P (index_type))
18409 || (TYPE_MODE (index_type) != SImode
18410 && TYPE_MODE (index_type) != DImode))
18411 return NULL_TREE;
18412
18413 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
18414 return NULL_TREE;
18415
18416 /* v*scatter* insn sign extends index to pointer mode. */
18417 if (TYPE_PRECISION (index_type) < POINTER_SIZE
18418 && TYPE_UNSIGNED (index_type))
18419 return NULL_TREE;
18420
18421 /* Scale can be 1, 2, 4 or 8. */
18422 if (scale <= 0
18423 || scale > 8
18424 || (scale & (scale - 1)) != 0)
18425 return NULL_TREE;
18426
18427 si = TYPE_MODE (index_type) == SImode;
18428 switch (TYPE_MODE (vectype))
18429 {
18430 case E_V8DFmode:
18431 code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
18432 break;
18433 case E_V8DImode:
18434 code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
18435 break;
18436 case E_V16SFmode:
18437 code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
18438 break;
18439 case E_V16SImode:
18440 code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
18441 break;
18442 case E_V4DFmode:
18443 if (TARGET_AVX512VL)
18444 code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
18445 else
18446 return NULL_TREE;
18447 break;
18448 case E_V4DImode:
18449 if (TARGET_AVX512VL)
18450 code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
18451 else
18452 return NULL_TREE;
18453 break;
18454 case E_V8SFmode:
18455 if (TARGET_AVX512VL)
18456 code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
18457 else
18458 return NULL_TREE;
18459 break;
18460 case E_V8SImode:
18461 if (TARGET_AVX512VL)
18462 code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
18463 else
18464 return NULL_TREE;
18465 break;
18466 case E_V2DFmode:
18467 if (TARGET_AVX512VL)
18468 code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
18469 else
18470 return NULL_TREE;
18471 break;
18472 case E_V2DImode:
18473 if (TARGET_AVX512VL)
18474 code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
18475 else
18476 return NULL_TREE;
18477 break;
18478 case E_V4SFmode:
18479 if (TARGET_AVX512VL)
18480 code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
18481 else
18482 return NULL_TREE;
18483 break;
18484 case E_V4SImode:
18485 if (TARGET_AVX512VL)
18486 code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
18487 else
18488 return NULL_TREE;
18489 break;
18490 default:
18491 return NULL_TREE;
18492 }
18493
18494 return get_ix86_builtin (code);
18495 }
18496
18497 /* Return true if it is safe to use the rsqrt optabs to optimize
18498 1.0/sqrt. */
18499
18500 static bool
18501 use_rsqrt_p ()
18502 {
18503 return (TARGET_SSE && TARGET_SSE_MATH
18504 && flag_finite_math_only
18505 && !flag_trapping_math
18506 && flag_unsafe_math_optimizations);
18507 }
18508 \f
18509 /* Helper for avx_vpermilps256_operand et al. This is also used by
18510 the expansion functions to turn the parallel back into a mask.
18511 The return value is 0 for no match and the imm8+1 for a match. */
18512
18513 int
18514 avx_vpermilp_parallel (rtx par, machine_mode mode)
18515 {
18516 unsigned i, nelt = GET_MODE_NUNITS (mode);
18517 unsigned mask = 0;
18518 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
18519
18520 if (XVECLEN (par, 0) != (int) nelt)
18521 return 0;
18522
18523 /* Validate that all of the elements are constants, and not totally
18524 out of range. Copy the data into an integral array to make the
18525 subsequent checks easier. */
18526 for (i = 0; i < nelt; ++i)
18527 {
18528 rtx er = XVECEXP (par, 0, i);
18529 unsigned HOST_WIDE_INT ei;
18530
18531 if (!CONST_INT_P (er))
18532 return 0;
18533 ei = INTVAL (er);
18534 if (ei >= nelt)
18535 return 0;
18536 ipar[i] = ei;
18537 }
18538
18539 switch (mode)
18540 {
18541 case E_V8DFmode:
18542 /* In the 512-bit DFmode case, we can only move elements within
18543 a 128-bit lane. First fill the second part of the mask,
18544 then fallthru. */
18545 for (i = 4; i < 6; ++i)
18546 {
18547 if (ipar[i] < 4 || ipar[i] >= 6)
18548 return 0;
18549 mask |= (ipar[i] - 4) << i;
18550 }
18551 for (i = 6; i < 8; ++i)
18552 {
18553 if (ipar[i] < 6)
18554 return 0;
18555 mask |= (ipar[i] - 6) << i;
18556 }
18557 /* FALLTHRU */
18558
18559 case E_V4DFmode:
18560 /* In the 256-bit DFmode case, we can only move elements within
18561 a 128-bit lane. */
18562 for (i = 0; i < 2; ++i)
18563 {
18564 if (ipar[i] >= 2)
18565 return 0;
18566 mask |= ipar[i] << i;
18567 }
18568 for (i = 2; i < 4; ++i)
18569 {
18570 if (ipar[i] < 2)
18571 return 0;
18572 mask |= (ipar[i] - 2) << i;
18573 }
18574 break;
18575
18576 case E_V16SFmode:
18577 /* In 512 bit SFmode case, permutation in the upper 256 bits
18578 must mirror the permutation in the lower 256-bits. */
18579 for (i = 0; i < 8; ++i)
18580 if (ipar[i] + 8 != ipar[i + 8])
18581 return 0;
18582 /* FALLTHRU */
18583
18584 case E_V8SFmode:
18585 /* In 256 bit SFmode case, we have full freedom of
18586 movement within the low 128-bit lane, but the high 128-bit
18587 lane must mirror the exact same pattern. */
18588 for (i = 0; i < 4; ++i)
18589 if (ipar[i] + 4 != ipar[i + 4])
18590 return 0;
18591 nelt = 4;
18592 /* FALLTHRU */
18593
18594 case E_V2DFmode:
18595 case E_V4SFmode:
18596 /* In the 128-bit case, we've full freedom in the placement of
18597 the elements from the source operand. */
18598 for (i = 0; i < nelt; ++i)
18599 mask |= ipar[i] << (i * (nelt / 2));
18600 break;
18601
18602 default:
18603 gcc_unreachable ();
18604 }
18605
18606 /* Make sure success has a non-zero value by adding one. */
18607 return mask + 1;
18608 }
18609
18610 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
18611 the expansion functions to turn the parallel back into a mask.
18612 The return value is 0 for no match and the imm8+1 for a match. */
18613
18614 int
18615 avx_vperm2f128_parallel (rtx par, machine_mode mode)
18616 {
18617 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
18618 unsigned mask = 0;
18619 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
18620
18621 if (XVECLEN (par, 0) != (int) nelt)
18622 return 0;
18623
18624 /* Validate that all of the elements are constants, and not totally
18625 out of range. Copy the data into an integral array to make the
18626 subsequent checks easier. */
18627 for (i = 0; i < nelt; ++i)
18628 {
18629 rtx er = XVECEXP (par, 0, i);
18630 unsigned HOST_WIDE_INT ei;
18631
18632 if (!CONST_INT_P (er))
18633 return 0;
18634 ei = INTVAL (er);
18635 if (ei >= 2 * nelt)
18636 return 0;
18637 ipar[i] = ei;
18638 }
18639
18640 /* Validate that the halves of the permute are halves. */
18641 for (i = 0; i < nelt2 - 1; ++i)
18642 if (ipar[i] + 1 != ipar[i + 1])
18643 return 0;
18644 for (i = nelt2; i < nelt - 1; ++i)
18645 if (ipar[i] + 1 != ipar[i + 1])
18646 return 0;
18647
18648 /* Reconstruct the mask. */
18649 for (i = 0; i < 2; ++i)
18650 {
18651 unsigned e = ipar[i * nelt2];
18652 if (e % nelt2)
18653 return 0;
18654 e /= nelt2;
18655 mask |= e << (i * 4);
18656 }
18657
18658 /* Make sure success has a non-zero value by adding one. */
18659 return mask + 1;
18660 }
18661 \f
18662 /* Return a register priority for hard reg REGNO. */
18663 static int
18664 ix86_register_priority (int hard_regno)
18665 {
18666 /* ebp and r13 as the base always wants a displacement, r12 as the
18667 base always wants an index. So discourage their usage in an
18668 address. */
18669 if (hard_regno == R12_REG || hard_regno == R13_REG)
18670 return 0;
18671 if (hard_regno == BP_REG)
18672 return 1;
18673 /* New x86-64 int registers result in bigger code size. Discourage
18674 them. */
18675 if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG))
18676 return 2;
18677 /* New x86-64 SSE registers result in bigger code size. Discourage
18678 them. */
18679 if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG))
18680 return 2;
18681 if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG))
18682 return 1;
18683 /* Usage of AX register results in smaller code. Prefer it. */
18684 if (hard_regno == AX_REG)
18685 return 4;
18686 return 3;
18687 }
18688
18689 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
18690
18691 Put float CONST_DOUBLE in the constant pool instead of fp regs.
18692 QImode must go into class Q_REGS.
18693 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18694 movdf to do mem-to-mem moves through integer regs. */
18695
18696 static reg_class_t
18697 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
18698 {
18699 machine_mode mode = GET_MODE (x);
18700
18701 /* We're only allowed to return a subclass of CLASS. Many of the
18702 following checks fail for NO_REGS, so eliminate that early. */
18703 if (regclass == NO_REGS)
18704 return NO_REGS;
18705
18706 /* All classes can load zeros. */
18707 if (x == CONST0_RTX (mode))
18708 return regclass;
18709
18710 /* Force constants into memory if we are loading a (nonzero) constant into
18711 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
18712 instructions to load from a constant. */
18713 if (CONSTANT_P (x)
18714 && (MAYBE_MMX_CLASS_P (regclass)
18715 || MAYBE_SSE_CLASS_P (regclass)
18716 || MAYBE_MASK_CLASS_P (regclass)))
18717 return NO_REGS;
18718
18719 /* Floating-point constants need more complex checks. */
18720 if (CONST_DOUBLE_P (x))
18721 {
18722 /* General regs can load everything. */
18723 if (INTEGER_CLASS_P (regclass))
18724 return regclass;
18725
18726 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18727 zero above. We only want to wind up preferring 80387 registers if
18728 we plan on doing computation with them. */
18729 if (IS_STACK_MODE (mode)
18730 && standard_80387_constant_p (x) > 0)
18731 {
18732 /* Limit class to FP regs. */
18733 if (FLOAT_CLASS_P (regclass))
18734 return FLOAT_REGS;
18735 }
18736
18737 return NO_REGS;
18738 }
18739
18740 /* Prefer SSE regs only, if we can use them for math. */
18741 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18742 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
18743
18744 /* Generally when we see PLUS here, it's the function invariant
18745 (plus soft-fp const_int). Which can only be computed into general
18746 regs. */
18747 if (GET_CODE (x) == PLUS)
18748 return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
18749
18750 /* QImode constants are easy to load, but non-constant QImode data
18751 must go into Q_REGS or ALL_MASK_REGS. */
18752 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18753 {
18754 if (Q_CLASS_P (regclass))
18755 return regclass;
18756 else if (reg_class_subset_p (Q_REGS, regclass))
18757 return Q_REGS;
18758 else if (MASK_CLASS_P (regclass))
18759 return regclass;
18760 else
18761 return NO_REGS;
18762 }
18763
18764 return regclass;
18765 }
18766
18767 /* Discourage putting floating-point values in SSE registers unless
18768 SSE math is being used, and likewise for the 387 registers. */
18769 static reg_class_t
18770 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
18771 {
18772 /* Restrict the output reload class to the register bank that we are doing
18773 math on. If we would like not to return a subset of CLASS, reject this
18774 alternative: if reload cannot do this, it will still use its choice. */
18775 machine_mode mode = GET_MODE (x);
18776 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
18777 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
18778
18779 if (IS_STACK_MODE (mode))
18780 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
18781
18782 return regclass;
18783 }
18784
18785 static reg_class_t
18786 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
18787 machine_mode mode, secondary_reload_info *sri)
18788 {
18789 /* Double-word spills from general registers to non-offsettable memory
18790 references (zero-extended addresses) require special handling. */
18791 if (TARGET_64BIT
18792 && MEM_P (x)
18793 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
18794 && INTEGER_CLASS_P (rclass)
18795 && !offsettable_memref_p (x))
18796 {
18797 sri->icode = (in_p
18798 ? CODE_FOR_reload_noff_load
18799 : CODE_FOR_reload_noff_store);
18800 /* Add the cost of moving address to a temporary. */
18801 sri->extra_cost = 1;
18802
18803 return NO_REGS;
18804 }
18805
18806 /* QImode spills from non-QI registers require
18807 intermediate register on 32bit targets. */
18808 if (mode == QImode
18809 && ((!TARGET_64BIT && !in_p
18810 && INTEGER_CLASS_P (rclass)
18811 && MAYBE_NON_Q_CLASS_P (rclass))
18812 || (!TARGET_AVX512DQ
18813 && MAYBE_MASK_CLASS_P (rclass))))
18814 {
18815 int regno = true_regnum (x);
18816
18817 /* Return Q_REGS if the operand is in memory. */
18818 if (regno == -1)
18819 return Q_REGS;
18820
18821 return NO_REGS;
18822 }
18823
18824 /* This condition handles corner case where an expression involving
18825 pointers gets vectorized. We're trying to use the address of a
18826 stack slot as a vector initializer.
18827
18828 (set (reg:V2DI 74 [ vect_cst_.2 ])
18829 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
18830
18831 Eventually frame gets turned into sp+offset like this:
18832
18833 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18834 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18835 (const_int 392 [0x188]))))
18836
18837 That later gets turned into:
18838
18839 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18840 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
18841 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
18842
18843 We'll have the following reload recorded:
18844
18845 Reload 0: reload_in (DI) =
18846 (plus:DI (reg/f:DI 7 sp)
18847 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
18848 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18849 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
18850 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
18851 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
18852 reload_reg_rtx: (reg:V2DI 22 xmm1)
18853
18854 Which isn't going to work since SSE instructions can't handle scalar
18855 additions. Returning GENERAL_REGS forces the addition into integer
18856 register and reload can handle subsequent reloads without problems. */
18857
18858 if (in_p && GET_CODE (x) == PLUS
18859 && SSE_CLASS_P (rclass)
18860 && SCALAR_INT_MODE_P (mode))
18861 return GENERAL_REGS;
18862
18863 return NO_REGS;
18864 }
18865
18866 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
18867
18868 static bool
18869 ix86_class_likely_spilled_p (reg_class_t rclass)
18870 {
18871 switch (rclass)
18872 {
18873 case AREG:
18874 case DREG:
18875 case CREG:
18876 case BREG:
18877 case AD_REGS:
18878 case SIREG:
18879 case DIREG:
18880 case SSE_FIRST_REG:
18881 case FP_TOP_REG:
18882 case FP_SECOND_REG:
18883 return true;
18884
18885 default:
18886 break;
18887 }
18888
18889 return false;
18890 }
18891
18892 /* Return true if a set of DST by the expression SRC should be allowed.
18893 This prevents complex sets of likely_spilled hard regs before reload. */
18894
18895 bool
18896 ix86_hardreg_mov_ok (rtx dst, rtx src)
18897 {
18898 /* Avoid complex sets of likely_spilled hard registers before reload. */
18899 if (REG_P (dst) && HARD_REGISTER_P (dst)
18900 && !REG_P (src) && !MEM_P (src)
18901 && !x86_64_immediate_operand (src, GET_MODE (dst))
18902 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
18903 && !reload_completed)
18904 return false;
18905 return true;
18906 }
18907
18908 /* If we are copying between registers from different register sets
18909 (e.g. FP and integer), we may need a memory location.
18910
18911 The function can't work reliably when one of the CLASSES is a class
18912 containing registers from multiple sets. We avoid this by never combining
18913 different sets in a single alternative in the machine description.
18914 Ensure that this constraint holds to avoid unexpected surprises.
18915
18916 When STRICT is false, we are being called from REGISTER_MOVE_COST,
18917 so do not enforce these sanity checks.
18918
18919 To optimize register_move_cost performance, define inline variant. */
18920
18921 static inline bool
18922 inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18923 reg_class_t class2, int strict)
18924 {
18925 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
18926 return false;
18927
18928 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18929 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18930 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18931 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18932 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18933 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
18934 || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
18935 || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
18936 {
18937 gcc_assert (!strict || lra_in_progress);
18938 return true;
18939 }
18940
18941 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18942 return true;
18943
18944 /* ??? This is a lie. We do have moves between mmx/general, and for
18945 mmx/sse2. But by saying we need secondary memory we discourage the
18946 register allocator from using the mmx registers unless needed. */
18947 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18948 return true;
18949
18950 /* Between mask and general, we have moves no larger than word size. */
18951 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
18952 {
18953 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18954 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18955 return true;
18956 }
18957
18958 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18959 {
18960 /* SSE1 doesn't have any direct moves from other classes. */
18961 if (!TARGET_SSE2)
18962 return true;
18963
18964 /* Between SSE and general, we have moves no larger than word size. */
18965 if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
18966 || GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode)
18967 || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18968 return true;
18969
18970 /* If the target says that inter-unit moves are more expensive
18971 than moving through memory, then don't generate them. */
18972 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
18973 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
18974 return true;
18975 }
18976
18977 return false;
18978 }
18979
18980 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
18981
18982 static bool
18983 ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
18984 reg_class_t class2)
18985 {
18986 return inline_secondary_memory_needed (mode, class1, class2, true);
18987 }
18988
18989 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
18990
18991 get_secondary_mem widens integral modes to BITS_PER_WORD.
18992 There is no need to emit full 64 bit move on 64 bit targets
18993 for integral modes that can be moved using 32 bit move. */
18994
18995 static machine_mode
18996 ix86_secondary_memory_needed_mode (machine_mode mode)
18997 {
18998 if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
18999 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
19000 return mode;
19001 }
19002
19003 /* Implement the TARGET_CLASS_MAX_NREGS hook.
19004
19005 On the 80386, this is the size of MODE in words,
19006 except in the FP regs, where a single reg is always enough. */
19007
19008 static unsigned char
19009 ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
19010 {
19011 if (MAYBE_INTEGER_CLASS_P (rclass))
19012 {
19013 if (mode == XFmode)
19014 return (TARGET_64BIT ? 2 : 3);
19015 else if (mode == XCmode)
19016 return (TARGET_64BIT ? 4 : 6);
19017 else
19018 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
19019 }
19020 else
19021 {
19022 if (COMPLEX_MODE_P (mode))
19023 return 2;
19024 else
19025 return 1;
19026 }
19027 }
19028
19029 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
19030
19031 static bool
19032 ix86_can_change_mode_class (machine_mode from, machine_mode to,
19033 reg_class_t regclass)
19034 {
19035 if (from == to)
19036 return true;
19037
19038 /* x87 registers can't do subreg at all, as all values are reformatted
19039 to extended precision. */
19040 if (MAYBE_FLOAT_CLASS_P (regclass))
19041 return false;
19042
19043 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
19044 {
19045 /* Vector registers do not support QI or HImode loads. If we don't
19046 disallow a change to these modes, reload will assume it's ok to
19047 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19048 the vec_dupv4hi pattern. */
19049 if (GET_MODE_SIZE (from) < 4)
19050 return false;
19051 }
19052
19053 return true;
19054 }
19055
19056 /* Return index of MODE in the sse load/store tables. */
19057
19058 static inline int
19059 sse_store_index (machine_mode mode)
19060 {
19061 switch (GET_MODE_SIZE (mode))
19062 {
19063 case 4:
19064 return 0;
19065 case 8:
19066 return 1;
19067 case 16:
19068 return 2;
19069 case 32:
19070 return 3;
19071 case 64:
19072 return 4;
19073 default:
19074 return -1;
19075 }
19076 }
19077
19078 /* Return the cost of moving data of mode M between a
19079 register and memory. A value of 2 is the default; this cost is
19080 relative to those in `REGISTER_MOVE_COST'.
19081
19082 This function is used extensively by register_move_cost that is used to
19083 build tables at startup. Make it inline in this case.
19084 When IN is 2, return maximum of in and out move cost.
19085
19086 If moving between registers and memory is more expensive than
19087 between two registers, you should define this macro to express the
19088 relative cost.
19089
19090 Model also increased moving costs of QImode registers in non
19091 Q_REGS classes.
19092 */
19093 static inline int
19094 inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
19095 {
19096 int cost;
19097 if (FLOAT_CLASS_P (regclass))
19098 {
19099 int index;
19100 switch (mode)
19101 {
19102 case E_SFmode:
19103 index = 0;
19104 break;
19105 case E_DFmode:
19106 index = 1;
19107 break;
19108 case E_XFmode:
19109 index = 2;
19110 break;
19111 default:
19112 return 100;
19113 }
19114 if (in == 2)
19115 return MAX (ix86_cost->hard_register.fp_load [index],
19116 ix86_cost->hard_register.fp_store [index]);
19117 return in ? ix86_cost->hard_register.fp_load [index]
19118 : ix86_cost->hard_register.fp_store [index];
19119 }
19120 if (SSE_CLASS_P (regclass))
19121 {
19122 int index = sse_store_index (mode);
19123 if (index == -1)
19124 return 100;
19125 if (in == 2)
19126 return MAX (ix86_cost->hard_register.sse_load [index],
19127 ix86_cost->hard_register.sse_store [index]);
19128 return in ? ix86_cost->hard_register.sse_load [index]
19129 : ix86_cost->hard_register.sse_store [index];
19130 }
19131 if (MASK_CLASS_P (regclass))
19132 {
19133 int index;
19134 switch (GET_MODE_SIZE (mode))
19135 {
19136 case 1:
19137 index = 0;
19138 break;
19139 case 2:
19140 index = 1;
19141 break;
19142 /* DImode loads and stores assumed to cost the same as SImode. */
19143 default:
19144 index = 2;
19145 break;
19146 }
19147
19148 if (in == 2)
19149 return MAX (ix86_cost->hard_register.mask_load[index],
19150 ix86_cost->hard_register.mask_store[index]);
19151 return in ? ix86_cost->hard_register.mask_load[2]
19152 : ix86_cost->hard_register.mask_store[2];
19153 }
19154 if (MMX_CLASS_P (regclass))
19155 {
19156 int index;
19157 switch (GET_MODE_SIZE (mode))
19158 {
19159 case 4:
19160 index = 0;
19161 break;
19162 case 8:
19163 index = 1;
19164 break;
19165 default:
19166 return 100;
19167 }
19168 if (in == 2)
19169 return MAX (ix86_cost->hard_register.mmx_load [index],
19170 ix86_cost->hard_register.mmx_store [index]);
19171 return in ? ix86_cost->hard_register.mmx_load [index]
19172 : ix86_cost->hard_register.mmx_store [index];
19173 }
19174 switch (GET_MODE_SIZE (mode))
19175 {
19176 case 1:
19177 if (Q_CLASS_P (regclass) || TARGET_64BIT)
19178 {
19179 if (!in)
19180 return ix86_cost->hard_register.int_store[0];
19181 if (TARGET_PARTIAL_REG_DEPENDENCY
19182 && optimize_function_for_speed_p (cfun))
19183 cost = ix86_cost->hard_register.movzbl_load;
19184 else
19185 cost = ix86_cost->hard_register.int_load[0];
19186 if (in == 2)
19187 return MAX (cost, ix86_cost->hard_register.int_store[0]);
19188 return cost;
19189 }
19190 else
19191 {
19192 if (in == 2)
19193 return MAX (ix86_cost->hard_register.movzbl_load,
19194 ix86_cost->hard_register.int_store[0] + 4);
19195 if (in)
19196 return ix86_cost->hard_register.movzbl_load;
19197 else
19198 return ix86_cost->hard_register.int_store[0] + 4;
19199 }
19200 break;
19201 case 2:
19202 if (in == 2)
19203 return MAX (ix86_cost->hard_register.int_load[1],
19204 ix86_cost->hard_register.int_store[1]);
19205 return in ? ix86_cost->hard_register.int_load[1]
19206 : ix86_cost->hard_register.int_store[1];
19207 default:
19208 if (in == 2)
19209 cost = MAX (ix86_cost->hard_register.int_load[2],
19210 ix86_cost->hard_register.int_store[2]);
19211 else if (in)
19212 cost = ix86_cost->hard_register.int_load[2];
19213 else
19214 cost = ix86_cost->hard_register.int_store[2];
19215 /* Multiply with the number of GPR moves needed. */
19216 return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
19217 }
19218 }
19219
19220 static int
19221 ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
19222 {
19223 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
19224 }
19225
19226
19227 /* Return the cost of moving data from a register in class CLASS1 to
19228 one in class CLASS2.
19229
19230 It is not required that the cost always equal 2 when FROM is the same as TO;
19231 on some machines it is expensive to move between registers if they are not
19232 general registers. */
19233
19234 static int
19235 ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
19236 reg_class_t class2_i)
19237 {
19238 enum reg_class class1 = (enum reg_class) class1_i;
19239 enum reg_class class2 = (enum reg_class) class2_i;
19240
19241 /* In case we require secondary memory, compute cost of the store followed
19242 by load. In order to avoid bad register allocation choices, we need
19243 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19244
19245 if (inline_secondary_memory_needed (mode, class1, class2, false))
19246 {
19247 int cost = 1;
19248
19249 cost += inline_memory_move_cost (mode, class1, 2);
19250 cost += inline_memory_move_cost (mode, class2, 2);
19251
19252 /* In case of copying from general_purpose_register we may emit multiple
19253 stores followed by single load causing memory size mismatch stall.
19254 Count this as arbitrarily high cost of 20. */
19255 if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
19256 && TARGET_MEMORY_MISMATCH_STALL
19257 && targetm.class_max_nregs (class1, mode)
19258 > targetm.class_max_nregs (class2, mode))
19259 cost += 20;
19260
19261 /* In the case of FP/MMX moves, the registers actually overlap, and we
19262 have to switch modes in order to treat them differently. */
19263 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19264 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19265 cost += 20;
19266
19267 return cost;
19268 }
19269
19270 /* Moves between MMX and non-MMX units require secondary memory. */
19271 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19272 gcc_unreachable ();
19273
19274 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19275 return (SSE_CLASS_P (class1)
19276 ? ix86_cost->hard_register.sse_to_integer
19277 : ix86_cost->hard_register.integer_to_sse);
19278
19279 /* Moves between mask register and GPR. */
19280 if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
19281 {
19282 return (MASK_CLASS_P (class1)
19283 ? ix86_cost->hard_register.mask_to_integer
19284 : ix86_cost->hard_register.integer_to_mask);
19285 }
19286 /* Moving between mask registers. */
19287 if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
19288 return ix86_cost->hard_register.mask_move;
19289
19290 if (MAYBE_FLOAT_CLASS_P (class1))
19291 return ix86_cost->hard_register.fp_move;
19292 if (MAYBE_SSE_CLASS_P (class1))
19293 {
19294 if (GET_MODE_BITSIZE (mode) <= 128)
19295 return ix86_cost->hard_register.xmm_move;
19296 if (GET_MODE_BITSIZE (mode) <= 256)
19297 return ix86_cost->hard_register.ymm_move;
19298 return ix86_cost->hard_register.zmm_move;
19299 }
19300 if (MAYBE_MMX_CLASS_P (class1))
19301 return ix86_cost->hard_register.mmx_move;
19302 return 2;
19303 }
19304
19305 /* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in
19306 words of a value of mode MODE but can be less for certain modes in
19307 special long registers.
19308
19309 Actually there are no two word move instructions for consecutive
19310 registers. And only registers 0-3 may have mov byte instructions
19311 applied to them. */
19312
19313 static unsigned int
19314 ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
19315 {
19316 if (GENERAL_REGNO_P (regno))
19317 {
19318 if (mode == XFmode)
19319 return TARGET_64BIT ? 2 : 3;
19320 if (mode == XCmode)
19321 return TARGET_64BIT ? 4 : 6;
19322 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
19323 }
19324 if (COMPLEX_MODE_P (mode))
19325 return 2;
19326 /* Register pair for mask registers. */
19327 if (mode == P2QImode || mode == P2HImode)
19328 return 2;
19329 if (mode == V64SFmode || mode == V64SImode)
19330 return 4;
19331 return 1;
19332 }
19333
19334 /* Implement REGMODE_NATURAL_SIZE(MODE). */
19335 unsigned int
19336 ix86_regmode_natural_size (machine_mode mode)
19337 {
19338 if (mode == P2HImode || mode == P2QImode)
19339 return GET_MODE_SIZE (mode) / 2;
19340 return UNITS_PER_WORD;
19341 }
19342
19343 /* Implement TARGET_HARD_REGNO_MODE_OK. */
19344
19345 static bool
19346 ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
19347 {
19348 /* Flags and only flags can only hold CCmode values. */
19349 if (CC_REGNO_P (regno))
19350 return GET_MODE_CLASS (mode) == MODE_CC;
19351 if (GET_MODE_CLASS (mode) == MODE_CC
19352 || GET_MODE_CLASS (mode) == MODE_RANDOM)
19353 return false;
19354 if (STACK_REGNO_P (regno))
19355 return VALID_FP_MODE_P (mode);
19356 if (MASK_REGNO_P (regno))
19357 {
19358 /* Register pair only starts at even register number. */
19359 if ((mode == P2QImode || mode == P2HImode))
19360 return MASK_PAIR_REGNO_P(regno);
19361
19362 return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
19363 || (TARGET_AVX512BW
19364 && VALID_MASK_AVX512BW_MODE (mode)));
19365 }
19366
19367 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19368 return false;
19369
19370 if (SSE_REGNO_P (regno))
19371 {
19372 /* We implement the move patterns for all vector modes into and
19373 out of SSE registers, even when no operation instructions
19374 are available. */
19375
19376 /* For AVX-512 we allow, regardless of regno:
19377 - XI mode
19378 - any of 512-bit wide vector mode
19379 - any scalar mode. */
19380 if (TARGET_AVX512F
19381 && (mode == XImode
19382 || VALID_AVX512F_REG_MODE (mode)
19383 || VALID_AVX512F_SCALAR_MODE (mode)))
19384 return true;
19385
19386 /* For AVX-5124FMAPS or AVX-5124VNNIW
19387 allow V64SF and V64SI modes for special regnos. */
19388 if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW)
19389 && (mode == V64SFmode || mode == V64SImode)
19390 && MOD4_SSE_REGNO_P (regno))
19391 return true;
19392
19393 /* TODO check for QI/HI scalars. */
19394 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
19395 if (TARGET_AVX512VL
19396 && (mode == OImode
19397 || mode == TImode
19398 || VALID_AVX256_REG_MODE (mode)
19399 || VALID_AVX512VL_128_REG_MODE (mode)))
19400 return true;
19401
19402 /* xmm16-xmm31 are only available for AVX-512. */
19403 if (EXT_REX_SSE_REGNO_P (regno))
19404 return false;
19405
19406 /* OImode and AVX modes are available only when AVX is enabled. */
19407 return ((TARGET_AVX
19408 && VALID_AVX256_REG_OR_OI_MODE (mode))
19409 || VALID_SSE_REG_MODE (mode)
19410 || VALID_SSE2_REG_MODE (mode)
19411 || VALID_MMX_REG_MODE (mode)
19412 || VALID_MMX_REG_MODE_3DNOW (mode));
19413 }
19414 if (MMX_REGNO_P (regno))
19415 {
19416 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19417 so if the register is available at all, then we can move data of
19418 the given mode into or out of it. */
19419 return (VALID_MMX_REG_MODE (mode)
19420 || VALID_MMX_REG_MODE_3DNOW (mode));
19421 }
19422
19423 if (mode == QImode)
19424 {
19425 /* Take care for QImode values - they can be in non-QI regs,
19426 but then they do cause partial register stalls. */
19427 if (ANY_QI_REGNO_P (regno))
19428 return true;
19429 if (!TARGET_PARTIAL_REG_STALL)
19430 return true;
19431 /* LRA checks if the hard register is OK for the given mode.
19432 QImode values can live in non-QI regs, so we allow all
19433 registers here. */
19434 if (lra_in_progress)
19435 return true;
19436 return !can_create_pseudo_p ();
19437 }
19438 /* We handle both integer and floats in the general purpose registers. */
19439 else if (VALID_INT_MODE_P (mode))
19440 return true;
19441 else if (VALID_FP_MODE_P (mode))
19442 return true;
19443 else if (VALID_DFP_MODE_P (mode))
19444 return true;
19445 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19446 on to use that value in smaller contexts, this can easily force a
19447 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19448 supporting DImode, allow it. */
19449 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19450 return true;
19451
19452 return false;
19453 }
19454
19455 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that
19456 saves SSE registers across calls is Win64 (thus no need to check the
19457 current ABI here), and with AVX enabled Win64 only guarantees that
19458 the low 16 bytes are saved. */
19459
19460 static bool
19461 ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
19462 machine_mode mode)
19463 {
19464 return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
19465 }
19466
19467 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19468 tieable integer mode. */
19469
19470 static bool
19471 ix86_tieable_integer_mode_p (machine_mode mode)
19472 {
19473 switch (mode)
19474 {
19475 case E_HImode:
19476 case E_SImode:
19477 return true;
19478
19479 case E_QImode:
19480 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19481
19482 case E_DImode:
19483 return TARGET_64BIT;
19484
19485 default:
19486 return false;
19487 }
19488 }
19489
19490 /* Implement TARGET_MODES_TIEABLE_P.
19491
19492 Return true if MODE1 is accessible in a register that can hold MODE2
19493 without copying. That is, all register classes that can hold MODE2
19494 can also hold MODE1. */
19495
19496 static bool
19497 ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
19498 {
19499 if (mode1 == mode2)
19500 return true;
19501
19502 if (ix86_tieable_integer_mode_p (mode1)
19503 && ix86_tieable_integer_mode_p (mode2))
19504 return true;
19505
19506 /* MODE2 being XFmode implies fp stack or general regs, which means we
19507 can tie any smaller floating point modes to it. Note that we do not
19508 tie this with TFmode. */
19509 if (mode2 == XFmode)
19510 return mode1 == SFmode || mode1 == DFmode;
19511
19512 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19513 that we can tie it with SFmode. */
19514 if (mode2 == DFmode)
19515 return mode1 == SFmode;
19516
19517 /* If MODE2 is only appropriate for an SSE register, then tie with
19518 any other mode acceptable to SSE registers. */
19519 if (GET_MODE_SIZE (mode2) == 64
19520 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19521 return (GET_MODE_SIZE (mode1) == 64
19522 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19523 if (GET_MODE_SIZE (mode2) == 32
19524 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19525 return (GET_MODE_SIZE (mode1) == 32
19526 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19527 if (GET_MODE_SIZE (mode2) == 16
19528 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19529 return (GET_MODE_SIZE (mode1) == 16
19530 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19531
19532 /* If MODE2 is appropriate for an MMX register, then tie
19533 with any other mode acceptable to MMX registers. */
19534 if (GET_MODE_SIZE (mode2) == 8
19535 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19536 return (GET_MODE_SIZE (mode1) == 8
19537 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19538
19539 return false;
19540 }
19541
19542 /* Return the cost of moving between two registers of mode MODE. */
19543
19544 static int
19545 ix86_set_reg_reg_cost (machine_mode mode)
19546 {
19547 unsigned int units = UNITS_PER_WORD;
19548
19549 switch (GET_MODE_CLASS (mode))
19550 {
19551 default:
19552 break;
19553
19554 case MODE_CC:
19555 units = GET_MODE_SIZE (CCmode);
19556 break;
19557
19558 case MODE_FLOAT:
19559 if ((TARGET_SSE && mode == TFmode)
19560 || (TARGET_80387 && mode == XFmode)
19561 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
19562 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
19563 units = GET_MODE_SIZE (mode);
19564 break;
19565
19566 case MODE_COMPLEX_FLOAT:
19567 if ((TARGET_SSE && mode == TCmode)
19568 || (TARGET_80387 && mode == XCmode)
19569 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
19570 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
19571 units = GET_MODE_SIZE (mode);
19572 break;
19573
19574 case MODE_VECTOR_INT:
19575 case MODE_VECTOR_FLOAT:
19576 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
19577 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
19578 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
19579 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
19580 || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
19581 && VALID_MMX_REG_MODE (mode)))
19582 units = GET_MODE_SIZE (mode);
19583 }
19584
19585 /* Return the cost of moving between two registers of mode MODE,
19586 assuming that the move will be in pieces of at most UNITS bytes. */
19587 return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
19588 }
19589
19590 /* Return cost of vector operation in MODE given that scalar version has
19591 COST. */
19592
19593 static int
19594 ix86_vec_cost (machine_mode mode, int cost)
19595 {
19596 if (!VECTOR_MODE_P (mode))
19597 return cost;
19598
19599 if (GET_MODE_BITSIZE (mode) == 128
19600 && TARGET_SSE_SPLIT_REGS)
19601 return cost * 2;
19602 if (GET_MODE_BITSIZE (mode) > 128
19603 && TARGET_AVX256_SPLIT_REGS)
19604 return cost * GET_MODE_BITSIZE (mode) / 128;
19605 return cost;
19606 }
19607
19608 /* Return cost of multiplication in MODE. */
19609
19610 static int
19611 ix86_multiplication_cost (const struct processor_costs *cost,
19612 enum machine_mode mode)
19613 {
19614 machine_mode inner_mode = mode;
19615 if (VECTOR_MODE_P (mode))
19616 inner_mode = GET_MODE_INNER (mode);
19617
19618 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19619 return inner_mode == DFmode ? cost->mulsd : cost->mulss;
19620 else if (X87_FLOAT_MODE_P (mode))
19621 return cost->fmul;
19622 else if (FLOAT_MODE_P (mode))
19623 return ix86_vec_cost (mode,
19624 inner_mode == DFmode ? cost->mulsd : cost->mulss);
19625 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19626 {
19627 /* vpmullq is used in this case. No emulation is needed. */
19628 if (TARGET_AVX512DQ)
19629 return ix86_vec_cost (mode, cost->mulss);
19630
19631 /* V*QImode is emulated with 7-13 insns. */
19632 if (mode == V16QImode || mode == V32QImode)
19633 {
19634 int extra = 11;
19635 if (TARGET_XOP && mode == V16QImode)
19636 extra = 5;
19637 else if (TARGET_SSSE3)
19638 extra = 6;
19639 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
19640 }
19641 /* V*DImode is emulated with 5-8 insns. */
19642 else if (mode == V2DImode || mode == V4DImode)
19643 {
19644 if (TARGET_XOP && mode == V2DImode)
19645 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
19646 else
19647 return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
19648 }
19649 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
19650 insns, including two PMULUDQ. */
19651 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
19652 return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
19653 else
19654 return ix86_vec_cost (mode, cost->mulss);
19655 }
19656 else
19657 return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
19658 }
19659
19660 /* Return cost of multiplication in MODE. */
19661
19662 static int
19663 ix86_division_cost (const struct processor_costs *cost,
19664 enum machine_mode mode)
19665 {
19666 machine_mode inner_mode = mode;
19667 if (VECTOR_MODE_P (mode))
19668 inner_mode = GET_MODE_INNER (mode);
19669
19670 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19671 return inner_mode == DFmode ? cost->divsd : cost->divss;
19672 else if (X87_FLOAT_MODE_P (mode))
19673 return cost->fdiv;
19674 else if (FLOAT_MODE_P (mode))
19675 return ix86_vec_cost (mode,
19676 inner_mode == DFmode ? cost->divsd : cost->divss);
19677 else
19678 return cost->divide[MODE_INDEX (mode)];
19679 }
19680
19681 #define COSTS_N_BYTES(N) ((N) * 2)
19682
19683 /* Return cost of shift in MODE.
19684 If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
19685 AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE
19686 if op1 is a result of subreg.
19687
19688 SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */
19689
19690 static int
19691 ix86_shift_rotate_cost (const struct processor_costs *cost,
19692 enum machine_mode mode, bool constant_op1,
19693 HOST_WIDE_INT op1_val,
19694 bool speed,
19695 bool and_in_op1,
19696 bool shift_and_truncate,
19697 bool *skip_op0, bool *skip_op1)
19698 {
19699 if (skip_op0)
19700 *skip_op0 = *skip_op1 = false;
19701 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19702 {
19703 /* V*QImode is emulated with 1-11 insns. */
19704 if (mode == V16QImode || mode == V32QImode)
19705 {
19706 int count = 11;
19707 if (TARGET_XOP && mode == V16QImode)
19708 {
19709 /* For XOP we use vpshab, which requires a broadcast of the
19710 value to the variable shift insn. For constants this
19711 means a V16Q const in mem; even when we can perform the
19712 shift with one insn set the cost to prefer paddb. */
19713 if (constant_op1)
19714 {
19715 if (skip_op1)
19716 *skip_op1 = true;
19717 return ix86_vec_cost (mode,
19718 cost->sse_op
19719 + (speed
19720 ? 2
19721 : COSTS_N_BYTES
19722 (GET_MODE_UNIT_SIZE (mode))));
19723 }
19724 count = 3;
19725 }
19726 else if (TARGET_SSSE3)
19727 count = 7;
19728 return ix86_vec_cost (mode, cost->sse_op * count);
19729 }
19730 else
19731 return ix86_vec_cost (mode, cost->sse_op);
19732 }
19733 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19734 {
19735 if (constant_op1)
19736 {
19737 if (op1_val > 32)
19738 return cost->shift_const + COSTS_N_INSNS (2);
19739 else
19740 return cost->shift_const * 2;
19741 }
19742 else
19743 {
19744 if (and_in_op1)
19745 return cost->shift_var * 2;
19746 else
19747 return cost->shift_var * 6 + COSTS_N_INSNS (2);
19748 }
19749 }
19750 else
19751 {
19752 if (constant_op1)
19753 return cost->shift_const;
19754 else if (shift_and_truncate)
19755 {
19756 if (skip_op0)
19757 *skip_op0 = *skip_op1 = true;
19758 /* Return the cost after shift-and truncation. */
19759 return cost->shift_var;
19760 }
19761 else
19762 return cost->shift_var;
19763 }
19764 return cost->shift_const;
19765 }
19766
19767 /* Compute a (partial) cost for rtx X. Return true if the complete
19768 cost has been computed, and false if subexpressions should be
19769 scanned. In either case, *TOTAL contains the cost result. */
19770
19771 static bool
19772 ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
19773 int *total, bool speed)
19774 {
19775 rtx mask;
19776 enum rtx_code code = GET_CODE (x);
19777 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
19778 const struct processor_costs *cost
19779 = speed ? ix86_tune_cost : &ix86_size_cost;
19780 int src_cost;
19781
19782 switch (code)
19783 {
19784 case SET:
19785 if (register_operand (SET_DEST (x), VOIDmode)
19786 && register_operand (SET_SRC (x), VOIDmode))
19787 {
19788 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
19789 return true;
19790 }
19791
19792 if (register_operand (SET_SRC (x), VOIDmode))
19793 /* Avoid potentially incorrect high cost from rtx_costs
19794 for non-tieable SUBREGs. */
19795 src_cost = 0;
19796 else
19797 {
19798 src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
19799
19800 if (CONSTANT_P (SET_SRC (x)))
19801 /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
19802 a small value, possibly zero for cheap constants. */
19803 src_cost += COSTS_N_INSNS (1);
19804 }
19805
19806 *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
19807 return true;
19808
19809 case CONST_INT:
19810 case CONST:
19811 case LABEL_REF:
19812 case SYMBOL_REF:
19813 if (x86_64_immediate_operand (x, VOIDmode))
19814 *total = 0;
19815 else
19816 *total = 1;
19817 return true;
19818
19819 case CONST_DOUBLE:
19820 if (IS_STACK_MODE (mode))
19821 switch (standard_80387_constant_p (x))
19822 {
19823 case -1:
19824 case 0:
19825 break;
19826 case 1: /* 0.0 */
19827 *total = 1;
19828 return true;
19829 default: /* Other constants */
19830 *total = 2;
19831 return true;
19832 }
19833 /* FALLTHRU */
19834
19835 case CONST_VECTOR:
19836 switch (standard_sse_constant_p (x, mode))
19837 {
19838 case 0:
19839 break;
19840 case 1: /* 0: xor eliminates false dependency */
19841 *total = 0;
19842 return true;
19843 default: /* -1: cmp contains false dependency */
19844 *total = 1;
19845 return true;
19846 }
19847 /* FALLTHRU */
19848
19849 case CONST_WIDE_INT:
19850 /* Fall back to (MEM (SYMBOL_REF)), since that's where
19851 it'll probably end up. Add a penalty for size. */
19852 *total = (COSTS_N_INSNS (1)
19853 + (!TARGET_64BIT && flag_pic)
19854 + (GET_MODE_SIZE (mode) <= 4
19855 ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
19856 return true;
19857
19858 case ZERO_EXTEND:
19859 /* The zero extensions is often completely free on x86_64, so make
19860 it as cheap as possible. */
19861 if (TARGET_64BIT && mode == DImode
19862 && GET_MODE (XEXP (x, 0)) == SImode)
19863 *total = 1;
19864 else if (TARGET_ZERO_EXTEND_WITH_AND)
19865 *total = cost->add;
19866 else
19867 *total = cost->movzx;
19868 return false;
19869
19870 case SIGN_EXTEND:
19871 *total = cost->movsx;
19872 return false;
19873
19874 case ASHIFT:
19875 if (SCALAR_INT_MODE_P (mode)
19876 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
19877 && CONST_INT_P (XEXP (x, 1)))
19878 {
19879 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19880 if (value == 1)
19881 {
19882 *total = cost->add;
19883 return false;
19884 }
19885 if ((value == 2 || value == 3)
19886 && cost->lea <= cost->shift_const)
19887 {
19888 *total = cost->lea;
19889 return false;
19890 }
19891 }
19892 /* FALLTHRU */
19893
19894 case ROTATE:
19895 case ASHIFTRT:
19896 case LSHIFTRT:
19897 case ROTATERT:
19898 bool skip_op0, skip_op1;
19899 *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)),
19900 CONST_INT_P (XEXP (x, 1))
19901 ? INTVAL (XEXP (x, 1)) : -1,
19902 speed,
19903 GET_CODE (XEXP (x, 1)) == AND,
19904 SUBREG_P (XEXP (x, 1))
19905 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND,
19906 &skip_op0, &skip_op1);
19907 if (skip_op0 || skip_op1)
19908 {
19909 if (!skip_op0)
19910 *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
19911 if (!skip_op1)
19912 *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
19913 return true;
19914 }
19915 return false;
19916
19917 case FMA:
19918 {
19919 rtx sub;
19920
19921 gcc_assert (FLOAT_MODE_P (mode));
19922 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
19923
19924 *total = ix86_vec_cost (mode,
19925 GET_MODE_INNER (mode) == SFmode
19926 ? cost->fmass : cost->fmasd);
19927 *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
19928
19929 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
19930 sub = XEXP (x, 0);
19931 if (GET_CODE (sub) == NEG)
19932 sub = XEXP (sub, 0);
19933 *total += rtx_cost (sub, mode, FMA, 0, speed);
19934
19935 sub = XEXP (x, 2);
19936 if (GET_CODE (sub) == NEG)
19937 sub = XEXP (sub, 0);
19938 *total += rtx_cost (sub, mode, FMA, 2, speed);
19939 return true;
19940 }
19941
19942 case MULT:
19943 if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
19944 {
19945 rtx op0 = XEXP (x, 0);
19946 rtx op1 = XEXP (x, 1);
19947 int nbits;
19948 if (CONST_INT_P (XEXP (x, 1)))
19949 {
19950 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19951 for (nbits = 0; value != 0; value &= value - 1)
19952 nbits++;
19953 }
19954 else
19955 /* This is arbitrary. */
19956 nbits = 7;
19957
19958 /* Compute costs correctly for widening multiplication. */
19959 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
19960 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19961 == GET_MODE_SIZE (mode))
19962 {
19963 int is_mulwiden = 0;
19964 machine_mode inner_mode = GET_MODE (op0);
19965
19966 if (GET_CODE (op0) == GET_CODE (op1))
19967 is_mulwiden = 1, op1 = XEXP (op1, 0);
19968 else if (CONST_INT_P (op1))
19969 {
19970 if (GET_CODE (op0) == SIGN_EXTEND)
19971 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19972 == INTVAL (op1);
19973 else
19974 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19975 }
19976
19977 if (is_mulwiden)
19978 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19979 }
19980
19981 *total = (cost->mult_init[MODE_INDEX (mode)]
19982 + nbits * cost->mult_bit
19983 + rtx_cost (op0, mode, outer_code, opno, speed)
19984 + rtx_cost (op1, mode, outer_code, opno, speed));
19985
19986 return true;
19987 }
19988 *total = ix86_multiplication_cost (cost, mode);
19989 return false;
19990
19991 case DIV:
19992 case UDIV:
19993 case MOD:
19994 case UMOD:
19995 *total = ix86_division_cost (cost, mode);
19996 return false;
19997
19998 case PLUS:
19999 if (GET_MODE_CLASS (mode) == MODE_INT
20000 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
20001 {
20002 if (GET_CODE (XEXP (x, 0)) == PLUS
20003 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20004 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20005 && CONSTANT_P (XEXP (x, 1)))
20006 {
20007 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20008 if (val == 2 || val == 4 || val == 8)
20009 {
20010 *total = cost->lea;
20011 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
20012 outer_code, opno, speed);
20013 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
20014 outer_code, opno, speed);
20015 *total += rtx_cost (XEXP (x, 1), mode,
20016 outer_code, opno, speed);
20017 return true;
20018 }
20019 }
20020 else if (GET_CODE (XEXP (x, 0)) == MULT
20021 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20022 {
20023 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20024 if (val == 2 || val == 4 || val == 8)
20025 {
20026 *total = cost->lea;
20027 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20028 outer_code, opno, speed);
20029 *total += rtx_cost (XEXP (x, 1), mode,
20030 outer_code, opno, speed);
20031 return true;
20032 }
20033 }
20034 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20035 {
20036 /* Add with carry, ignore the cost of adding a carry flag. */
20037 if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode))
20038 *total = cost->add;
20039 else
20040 {
20041 *total = cost->lea;
20042 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20043 outer_code, opno, speed);
20044 }
20045
20046 *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
20047 outer_code, opno, speed);
20048 *total += rtx_cost (XEXP (x, 1), mode,
20049 outer_code, opno, speed);
20050 return true;
20051 }
20052 }
20053 /* FALLTHRU */
20054
20055 case MINUS:
20056 /* Subtract with borrow, ignore the cost of subtracting a carry flag. */
20057 if (GET_MODE_CLASS (mode) == MODE_INT
20058 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
20059 && GET_CODE (XEXP (x, 0)) == MINUS
20060 && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode))
20061 {
20062 *total = cost->add;
20063 *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
20064 outer_code, opno, speed);
20065 *total += rtx_cost (XEXP (x, 1), mode,
20066 outer_code, opno, speed);
20067 return true;
20068 }
20069
20070 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20071 {
20072 *total = cost->addss;
20073 return false;
20074 }
20075 else if (X87_FLOAT_MODE_P (mode))
20076 {
20077 *total = cost->fadd;
20078 return false;
20079 }
20080 else if (FLOAT_MODE_P (mode))
20081 {
20082 *total = ix86_vec_cost (mode, cost->addss);
20083 return false;
20084 }
20085 /* FALLTHRU */
20086
20087 case AND:
20088 case IOR:
20089 case XOR:
20090 if (GET_MODE_CLASS (mode) == MODE_INT
20091 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20092 {
20093 *total = (cost->add * 2
20094 + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
20095 << (GET_MODE (XEXP (x, 0)) != DImode))
20096 + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
20097 << (GET_MODE (XEXP (x, 1)) != DImode)));
20098 return true;
20099 }
20100 /* FALLTHRU */
20101
20102 case NEG:
20103 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20104 {
20105 *total = cost->sse_op;
20106 return false;
20107 }
20108 else if (X87_FLOAT_MODE_P (mode))
20109 {
20110 *total = cost->fchs;
20111 return false;
20112 }
20113 else if (FLOAT_MODE_P (mode))
20114 {
20115 *total = ix86_vec_cost (mode, cost->sse_op);
20116 return false;
20117 }
20118 /* FALLTHRU */
20119
20120 case NOT:
20121 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20122 *total = ix86_vec_cost (mode, cost->sse_op);
20123 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20124 *total = cost->add * 2;
20125 else
20126 *total = cost->add;
20127 return false;
20128
20129 case COMPARE:
20130 rtx op0, op1;
20131 op0 = XEXP (x, 0);
20132 op1 = XEXP (x, 1);
20133 if (GET_CODE (op0) == ZERO_EXTRACT
20134 && XEXP (op0, 1) == const1_rtx
20135 && CONST_INT_P (XEXP (op0, 2))
20136 && op1 == const0_rtx)
20137 {
20138 /* This kind of construct is implemented using test[bwl].
20139 Treat it as if we had an AND. */
20140 mode = GET_MODE (XEXP (op0, 0));
20141 *total = (cost->add
20142 + rtx_cost (XEXP (op0, 0), mode, outer_code,
20143 opno, speed)
20144 + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
20145 return true;
20146 }
20147
20148 if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
20149 {
20150 /* This is an overflow detection, count it as a normal compare. */
20151 *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
20152 return true;
20153 }
20154
20155 rtx geu;
20156 /* Match x
20157 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
20158 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))) */
20159 if (mode == CCCmode
20160 && GET_CODE (op0) == NEG
20161 && GET_CODE (geu = XEXP (op0, 0)) == GEU
20162 && REG_P (XEXP (geu, 0))
20163 && (GET_MODE (XEXP (geu, 0)) == CCCmode
20164 || GET_MODE (XEXP (geu, 0)) == CCmode)
20165 && REGNO (XEXP (geu, 0)) == FLAGS_REG
20166 && XEXP (geu, 1) == const0_rtx
20167 && GET_CODE (op1) == LTU
20168 && REG_P (XEXP (op1, 0))
20169 && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
20170 && REGNO (XEXP (op1, 0)) == FLAGS_REG
20171 && XEXP (op1, 1) == const0_rtx)
20172 {
20173 /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop. */
20174 *total = 0;
20175 return true;
20176 }
20177
20178 /* The embedded comparison operand is completely free. */
20179 if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
20180 *total = 0;
20181
20182 return false;
20183
20184 case FLOAT_EXTEND:
20185 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20186 *total = 0;
20187 else
20188 *total = ix86_vec_cost (mode, cost->addss);
20189 return false;
20190
20191 case FLOAT_TRUNCATE:
20192 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20193 *total = cost->fadd;
20194 else
20195 *total = ix86_vec_cost (mode, cost->addss);
20196 return false;
20197
20198 case ABS:
20199 /* SSE requires memory load for the constant operand. It may make
20200 sense to account for this. Of course the constant operand may or
20201 may not be reused. */
20202 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20203 *total = cost->sse_op;
20204 else if (X87_FLOAT_MODE_P (mode))
20205 *total = cost->fabs;
20206 else if (FLOAT_MODE_P (mode))
20207 *total = ix86_vec_cost (mode, cost->sse_op);
20208 return false;
20209
20210 case SQRT:
20211 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20212 *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
20213 else if (X87_FLOAT_MODE_P (mode))
20214 *total = cost->fsqrt;
20215 else if (FLOAT_MODE_P (mode))
20216 *total = ix86_vec_cost (mode,
20217 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
20218 return false;
20219
20220 case UNSPEC:
20221 if (XINT (x, 1) == UNSPEC_TP)
20222 *total = 0;
20223 return false;
20224
20225 case VEC_SELECT:
20226 case VEC_CONCAT:
20227 case VEC_DUPLICATE:
20228 /* ??? Assume all of these vector manipulation patterns are
20229 recognizable. In which case they all pretty much have the
20230 same cost. */
20231 *total = cost->sse_op;
20232 return true;
20233 case VEC_MERGE:
20234 mask = XEXP (x, 2);
20235 /* This is masked instruction, assume the same cost,
20236 as nonmasked variant. */
20237 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
20238 *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
20239 else
20240 *total = cost->sse_op;
20241 return true;
20242
20243 default:
20244 return false;
20245 }
20246 }
20247
20248 #if TARGET_MACHO
20249
20250 static int current_machopic_label_num;
20251
20252 /* Given a symbol name and its associated stub, write out the
20253 definition of the stub. */
20254
20255 void
20256 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20257 {
20258 unsigned int length;
20259 char *binder_name, *symbol_name, lazy_ptr_name[32];
20260 int label = ++current_machopic_label_num;
20261
20262 /* For 64-bit we shouldn't get here. */
20263 gcc_assert (!TARGET_64BIT);
20264
20265 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20266 symb = targetm.strip_name_encoding (symb);
20267
20268 length = strlen (stub);
20269 binder_name = XALLOCAVEC (char, length + 32);
20270 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20271
20272 length = strlen (symb);
20273 symbol_name = XALLOCAVEC (char, length + 32);
20274 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20275
20276 sprintf (lazy_ptr_name, "L%d$lz", label);
20277
20278 if (MACHOPIC_ATT_STUB)
20279 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
20280 else if (MACHOPIC_PURE)
20281 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
20282 else
20283 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20284
20285 fprintf (file, "%s:\n", stub);
20286 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20287
20288 if (MACHOPIC_ATT_STUB)
20289 {
20290 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
20291 }
20292 else if (MACHOPIC_PURE)
20293 {
20294 /* PIC stub. */
20295 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20296 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20297 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
20298 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
20299 label, lazy_ptr_name, label);
20300 fprintf (file, "\tjmp\t*%%ecx\n");
20301 }
20302 else
20303 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20304
20305 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
20306 it needs no stub-binding-helper. */
20307 if (MACHOPIC_ATT_STUB)
20308 return;
20309
20310 fprintf (file, "%s:\n", binder_name);
20311
20312 if (MACHOPIC_PURE)
20313 {
20314 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
20315 fprintf (file, "\tpushl\t%%ecx\n");
20316 }
20317 else
20318 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20319
20320 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
20321
20322 /* N.B. Keep the correspondence of these
20323 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
20324 old-pic/new-pic/non-pic stubs; altering this will break
20325 compatibility with existing dylibs. */
20326 if (MACHOPIC_PURE)
20327 {
20328 /* 25-byte PIC stub using "CALL get_pc_thunk". */
20329 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
20330 }
20331 else
20332 /* 16-byte -mdynamic-no-pic stub. */
20333 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
20334
20335 fprintf (file, "%s:\n", lazy_ptr_name);
20336 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20337 fprintf (file, ASM_LONG "%s\n", binder_name);
20338 }
20339 #endif /* TARGET_MACHO */
20340
20341 /* Order the registers for register allocator. */
20342
20343 void
20344 x86_order_regs_for_local_alloc (void)
20345 {
20346 int pos = 0;
20347 int i;
20348
20349 /* First allocate the local general purpose registers. */
20350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20351 if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
20352 reg_alloc_order [pos++] = i;
20353
20354 /* Global general purpose registers. */
20355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20356 if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
20357 reg_alloc_order [pos++] = i;
20358
20359 /* x87 registers come first in case we are doing FP math
20360 using them. */
20361 if (!TARGET_SSE_MATH)
20362 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20363 reg_alloc_order [pos++] = i;
20364
20365 /* SSE registers. */
20366 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20367 reg_alloc_order [pos++] = i;
20368 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20369 reg_alloc_order [pos++] = i;
20370
20371 /* Extended REX SSE registers. */
20372 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
20373 reg_alloc_order [pos++] = i;
20374
20375 /* Mask register. */
20376 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
20377 reg_alloc_order [pos++] = i;
20378
20379 /* x87 registers. */
20380 if (TARGET_SSE_MATH)
20381 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20382 reg_alloc_order [pos++] = i;
20383
20384 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20385 reg_alloc_order [pos++] = i;
20386
20387 /* Initialize the rest of array as we do not allocate some registers
20388 at all. */
20389 while (pos < FIRST_PSEUDO_REGISTER)
20390 reg_alloc_order [pos++] = 0;
20391 }
20392
20393 static bool
20394 ix86_ms_bitfield_layout_p (const_tree record_type)
20395 {
20396 return ((TARGET_MS_BITFIELD_LAYOUT
20397 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20398 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
20399 }
20400
20401 /* Returns an expression indicating where the this parameter is
20402 located on entry to the FUNCTION. */
20403
20404 static rtx
20405 x86_this_parameter (tree function)
20406 {
20407 tree type = TREE_TYPE (function);
20408 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20409 int nregs;
20410
20411 if (TARGET_64BIT)
20412 {
20413 const int *parm_regs;
20414
20415 if (ix86_function_type_abi (type) == MS_ABI)
20416 parm_regs = x86_64_ms_abi_int_parameter_registers;
20417 else
20418 parm_regs = x86_64_int_parameter_registers;
20419 return gen_rtx_REG (Pmode, parm_regs[aggr]);
20420 }
20421
20422 nregs = ix86_function_regparm (type, function);
20423
20424 if (nregs > 0 && !stdarg_p (type))
20425 {
20426 int regno;
20427 unsigned int ccvt = ix86_get_callcvt (type);
20428
20429 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20430 regno = aggr ? DX_REG : CX_REG;
20431 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20432 {
20433 regno = CX_REG;
20434 if (aggr)
20435 return gen_rtx_MEM (SImode,
20436 plus_constant (Pmode, stack_pointer_rtx, 4));
20437 }
20438 else
20439 {
20440 regno = AX_REG;
20441 if (aggr)
20442 {
20443 regno = DX_REG;
20444 if (nregs == 1)
20445 return gen_rtx_MEM (SImode,
20446 plus_constant (Pmode,
20447 stack_pointer_rtx, 4));
20448 }
20449 }
20450 return gen_rtx_REG (SImode, regno);
20451 }
20452
20453 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
20454 aggr ? 8 : 4));
20455 }
20456
20457 /* Determine whether x86_output_mi_thunk can succeed. */
20458
20459 static bool
20460 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
20461 const_tree function)
20462 {
20463 /* 64-bit can handle anything. */
20464 if (TARGET_64BIT)
20465 return true;
20466
20467 /* For 32-bit, everything's fine if we have one free register. */
20468 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20469 return true;
20470
20471 /* Need a free register for vcall_offset. */
20472 if (vcall_offset)
20473 return false;
20474
20475 /* Need a free register for GOT references. */
20476 if (flag_pic && !targetm.binds_local_p (function))
20477 return false;
20478
20479 /* Otherwise ok. */
20480 return true;
20481 }
20482
20483 /* Output the assembler code for a thunk function. THUNK_DECL is the
20484 declaration for the thunk function itself, FUNCTION is the decl for
20485 the target function. DELTA is an immediate constant offset to be
20486 added to THIS. If VCALL_OFFSET is nonzero, the word at
20487 *(*this + vcall_offset) should be added to THIS. */
20488
20489 static void
20490 x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
20491 HOST_WIDE_INT vcall_offset, tree function)
20492 {
20493 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
20494 rtx this_param = x86_this_parameter (function);
20495 rtx this_reg, tmp, fnaddr;
20496 unsigned int tmp_regno;
20497 rtx_insn *insn;
20498
20499 if (TARGET_64BIT)
20500 tmp_regno = R10_REG;
20501 else
20502 {
20503 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
20504 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
20505 tmp_regno = AX_REG;
20506 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
20507 tmp_regno = DX_REG;
20508 else
20509 tmp_regno = CX_REG;
20510 }
20511
20512 emit_note (NOTE_INSN_PROLOGUE_END);
20513
20514 /* CET is enabled, insert EB instruction. */
20515 if ((flag_cf_protection & CF_BRANCH))
20516 emit_insn (gen_nop_endbr ());
20517
20518 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20519 pull it in now and let DELTA benefit. */
20520 if (REG_P (this_param))
20521 this_reg = this_param;
20522 else if (vcall_offset)
20523 {
20524 /* Put the this parameter into %eax. */
20525 this_reg = gen_rtx_REG (Pmode, AX_REG);
20526 emit_move_insn (this_reg, this_param);
20527 }
20528 else
20529 this_reg = NULL_RTX;
20530
20531 /* Adjust the this parameter by a fixed constant. */
20532 if (delta)
20533 {
20534 rtx delta_rtx = GEN_INT (delta);
20535 rtx delta_dst = this_reg ? this_reg : this_param;
20536
20537 if (TARGET_64BIT)
20538 {
20539 if (!x86_64_general_operand (delta_rtx, Pmode))
20540 {
20541 tmp = gen_rtx_REG (Pmode, tmp_regno);
20542 emit_move_insn (tmp, delta_rtx);
20543 delta_rtx = tmp;
20544 }
20545 }
20546
20547 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
20548 }
20549
20550 /* Adjust the this parameter by a value stored in the vtable. */
20551 if (vcall_offset)
20552 {
20553 rtx vcall_addr, vcall_mem, this_mem;
20554
20555 tmp = gen_rtx_REG (Pmode, tmp_regno);
20556
20557 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
20558 if (Pmode != ptr_mode)
20559 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
20560 emit_move_insn (tmp, this_mem);
20561
20562 /* Adjust the this parameter. */
20563 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
20564 if (TARGET_64BIT
20565 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
20566 {
20567 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
20568 emit_move_insn (tmp2, GEN_INT (vcall_offset));
20569 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
20570 }
20571
20572 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
20573 if (Pmode != ptr_mode)
20574 emit_insn (gen_addsi_1_zext (this_reg,
20575 gen_rtx_REG (ptr_mode,
20576 REGNO (this_reg)),
20577 vcall_mem));
20578 else
20579 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
20580 }
20581
20582 /* If necessary, drop THIS back to its stack slot. */
20583 if (this_reg && this_reg != this_param)
20584 emit_move_insn (this_param, this_reg);
20585
20586 fnaddr = XEXP (DECL_RTL (function), 0);
20587 if (TARGET_64BIT)
20588 {
20589 if (!flag_pic || targetm.binds_local_p (function)
20590 || TARGET_PECOFF)
20591 ;
20592 else
20593 {
20594 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
20595 tmp = gen_rtx_CONST (Pmode, tmp);
20596 fnaddr = gen_const_mem (Pmode, tmp);
20597 }
20598 }
20599 else
20600 {
20601 if (!flag_pic || targetm.binds_local_p (function))
20602 ;
20603 #if TARGET_MACHO
20604 else if (TARGET_MACHO)
20605 {
20606 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
20607 fnaddr = XEXP (fnaddr, 0);
20608 }
20609 #endif /* TARGET_MACHO */
20610 else
20611 {
20612 tmp = gen_rtx_REG (Pmode, CX_REG);
20613 output_set_got (tmp, NULL_RTX);
20614
20615 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
20616 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
20617 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
20618 fnaddr = gen_const_mem (Pmode, fnaddr);
20619 }
20620 }
20621
20622 /* Our sibling call patterns do not allow memories, because we have no
20623 predicate that can distinguish between frame and non-frame memory.
20624 For our purposes here, we can get away with (ab)using a jump pattern,
20625 because we're going to do no optimization. */
20626 if (MEM_P (fnaddr))
20627 {
20628 if (sibcall_insn_operand (fnaddr, word_mode))
20629 {
20630 fnaddr = XEXP (DECL_RTL (function), 0);
20631 tmp = gen_rtx_MEM (QImode, fnaddr);
20632 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20633 tmp = emit_call_insn (tmp);
20634 SIBLING_CALL_P (tmp) = 1;
20635 }
20636 else
20637 emit_jump_insn (gen_indirect_jump (fnaddr));
20638 }
20639 else
20640 {
20641 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
20642 {
20643 // CM_LARGE_PIC always uses pseudo PIC register which is
20644 // uninitialized. Since FUNCTION is local and calling it
20645 // doesn't go through PLT, we use scratch register %r11 as
20646 // PIC register and initialize it here.
20647 pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
20648 ix86_init_large_pic_reg (tmp_regno);
20649 fnaddr = legitimize_pic_address (fnaddr,
20650 gen_rtx_REG (Pmode, tmp_regno));
20651 }
20652
20653 if (!sibcall_insn_operand (fnaddr, word_mode))
20654 {
20655 tmp = gen_rtx_REG (word_mode, tmp_regno);
20656 if (GET_MODE (fnaddr) != word_mode)
20657 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
20658 emit_move_insn (tmp, fnaddr);
20659 fnaddr = tmp;
20660 }
20661
20662 tmp = gen_rtx_MEM (QImode, fnaddr);
20663 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
20664 tmp = emit_call_insn (tmp);
20665 SIBLING_CALL_P (tmp) = 1;
20666 }
20667 emit_barrier ();
20668
20669 /* Emit just enough of rest_of_compilation to get the insns emitted. */
20670 insn = get_insns ();
20671 shorten_branches (insn);
20672 assemble_start_function (thunk_fndecl, fnname);
20673 final_start_function (insn, file, 1);
20674 final (insn, file, 1);
20675 final_end_function ();
20676 assemble_end_function (thunk_fndecl, fnname);
20677 }
20678
20679 static void
20680 x86_file_start (void)
20681 {
20682 default_file_start ();
20683 if (TARGET_16BIT)
20684 fputs ("\t.code16gcc\n", asm_out_file);
20685 #if TARGET_MACHO
20686 darwin_file_start ();
20687 #endif
20688 if (X86_FILE_START_VERSION_DIRECTIVE)
20689 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20690 if (X86_FILE_START_FLTUSED)
20691 fputs ("\t.global\t__fltused\n", asm_out_file);
20692 if (ix86_asm_dialect == ASM_INTEL)
20693 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
20694 }
20695
20696 int
20697 x86_field_alignment (tree type, int computed)
20698 {
20699 machine_mode mode;
20700
20701 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20702 return computed;
20703 if (TARGET_IAMCU)
20704 return iamcu_alignment (type, computed);
20705 type = strip_array_types (type);
20706 mode = TYPE_MODE (type);
20707 if (mode == DFmode || mode == DCmode
20708 || GET_MODE_CLASS (mode) == MODE_INT
20709 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20710 {
20711 if (TYPE_ATOMIC (type) && computed > 32)
20712 {
20713 static bool warned;
20714
20715 if (!warned && warn_psabi)
20716 {
20717 const char *url
20718 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
20719
20720 warned = true;
20721 inform (input_location, "the alignment of %<_Atomic %T%> "
20722 "fields changed in %{GCC 11.1%}",
20723 TYPE_MAIN_VARIANT (type), url);
20724 }
20725 }
20726 else
20727 return MIN (32, computed);
20728 }
20729 return computed;
20730 }
20731
20732 /* Print call to TARGET to FILE. */
20733
20734 static void
20735 x86_print_call_or_nop (FILE *file, const char *target)
20736 {
20737 if (flag_nop_mcount || !strcmp (target, "nop"))
20738 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
20739 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
20740 else
20741 fprintf (file, "1:\tcall\t%s\n", target);
20742 }
20743
20744 static bool
20745 current_fentry_name (const char **name)
20746 {
20747 tree attr = lookup_attribute ("fentry_name",
20748 DECL_ATTRIBUTES (current_function_decl));
20749 if (!attr)
20750 return false;
20751 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20752 return true;
20753 }
20754
20755 static bool
20756 current_fentry_section (const char **name)
20757 {
20758 tree attr = lookup_attribute ("fentry_section",
20759 DECL_ATTRIBUTES (current_function_decl));
20760 if (!attr)
20761 return false;
20762 *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
20763 return true;
20764 }
20765
20766 /* Output assembler code to FILE to increment profiler label # LABELNO
20767 for profiling a function entry. */
20768 void
20769 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20770 {
20771 if (cfun->machine->insn_queued_at_entrance)
20772 {
20773 if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
20774 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
20775 unsigned int patch_area_size
20776 = crtl->patch_area_size - crtl->patch_area_entry;
20777 if (patch_area_size)
20778 ix86_output_patchable_area (patch_area_size,
20779 crtl->patch_area_entry == 0);
20780 }
20781
20782 const char *mcount_name = MCOUNT_NAME;
20783
20784 if (current_fentry_name (&mcount_name))
20785 ;
20786 else if (fentry_name)
20787 mcount_name = fentry_name;
20788 else if (flag_fentry)
20789 mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
20790
20791 if (TARGET_64BIT)
20792 {
20793 #ifndef NO_PROFILE_COUNTERS
20794 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
20795 #endif
20796
20797 if (!TARGET_PECOFF && flag_pic)
20798 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
20799 else
20800 x86_print_call_or_nop (file, mcount_name);
20801 }
20802 else if (flag_pic)
20803 {
20804 #ifndef NO_PROFILE_COUNTERS
20805 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
20806 LPREFIX, labelno);
20807 #endif
20808 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
20809 }
20810 else
20811 {
20812 #ifndef NO_PROFILE_COUNTERS
20813 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
20814 LPREFIX, labelno);
20815 #endif
20816 x86_print_call_or_nop (file, mcount_name);
20817 }
20818
20819 if (flag_record_mcount
20820 || lookup_attribute ("fentry_section",
20821 DECL_ATTRIBUTES (current_function_decl)))
20822 {
20823 const char *sname = "__mcount_loc";
20824
20825 if (current_fentry_section (&sname))
20826 ;
20827 else if (fentry_section)
20828 sname = fentry_section;
20829
20830 fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
20831 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
20832 fprintf (file, "\t.previous\n");
20833 }
20834 }
20835
20836 /* We don't have exact information about the insn sizes, but we may assume
20837 quite safely that we are informed about all 1 byte insns and memory
20838 address sizes. This is enough to eliminate unnecessary padding in
20839 99% of cases. */
20840
20841 int
20842 ix86_min_insn_size (rtx_insn *insn)
20843 {
20844 int l = 0, len;
20845
20846 if (!INSN_P (insn) || !active_insn_p (insn))
20847 return 0;
20848
20849 /* Discard alignments we've emit and jump instructions. */
20850 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20851 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20852 return 0;
20853
20854 /* Important case - calls are always 5 bytes.
20855 It is common to have many calls in the row. */
20856 if (CALL_P (insn)
20857 && symbolic_reference_mentioned_p (PATTERN (insn))
20858 && !SIBLING_CALL_P (insn))
20859 return 5;
20860 len = get_attr_length (insn);
20861 if (len <= 1)
20862 return 1;
20863
20864 /* For normal instructions we rely on get_attr_length being exact,
20865 with a few exceptions. */
20866 if (!JUMP_P (insn))
20867 {
20868 enum attr_type type = get_attr_type (insn);
20869
20870 switch (type)
20871 {
20872 case TYPE_MULTI:
20873 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
20874 || asm_noperands (PATTERN (insn)) >= 0)
20875 return 0;
20876 break;
20877 case TYPE_OTHER:
20878 case TYPE_FCMP:
20879 break;
20880 default:
20881 /* Otherwise trust get_attr_length. */
20882 return len;
20883 }
20884
20885 l = get_attr_length_address (insn);
20886 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20887 l = 4;
20888 }
20889 if (l)
20890 return 1+l;
20891 else
20892 return 2;
20893 }
20894
20895 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
20896
20897 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20898 window. */
20899
20900 static void
20901 ix86_avoid_jump_mispredicts (void)
20902 {
20903 rtx_insn *insn, *start = get_insns ();
20904 int nbytes = 0, njumps = 0;
20905 bool isjump = false;
20906
20907 /* Look for all minimal intervals of instructions containing 4 jumps.
20908 The intervals are bounded by START and INSN. NBYTES is the total
20909 size of instructions in the interval including INSN and not including
20910 START. When the NBYTES is smaller than 16 bytes, it is possible
20911 that the end of START and INSN ends up in the same 16byte page.
20912
20913 The smallest offset in the page INSN can start is the case where START
20914 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20915 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
20916
20917 Don't consider asm goto as jump, while it can contain a jump, it doesn't
20918 have to, control transfer to label(s) can be performed through other
20919 means, and also we estimate minimum length of all asm stmts as 0. */
20920 for (insn = start; insn; insn = NEXT_INSN (insn))
20921 {
20922 int min_size;
20923
20924 if (LABEL_P (insn))
20925 {
20926 align_flags alignment = label_to_alignment (insn);
20927 int align = alignment.levels[0].log;
20928 int max_skip = alignment.levels[0].maxskip;
20929
20930 if (max_skip > 15)
20931 max_skip = 15;
20932 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
20933 already in the current 16 byte page, because otherwise
20934 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
20935 bytes to reach 16 byte boundary. */
20936 if (align <= 0
20937 || (align <= 3 && max_skip != (1 << align) - 1))
20938 max_skip = 0;
20939 if (dump_file)
20940 fprintf (dump_file, "Label %i with max_skip %i\n",
20941 INSN_UID (insn), max_skip);
20942 if (max_skip)
20943 {
20944 while (nbytes + max_skip >= 16)
20945 {
20946 start = NEXT_INSN (start);
20947 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20948 || CALL_P (start))
20949 njumps--, isjump = true;
20950 else
20951 isjump = false;
20952 nbytes -= ix86_min_insn_size (start);
20953 }
20954 }
20955 continue;
20956 }
20957
20958 min_size = ix86_min_insn_size (insn);
20959 nbytes += min_size;
20960 if (dump_file)
20961 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
20962 INSN_UID (insn), min_size);
20963 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
20964 || CALL_P (insn))
20965 njumps++;
20966 else
20967 continue;
20968
20969 while (njumps > 3)
20970 {
20971 start = NEXT_INSN (start);
20972 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
20973 || CALL_P (start))
20974 njumps--, isjump = true;
20975 else
20976 isjump = false;
20977 nbytes -= ix86_min_insn_size (start);
20978 }
20979 gcc_assert (njumps >= 0);
20980 if (dump_file)
20981 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20982 INSN_UID (start), INSN_UID (insn), nbytes);
20983
20984 if (njumps == 3 && isjump && nbytes < 16)
20985 {
20986 int padsize = 15 - nbytes + ix86_min_insn_size (insn);
20987
20988 if (dump_file)
20989 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20990 INSN_UID (insn), padsize);
20991 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
20992 }
20993 }
20994 }
20995 #endif
20996
20997 /* AMD Athlon works faster
20998 when RET is not destination of conditional jump or directly preceded
20999 by other jump instruction. We avoid the penalty by inserting NOP just
21000 before the RET instructions in such cases. */
21001 static void
21002 ix86_pad_returns (void)
21003 {
21004 edge e;
21005 edge_iterator ei;
21006
21007 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21008 {
21009 basic_block bb = e->src;
21010 rtx_insn *ret = BB_END (bb);
21011 rtx_insn *prev;
21012 bool replace = false;
21013
21014 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
21015 || optimize_bb_for_size_p (bb))
21016 continue;
21017 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21018 if (active_insn_p (prev) || LABEL_P (prev))
21019 break;
21020 if (prev && LABEL_P (prev))
21021 {
21022 edge e;
21023 edge_iterator ei;
21024
21025 FOR_EACH_EDGE (e, ei, bb->preds)
21026 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21027 && !(e->flags & EDGE_FALLTHRU))
21028 {
21029 replace = true;
21030 break;
21031 }
21032 }
21033 if (!replace)
21034 {
21035 prev = prev_active_insn (ret);
21036 if (prev
21037 && ((JUMP_P (prev) && any_condjump_p (prev))
21038 || CALL_P (prev)))
21039 replace = true;
21040 /* Empty functions get branch mispredict even when
21041 the jump destination is not visible to us. */
21042 if (!prev && !optimize_function_for_size_p (cfun))
21043 replace = true;
21044 }
21045 if (replace)
21046 {
21047 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
21048 delete_insn (ret);
21049 }
21050 }
21051 }
21052
21053 /* Count the minimum number of instructions in BB. Return 4 if the
21054 number of instructions >= 4. */
21055
21056 static int
21057 ix86_count_insn_bb (basic_block bb)
21058 {
21059 rtx_insn *insn;
21060 int insn_count = 0;
21061
21062 /* Count number of instructions in this block. Return 4 if the number
21063 of instructions >= 4. */
21064 FOR_BB_INSNS (bb, insn)
21065 {
21066 /* Only happen in exit blocks. */
21067 if (JUMP_P (insn)
21068 && ANY_RETURN_P (PATTERN (insn)))
21069 break;
21070
21071 if (NONDEBUG_INSN_P (insn)
21072 && GET_CODE (PATTERN (insn)) != USE
21073 && GET_CODE (PATTERN (insn)) != CLOBBER)
21074 {
21075 insn_count++;
21076 if (insn_count >= 4)
21077 return insn_count;
21078 }
21079 }
21080
21081 return insn_count;
21082 }
21083
21084
21085 /* Count the minimum number of instructions in code path in BB.
21086 Return 4 if the number of instructions >= 4. */
21087
21088 static int
21089 ix86_count_insn (basic_block bb)
21090 {
21091 edge e;
21092 edge_iterator ei;
21093 int min_prev_count;
21094
21095 /* Only bother counting instructions along paths with no
21096 more than 2 basic blocks between entry and exit. Given
21097 that BB has an edge to exit, determine if a predecessor
21098 of BB has an edge from entry. If so, compute the number
21099 of instructions in the predecessor block. If there
21100 happen to be multiple such blocks, compute the minimum. */
21101 min_prev_count = 4;
21102 FOR_EACH_EDGE (e, ei, bb->preds)
21103 {
21104 edge prev_e;
21105 edge_iterator prev_ei;
21106
21107 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
21108 {
21109 min_prev_count = 0;
21110 break;
21111 }
21112 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
21113 {
21114 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
21115 {
21116 int count = ix86_count_insn_bb (e->src);
21117 if (count < min_prev_count)
21118 min_prev_count = count;
21119 break;
21120 }
21121 }
21122 }
21123
21124 if (min_prev_count < 4)
21125 min_prev_count += ix86_count_insn_bb (bb);
21126
21127 return min_prev_count;
21128 }
21129
21130 /* Pad short function to 4 instructions. */
21131
21132 static void
21133 ix86_pad_short_function (void)
21134 {
21135 edge e;
21136 edge_iterator ei;
21137
21138 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21139 {
21140 rtx_insn *ret = BB_END (e->src);
21141 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
21142 {
21143 int insn_count = ix86_count_insn (e->src);
21144
21145 /* Pad short function. */
21146 if (insn_count < 4)
21147 {
21148 rtx_insn *insn = ret;
21149
21150 /* Find epilogue. */
21151 while (insn
21152 && (!NOTE_P (insn)
21153 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
21154 insn = PREV_INSN (insn);
21155
21156 if (!insn)
21157 insn = ret;
21158
21159 /* Two NOPs count as one instruction. */
21160 insn_count = 2 * (4 - insn_count);
21161 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
21162 }
21163 }
21164 }
21165 }
21166
21167 /* Fix up a Windows system unwinder issue. If an EH region falls through into
21168 the epilogue, the Windows system unwinder will apply epilogue logic and
21169 produce incorrect offsets. This can be avoided by adding a nop between
21170 the last insn that can throw and the first insn of the epilogue. */
21171
21172 static void
21173 ix86_seh_fixup_eh_fallthru (void)
21174 {
21175 edge e;
21176 edge_iterator ei;
21177
21178 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21179 {
21180 rtx_insn *insn, *next;
21181
21182 /* Find the beginning of the epilogue. */
21183 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
21184 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
21185 break;
21186 if (insn == NULL)
21187 continue;
21188
21189 /* We only care about preceding insns that can throw. */
21190 insn = prev_active_insn (insn);
21191 if (insn == NULL || !can_throw_internal (insn))
21192 continue;
21193
21194 /* Do not separate calls from their debug information. */
21195 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
21196 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
21197 insn = next;
21198 else
21199 break;
21200
21201 emit_insn_after (gen_nops (const1_rtx), insn);
21202 }
21203 }
21204
21205 /* Implement machine specific optimizations. We implement padding of returns
21206 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21207 static void
21208 ix86_reorg (void)
21209 {
21210 /* We are freeing block_for_insn in the toplev to keep compatibility
21211 with old MDEP_REORGS that are not CFG based. Recompute it now. */
21212 compute_bb_for_insn ();
21213
21214 if (TARGET_SEH && current_function_has_exception_handlers ())
21215 ix86_seh_fixup_eh_fallthru ();
21216
21217 if (optimize && optimize_function_for_speed_p (cfun))
21218 {
21219 if (TARGET_PAD_SHORT_FUNCTION)
21220 ix86_pad_short_function ();
21221 else if (TARGET_PAD_RETURNS)
21222 ix86_pad_returns ();
21223 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
21224 if (TARGET_FOUR_JUMP_LIMIT)
21225 ix86_avoid_jump_mispredicts ();
21226 #endif
21227 }
21228 }
21229
21230 /* Return nonzero when QImode register that must be represented via REX prefix
21231 is used. */
21232 bool
21233 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
21234 {
21235 int i;
21236 extract_insn_cached (insn);
21237 for (i = 0; i < recog_data.n_operands; i++)
21238 if (GENERAL_REG_P (recog_data.operand[i])
21239 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
21240 return true;
21241 return false;
21242 }
21243
21244 /* Return true when INSN mentions register that must be encoded using REX
21245 prefix. */
21246 bool
21247 x86_extended_reg_mentioned_p (rtx insn)
21248 {
21249 subrtx_iterator::array_type array;
21250 FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
21251 {
21252 const_rtx x = *iter;
21253 if (REG_P (x)
21254 && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
21255 return true;
21256 }
21257 return false;
21258 }
21259
21260 /* If profitable, negate (without causing overflow) integer constant
21261 of mode MODE at location LOC. Return true in this case. */
21262 bool
21263 x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
21264 {
21265 HOST_WIDE_INT val;
21266
21267 if (!CONST_INT_P (*loc))
21268 return false;
21269
21270 switch (mode)
21271 {
21272 case E_DImode:
21273 /* DImode x86_64 constants must fit in 32 bits. */
21274 gcc_assert (x86_64_immediate_operand (*loc, mode));
21275
21276 mode = SImode;
21277 break;
21278
21279 case E_SImode:
21280 case E_HImode:
21281 case E_QImode:
21282 break;
21283
21284 default:
21285 gcc_unreachable ();
21286 }
21287
21288 /* Avoid overflows. */
21289 if (mode_signbit_p (mode, *loc))
21290 return false;
21291
21292 val = INTVAL (*loc);
21293
21294 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
21295 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
21296 if ((val < 0 && val != -128)
21297 || val == 128)
21298 {
21299 *loc = GEN_INT (-val);
21300 return true;
21301 }
21302
21303 return false;
21304 }
21305
21306 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21307 optabs would emit if we didn't have TFmode patterns. */
21308
21309 void
21310 x86_emit_floatuns (rtx operands[2])
21311 {
21312 rtx_code_label *neglab, *donelab;
21313 rtx i0, i1, f0, in, out;
21314 machine_mode mode, inmode;
21315
21316 inmode = GET_MODE (operands[1]);
21317 gcc_assert (inmode == SImode || inmode == DImode);
21318
21319 out = operands[0];
21320 in = force_reg (inmode, operands[1]);
21321 mode = GET_MODE (out);
21322 neglab = gen_label_rtx ();
21323 donelab = gen_label_rtx ();
21324 f0 = gen_reg_rtx (mode);
21325
21326 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21327
21328 expand_float (out, in, 0);
21329
21330 emit_jump_insn (gen_jump (donelab));
21331 emit_barrier ();
21332
21333 emit_label (neglab);
21334
21335 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21336 1, OPTAB_DIRECT);
21337 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21338 1, OPTAB_DIRECT);
21339 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21340
21341 expand_float (f0, i0, 0);
21342
21343 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
21344
21345 emit_label (donelab);
21346 }
21347 \f
21348 /* Target hook for scalar_mode_supported_p. */
21349 static bool
21350 ix86_scalar_mode_supported_p (scalar_mode mode)
21351 {
21352 if (DECIMAL_FLOAT_MODE_P (mode))
21353 return default_decimal_float_supported_p ();
21354 else if (mode == TFmode)
21355 return true;
21356 else
21357 return default_scalar_mode_supported_p (mode);
21358 }
21359
21360 /* Implements target hook vector_mode_supported_p. */
21361 static bool
21362 ix86_vector_mode_supported_p (machine_mode mode)
21363 {
21364 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21365 return true;
21366 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21367 return true;
21368 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
21369 return true;
21370 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
21371 return true;
21372 if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
21373 && VALID_MMX_REG_MODE (mode))
21374 return true;
21375 if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
21376 && VALID_MMX_REG_MODE_3DNOW (mode))
21377 return true;
21378 return false;
21379 }
21380
21381 /* Target hook for c_mode_for_suffix. */
21382 static machine_mode
21383 ix86_c_mode_for_suffix (char suffix)
21384 {
21385 if (suffix == 'q')
21386 return TFmode;
21387 if (suffix == 'w')
21388 return XFmode;
21389
21390 return VOIDmode;
21391 }
21392
21393 /* Worker function for TARGET_MD_ASM_ADJUST.
21394
21395 We implement asm flag outputs, and maintain source compatibility
21396 with the old cc0-based compiler. */
21397
21398 static rtx_insn *
21399 ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
21400 vec<const char *> &constraints,
21401 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
21402 {
21403 bool saw_asm_flag = false;
21404
21405 start_sequence ();
21406 for (unsigned i = 0, n = outputs.length (); i < n; ++i)
21407 {
21408 const char *con = constraints[i];
21409 if (strncmp (con, "=@cc", 4) != 0)
21410 continue;
21411 con += 4;
21412 if (strchr (con, ',') != NULL)
21413 {
21414 error ("alternatives not allowed in %<asm%> flag output");
21415 continue;
21416 }
21417
21418 bool invert = false;
21419 if (con[0] == 'n')
21420 invert = true, con++;
21421
21422 machine_mode mode = CCmode;
21423 rtx_code code = UNKNOWN;
21424
21425 switch (con[0])
21426 {
21427 case 'a':
21428 if (con[1] == 0)
21429 mode = CCAmode, code = EQ;
21430 else if (con[1] == 'e' && con[2] == 0)
21431 mode = CCCmode, code = NE;
21432 break;
21433 case 'b':
21434 if (con[1] == 0)
21435 mode = CCCmode, code = EQ;
21436 else if (con[1] == 'e' && con[2] == 0)
21437 mode = CCAmode, code = NE;
21438 break;
21439 case 'c':
21440 if (con[1] == 0)
21441 mode = CCCmode, code = EQ;
21442 break;
21443 case 'e':
21444 if (con[1] == 0)
21445 mode = CCZmode, code = EQ;
21446 break;
21447 case 'g':
21448 if (con[1] == 0)
21449 mode = CCGCmode, code = GT;
21450 else if (con[1] == 'e' && con[2] == 0)
21451 mode = CCGCmode, code = GE;
21452 break;
21453 case 'l':
21454 if (con[1] == 0)
21455 mode = CCGCmode, code = LT;
21456 else if (con[1] == 'e' && con[2] == 0)
21457 mode = CCGCmode, code = LE;
21458 break;
21459 case 'o':
21460 if (con[1] == 0)
21461 mode = CCOmode, code = EQ;
21462 break;
21463 case 'p':
21464 if (con[1] == 0)
21465 mode = CCPmode, code = EQ;
21466 break;
21467 case 's':
21468 if (con[1] == 0)
21469 mode = CCSmode, code = EQ;
21470 break;
21471 case 'z':
21472 if (con[1] == 0)
21473 mode = CCZmode, code = EQ;
21474 break;
21475 }
21476 if (code == UNKNOWN)
21477 {
21478 error ("unknown %<asm%> flag output %qs", constraints[i]);
21479 continue;
21480 }
21481 if (invert)
21482 code = reverse_condition (code);
21483
21484 rtx dest = outputs[i];
21485 if (!saw_asm_flag)
21486 {
21487 /* This is the first asm flag output. Here we put the flags
21488 register in as the real output and adjust the condition to
21489 allow it. */
21490 constraints[i] = "=Bf";
21491 outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
21492 saw_asm_flag = true;
21493 }
21494 else
21495 {
21496 /* We don't need the flags register as output twice. */
21497 constraints[i] = "=X";
21498 outputs[i] = gen_rtx_SCRATCH (SImode);
21499 }
21500
21501 rtx x = gen_rtx_REG (mode, FLAGS_REG);
21502 x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
21503
21504 machine_mode dest_mode = GET_MODE (dest);
21505 if (!SCALAR_INT_MODE_P (dest_mode))
21506 {
21507 error ("invalid type for %<asm%> flag output");
21508 continue;
21509 }
21510
21511 if (dest_mode == QImode)
21512 emit_insn (gen_rtx_SET (dest, x));
21513 else
21514 {
21515 rtx reg = gen_reg_rtx (QImode);
21516 emit_insn (gen_rtx_SET (reg, x));
21517
21518 reg = convert_to_mode (dest_mode, reg, 1);
21519 emit_move_insn (dest, reg);
21520 }
21521 }
21522
21523 rtx_insn *seq = get_insns ();
21524 end_sequence ();
21525
21526 if (saw_asm_flag)
21527 return seq;
21528 else
21529 {
21530 /* If we had no asm flag outputs, clobber the flags. */
21531 clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
21532 SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
21533 return NULL;
21534 }
21535 }
21536
21537 /* Implements target vector targetm.asm.encode_section_info. */
21538
21539 static void ATTRIBUTE_UNUSED
21540 ix86_encode_section_info (tree decl, rtx rtl, int first)
21541 {
21542 default_encode_section_info (decl, rtl, first);
21543
21544 if (ix86_in_large_data_p (decl))
21545 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21546 }
21547
21548 /* Worker function for REVERSE_CONDITION. */
21549
21550 enum rtx_code
21551 ix86_reverse_condition (enum rtx_code code, machine_mode mode)
21552 {
21553 return (mode == CCFPmode
21554 ? reverse_condition_maybe_unordered (code)
21555 : reverse_condition (code));
21556 }
21557
21558 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21559 to OPERANDS[0]. */
21560
21561 const char *
21562 output_387_reg_move (rtx_insn *insn, rtx *operands)
21563 {
21564 if (REG_P (operands[0]))
21565 {
21566 if (REG_P (operands[1])
21567 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21568 {
21569 if (REGNO (operands[0]) == FIRST_STACK_REG)
21570 return output_387_ffreep (operands, 0);
21571 return "fstp\t%y0";
21572 }
21573 if (STACK_TOP_P (operands[0]))
21574 return "fld%Z1\t%y1";
21575 return "fst\t%y0";
21576 }
21577 else if (MEM_P (operands[0]))
21578 {
21579 gcc_assert (REG_P (operands[1]));
21580 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21581 return "fstp%Z0\t%y0";
21582 else
21583 {
21584 /* There is no non-popping store to memory for XFmode.
21585 So if we need one, follow the store with a load. */
21586 if (GET_MODE (operands[0]) == XFmode)
21587 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
21588 else
21589 return "fst%Z0\t%y0";
21590 }
21591 }
21592 else
21593 gcc_unreachable();
21594 }
21595 #ifdef TARGET_SOLARIS
21596 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21597
21598 static void
21599 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21600 tree decl)
21601 {
21602 /* With Binutils 2.15, the "@unwind" marker must be specified on
21603 every occurrence of the ".eh_frame" section, not just the first
21604 one. */
21605 if (TARGET_64BIT
21606 && strcmp (name, ".eh_frame") == 0)
21607 {
21608 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21609 flags & SECTION_WRITE ? "aw" : "a");
21610 return;
21611 }
21612
21613 #ifndef USE_GAS
21614 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
21615 {
21616 solaris_elf_asm_comdat_section (name, flags, decl);
21617 return;
21618 }
21619
21620 /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
21621 SPARC assembler. One cannot mix single-letter flags and #exclude, so
21622 only emit the latter here. */
21623 if (flags & SECTION_EXCLUDE)
21624 {
21625 fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
21626 return;
21627 }
21628 #endif
21629
21630 default_elf_asm_named_section (name, flags, decl);
21631 }
21632 #endif /* TARGET_SOLARIS */
21633
21634 /* Return the mangling of TYPE if it is an extended fundamental type. */
21635
21636 static const char *
21637 ix86_mangle_type (const_tree type)
21638 {
21639 type = TYPE_MAIN_VARIANT (type);
21640
21641 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
21642 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
21643 return NULL;
21644
21645 switch (TYPE_MODE (type))
21646 {
21647 case E_TFmode:
21648 /* __float128 is "g". */
21649 return "g";
21650 case E_XFmode:
21651 /* "long double" or __float80 is "e". */
21652 return "e";
21653 default:
21654 return NULL;
21655 }
21656 }
21657
21658 static GTY(()) tree ix86_tls_stack_chk_guard_decl;
21659
21660 static tree
21661 ix86_stack_protect_guard (void)
21662 {
21663 if (TARGET_SSP_TLS_GUARD)
21664 {
21665 tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
21666 int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
21667 tree type = build_qualified_type (type_node, qual);
21668 tree t;
21669
21670 if (global_options_set.x_ix86_stack_protector_guard_symbol_str)
21671 {
21672 t = ix86_tls_stack_chk_guard_decl;
21673
21674 if (t == NULL)
21675 {
21676 rtx x;
21677
21678 t = build_decl
21679 (UNKNOWN_LOCATION, VAR_DECL,
21680 get_identifier (ix86_stack_protector_guard_symbol_str),
21681 type);
21682 TREE_STATIC (t) = 1;
21683 TREE_PUBLIC (t) = 1;
21684 DECL_EXTERNAL (t) = 1;
21685 TREE_USED (t) = 1;
21686 TREE_THIS_VOLATILE (t) = 1;
21687 DECL_ARTIFICIAL (t) = 1;
21688 DECL_IGNORED_P (t) = 1;
21689
21690 /* Do not share RTL as the declaration is visible outside of
21691 current function. */
21692 x = DECL_RTL (t);
21693 RTX_FLAG (x, used) = 1;
21694
21695 ix86_tls_stack_chk_guard_decl = t;
21696 }
21697 }
21698 else
21699 {
21700 tree asptrtype = build_pointer_type (type);
21701
21702 t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
21703 t = build2 (MEM_REF, asptrtype, t,
21704 build_int_cst (asptrtype, 0));
21705 TREE_THIS_VOLATILE (t) = 1;
21706 }
21707
21708 return t;
21709 }
21710
21711 return default_stack_protect_guard ();
21712 }
21713
21714 /* For 32-bit code we can save PIC register setup by using
21715 __stack_chk_fail_local hidden function instead of calling
21716 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21717 register, so it is better to call __stack_chk_fail directly. */
21718
21719 static tree ATTRIBUTE_UNUSED
21720 ix86_stack_protect_fail (void)
21721 {
21722 return TARGET_64BIT
21723 ? default_external_stack_protect_fail ()
21724 : default_hidden_stack_protect_fail ();
21725 }
21726
21727 /* Select a format to encode pointers in exception handling data. CODE
21728 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21729 true if the symbol may be affected by dynamic relocations.
21730
21731 ??? All x86 object file formats are capable of representing this.
21732 After all, the relocation needed is the same as for the call insn.
21733 Whether or not a particular assembler allows us to enter such, I
21734 guess we'll have to see. */
21735 int
21736 asm_preferred_eh_data_format (int code, int global)
21737 {
21738 if (flag_pic)
21739 {
21740 int type = DW_EH_PE_sdata8;
21741 if (!TARGET_64BIT
21742 || ix86_cmodel == CM_SMALL_PIC
21743 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21744 type = DW_EH_PE_sdata4;
21745 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21746 }
21747 if (ix86_cmodel == CM_SMALL
21748 || (ix86_cmodel == CM_MEDIUM && code))
21749 return DW_EH_PE_udata4;
21750 return DW_EH_PE_absptr;
21751 }
21752 \f
21753 /* Implement targetm.vectorize.builtin_vectorization_cost. */
21754 static int
21755 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
21756 tree vectype, int)
21757 {
21758 bool fp = false;
21759 machine_mode mode = TImode;
21760 int index;
21761 if (vectype != NULL)
21762 {
21763 fp = FLOAT_TYPE_P (vectype);
21764 mode = TYPE_MODE (vectype);
21765 }
21766
21767 switch (type_of_cost)
21768 {
21769 case scalar_stmt:
21770 return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
21771
21772 case scalar_load:
21773 /* load/store costs are relative to register move which is 2. Recompute
21774 it to COSTS_N_INSNS so everything have same base. */
21775 return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
21776 : ix86_cost->int_load [2]) / 2;
21777
21778 case scalar_store:
21779 return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
21780 : ix86_cost->int_store [2]) / 2;
21781
21782 case vector_stmt:
21783 return ix86_vec_cost (mode,
21784 fp ? ix86_cost->addss : ix86_cost->sse_op);
21785
21786 case vector_load:
21787 index = sse_store_index (mode);
21788 /* See PR82713 - we may end up being called on non-vector type. */
21789 if (index < 0)
21790 index = 2;
21791 return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
21792
21793 case vector_store:
21794 index = sse_store_index (mode);
21795 /* See PR82713 - we may end up being called on non-vector type. */
21796 if (index < 0)
21797 index = 2;
21798 return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
21799
21800 case vec_to_scalar:
21801 case scalar_to_vec:
21802 return ix86_vec_cost (mode, ix86_cost->sse_op);
21803
21804 /* We should have separate costs for unaligned loads and gather/scatter.
21805 Do that incrementally. */
21806 case unaligned_load:
21807 index = sse_store_index (mode);
21808 /* See PR82713 - we may end up being called on non-vector type. */
21809 if (index < 0)
21810 index = 2;
21811 return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
21812
21813 case unaligned_store:
21814 index = sse_store_index (mode);
21815 /* See PR82713 - we may end up being called on non-vector type. */
21816 if (index < 0)
21817 index = 2;
21818 return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
21819
21820 case vector_gather_load:
21821 return ix86_vec_cost (mode,
21822 COSTS_N_INSNS
21823 (ix86_cost->gather_static
21824 + ix86_cost->gather_per_elt
21825 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21826
21827 case vector_scatter_store:
21828 return ix86_vec_cost (mode,
21829 COSTS_N_INSNS
21830 (ix86_cost->scatter_static
21831 + ix86_cost->scatter_per_elt
21832 * TYPE_VECTOR_SUBPARTS (vectype)) / 2);
21833
21834 case cond_branch_taken:
21835 return ix86_cost->cond_taken_branch_cost;
21836
21837 case cond_branch_not_taken:
21838 return ix86_cost->cond_not_taken_branch_cost;
21839
21840 case vec_perm:
21841 case vec_promote_demote:
21842 return ix86_vec_cost (mode, ix86_cost->sse_op);
21843
21844 case vec_construct:
21845 {
21846 /* N element inserts into SSE vectors. */
21847 int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
21848 /* One vinserti128 for combining two SSE vectors for AVX256. */
21849 if (GET_MODE_BITSIZE (mode) == 256)
21850 cost += ix86_vec_cost (mode, ix86_cost->addss);
21851 /* One vinserti64x4 and two vinserti128 for combining SSE
21852 and AVX256 vectors to AVX512. */
21853 else if (GET_MODE_BITSIZE (mode) == 512)
21854 cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
21855 return cost;
21856 }
21857
21858 default:
21859 gcc_unreachable ();
21860 }
21861 }
21862
21863 \f
21864 /* This function returns the calling abi specific va_list type node.
21865 It returns the FNDECL specific va_list type. */
21866
21867 static tree
21868 ix86_fn_abi_va_list (tree fndecl)
21869 {
21870 if (!TARGET_64BIT)
21871 return va_list_type_node;
21872 gcc_assert (fndecl != NULL_TREE);
21873
21874 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
21875 return ms_va_list_type_node;
21876 else
21877 return sysv_va_list_type_node;
21878 }
21879
21880 /* Returns the canonical va_list type specified by TYPE. If there
21881 is no valid TYPE provided, it return NULL_TREE. */
21882
21883 static tree
21884 ix86_canonical_va_list_type (tree type)
21885 {
21886 if (TARGET_64BIT)
21887 {
21888 if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
21889 return ms_va_list_type_node;
21890
21891 if ((TREE_CODE (type) == ARRAY_TYPE
21892 && integer_zerop (array_type_nelts (type)))
21893 || POINTER_TYPE_P (type))
21894 {
21895 tree elem_type = TREE_TYPE (type);
21896 if (TREE_CODE (elem_type) == RECORD_TYPE
21897 && lookup_attribute ("sysv_abi va_list",
21898 TYPE_ATTRIBUTES (elem_type)))
21899 return sysv_va_list_type_node;
21900 }
21901
21902 return NULL_TREE;
21903 }
21904
21905 return std_canonical_va_list_type (type);
21906 }
21907
21908 /* Iterate through the target-specific builtin types for va_list.
21909 IDX denotes the iterator, *PTREE is set to the result type of
21910 the va_list builtin, and *PNAME to its internal type.
21911 Returns zero if there is no element for this index, otherwise
21912 IDX should be increased upon the next call.
21913 Note, do not iterate a base builtin's name like __builtin_va_list.
21914 Used from c_common_nodes_and_builtins. */
21915
21916 static int
21917 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
21918 {
21919 if (TARGET_64BIT)
21920 {
21921 switch (idx)
21922 {
21923 default:
21924 break;
21925
21926 case 0:
21927 *ptree = ms_va_list_type_node;
21928 *pname = "__builtin_ms_va_list";
21929 return 1;
21930
21931 case 1:
21932 *ptree = sysv_va_list_type_node;
21933 *pname = "__builtin_sysv_va_list";
21934 return 1;
21935 }
21936 }
21937
21938 return 0;
21939 }
21940
21941 #undef TARGET_SCHED_DISPATCH
21942 #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
21943 #undef TARGET_SCHED_DISPATCH_DO
21944 #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
21945 #undef TARGET_SCHED_REASSOCIATION_WIDTH
21946 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
21947 #undef TARGET_SCHED_REORDER
21948 #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
21949 #undef TARGET_SCHED_ADJUST_PRIORITY
21950 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
21951 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
21952 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
21953 ix86_dependencies_evaluation_hook
21954
21955
21956 /* Implementation of reassociation_width target hook used by
21957 reassoc phase to identify parallelism level in reassociated
21958 tree. Statements tree_code is passed in OPC. Arguments type
21959 is passed in MODE. */
21960
21961 static int
21962 ix86_reassociation_width (unsigned int op, machine_mode mode)
21963 {
21964 int width = 1;
21965 /* Vector part. */
21966 if (VECTOR_MODE_P (mode))
21967 {
21968 int div = 1;
21969 if (INTEGRAL_MODE_P (mode))
21970 width = ix86_cost->reassoc_vec_int;
21971 else if (FLOAT_MODE_P (mode))
21972 width = ix86_cost->reassoc_vec_fp;
21973
21974 if (width == 1)
21975 return 1;
21976
21977 /* Integer vector instructions execute in FP unit
21978 and can execute 3 additions and one multiplication per cycle. */
21979 if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
21980 || ix86_tune == PROCESSOR_ZNVER3)
21981 && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
21982 return 1;
21983
21984 /* Account for targets that splits wide vectors into multiple parts. */
21985 if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
21986 div = GET_MODE_BITSIZE (mode) / 128;
21987 else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
21988 div = GET_MODE_BITSIZE (mode) / 64;
21989 width = (width + div - 1) / div;
21990 }
21991 /* Scalar part. */
21992 else if (INTEGRAL_MODE_P (mode))
21993 width = ix86_cost->reassoc_int;
21994 else if (FLOAT_MODE_P (mode))
21995 width = ix86_cost->reassoc_fp;
21996
21997 /* Avoid using too many registers in 32bit mode. */
21998 if (!TARGET_64BIT && width > 2)
21999 width = 2;
22000 return width;
22001 }
22002
22003 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
22004 place emms and femms instructions. */
22005
22006 static machine_mode
22007 ix86_preferred_simd_mode (scalar_mode mode)
22008 {
22009 if (!TARGET_SSE)
22010 return word_mode;
22011
22012 switch (mode)
22013 {
22014 case E_QImode:
22015 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
22016 return V64QImode;
22017 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22018 return V32QImode;
22019 else
22020 return V16QImode;
22021
22022 case E_HImode:
22023 if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
22024 return V32HImode;
22025 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22026 return V16HImode;
22027 else
22028 return V8HImode;
22029
22030 case E_SImode:
22031 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22032 return V16SImode;
22033 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22034 return V8SImode;
22035 else
22036 return V4SImode;
22037
22038 case E_DImode:
22039 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22040 return V8DImode;
22041 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22042 return V4DImode;
22043 else
22044 return V2DImode;
22045
22046 case E_SFmode:
22047 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22048 return V16SFmode;
22049 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22050 return V8SFmode;
22051 else
22052 return V4SFmode;
22053
22054 case E_DFmode:
22055 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22056 return V8DFmode;
22057 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22058 return V4DFmode;
22059 else if (TARGET_SSE2)
22060 return V2DFmode;
22061 /* FALLTHRU */
22062
22063 default:
22064 return word_mode;
22065 }
22066 }
22067
22068 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
22069 vectors. If AVX512F is enabled then try vectorizing with 512bit,
22070 256bit and 128bit vectors. */
22071
22072 static unsigned int
22073 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
22074 {
22075 if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
22076 {
22077 modes->safe_push (V64QImode);
22078 modes->safe_push (V32QImode);
22079 modes->safe_push (V16QImode);
22080 }
22081 else if (TARGET_AVX512F && all)
22082 {
22083 modes->safe_push (V32QImode);
22084 modes->safe_push (V16QImode);
22085 modes->safe_push (V64QImode);
22086 }
22087 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
22088 {
22089 modes->safe_push (V32QImode);
22090 modes->safe_push (V16QImode);
22091 }
22092 else if (TARGET_AVX && all)
22093 {
22094 modes->safe_push (V16QImode);
22095 modes->safe_push (V32QImode);
22096 }
22097 else if (TARGET_MMX_WITH_SSE)
22098 modes->safe_push (V16QImode);
22099
22100 if (TARGET_MMX_WITH_SSE)
22101 modes->safe_push (V8QImode);
22102
22103 return 0;
22104 }
22105
22106 /* Implemenation of targetm.vectorize.get_mask_mode. */
22107
22108 static opt_machine_mode
22109 ix86_get_mask_mode (machine_mode data_mode)
22110 {
22111 unsigned vector_size = GET_MODE_SIZE (data_mode);
22112 unsigned nunits = GET_MODE_NUNITS (data_mode);
22113 unsigned elem_size = vector_size / nunits;
22114
22115 /* Scalar mask case. */
22116 if ((TARGET_AVX512F && vector_size == 64)
22117 || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
22118 {
22119 if (elem_size == 4
22120 || elem_size == 8
22121 || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
22122 return smallest_int_mode_for_size (nunits);
22123 }
22124
22125 scalar_int_mode elem_mode
22126 = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
22127
22128 gcc_assert (elem_size * nunits == vector_size);
22129
22130 return mode_for_vector (elem_mode, nunits);
22131 }
22132
22133 \f
22134
22135 /* Return class of registers which could be used for pseudo of MODE
22136 and of class RCLASS for spilling instead of memory. Return NO_REGS
22137 if it is not possible or non-profitable. */
22138
22139 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
22140
22141 static reg_class_t
22142 ix86_spill_class (reg_class_t rclass, machine_mode mode)
22143 {
22144 if (0 && TARGET_GENERAL_REGS_SSE_SPILL
22145 && TARGET_SSE2
22146 && TARGET_INTER_UNIT_MOVES_TO_VEC
22147 && TARGET_INTER_UNIT_MOVES_FROM_VEC
22148 && (mode == SImode || (TARGET_64BIT && mode == DImode))
22149 && INTEGER_CLASS_P (rclass))
22150 return ALL_SSE_REGS;
22151 return NO_REGS;
22152 }
22153
22154 /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST. Like the default implementation,
22155 but returns a lower bound. */
22156
22157 static unsigned int
22158 ix86_max_noce_ifcvt_seq_cost (edge e)
22159 {
22160 bool predictable_p = predictable_edge_p (e);
22161 if (predictable_p)
22162 {
22163 if (global_options_set.x_param_max_rtl_if_conversion_predictable_cost)
22164 return param_max_rtl_if_conversion_predictable_cost;
22165 }
22166 else
22167 {
22168 if (global_options_set.x_param_max_rtl_if_conversion_unpredictable_cost)
22169 return param_max_rtl_if_conversion_unpredictable_cost;
22170 }
22171
22172 return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
22173 }
22174
22175 /* Return true if SEQ is a good candidate as a replacement for the
22176 if-convertible sequence described in IF_INFO. */
22177
22178 static bool
22179 ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
22180 {
22181 if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
22182 {
22183 int cmov_cnt = 0;
22184 /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
22185 Maybe we should allow even more conditional moves as long as they
22186 are used far enough not to stall the CPU, or also consider
22187 IF_INFO->TEST_BB succ edge probabilities. */
22188 for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
22189 {
22190 rtx set = single_set (insn);
22191 if (!set)
22192 continue;
22193 if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
22194 continue;
22195 rtx src = SET_SRC (set);
22196 machine_mode mode = GET_MODE (src);
22197 if (GET_MODE_CLASS (mode) != MODE_INT
22198 && GET_MODE_CLASS (mode) != MODE_FLOAT)
22199 continue;
22200 if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
22201 || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
22202 continue;
22203 /* insn is CMOV or FCMOV. */
22204 if (++cmov_cnt > 1)
22205 return false;
22206 }
22207 }
22208 return default_noce_conversion_profitable_p (seq, if_info);
22209 }
22210
22211 /* Implement targetm.vectorize.init_cost. */
22212
22213 static void *
22214 ix86_init_cost (class loop *)
22215 {
22216 unsigned *cost = XNEWVEC (unsigned, 3);
22217 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
22218 return cost;
22219 }
22220
22221 /* Implement targetm.vectorize.add_stmt_cost. */
22222
22223 static unsigned
22224 ix86_add_stmt_cost (class vec_info *vinfo, void *data, int count,
22225 enum vect_cost_for_stmt kind,
22226 class _stmt_vec_info *stmt_info, tree vectype,
22227 int misalign,
22228 enum vect_cost_model_location where)
22229 {
22230 unsigned *cost = (unsigned *) data;
22231 unsigned retval = 0;
22232 bool scalar_p
22233 = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
22234 int stmt_cost = - 1;
22235
22236 bool fp = false;
22237 machine_mode mode = scalar_p ? SImode : TImode;
22238
22239 if (vectype != NULL)
22240 {
22241 fp = FLOAT_TYPE_P (vectype);
22242 mode = TYPE_MODE (vectype);
22243 if (scalar_p)
22244 mode = TYPE_MODE (TREE_TYPE (vectype));
22245 }
22246
22247 if ((kind == vector_stmt || kind == scalar_stmt)
22248 && stmt_info
22249 && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
22250 {
22251 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
22252 /*machine_mode inner_mode = mode;
22253 if (VECTOR_MODE_P (mode))
22254 inner_mode = GET_MODE_INNER (mode);*/
22255
22256 switch (subcode)
22257 {
22258 case PLUS_EXPR:
22259 case POINTER_PLUS_EXPR:
22260 case MINUS_EXPR:
22261 if (kind == scalar_stmt)
22262 {
22263 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22264 stmt_cost = ix86_cost->addss;
22265 else if (X87_FLOAT_MODE_P (mode))
22266 stmt_cost = ix86_cost->fadd;
22267 else
22268 stmt_cost = ix86_cost->add;
22269 }
22270 else
22271 stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
22272 : ix86_cost->sse_op);
22273 break;
22274
22275 case MULT_EXPR:
22276 case WIDEN_MULT_EXPR:
22277 case MULT_HIGHPART_EXPR:
22278 stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
22279 break;
22280 case NEGATE_EXPR:
22281 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22282 stmt_cost = ix86_cost->sse_op;
22283 else if (X87_FLOAT_MODE_P (mode))
22284 stmt_cost = ix86_cost->fchs;
22285 else if (VECTOR_MODE_P (mode))
22286 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
22287 else
22288 stmt_cost = ix86_cost->add;
22289 break;
22290 case TRUNC_DIV_EXPR:
22291 case CEIL_DIV_EXPR:
22292 case FLOOR_DIV_EXPR:
22293 case ROUND_DIV_EXPR:
22294 case TRUNC_MOD_EXPR:
22295 case CEIL_MOD_EXPR:
22296 case FLOOR_MOD_EXPR:
22297 case RDIV_EXPR:
22298 case ROUND_MOD_EXPR:
22299 case EXACT_DIV_EXPR:
22300 stmt_cost = ix86_division_cost (ix86_cost, mode);
22301 break;
22302
22303 case RSHIFT_EXPR:
22304 case LSHIFT_EXPR:
22305 case LROTATE_EXPR:
22306 case RROTATE_EXPR:
22307 {
22308 tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
22309 stmt_cost = ix86_shift_rotate_cost
22310 (ix86_cost, mode,
22311 TREE_CODE (op2) == INTEGER_CST,
22312 cst_and_fits_in_hwi (op2) ? int_cst_value (op2) : -1,
22313 true, false, false, NULL, NULL);
22314 }
22315 break;
22316 case NOP_EXPR:
22317 /* Only sign-conversions are free. */
22318 if (tree_nop_conversion_p
22319 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
22320 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
22321 stmt_cost = 0;
22322 break;
22323
22324 case BIT_IOR_EXPR:
22325 case ABS_EXPR:
22326 case ABSU_EXPR:
22327 case MIN_EXPR:
22328 case MAX_EXPR:
22329 case BIT_XOR_EXPR:
22330 case BIT_AND_EXPR:
22331 case BIT_NOT_EXPR:
22332 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22333 stmt_cost = ix86_cost->sse_op;
22334 else if (VECTOR_MODE_P (mode))
22335 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
22336 else
22337 stmt_cost = ix86_cost->add;
22338 break;
22339 default:
22340 break;
22341 }
22342 }
22343
22344 combined_fn cfn;
22345 if ((kind == vector_stmt || kind == scalar_stmt)
22346 && stmt_info
22347 && stmt_info->stmt
22348 && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
22349 switch (cfn)
22350 {
22351 case CFN_FMA:
22352 stmt_cost = ix86_vec_cost (mode,
22353 mode == SFmode ? ix86_cost->fmass
22354 : ix86_cost->fmasd);
22355 break;
22356 default:
22357 break;
22358 }
22359
22360 /* If we do elementwise loads into a vector then we are bound by
22361 latency and execution resources for the many scalar loads
22362 (AGU and load ports). Try to account for this by scaling the
22363 construction cost by the number of elements involved. */
22364 if ((kind == vec_construct || kind == vec_to_scalar)
22365 && stmt_info
22366 && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
22367 || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
22368 && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
22369 && TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info))) != INTEGER_CST)
22370 {
22371 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
22372 stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
22373 }
22374 if (stmt_cost == -1)
22375 stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
22376
22377 /* Penalize DFmode vector operations for Bonnell. */
22378 if (TARGET_BONNELL && kind == vector_stmt
22379 && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
22380 stmt_cost *= 5; /* FIXME: The value here is arbitrary. */
22381
22382 /* Statements in an inner loop relative to the loop being
22383 vectorized are weighted more heavily. The value here is
22384 arbitrary and could potentially be improved with analysis. */
22385 if (where == vect_body && stmt_info
22386 && stmt_in_inner_loop_p (vinfo, stmt_info))
22387 count *= 50; /* FIXME. */
22388
22389 retval = (unsigned) (count * stmt_cost);
22390
22391 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
22392 for Silvermont as it has out of order integer pipeline and can execute
22393 2 scalar instruction per tick, but has in order SIMD pipeline. */
22394 if ((TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS
22395 || TARGET_TREMONT || TARGET_INTEL) && stmt_info && stmt_info->stmt)
22396 {
22397 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
22398 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
22399 retval = (retval * 17) / 10;
22400 }
22401
22402 cost[where] += retval;
22403
22404 return retval;
22405 }
22406
22407 /* Implement targetm.vectorize.finish_cost. */
22408
22409 static void
22410 ix86_finish_cost (void *data, unsigned *prologue_cost,
22411 unsigned *body_cost, unsigned *epilogue_cost)
22412 {
22413 unsigned *cost = (unsigned *) data;
22414 *prologue_cost = cost[vect_prologue];
22415 *body_cost = cost[vect_body];
22416 *epilogue_cost = cost[vect_epilogue];
22417 }
22418
22419 /* Implement targetm.vectorize.destroy_cost_data. */
22420
22421 static void
22422 ix86_destroy_cost_data (void *data)
22423 {
22424 free (data);
22425 }
22426
22427 /* Validate target specific memory model bits in VAL. */
22428
22429 static unsigned HOST_WIDE_INT
22430 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
22431 {
22432 enum memmodel model = memmodel_from_int (val);
22433 bool strong;
22434
22435 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
22436 |MEMMODEL_MASK)
22437 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
22438 {
22439 warning (OPT_Winvalid_memory_model,
22440 "unknown architecture specific memory model");
22441 return MEMMODEL_SEQ_CST;
22442 }
22443 strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
22444 if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
22445 {
22446 warning (OPT_Winvalid_memory_model,
22447 "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
22448 "memory model");
22449 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
22450 }
22451 if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
22452 {
22453 warning (OPT_Winvalid_memory_model,
22454 "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
22455 "memory model");
22456 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
22457 }
22458 return val;
22459 }
22460
22461 /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
22462 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
22463 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
22464 or number of vecsize_mangle variants that should be emitted. */
22465
22466 static int
22467 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
22468 struct cgraph_simd_clone *clonei,
22469 tree base_type, int num)
22470 {
22471 int ret = 1;
22472
22473 if (clonei->simdlen
22474 && (clonei->simdlen < 2
22475 || clonei->simdlen > 1024
22476 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
22477 {
22478 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22479 "unsupported simdlen %wd", clonei->simdlen.to_constant ());
22480 return 0;
22481 }
22482
22483 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
22484 if (TREE_CODE (ret_type) != VOID_TYPE)
22485 switch (TYPE_MODE (ret_type))
22486 {
22487 case E_QImode:
22488 case E_HImode:
22489 case E_SImode:
22490 case E_DImode:
22491 case E_SFmode:
22492 case E_DFmode:
22493 /* case E_SCmode: */
22494 /* case E_DCmode: */
22495 if (!AGGREGATE_TYPE_P (ret_type))
22496 break;
22497 /* FALLTHRU */
22498 default:
22499 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22500 "unsupported return type %qT for simd", ret_type);
22501 return 0;
22502 }
22503
22504 tree t;
22505 int i;
22506 tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
22507 bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
22508
22509 for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
22510 t && t != void_list_node; t = TREE_CHAIN (t), i++)
22511 {
22512 tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
22513 switch (TYPE_MODE (arg_type))
22514 {
22515 case E_QImode:
22516 case E_HImode:
22517 case E_SImode:
22518 case E_DImode:
22519 case E_SFmode:
22520 case E_DFmode:
22521 /* case E_SCmode: */
22522 /* case E_DCmode: */
22523 if (!AGGREGATE_TYPE_P (arg_type))
22524 break;
22525 /* FALLTHRU */
22526 default:
22527 if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
22528 break;
22529 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22530 "unsupported argument type %qT for simd", arg_type);
22531 return 0;
22532 }
22533 }
22534
22535 if (!TREE_PUBLIC (node->decl))
22536 {
22537 /* If the function isn't exported, we can pick up just one ISA
22538 for the clones. */
22539 if (TARGET_AVX512F)
22540 clonei->vecsize_mangle = 'e';
22541 else if (TARGET_AVX2)
22542 clonei->vecsize_mangle = 'd';
22543 else if (TARGET_AVX)
22544 clonei->vecsize_mangle = 'c';
22545 else
22546 clonei->vecsize_mangle = 'b';
22547 ret = 1;
22548 }
22549 else
22550 {
22551 clonei->vecsize_mangle = "bcde"[num];
22552 ret = 4;
22553 }
22554 clonei->mask_mode = VOIDmode;
22555 switch (clonei->vecsize_mangle)
22556 {
22557 case 'b':
22558 clonei->vecsize_int = 128;
22559 clonei->vecsize_float = 128;
22560 break;
22561 case 'c':
22562 clonei->vecsize_int = 128;
22563 clonei->vecsize_float = 256;
22564 break;
22565 case 'd':
22566 clonei->vecsize_int = 256;
22567 clonei->vecsize_float = 256;
22568 break;
22569 case 'e':
22570 clonei->vecsize_int = 512;
22571 clonei->vecsize_float = 512;
22572 if (TYPE_MODE (base_type) == QImode)
22573 clonei->mask_mode = DImode;
22574 else
22575 clonei->mask_mode = SImode;
22576 break;
22577 }
22578 if (clonei->simdlen == 0)
22579 {
22580 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
22581 clonei->simdlen = clonei->vecsize_int;
22582 else
22583 clonei->simdlen = clonei->vecsize_float;
22584 clonei->simdlen = clonei->simdlen
22585 / GET_MODE_BITSIZE (TYPE_MODE (base_type));
22586 }
22587 else if (clonei->simdlen > 16)
22588 {
22589 /* For compatibility with ICC, use the same upper bounds
22590 for simdlen. In particular, for CTYPE below, use the return type,
22591 unless the function returns void, in that case use the characteristic
22592 type. If it is possible for given SIMDLEN to pass CTYPE value
22593 in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
22594 for 64-bit code), accept that SIMDLEN, otherwise warn and don't
22595 emit corresponding clone. */
22596 tree ctype = ret_type;
22597 if (TREE_CODE (ret_type) == VOID_TYPE)
22598 ctype = base_type;
22599 int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
22600 if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
22601 cnt /= clonei->vecsize_int;
22602 else
22603 cnt /= clonei->vecsize_float;
22604 if (cnt > (TARGET_64BIT ? 16 : 8))
22605 {
22606 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
22607 "unsupported simdlen %wd",
22608 clonei->simdlen.to_constant ());
22609 return 0;
22610 }
22611 }
22612 return ret;
22613 }
22614
22615 /* If SIMD clone NODE can't be used in a vectorized loop
22616 in current function, return -1, otherwise return a badness of using it
22617 (0 if it is most desirable from vecsize_mangle point of view, 1
22618 slightly less desirable, etc.). */
22619
22620 static int
22621 ix86_simd_clone_usable (struct cgraph_node *node)
22622 {
22623 switch (node->simdclone->vecsize_mangle)
22624 {
22625 case 'b':
22626 if (!TARGET_SSE2)
22627 return -1;
22628 if (!TARGET_AVX)
22629 return 0;
22630 return TARGET_AVX2 ? 2 : 1;
22631 case 'c':
22632 if (!TARGET_AVX)
22633 return -1;
22634 return TARGET_AVX2 ? 1 : 0;
22635 case 'd':
22636 if (!TARGET_AVX2)
22637 return -1;
22638 return 0;
22639 case 'e':
22640 if (!TARGET_AVX512F)
22641 return -1;
22642 return 0;
22643 default:
22644 gcc_unreachable ();
22645 }
22646 }
22647
22648 /* This function adjusts the unroll factor based on
22649 the hardware capabilities. For ex, bdver3 has
22650 a loop buffer which makes unrolling of smaller
22651 loops less important. This function decides the
22652 unroll factor using number of memory references
22653 (value 32 is used) as a heuristic. */
22654
22655 static unsigned
22656 ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
22657 {
22658 basic_block *bbs;
22659 rtx_insn *insn;
22660 unsigned i;
22661 unsigned mem_count = 0;
22662
22663 if (!TARGET_ADJUST_UNROLL)
22664 return nunroll;
22665
22666 /* Count the number of memory references within the loop body.
22667 This value determines the unrolling factor for bdver3 and bdver4
22668 architectures. */
22669 subrtx_iterator::array_type array;
22670 bbs = get_loop_body (loop);
22671 for (i = 0; i < loop->num_nodes; i++)
22672 FOR_BB_INSNS (bbs[i], insn)
22673 if (NONDEBUG_INSN_P (insn))
22674 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
22675 if (const_rtx x = *iter)
22676 if (MEM_P (x))
22677 {
22678 machine_mode mode = GET_MODE (x);
22679 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22680 if (n_words > 4)
22681 mem_count += 2;
22682 else
22683 mem_count += 1;
22684 }
22685 free (bbs);
22686
22687 if (mem_count && mem_count <=32)
22688 return MIN (nunroll, 32 / mem_count);
22689
22690 return nunroll;
22691 }
22692
22693
22694 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
22695
22696 static bool
22697 ix86_float_exceptions_rounding_supported_p (void)
22698 {
22699 /* For x87 floating point with standard excess precision handling,
22700 there is no adddf3 pattern (since x87 floating point only has
22701 XFmode operations) so the default hook implementation gets this
22702 wrong. */
22703 return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
22704 }
22705
22706 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
22707
22708 static void
22709 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
22710 {
22711 if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
22712 return;
22713 tree exceptions_var = create_tmp_var_raw (integer_type_node);
22714 if (TARGET_80387)
22715 {
22716 tree fenv_index_type = build_index_type (size_int (6));
22717 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
22718 tree fenv_var = create_tmp_var_raw (fenv_type);
22719 TREE_ADDRESSABLE (fenv_var) = 1;
22720 tree fenv_ptr = build_pointer_type (fenv_type);
22721 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
22722 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
22723 tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
22724 tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
22725 tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
22726 tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
22727 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
22728 tree hold_fnclex = build_call_expr (fnclex, 0);
22729 fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
22730 NULL_TREE, NULL_TREE);
22731 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
22732 hold_fnclex);
22733 *clear = build_call_expr (fnclex, 0);
22734 tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
22735 tree fnstsw_call = build_call_expr (fnstsw, 0);
22736 tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
22737 fnstsw_call, NULL_TREE, NULL_TREE);
22738 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
22739 tree update_mod = build4 (TARGET_EXPR, integer_type_node,
22740 exceptions_var, exceptions_x87,
22741 NULL_TREE, NULL_TREE);
22742 *update = build2 (COMPOUND_EXPR, integer_type_node,
22743 sw_mod, update_mod);
22744 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
22745 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
22746 }
22747 if (TARGET_SSE && TARGET_SSE_MATH)
22748 {
22749 tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
22750 tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
22751 tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
22752 tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
22753 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
22754 tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
22755 mxcsr_orig_var, stmxcsr_hold_call,
22756 NULL_TREE, NULL_TREE);
22757 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
22758 mxcsr_orig_var,
22759 build_int_cst (unsigned_type_node, 0x1f80));
22760 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
22761 build_int_cst (unsigned_type_node, 0xffffffc0));
22762 tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
22763 mxcsr_mod_var, hold_mod_val,
22764 NULL_TREE, NULL_TREE);
22765 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22766 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
22767 hold_assign_orig, hold_assign_mod);
22768 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
22769 ldmxcsr_hold_call);
22770 if (*hold)
22771 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
22772 else
22773 *hold = hold_all;
22774 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
22775 if (*clear)
22776 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
22777 ldmxcsr_clear_call);
22778 else
22779 *clear = ldmxcsr_clear_call;
22780 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
22781 tree exceptions_sse = fold_convert (integer_type_node,
22782 stxmcsr_update_call);
22783 if (*update)
22784 {
22785 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
22786 exceptions_var, exceptions_sse);
22787 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
22788 exceptions_var, exceptions_mod);
22789 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
22790 exceptions_assign);
22791 }
22792 else
22793 *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
22794 exceptions_sse, NULL_TREE, NULL_TREE);
22795 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
22796 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22797 ldmxcsr_update_call);
22798 }
22799 tree atomic_feraiseexcept
22800 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
22801 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
22802 1, exceptions_var);
22803 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
22804 atomic_feraiseexcept_call);
22805 }
22806
22807 #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
22808 /* For i386, common symbol is local only for non-PIE binaries. For
22809 x86-64, common symbol is local only for non-PIE binaries or linker
22810 supports copy reloc in PIE binaries. */
22811
22812 static bool
22813 ix86_binds_local_p (const_tree exp)
22814 {
22815 return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
22816 (!flag_pic
22817 || (TARGET_64BIT
22818 && HAVE_LD_PIE_COPYRELOC != 0)));
22819 }
22820 #endif
22821
22822 /* If MEM is in the form of [base+offset], extract the two parts
22823 of address and set to BASE and OFFSET, otherwise return false. */
22824
22825 static bool
22826 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
22827 {
22828 rtx addr;
22829
22830 gcc_assert (MEM_P (mem));
22831
22832 addr = XEXP (mem, 0);
22833
22834 if (GET_CODE (addr) == CONST)
22835 addr = XEXP (addr, 0);
22836
22837 if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
22838 {
22839 *base = addr;
22840 *offset = const0_rtx;
22841 return true;
22842 }
22843
22844 if (GET_CODE (addr) == PLUS
22845 && (REG_P (XEXP (addr, 0))
22846 || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
22847 && CONST_INT_P (XEXP (addr, 1)))
22848 {
22849 *base = XEXP (addr, 0);
22850 *offset = XEXP (addr, 1);
22851 return true;
22852 }
22853
22854 return false;
22855 }
22856
22857 /* Given OPERANDS of consecutive load/store, check if we can merge
22858 them into move multiple. LOAD is true if they are load instructions.
22859 MODE is the mode of memory operands. */
22860
22861 bool
22862 ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
22863 machine_mode mode)
22864 {
22865 HOST_WIDE_INT offval_1, offval_2, msize;
22866 rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
22867
22868 if (load)
22869 {
22870 mem_1 = operands[1];
22871 mem_2 = operands[3];
22872 reg_1 = operands[0];
22873 reg_2 = operands[2];
22874 }
22875 else
22876 {
22877 mem_1 = operands[0];
22878 mem_2 = operands[2];
22879 reg_1 = operands[1];
22880 reg_2 = operands[3];
22881 }
22882
22883 gcc_assert (REG_P (reg_1) && REG_P (reg_2));
22884
22885 if (REGNO (reg_1) != REGNO (reg_2))
22886 return false;
22887
22888 /* Check if the addresses are in the form of [base+offset]. */
22889 if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
22890 return false;
22891 if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
22892 return false;
22893
22894 /* Check if the bases are the same. */
22895 if (!rtx_equal_p (base_1, base_2))
22896 return false;
22897
22898 offval_1 = INTVAL (offset_1);
22899 offval_2 = INTVAL (offset_2);
22900 msize = GET_MODE_SIZE (mode);
22901 /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
22902 if (offval_1 + msize != offval_2)
22903 return false;
22904
22905 return true;
22906 }
22907
22908 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
22909
22910 static bool
22911 ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
22912 optimization_type opt_type)
22913 {
22914 switch (op)
22915 {
22916 case asin_optab:
22917 case acos_optab:
22918 case log1p_optab:
22919 case exp_optab:
22920 case exp10_optab:
22921 case exp2_optab:
22922 case expm1_optab:
22923 case ldexp_optab:
22924 case scalb_optab:
22925 case round_optab:
22926 return opt_type == OPTIMIZE_FOR_SPEED;
22927
22928 case rint_optab:
22929 if (SSE_FLOAT_MODE_P (mode1)
22930 && TARGET_SSE_MATH
22931 && !flag_trapping_math
22932 && !TARGET_SSE4_1)
22933 return opt_type == OPTIMIZE_FOR_SPEED;
22934 return true;
22935
22936 case floor_optab:
22937 case ceil_optab:
22938 case btrunc_optab:
22939 if (SSE_FLOAT_MODE_P (mode1)
22940 && TARGET_SSE_MATH
22941 && !flag_trapping_math
22942 && TARGET_SSE4_1)
22943 return true;
22944 return opt_type == OPTIMIZE_FOR_SPEED;
22945
22946 case rsqrt_optab:
22947 return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
22948
22949 default:
22950 return true;
22951 }
22952 }
22953
22954 /* Address space support.
22955
22956 This is not "far pointers" in the 16-bit sense, but an easy way
22957 to use %fs and %gs segment prefixes. Therefore:
22958
22959 (a) All address spaces have the same modes,
22960 (b) All address spaces have the same addresss forms,
22961 (c) While %fs and %gs are technically subsets of the generic
22962 address space, they are probably not subsets of each other.
22963 (d) Since we have no access to the segment base register values
22964 without resorting to a system call, we cannot convert a
22965 non-default address space to a default address space.
22966 Therefore we do not claim %fs or %gs are subsets of generic.
22967
22968 Therefore we can (mostly) use the default hooks. */
22969
22970 /* All use of segmentation is assumed to make address 0 valid. */
22971
22972 static bool
22973 ix86_addr_space_zero_address_valid (addr_space_t as)
22974 {
22975 return as != ADDR_SPACE_GENERIC;
22976 }
22977
22978 static void
22979 ix86_init_libfuncs (void)
22980 {
22981 if (TARGET_64BIT)
22982 {
22983 set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
22984 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
22985 }
22986 else
22987 {
22988 set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
22989 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
22990 }
22991
22992 #if TARGET_MACHO
22993 darwin_rename_builtins ();
22994 #endif
22995 }
22996
22997 /* Set the value of FLT_EVAL_METHOD in float.h. When using only the
22998 FPU, assume that the fpcw is set to extended precision; when using
22999 only SSE, rounding is correct; when using both SSE and the FPU,
23000 the rounding precision is indeterminate, since either may be chosen
23001 apparently at random. */
23002
23003 static enum flt_eval_method
23004 ix86_excess_precision (enum excess_precision_type type)
23005 {
23006 switch (type)
23007 {
23008 case EXCESS_PRECISION_TYPE_FAST:
23009 /* The fastest type to promote to will always be the native type,
23010 whether that occurs with implicit excess precision or
23011 otherwise. */
23012 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23013 case EXCESS_PRECISION_TYPE_STANDARD:
23014 case EXCESS_PRECISION_TYPE_IMPLICIT:
23015 /* Otherwise, the excess precision we want when we are
23016 in a standards compliant mode, and the implicit precision we
23017 provide would be identical were it not for the unpredictable
23018 cases. */
23019 if (!TARGET_80387)
23020 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23021 else if (!TARGET_MIX_SSE_I387)
23022 {
23023 if (!(TARGET_SSE && TARGET_SSE_MATH))
23024 return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
23025 else if (TARGET_SSE2)
23026 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
23027 }
23028
23029 /* If we are in standards compliant mode, but we know we will
23030 calculate in unpredictable precision, return
23031 FLT_EVAL_METHOD_FLOAT. There is no reason to introduce explicit
23032 excess precision if the target can't guarantee it will honor
23033 it. */
23034 return (type == EXCESS_PRECISION_TYPE_STANDARD
23035 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
23036 : FLT_EVAL_METHOD_UNPREDICTABLE);
23037 default:
23038 gcc_unreachable ();
23039 }
23040
23041 return FLT_EVAL_METHOD_UNPREDICTABLE;
23042 }
23043
23044 /* Implement PUSH_ROUNDING. On 386, we have pushw instruction that
23045 decrements by exactly 2 no matter what the position was, there is no pushb.
23046
23047 But as CIE data alignment factor on this arch is -4 for 32bit targets
23048 and -8 for 64bit targets, we need to make sure all stack pointer adjustments
23049 are in multiple of 4 for 32bit targets and 8 for 64bit targets. */
23050
23051 poly_int64
23052 ix86_push_rounding (poly_int64 bytes)
23053 {
23054 return ROUND_UP (bytes, UNITS_PER_WORD);
23055 }
23056
23057 /* Target-specific selftests. */
23058
23059 #if CHECKING_P
23060
23061 namespace selftest {
23062
23063 /* Verify that hard regs are dumped as expected (in compact mode). */
23064
23065 static void
23066 ix86_test_dumping_hard_regs ()
23067 {
23068 ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
23069 ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
23070 }
23071
23072 /* Test dumping an insn with repeated references to the same SCRATCH,
23073 to verify the rtx_reuse code. */
23074
23075 static void
23076 ix86_test_dumping_memory_blockage ()
23077 {
23078 set_new_first_and_last_insn (NULL, NULL);
23079
23080 rtx pat = gen_memory_blockage ();
23081 rtx_reuse_manager r;
23082 r.preprocess (pat);
23083
23084 /* Verify that the repeated references to the SCRATCH show use
23085 reuse IDS. The first should be prefixed with a reuse ID,
23086 and the second should be dumped as a "reuse_rtx" of that ID.
23087 The expected string assumes Pmode == DImode. */
23088 if (Pmode == DImode)
23089 ASSERT_RTL_DUMP_EQ_WITH_REUSE
23090 ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0 A8])\n"
23091 " (unspec:BLK [\n"
23092 " (mem/v:BLK (reuse_rtx 0) [0 A8])\n"
23093 " ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
23094 }
23095
23096 /* Verify loading an RTL dump; specifically a dump of copying
23097 a param on x86_64 from a hard reg into the frame.
23098 This test is target-specific since the dump contains target-specific
23099 hard reg names. */
23100
23101 static void
23102 ix86_test_loading_dump_fragment_1 ()
23103 {
23104 rtl_dump_test t (SELFTEST_LOCATION,
23105 locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
23106
23107 rtx_insn *insn = get_insn_by_uid (1);
23108
23109 /* The block structure and indentation here is purely for
23110 readability; it mirrors the structure of the rtx. */
23111 tree mem_expr;
23112 {
23113 rtx pat = PATTERN (insn);
23114 ASSERT_EQ (SET, GET_CODE (pat));
23115 {
23116 rtx dest = SET_DEST (pat);
23117 ASSERT_EQ (MEM, GET_CODE (dest));
23118 /* Verify the "/c" was parsed. */
23119 ASSERT_TRUE (RTX_FLAG (dest, call));
23120 ASSERT_EQ (SImode, GET_MODE (dest));
23121 {
23122 rtx addr = XEXP (dest, 0);
23123 ASSERT_EQ (PLUS, GET_CODE (addr));
23124 ASSERT_EQ (DImode, GET_MODE (addr));
23125 {
23126 rtx lhs = XEXP (addr, 0);
23127 /* Verify that the "frame" REG was consolidated. */
23128 ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
23129 }
23130 {
23131 rtx rhs = XEXP (addr, 1);
23132 ASSERT_EQ (CONST_INT, GET_CODE (rhs));
23133 ASSERT_EQ (-4, INTVAL (rhs));
23134 }
23135 }
23136 /* Verify the "[1 i+0 S4 A32]" was parsed. */
23137 ASSERT_EQ (1, MEM_ALIAS_SET (dest));
23138 /* "i" should have been handled by synthesizing a global int
23139 variable named "i". */
23140 mem_expr = MEM_EXPR (dest);
23141 ASSERT_NE (mem_expr, NULL);
23142 ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
23143 ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
23144 ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
23145 ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
23146 /* "+0". */
23147 ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
23148 ASSERT_EQ (0, MEM_OFFSET (dest));
23149 /* "S4". */
23150 ASSERT_EQ (4, MEM_SIZE (dest));
23151 /* "A32. */
23152 ASSERT_EQ (32, MEM_ALIGN (dest));
23153 }
23154 {
23155 rtx src = SET_SRC (pat);
23156 ASSERT_EQ (REG, GET_CODE (src));
23157 ASSERT_EQ (SImode, GET_MODE (src));
23158 ASSERT_EQ (5, REGNO (src));
23159 tree reg_expr = REG_EXPR (src);
23160 /* "i" here should point to the same var as for the MEM_EXPR. */
23161 ASSERT_EQ (reg_expr, mem_expr);
23162 }
23163 }
23164 }
23165
23166 /* Verify that the RTL loader copes with a call_insn dump.
23167 This test is target-specific since the dump contains a target-specific
23168 hard reg name. */
23169
23170 static void
23171 ix86_test_loading_call_insn ()
23172 {
23173 /* The test dump includes register "xmm0", where requires TARGET_SSE
23174 to exist. */
23175 if (!TARGET_SSE)
23176 return;
23177
23178 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
23179
23180 rtx_insn *insn = get_insns ();
23181 ASSERT_EQ (CALL_INSN, GET_CODE (insn));
23182
23183 /* "/j". */
23184 ASSERT_TRUE (RTX_FLAG (insn, jump));
23185
23186 rtx pat = PATTERN (insn);
23187 ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
23188
23189 /* Verify REG_NOTES. */
23190 {
23191 /* "(expr_list:REG_CALL_DECL". */
23192 ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
23193 rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
23194 ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
23195
23196 /* "(expr_list:REG_EH_REGION (const_int 0 [0])". */
23197 rtx_expr_list *note1 = note0->next ();
23198 ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
23199
23200 ASSERT_EQ (NULL, note1->next ());
23201 }
23202
23203 /* Verify CALL_INSN_FUNCTION_USAGE. */
23204 {
23205 /* "(expr_list:DF (use (reg:DF 21 xmm0))". */
23206 rtx_expr_list *usage
23207 = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
23208 ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
23209 ASSERT_EQ (DFmode, GET_MODE (usage));
23210 ASSERT_EQ (USE, GET_CODE (usage->element ()));
23211 ASSERT_EQ (NULL, usage->next ());
23212 }
23213 }
23214
23215 /* Verify that the RTL loader copes a dump from print_rtx_function.
23216 This test is target-specific since the dump contains target-specific
23217 hard reg names. */
23218
23219 static void
23220 ix86_test_loading_full_dump ()
23221 {
23222 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
23223
23224 ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
23225
23226 rtx_insn *insn_1 = get_insn_by_uid (1);
23227 ASSERT_EQ (NOTE, GET_CODE (insn_1));
23228
23229 rtx_insn *insn_7 = get_insn_by_uid (7);
23230 ASSERT_EQ (INSN, GET_CODE (insn_7));
23231 ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
23232
23233 rtx_insn *insn_15 = get_insn_by_uid (15);
23234 ASSERT_EQ (INSN, GET_CODE (insn_15));
23235 ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
23236
23237 /* Verify crtl->return_rtx. */
23238 ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
23239 ASSERT_EQ (0, REGNO (crtl->return_rtx));
23240 ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
23241 }
23242
23243 /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
23244 In particular, verify that it correctly loads the 2nd operand.
23245 This test is target-specific since these are machine-specific
23246 operands (and enums). */
23247
23248 static void
23249 ix86_test_loading_unspec ()
23250 {
23251 rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
23252
23253 ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
23254
23255 ASSERT_TRUE (cfun);
23256
23257 /* Test of an UNSPEC. */
23258 rtx_insn *insn = get_insns ();
23259 ASSERT_EQ (INSN, GET_CODE (insn));
23260 rtx set = single_set (insn);
23261 ASSERT_NE (NULL, set);
23262 rtx dst = SET_DEST (set);
23263 ASSERT_EQ (MEM, GET_CODE (dst));
23264 rtx src = SET_SRC (set);
23265 ASSERT_EQ (UNSPEC, GET_CODE (src));
23266 ASSERT_EQ (BLKmode, GET_MODE (src));
23267 ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
23268
23269 rtx v0 = XVECEXP (src, 0, 0);
23270
23271 /* Verify that the two uses of the first SCRATCH have pointer
23272 equality. */
23273 rtx scratch_a = XEXP (dst, 0);
23274 ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
23275
23276 rtx scratch_b = XEXP (v0, 0);
23277 ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
23278
23279 ASSERT_EQ (scratch_a, scratch_b);
23280
23281 /* Verify that the two mems are thus treated as equal. */
23282 ASSERT_TRUE (rtx_equal_p (dst, v0));
23283
23284 /* Verify that the insn is recognized. */
23285 ASSERT_NE(-1, recog_memoized (insn));
23286
23287 /* Test of an UNSPEC_VOLATILE, which has its own enum values. */
23288 insn = NEXT_INSN (insn);
23289 ASSERT_EQ (INSN, GET_CODE (insn));
23290
23291 set = single_set (insn);
23292 ASSERT_NE (NULL, set);
23293
23294 src = SET_SRC (set);
23295 ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
23296 ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
23297 }
23298
23299 /* Run all target-specific selftests. */
23300
23301 static void
23302 ix86_run_selftests (void)
23303 {
23304 ix86_test_dumping_hard_regs ();
23305 ix86_test_dumping_memory_blockage ();
23306
23307 /* Various tests of loading RTL dumps, here because they contain
23308 ix86-isms (e.g. names of hard regs). */
23309 ix86_test_loading_dump_fragment_1 ();
23310 ix86_test_loading_call_insn ();
23311 ix86_test_loading_full_dump ();
23312 ix86_test_loading_unspec ();
23313 }
23314
23315 } // namespace selftest
23316
23317 #endif /* CHECKING_P */
23318
23319 /* Initialize the GCC target structure. */
23320 #undef TARGET_RETURN_IN_MEMORY
23321 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
23322
23323 #undef TARGET_LEGITIMIZE_ADDRESS
23324 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
23325
23326 #undef TARGET_ATTRIBUTE_TABLE
23327 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23328 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
23329 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
23330 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23331 # undef TARGET_MERGE_DECL_ATTRIBUTES
23332 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23333 #endif
23334
23335 #undef TARGET_COMP_TYPE_ATTRIBUTES
23336 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23337
23338 #undef TARGET_INIT_BUILTINS
23339 #define TARGET_INIT_BUILTINS ix86_init_builtins
23340 #undef TARGET_BUILTIN_DECL
23341 #define TARGET_BUILTIN_DECL ix86_builtin_decl
23342 #undef TARGET_EXPAND_BUILTIN
23343 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23344
23345 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23346 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23347 ix86_builtin_vectorized_function
23348
23349 #undef TARGET_VECTORIZE_BUILTIN_GATHER
23350 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
23351
23352 #undef TARGET_VECTORIZE_BUILTIN_SCATTER
23353 #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
23354
23355 #undef TARGET_BUILTIN_RECIPROCAL
23356 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23357
23358 #undef TARGET_ASM_FUNCTION_EPILOGUE
23359 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23360
23361 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
23362 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
23363 ix86_print_patchable_function_entry
23364
23365 #undef TARGET_ENCODE_SECTION_INFO
23366 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23367 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23368 #else
23369 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23370 #endif
23371
23372 #undef TARGET_ASM_OPEN_PAREN
23373 #define TARGET_ASM_OPEN_PAREN ""
23374 #undef TARGET_ASM_CLOSE_PAREN
23375 #define TARGET_ASM_CLOSE_PAREN ""
23376
23377 #undef TARGET_ASM_BYTE_OP
23378 #define TARGET_ASM_BYTE_OP ASM_BYTE
23379
23380 #undef TARGET_ASM_ALIGNED_HI_OP
23381 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23382 #undef TARGET_ASM_ALIGNED_SI_OP
23383 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23384 #ifdef ASM_QUAD
23385 #undef TARGET_ASM_ALIGNED_DI_OP
23386 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23387 #endif
23388
23389 #undef TARGET_PROFILE_BEFORE_PROLOGUE
23390 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
23391
23392 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
23393 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
23394
23395 #undef TARGET_ASM_UNALIGNED_HI_OP
23396 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23397 #undef TARGET_ASM_UNALIGNED_SI_OP
23398 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23399 #undef TARGET_ASM_UNALIGNED_DI_OP
23400 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23401
23402 #undef TARGET_PRINT_OPERAND
23403 #define TARGET_PRINT_OPERAND ix86_print_operand
23404 #undef TARGET_PRINT_OPERAND_ADDRESS
23405 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
23406 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
23407 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
23408 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
23409 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
23410
23411 #undef TARGET_SCHED_INIT_GLOBAL
23412 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
23413 #undef TARGET_SCHED_ADJUST_COST
23414 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23415 #undef TARGET_SCHED_ISSUE_RATE
23416 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23417 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23418 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23419 ia32_multipass_dfa_lookahead
23420 #undef TARGET_SCHED_MACRO_FUSION_P
23421 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
23422 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
23423 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
23424
23425 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23426 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23427
23428 #undef TARGET_MEMMODEL_CHECK
23429 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
23430
23431 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
23432 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
23433
23434 #ifdef HAVE_AS_TLS
23435 #undef TARGET_HAVE_TLS
23436 #define TARGET_HAVE_TLS true
23437 #endif
23438 #undef TARGET_CANNOT_FORCE_CONST_MEM
23439 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23440 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23441 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23442
23443 #undef TARGET_DELEGITIMIZE_ADDRESS
23444 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23445
23446 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
23447 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
23448
23449 #undef TARGET_MS_BITFIELD_LAYOUT_P
23450 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23451
23452 #if TARGET_MACHO
23453 #undef TARGET_BINDS_LOCAL_P
23454 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23455 #else
23456 #undef TARGET_BINDS_LOCAL_P
23457 #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
23458 #endif
23459 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23460 #undef TARGET_BINDS_LOCAL_P
23461 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23462 #endif
23463
23464 #undef TARGET_ASM_OUTPUT_MI_THUNK
23465 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23466 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23467 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23468
23469 #undef TARGET_ASM_FILE_START
23470 #define TARGET_ASM_FILE_START x86_file_start
23471
23472 #undef TARGET_OPTION_OVERRIDE
23473 #define TARGET_OPTION_OVERRIDE ix86_option_override
23474
23475 #undef TARGET_REGISTER_MOVE_COST
23476 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
23477 #undef TARGET_MEMORY_MOVE_COST
23478 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
23479 #undef TARGET_RTX_COSTS
23480 #define TARGET_RTX_COSTS ix86_rtx_costs
23481 #undef TARGET_ADDRESS_COST
23482 #define TARGET_ADDRESS_COST ix86_address_cost
23483
23484 #undef TARGET_FLAGS_REGNUM
23485 #define TARGET_FLAGS_REGNUM FLAGS_REG
23486 #undef TARGET_FIXED_CONDITION_CODE_REGS
23487 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23488 #undef TARGET_CC_MODES_COMPATIBLE
23489 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23490
23491 #undef TARGET_MACHINE_DEPENDENT_REORG
23492 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23493
23494 #undef TARGET_BUILD_BUILTIN_VA_LIST
23495 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23496
23497 #undef TARGET_FOLD_BUILTIN
23498 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
23499
23500 #undef TARGET_GIMPLE_FOLD_BUILTIN
23501 #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
23502
23503 #undef TARGET_COMPARE_VERSION_PRIORITY
23504 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
23505
23506 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
23507 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
23508 ix86_generate_version_dispatcher_body
23509
23510 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
23511 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
23512 ix86_get_function_versions_dispatcher
23513
23514 #undef TARGET_ENUM_VA_LIST_P
23515 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
23516
23517 #undef TARGET_FN_ABI_VA_LIST
23518 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
23519
23520 #undef TARGET_CANONICAL_VA_LIST_TYPE
23521 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
23522
23523 #undef TARGET_EXPAND_BUILTIN_VA_START
23524 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
23525
23526 #undef TARGET_MD_ASM_ADJUST
23527 #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
23528
23529 #undef TARGET_C_EXCESS_PRECISION
23530 #define TARGET_C_EXCESS_PRECISION ix86_excess_precision
23531 #undef TARGET_PROMOTE_PROTOTYPES
23532 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23533 #undef TARGET_SETUP_INCOMING_VARARGS
23534 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23535 #undef TARGET_MUST_PASS_IN_STACK
23536 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23537 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
23538 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
23539 #undef TARGET_FUNCTION_ARG_ADVANCE
23540 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
23541 #undef TARGET_FUNCTION_ARG
23542 #define TARGET_FUNCTION_ARG ix86_function_arg
23543 #undef TARGET_INIT_PIC_REG
23544 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
23545 #undef TARGET_USE_PSEUDO_PIC_REG
23546 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
23547 #undef TARGET_FUNCTION_ARG_BOUNDARY
23548 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
23549 #undef TARGET_PASS_BY_REFERENCE
23550 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23551 #undef TARGET_INTERNAL_ARG_POINTER
23552 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23553 #undef TARGET_UPDATE_STACK_BOUNDARY
23554 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
23555 #undef TARGET_GET_DRAP_RTX
23556 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
23557 #undef TARGET_STRICT_ARGUMENT_NAMING
23558 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23559 #undef TARGET_STATIC_CHAIN
23560 #define TARGET_STATIC_CHAIN ix86_static_chain
23561 #undef TARGET_TRAMPOLINE_INIT
23562 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
23563 #undef TARGET_RETURN_POPS_ARGS
23564 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
23565
23566 #undef TARGET_WARN_FUNC_RETURN
23567 #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
23568
23569 #undef TARGET_LEGITIMATE_COMBINED_INSN
23570 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
23571
23572 #undef TARGET_ASAN_SHADOW_OFFSET
23573 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
23574
23575 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23576 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23577
23578 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23579 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23580
23581 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23582 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23583
23584 #undef TARGET_C_MODE_FOR_SUFFIX
23585 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23586
23587 #ifdef HAVE_AS_TLS
23588 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23589 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23590 #endif
23591
23592 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23593 #undef TARGET_INSERT_ATTRIBUTES
23594 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23595 #endif
23596
23597 #undef TARGET_MANGLE_TYPE
23598 #define TARGET_MANGLE_TYPE ix86_mangle_type
23599
23600 #undef TARGET_STACK_PROTECT_GUARD
23601 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
23602
23603 #if !TARGET_MACHO
23604 #undef TARGET_STACK_PROTECT_FAIL
23605 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23606 #endif
23607
23608 #undef TARGET_FUNCTION_VALUE
23609 #define TARGET_FUNCTION_VALUE ix86_function_value
23610
23611 #undef TARGET_FUNCTION_VALUE_REGNO_P
23612 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
23613
23614 #undef TARGET_ZERO_CALL_USED_REGS
23615 #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
23616
23617 #undef TARGET_PROMOTE_FUNCTION_MODE
23618 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
23619
23620 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
23621 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
23622
23623 #undef TARGET_MEMBER_TYPE_FORCES_BLK
23624 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
23625
23626 #undef TARGET_INSTANTIATE_DECLS
23627 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
23628
23629 #undef TARGET_SECONDARY_RELOAD
23630 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
23631 #undef TARGET_SECONDARY_MEMORY_NEEDED
23632 #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
23633 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
23634 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
23635
23636 #undef TARGET_CLASS_MAX_NREGS
23637 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
23638
23639 #undef TARGET_PREFERRED_RELOAD_CLASS
23640 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
23641 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
23642 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
23643 #undef TARGET_CLASS_LIKELY_SPILLED_P
23644 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
23645
23646 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
23647 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
23648 ix86_builtin_vectorization_cost
23649 #undef TARGET_VECTORIZE_VEC_PERM_CONST
23650 #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
23651 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
23652 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
23653 ix86_preferred_simd_mode
23654 #undef TARGET_VECTORIZE_SPLIT_REDUCTION
23655 #define TARGET_VECTORIZE_SPLIT_REDUCTION \
23656 ix86_split_reduction
23657 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
23658 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
23659 ix86_autovectorize_vector_modes
23660 #undef TARGET_VECTORIZE_GET_MASK_MODE
23661 #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
23662 #undef TARGET_VECTORIZE_INIT_COST
23663 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
23664 #undef TARGET_VECTORIZE_ADD_STMT_COST
23665 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
23666 #undef TARGET_VECTORIZE_FINISH_COST
23667 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
23668 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
23669 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
23670
23671 #undef TARGET_SET_CURRENT_FUNCTION
23672 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
23673
23674 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
23675 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
23676
23677 #undef TARGET_OPTION_SAVE
23678 #define TARGET_OPTION_SAVE ix86_function_specific_save
23679
23680 #undef TARGET_OPTION_RESTORE
23681 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
23682
23683 #undef TARGET_OPTION_POST_STREAM_IN
23684 #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
23685
23686 #undef TARGET_OPTION_PRINT
23687 #define TARGET_OPTION_PRINT ix86_function_specific_print
23688
23689 #undef TARGET_OPTION_FUNCTION_VERSIONS
23690 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
23691
23692 #undef TARGET_CAN_INLINE_P
23693 #define TARGET_CAN_INLINE_P ix86_can_inline_p
23694
23695 #undef TARGET_LEGITIMATE_ADDRESS_P
23696 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
23697
23698 #undef TARGET_REGISTER_PRIORITY
23699 #define TARGET_REGISTER_PRIORITY ix86_register_priority
23700
23701 #undef TARGET_REGISTER_USAGE_LEVELING_P
23702 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
23703
23704 #undef TARGET_LEGITIMATE_CONSTANT_P
23705 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
23706
23707 #undef TARGET_COMPUTE_FRAME_LAYOUT
23708 #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
23709
23710 #undef TARGET_FRAME_POINTER_REQUIRED
23711 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
23712
23713 #undef TARGET_CAN_ELIMINATE
23714 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
23715
23716 #undef TARGET_EXTRA_LIVE_ON_ENTRY
23717 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
23718
23719 #undef TARGET_ASM_CODE_END
23720 #define TARGET_ASM_CODE_END ix86_code_end
23721
23722 #undef TARGET_CONDITIONAL_REGISTER_USAGE
23723 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
23724
23725 #undef TARGET_CANONICALIZE_COMPARISON
23726 #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
23727
23728 #undef TARGET_LOOP_UNROLL_ADJUST
23729 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
23730
23731 /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657. */
23732 #undef TARGET_SPILL_CLASS
23733 #define TARGET_SPILL_CLASS ix86_spill_class
23734
23735 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
23736 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
23737 ix86_simd_clone_compute_vecsize_and_simdlen
23738
23739 #undef TARGET_SIMD_CLONE_ADJUST
23740 #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
23741
23742 #undef TARGET_SIMD_CLONE_USABLE
23743 #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
23744
23745 #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
23746 #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
23747
23748 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
23749 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
23750 ix86_float_exceptions_rounding_supported_p
23751
23752 #undef TARGET_MODE_EMIT
23753 #define TARGET_MODE_EMIT ix86_emit_mode_set
23754
23755 #undef TARGET_MODE_NEEDED
23756 #define TARGET_MODE_NEEDED ix86_mode_needed
23757
23758 #undef TARGET_MODE_AFTER
23759 #define TARGET_MODE_AFTER ix86_mode_after
23760
23761 #undef TARGET_MODE_ENTRY
23762 #define TARGET_MODE_ENTRY ix86_mode_entry
23763
23764 #undef TARGET_MODE_EXIT
23765 #define TARGET_MODE_EXIT ix86_mode_exit
23766
23767 #undef TARGET_MODE_PRIORITY
23768 #define TARGET_MODE_PRIORITY ix86_mode_priority
23769
23770 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
23771 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
23772
23773 #undef TARGET_OFFLOAD_OPTIONS
23774 #define TARGET_OFFLOAD_OPTIONS \
23775 ix86_offload_options
23776
23777 #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
23778 #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
23779
23780 #undef TARGET_OPTAB_SUPPORTED_P
23781 #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
23782
23783 #undef TARGET_HARD_REGNO_SCRATCH_OK
23784 #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
23785
23786 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
23787 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
23788
23789 #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
23790 #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
23791
23792 #undef TARGET_INIT_LIBFUNCS
23793 #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
23794
23795 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
23796 #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
23797
23798 #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
23799 #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
23800
23801 #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
23802 #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
23803
23804 #undef TARGET_HARD_REGNO_NREGS
23805 #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
23806 #undef TARGET_HARD_REGNO_MODE_OK
23807 #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
23808
23809 #undef TARGET_MODES_TIEABLE_P
23810 #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
23811
23812 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
23813 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
23814 ix86_hard_regno_call_part_clobbered
23815
23816 #undef TARGET_CAN_CHANGE_MODE_CLASS
23817 #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
23818
23819 #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
23820 #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
23821
23822 #undef TARGET_STATIC_RTX_ALIGNMENT
23823 #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
23824 #undef TARGET_CONSTANT_ALIGNMENT
23825 #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
23826
23827 #undef TARGET_EMPTY_RECORD_P
23828 #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
23829
23830 #undef TARGET_WARN_PARAMETER_PASSING_ABI
23831 #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
23832
23833 #undef TARGET_GET_MULTILIB_ABI_NAME
23834 #define TARGET_GET_MULTILIB_ABI_NAME \
23835 ix86_get_multilib_abi_name
23836
23837 static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
23838 {
23839 #ifdef OPTION_GLIBC
23840 if (OPTION_GLIBC)
23841 return (built_in_function)fcode == BUILT_IN_MEMPCPY;
23842 else
23843 return false;
23844 #else
23845 return false;
23846 #endif
23847 }
23848
23849 #undef TARGET_LIBC_HAS_FAST_FUNCTION
23850 #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
23851
23852 #if CHECKING_P
23853 #undef TARGET_RUN_TARGET_SELFTESTS
23854 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
23855 #endif /* #if CHECKING_P */
23856
23857 struct gcc_target targetm = TARGET_INITIALIZER;
23858 \f
23859 #include "gt-i386.h"